简单画了个图:
首先,后端程序及客户端都是分成三个版本:内部测试版,线上测试版,线上稳定版。线上测试版是小范围更新,经过一天测试没问题,然后再推到线上稳定版,更新其他服,一般游戏也都是按这个流程来更新的。
运维管理后台,记录了区服信息,提供各种简单API接口给各脚本使用。
然后批量维护脚本,create_list.py是根据运维管理后台提供的API,根据输入的参数(平台,区服范围)生成一份cqbyupdate.py需要使用的iplist文件,然后cqbyupdate.py根据这份ip文件执行相应的操作。
saltstack,是用于全服修改一些配置使用,例如批量修改zabbix的配置,批量修改nginx的配置 等等。
rsync,用于数据同步,例如给游戏服拉取最新版本。
游戏服最关键的只有一个control.py脚本,该脚本集成了管理单个游戏区服的所有操作,根据传进去的版本参数及动作参数执行对应的操作。
整套架构的优点是全服维护可用cqbyupdate.py脚本操作,如果临时游戏服上想做些什么更新,可用单服脚本control.py操作,比较灵活;缺点是对中心机依赖比较高,万一中心机岩了,就麻烦大了,所以搞了一台备份中心机。这套架构已经上线开服3000+
control.py单服维护脚本:
#!/usr/bin/python#coding=utf-8
import subprocess
import shutil
import os
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import optparse
import ConfigParser
import time
import jinja2
import urllib2
import json
import socket
try:
import fcntl
except:
pass
import struct
import MySQLdb
def get_ip_address(ifname):
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
return socket.inet_ntoa(fcntl.ioctl(
s.fileno(),
0x8915, # SIOCGIFADDR
struct.pack('256s', ifname[:15])
)[20:24])
class Cqby:
def __init__(self, version, platform, platformid, id):
self.version = version
self.platform = platform
self.platformid = platformid
self.id = id
#工作目录:
self.workdir = '/data/init'
#定义游戏程序目录:
self.gamedir = '/data/game/game%s' % self.id
try:
os.makedirs('/data/game')
except:
print "目录已存在"
#当前游戏聊天监控目录:
self.chatdir = '/data/game/chat%s' % self.id
#定义游戏端口:
if int(self.id) > 50000:
self.gameport = str(self.id)
else:
self.gameport = 20000 + int(self.id)
self.gameport = str(self.gameport)
try:
self.localip=get_ip_address('eth0')
except:
self.localip=get_ip_address('em1')
#定义数据库名称:
self.dbname = 'game%s' % self.id
#定义管理员使用的数据库帐号密码:
self.admindbuser = 'root'
self.admindbpass = '123456'
#定义备份目录:
self.backup = '/data/backup'
try:
os.makedirs(self.backup)
except:
print "目录已经存在"
#建立日志目录:
self.gamelogdir = '/data/gamelogs/chuanqi/%s/S%s' % (self.platform, self.id)
if not os.path.isdir(self.gamelogdir):
os.makedirs(self.gamelogdir)
subprocess.call('chown www:www -R /data/gamelogs',shell=True)
#程序配置文件模板:
self.binConfigDir = '%s/bin' % self.gamedir
self.binConfigFiles = ['socket.jinja2']
self.confConfigDir = '%s/conf' % self.gamedir
self.confConfigFiles = ['jade.cfg.jinja2']
self.independentConfigDir = '%s/conf/independent' % self.gamedir
self.independentConfigFiles = [
'auth.properties.jinja2',
'debug.properties.jinja2',
'fcm.properties.jinja2',
'gm.properties.jinja2',
'net.properties.jinja2',
'server.properties.jinja2',
'whiteList.properties.jinja2',
'onlineLimit.properties.jinja2',
]
self.miscConfigDir = '%s/conf/config/common' % self.gamedir
self.miscConfigFiles = [
'misc.properties.jinja2',
]
#数据库权限:
baselist = ['127.0.0.1',]
payIPListAll = {
'37wan': [],
'liebao': [],
'2345': [],
'yilewan': [],
'renrenwang': [],
'6711': [],
'1360': [],
'duowan': [],
'baidu': [],
'lianyun': [],
'tencent': []
}
try:
self.platformPayList = payIPListAll[self.platform]
except:
self.platformPayList = payIPListAll['lianyun']
self.payList = baselist + self.platformPayList
self.mergelist = self.__getMerge()
def __getMerge(self):
'''获取合服列表'''
i = 0
while True:
try:
if i >= 3:
print "请求超时!!!!!!"
sys.exit(2)
url = 'http://yw.admin.xxx.com/yunwei/api/getmergetarget/%s/%s/' % (self.platform, self.id)
request = urllib2.urlopen(url)
response = request.read().split(',')
except Exception, e:
print "请求合服信息失败:" + str(e)
print "正在重试。。。"
i = i + 1
else:
break
return response
def createDatabase(self):
'''创建数据库'''
try:
print "正在创建数据库:%s" % self.dbname
cmd = ''' /usr/local/mysql/bin/mysql -u'%s' -p'%s' -e "create database %s DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci" ''' % (self.admindbuser, self.admindbpass, self.dbname)
ret = subprocess.call(cmd,shell=True)
print "执行状态:%s" % ret
if ret:
print "创建数据库失败,请确认!"
sys.exit(2)
except Exception,e:
print "捕捉到异常:",e
sys.exit(2)
def updateDB(self, filename):
''' 导入数据库文件 '''
try:
print "正在导入SQL文件:%s" % filename
cmd = ''' /usr/local/mysql/bin/mysql -u'%s' -p'%s' %s < %s ''' % (self.admindbuser, self.admindbpass, self.dbname, filename)
ret = subprocess.call(cmd, shell=True)
print "执行状态:%s" % ret
except Exception,e:
print "捕捉到异常:",e
sys.exit(2)
def dumpDatabase(self):
''' 备份数据库 '''
try:
print "正在备份数据库:%s" % self.dbname
curTime = time.strftime('%Y%m%d%H%M%S', time.localtime(time.time()))
cmd = ''' /usr/local/mysql/bin/mysqldump -u'%s' -p'%s' %s > %s ''' % (self.admindbuser, self.admindbpass, self.dbname, '%s/%s-%s.sql' % (self.backup,curTime,self.dbname))
ret = subprocess.call(cmd, shell=True)
print "执行状态:%s" % ret
except Exception,e:
print "捕捉到异常:",e
def dropDatabase(self):
''' 删除数据库 '''
try:
print "正在删除数据库:%s" % self.dbname
cmd = ''' /usr/local/mysql/bin/mysql -u'%s' -p'%s' -e "drop database %s" ''' % (self.admindbuser, self.admindbpass, self.dbname)
ret = subprocess.call(cmd, shell=True)
print "执行状态:%s" % ret
except Exception,e:
print "捕捉到异常:",e
def createGameDir(self):
''' 创建游戏目录 '''
try:
print "正在检测目录是否存在:%s" % self.gamedir
if os.path.isdir(self.gamedir):
print "目录已存在,请检查参数!"
sys.exit(2)
else:
print "正在复制程序文件至:%s" % self.gamedir
shutil.copytree('%s/%s/server' % (self.workdir, self.version), self.gamedir)
except Exception,e:
print "捕捉到异常:",e
sys.exit(2)
def dropGameDir(self):
''' 清理游戏目录 '''
try:
print "正在删除游戏目录:%s" % self.gamedir
if os.path.isdir(self.gamedir):
shutil.rmtree(self.gamedir)
except Exception,e:
print "遇到错误:",e
def dropGameLogDir(self):
''' 清理游戏日志目录 '''
try:
print "正在删除日志目录:%s" % self.gamelogdir
if os.path.isdir(self.gamelogdir):
shutil.rmtree(self.gamelogdir)
except Exception,e:
print "遇到错误:",e
def createConfig(self, configdir, configlist):
'''创建程序配置'''
try:
print "正在生成配置文件:%s" % configdir
url = 'http://yw.admin.xxx.com/yunwei/api/getmem/%s/%s' % (self.platform, self.id)
response = urllib2.urlopen(url)
mem = response.read()
env = jinja2.Environment(loader=jinja2.FileSystemLoader(configdir))
for gateconfig in configlist:
print gateconfig
template = env.get_template(gateconfig)
f = open('%s/%s' % (configdir,gateconfig.rstrip('.jinja2')), 'w')
f.write(
template.render(
version=self.version,
platformid=self.platformid,
platform=self.platform,
gameid=self.id,
gameport=self.gameport,
gamedir=self.gamedir,
dbuser='game',
dbpass='game123456',
dbname=self.dbname,
paylist=self.platformPayList,
mem=mem,
mergelist=self.mergelist,
)
)
f.close()
except Exception,e:
print "生成配置文件遇到错误:",e
sys.exit(2)
def updateconfig(self):
self.createConfig(self.binConfigDir, self.binConfigFiles)
os.chmod('%s/bin/socket' % self.gamedir,0755)
self.createConfig(self.confConfigDir, self.confConfigFiles)
self.createConfig(self.independentConfigDir, self.independentConfigFiles)
#self.createConfig(self.miscConfigDir, self.miscConfigFiles)
def updategame(self):
print "正在更新游戏程序。。。"
cmd = ''' rsync -avzP --exclude="socket" --exclude="log" --exclude="onlineLimit.properties" --exclude="jade.cfg" --exclude="auth.properties" --exclude="debug.properties" --exclude="fcm.properties" --exclude="gm.properties" --exclude="net.properties" --exclude="server.properties" --exclude="whiteList.properties" %s/%s/server/ %s/ ''' % (self.workdir,self.version,self.gamedir)
print cmd
result = subprocess.call(cmd, shell=True)
return result
def start(self):
print "给JSVC添加执行权限:"
os.chmod('%s/bin/jsvc' % self.gamedir,0755)
print "正在启动服务:"
cmd = '''cd %s/bin ; ./socket start ''' % self.gamedir
result = subprocess.call(cmd, shell=True)
return result
def stop(self):
print "正在关闭服务:"
cmd = '''cd %s/bin ; ./socket stop ''' % self.gamedir
result = subprocess.call(cmd, shell=True)
return result
def clearnow(self):
self.dumpDatabase()
self.updateDB('%s/%s/server/sql/database.sql' % (self.workdir,self.version))
self.dropGameLogDir()
def clear(self):
try:
conn = MySQLdb.connect(user=self.admindbuser, passwd=self.admindbpass, host='localhost', db=self.dbname, unix_socket='/tmp/mysql.sock')
cursor = conn.cursor(cursorclass = MySQLdb.cursors.DictCursor)
sql = ''' select * from Player '''
sum = cursor.execute(sql)
cursor.close()
conn.close()
print "数据库Player表有:%s" % sum
if int(sum) > 30:
print "Player表记录总数大于30!请确认后再执行清档操作!!!"
sys.exit(2)
else:
print "Player表记录总数小于30,可以执行清档操作!"
self.stop()
self.clearnow()
self.start()
except Exception,e:
print "连接数据库错误:%s" % e
sys.exit(2)
def create(self):
'''一键搭服'''
self.createDatabase()
self.updateDB('%s/%s/server/sql/database.sql' % (self.workdir,self.version))
self.mysqlgrant()
self.createGameDir()
self.updateconfig()
self.createchat()
self.nginxlogs()
def drop(self):
self.dumpDatabase()
self.dropDatabase()
self.dropGameDir()
self.dropGameLogDir()
self.dropchat()
def onekey(self):
'''一键更新'''
self.stop()
time.sleep(10)
self.updategame()
self.start()
def mysqlgrant(self):
'''添加数据库授权'''
print "正在添加数据库授权:"
for ip in self.payList:
print "正在添加%s权限" % ip
cmd = ''' /usr/local/mysql/bin/mysql -u'%s' -p'%s' -e "grant all privileges on *.* to game@'%s' Identified by 'cqbygame'" ''' % (self.admindbuser, self.admindbpass, ip)
subprocess.call(cmd, shell=True)
cmd = ''' /usr/local/mysql/bin/mysql -u'%s' -p'%s' -e "grant select on *.* to db@'119.131.244.178' identified by 'lizhenjie';" ''' % (self.admindbuser, self.admindbpass)
subprocess.call(cmd, shell=True)
if __name__ == "__main__":
active_list = ['create', 'drop', 'updateconfig', 'start', 'stop', 'clear', 'updategame', 'updateDB','onekey','mysqlgrant','clearnow']
gamever_list = ['test','37dev','37stable']
usage = ''' usage: %prog -p platform
%prog -v version -i id -a action
%prog -v version -i id -a updateDB -s sqlfile
'''
parser = optparse.OptionParser(
usage = usage,
version = "%prog 2.0"
)
setplat_opts = optparse.OptionGroup(
parser, '设置服务器平台标识',
'一台硬件服务器设置一次即可。'
)
setplat_opts.add_option(
'-p','--platform',
dest="platform",
help="平台名称"
)
parser.add_option_group(setplat_opts)
tools_opts = optparse.OptionGroup(
parser, '服务器日常功能',
)
tools_opts.add_option(
'-v','--ver',
dest="ver",
help="版本目录",
type="choice" ,
choices=gamever_list,
default=gamever_list[1]
)
tools_opts.add_option(
'-i','--id',
dest='id',
help="服务器ID"
)
tools_opts.add_option(
'-a','--action',
dest='action',
help="执行动作",
type="choice" ,
choices=active_list
)
tools_opts.add_option(
'-s','--sql',
dest='sql',
help="SQL文件(可选,配合updateDB使用)"
)
parser.add_option_group(tools_opts)
options, args = parser.parse_args()
err_msg = '参数不对,请输--help查看详细说明!'
ini = 'platform.ini'
if options.platform:
apiurl = 'http://yw.admin.xxx.com/yunwei/api/getplatforminfo/'
ini = 'platform.ini'
result = urllib2.urlopen(apiurl)
response = json.loads(result.read())
for code, id in response.items():
if options.platform == code:
platformid = id
print "正在设置服务器标识为:%s-%s" % (platformid, options.platform)
cfd = open(ini, 'w')
conf = ConfigParser.ConfigParser()
conf.add_section('platforminfo')
conf.set('platforminfo','name',options.platform)
conf.set('platforminfo','id',platformid)
conf.write(cfd)
cfd.close()
break
sys.exit(0)
if options.id and options.ver and options.action:
cf = ConfigParser.ConfigParser()
cf.read(ini)
platform = cf.get('platforminfo','name')
platformid = cf.get('platforminfo','id')
cqby = Cqby(options.ver, platform, platformid, options.id)
run_function = getattr(cqby,options.action)
if options.action in ['updateDB',]:
run_function('%s/server/sql/%s' % (options.ver,options.sql))
else:
run_function()
else:
parser.error(err_msg)
cqbyupdate.py批量维护脚本:
#!/usr/bin/python#coding:utf-8import threadingimport Queueimport subprocessimport optparseimport loggingimport logging.configimport datetimeimport osimport sysreload(sys)sys.setdefaultencoding('utf-8')#test:import time#logging.basicConfig(level = logging.DEBUG,format='(%(threadName)-10s) %(message)s',)logging.config.fileConfig("logger.conf")logger = logging.getLogger("root")logger2 = logging.getLogger("file")queue = Queue.Queue()Failed_List = []class Ahdts(threading.Thread): def __init__(self, queue): super(Ahdts,self).__init__() self.queue = queue self.workdir = '/data/init' #建立日志目录: log_path = 'updatelog' today = datetime.date.today() self.log_path_today = '%s/%s' % (log_path,today) if not os.path.isdir(self.log_path_today): try: os.makedirs(self.log_path_today) except Exception,e: print e sys.exit(2) def run(self): while True: global action global sqlfile item = self.queue.get() value = item.strip().split(',') platform = value[0] id = value[1] ip = value[2] port = value[3] opentime = value[4] logging.debug("%10s %6s %15s %15s %10s ThreadingStart!" % (platform,id,ip,action,ver)) if action == 'rsync': cmd = ''' cd %s ; ./rsync ''' % self.workdir elif action == 'ntp': cmd = ''' cd %s ; ./TimeClient.py ''' % self.workdir elif action in ['updateDB',]: cmd = ''' cd %s ; ./control.py -i %s -a %s -v %s -s %s ''' % (self.workdir, id, action, ver, sqlfile) elif action == 'platform': cmd = ''' cd %s ; ./control.py -p %s ''' % (self.workdir, platform) else: cmd = ''' cd %s ; ./control.py -i %s -a %s -v %s ''' % (self.workdir, id, action, ver) sshcmd = ''' ssh root@%s -n "%s" ''' % (ip, cmd) with open('%s/%s-%s-%s-%s.log' % (self.log_path_today, platform, id, ver, action), 'a') as logfile: exitcode = subprocess.call(sshcmd,shell=True,stdout=logfile, stderr=subprocess.STDOUT) if exitcode == 0: logger2.debug('%10s %6s %15s %15s %10s %s' % (platform, id, ip, action, ver, cmd)) rettxt = '%10s %6s %15s %15s %10s ThreadingEnd! ExitCode:%s' % (platform,id,ip,action,ver,exitcode) if exitcode: Failed_List.append(rettxt) logging.debug(rettxt) self.queue.task_done()if __name__ == "__main__": action_list = ['rsync','create','drop','start','stop','clear','updateconfig','updategame','updateDB','onekey'] gamever_list = ['test','37dev','37stable'] usage = ''' usage: %prog --file <file.ini> --action <action> Forexample: %prog -f game-test.ini -a create %prog -f game-test.ini -a onekey %prog -f game-test.ini -a updateDB -s test.sql ''' parser = optparse.OptionParser( usage = usage, version = "%prog 1.4" ) parser.add_option('-f','--file',dest="file",help="IP文件列表") parser.add_option('-a','--action',dest="action",help="执行动作",type="choice",choices=action_list) parser.add_option('-v','--ver', dest='ver',help="版本目录标识",type="choice",choices=gamever_list) parser.add_option('-s','--sql', dest='sql',help="待更新的SQL文件") options, args = parser.parse_args() err_msg = '参数不对,请输--help查看详细说明!' if options.action and options.ver and options.file: with open(options.file) as file: content = file.readlines() action = options.action ver = options.ver sqlfile = options.sql maxThreadNum = 200 if len(content) < 100: maxThreadNum = len(content) for i in range(maxThreadNum): t = Ahdts(queue) t.setDaemon(True) t.start() logging.debug("%10s %6s %15s %15s %10s" % ('PlatForm','ID','IP','Action','Version')) iplist = [] for i in content: ii = i.strip().split(',') ip = ii[2] if action in ['rsync','platform'] and ip in iplist: continue queue.put(i) iplist.append(ip) queue.join() #打印执行失败列表: print '=' * 20 + '执行失败列表' + '=' * 20 if Failed_List: for i in Failed_List: print i else: print "None" print '=' * 52 logging.debug("Done") else: print err_msg
批量维护脚本其实就是ssh远程过去游戏服执行control.py脚本,后面看能不能改成用socket的方式去连接,把socket的东西练练手,整套东西感觉还是比较简单。
本文出自 “运维笔记” 博客,请务必保留此出处http://lihuipeng.blog.51cto.com/3064864/1617958