最近学习python运维开发,编写得一个简单的监控系统,现记录如下,仅供学习参考。
整个程序分为7个部分:
第一个部分根据监控架构设计文档架构如下:
.
├── m_client
│ ├── config
│ │ ├── __init__.py
│ │ └── __init__.pyc
│ ├── core
│ │ ├── client.py
│ │ ├── client.py~
│ │ ├── global_setting.py
│ │ ├── global_setting.py~
│ │ ├── global_setting.pyc
│ │ ├── __init__.py
│ │ ├── plugin_api.py
│ │ ├── plugin_api.py~
│ │ ├── plugin_api.pyc
│ │ ├── redis_connector.py
│ │ └── redis_connector.pyc
│ ├── __init__.py
│ └── plugins
│ ├── assetsCollection
│ │ ├── hardinfor.txt
│ │ ├── xml2json.py
│ │ └── xml2json.pyc
│ ├── assetsCollect_linux.py
│ ├── assetsCollect.py
│ ├── assetsCollect.pyc
│ ├── assets.py
│ ├── assets.pyc
│ ├── cpu.py
│ ├── cpu.py~
│ ├── cpu.pyc
│ ├── disk.py
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── json.txt
│ ├── load.py
│ ├── load.pyc
│ ├── logical_disk.py
│ ├── memory.py
│ ├── memory.pyc
│ ├── mem.py
│ ├── mem.pyc
│ ├── netinfo.py
│ ├── old
│ │ ├── cpu.py
│ │ ├── disk.py
│ │ ├── logical_disk.py
│ │ ├── memory.py
│ │ ├── netinfo.py
│ │ ├── physical_disk.py
│ │ └── sys_Info.py
│ ├── physical_disk.py
│ ├── result_deal.py
│ ├── result_deal.pyc
│ ├── sys_Info.py
│ ├── upCheck.py
│ ├── upCheck.pyc
│ ├── win_cpu.py
│ ├── win_cpu.pyc
│ ├── xml2json.py
│ └── xml2json.pyc
└── m_server
├── config
│ ├── host.py
│ ├── host.py~
│ ├── host.pyc
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── services
│ │ ├── generic.py
│ │ ├── generic.py~
│ │ ├── generic.pyc
│ │ ├── __init__.py
│ │ ├── __init__.pyc
│ │ ├── linux.py
│ │ ├── linux.pyc
│ │ └── liunx.py~
│ ├── templates.py
│ ├── templates.py~
│ └── templates.pyc
├── core
│ ├── global_setting.py
│ ├── global_setting.py~
│ ├── global_setting.pyc
│ ├── main_server.py
│ ├── main_server.py~
│ ├── redis_connector.py
│ └── redis_connector.pyc
└── __init__.py
第二部分自定义监控内容:
#!/usr/bin/env python
#coding:utf-8
from generic import DefaultService
'''
class ngnix(DefaultService):
name = "Ngnix status"
interval = 30
triggers ={
'alive': [int , 1]
}
'''
class cpu(DefaultService):
name = 'cpu'
interval = 30
plugin_name = 'cpu_info'
triggers = {
'iowait': ['percentage', 5.5,90],
'system': ['percentage', 5,90],
'idle': ['percentage', 20,10],
'user': ['percentage', 80,90],
'steal': ['percentage', 80,90],
'nice': [None, 80,90],#进程评级
}
graph_index ={#画图
'index' :['iowait','system','idle','user'],
'title' :'CPU usage',
}
lt_operator = ['idle']
class load(DefaultService):
name = 'load'
interval = 300
plugin_name = 'load_info'
triggers = {
'uptime': ['string', 'd',90],
#'ptime': ['string', 'd',90],
'load1': [int, 4,9],
'load5': [int, 3,7],
'load15': [int, 3,9],
}
graph_index = {
'index':['load1', 'load5', 'load15'],
'title': 'Load status' ,
}
class memory(DefaultService):
name = 'memory'
plugin_name = 'mem_info'
triggers = {
'SwapUsage_p':['percentage', 66, 91],
'MemUsage_p': ['percentage', 68, 92],
'MemUsage': [None, 60, 65],
}
graph_index = {
'index': ['MemUsage','SwapUsage'],
}
第三部分:设计模板
#!/usr/bin/env python
#coding:utf-8
from services import generic, linux
class BaseTemplate:
name = None
alert_policy = None
groups = None
hosts = None
ip=None
port=None
services = None
os =None
class LinuxGeneralServices(BaseTemplate):
name = 'Linux General Services'
groups = ['TestGroup',]
#hosts = ['localhost','www.baidu.com']
services = {
'cpu': linux.cpu(),#这样和别的类相连
'memory': linux.memory(),
'load': linux.load(),
'upCheck': generic.upCheck(),
}
class WindowsGeneralService(BaseTemplate):
name = 'Windows General Services'
#groups = ['BJ']
hosts = ['localhost','www.baidu.com']
services = {
'load': linux.load(),
'memory': linux.memory(),
'cpu': linux.cpu(),
}
enabled_templates =(
LinuxGeneralServices(),
WindowsGeneralService()
#TestBaseTemplate(),
)
第四部分:设置主机服务实例化
#!/usr/bin/env python
#coding:utf-8
import templates
import copy
#host h1
h1=templates.LinuxGeneralServices()
h1.services = copy.deepcopy(h1.services)
h1.services['cpu'].triggers= copy.deepcopy(h1.services['cpu'].triggers)
h1.name = 'yue'
h1.ip = '192.168.1.3'
h1.port = 22
h1.os='ubuntu 15.0.4'
h1.services['cpu'].interval = 40
h1.services['cpu'].triggers['iowait'] = ['percentage', 20,50]#修改
#print h1.services['cpu']
#host h2
h2 = templates.LinuxGeneralServices()
h2.services = copy.deepcopy(h2.services)
h2.services['cpu'].triggers= copy.deepcopy(h2.services['cpu'].triggers)
h2.name = 'ubuntu'
h2.ip = '192.168.76.144'
h2.port = 8000
h2.os='ubuntu 16.0.4'
h2.services['cpu'].triggers['system'] = [int, 80,90]
monitored_hosts = (
h1, h2
)
第五部分:配置环境变量及编写连接reids服务代码
#!/usr/bin/env python
#coding:utf-8
import os,sys
#BASE_DIR = os.path.dirname(os.path.dirname(__file__))
BASE_DIR = '/home/xxxx/桌面/day7/monitor/m_server/'
sys.path.append(BASE_DIR)#添加到环境变量
#!/usr/bin/env python
import redis
r = redis.Redis(host='localhost', port=6379, db=0)
第六部分:使用redis订阅和发布功能,并且写好接收来自客户端的消息
#!/usr/bin/env python
#coding:utf-8
import global_setting
from config import host
import redis_connector as redis
import json
def push_configure_data_to_redis():
for h in host.monitored_hosts:
config_dic={}
for k,v in h.services.items():
config_dic[k]=[v.interval,v.plugin_name,0]#0是为起始初始时间
print config_dic
redis.r['configuration::%s'%h.name]=json.dumps(config_dic)#dumps至redis
#push_configure_data_to_redis()
#第二部分接收客户端发来的消息
count =0
channel ='chan_103'
msg_queue=redis.r.pubsub()#bind listen instance
msg_queue.subscribe(channel)
msg_queue.parse_response()
while True:
data=msg_queue.parse_response()
#print data
print 'round %s::'%count,json.loads(data[2])
count+=1
第七部分客户端编写
首先根据插件集编写相应的插件,简单的插件集如下:
import global_setting
from plugins import upCheck,cpu,load,memory
def upCheck_info():
return upCheck.monitor()
def cpu_info():
data = cpu.monitor()
#print data
return data
def load_info():
data =load.monitor()
#print data
return data
def mem_info():
return memory.monitor()
#load_info()
然后编写从reids取配置及把消息异步发送到redis的程序模块
#!/usr/bin/env python
#coding:utf-8
import global_setting
import json
import time,sys
import threading
import plugin_api
import redis_connector as redis
name='yue'
channel='chan_103'
def pull_config_from_redis():
config_data=redis.r.get('configuration::%s'%name)
if config_data is not None:
config_data=json.loads(config_data)
else:
sys.exit('Error:could not find any config_data')
return config_data
#print pull_config_rom_redis()
def run(service_config):
service_name,interval,plugin_name=service_config
#print service_name,interval,plugin_name
plugin_func=getattr(plugin_api,plugin_name)
res=plugin_func()
#把数据发送服务端,但是必须标明主机名,所发送内容的分类及时间
c_time= str(time.strftime("%Y-%m-%d %H:%M:%X"))
data={'time':c_time,'hostname':name,'service_name':service_name,'data':res}
redis.r.publish(channel,json.dumps(data))
print res
return res
host_config=pull_config_from_redis()
'''
#多线程起来各自获取配置后运行起来监控
for k,v in host_config.items():
temp=(k,v[0],v[1])
#print temp
t=threading.Thread(target=run,args=(temp,))
t.start()
'''
while True:
for k,v in host_config.items():
interval,plugin_name,last_run=v
if(time.time()-last_run)>=interval:
t=threading.Thread(target=run,args=((k,interval,plugin_name),))
t.start()
#update time stamp
host_config[k][2]=time.time()
else:
next_run_time=interval-(time.time()-last_run)
print "\033[31;1m services %s will run in next %s..\033[0m"%(k,next_run_time)
time.sleep(1)#防止无限刷屏