mogodb监控脚本

时间:2021-08-27 23:24:24

mongodb_server.py

#! /bin/env python
#-*- coding:utf8 -*- import sys
import os
from bson.timestamp import Timestamp import pymongo
from pymongo import MongoClient class mongodbMonitor(object): def mongodb_connect(self,host=None, port=None, user=None, password=None):
# try:
conn = MongoClient(host, port, serverSelectionTimeoutMS=) # conntion timeout sec. if user and password:
db_admin = conn["admin"]
if not db_admin.authenticate(user,password):
pass;
conn.server_info()
print conn
# except :
# e = sys.exc_info()[]
# return e, None return ,conn #data node(): standalone, replset primary, replset secondary. mongos(), mongoConfigSrv()
def get_mongo_role(self, conn): mongo_role =
conn.server_info()
if (conn.is_mongos):
mongo_role =
elif ("chunks" in conn.get_database("config").collection_names()): # Role is a config servers? not mongos and has config.chunks collections. it's a config server.
mongo_role =
return mongo_role def get_mongo_monitor_data(self, conn): mongo_monitor_dict ={}
mongo_monitor_dict["mongo_local_alive"] = # mongo local alive metric for all nodes.
mongo_role = self.get_mongo_role(conn) if(mongo_role == ):
mongodb_role,serverStatus_dict = self.serverStatus(conn)
mongo_monitor_dict.update(serverStatus_dict)
repl_status_dict = {}
if (mongodb_role == "master" or mongodb_role == "secondary"):
repl_status_dict = self.repl_status(conn)
mongo_monitor_dict.update(repl_status_dict)
else:
print "this is standalone node"
elif(mongo_role == ): # mongos
shards_dict = self.shard_status(conn)
mongo_monitor_dict.update(shards_dict)
return mongo_monitor_dict def serverStatus(self,connection): serverStatus = connection.admin.command(pymongo.son_manipulator.SON([('serverStatus', )])) mongodb_server_dict = {} # mongodb server status metric for upload to falcon mongo_version = serverStatus["version"]
#uptime metric
mongodb_server_dict["uptime"] = int(serverStatus["uptime"]) #asserts section metrics
mongo_asserts = serverStatus["asserts"]
for asserts_key in mongo_asserts.keys():
asserts_key_name = "asserts_" + asserts_key
mongodb_server_dict[asserts_key_name] = mongo_asserts[asserts_key] ### "extra_info" section metrics: page_faults. falcon counter type.
if serverStatus.has_key("extra_info"):
mongodb_server_dict["page_faults"] = serverStatus["extra_info"]["page_faults"] ### "connections" section metrics
current_conn = serverStatus["connections"]["current"]
available_conn = serverStatus["connections"]["available"] mongodb_server_dict["connections_current"] = current_conn
mongodb_server_dict["connections_available"] = available_conn # mongodb connection used percent
mongodb_server_dict["connections_used_percent"] = int((current_conn/(current_conn + available_conn)*)) # total created from mongodb started. COUNTER metric
mongodb_server_dict["connections_totalCreated"] = serverStatus["connections"]["totalCreated"] # "globalLock" currentQueue mongodb_server_dict["globalLock_currentQueue_total"] = serverStatus["globalLock"]["currentQueue"]["total"]
mongodb_server_dict["globalLock_currentQueue_readers"] = serverStatus["globalLock"]["currentQueue"]["readers"]
mongodb_server_dict["globalLock_currentQueue_writers"] = serverStatus["globalLock"]["currentQueue"]["writers"] # "locks" section, Changed in version 3.0
if serverStatus.has_key("locks") and mongo_version >"3.0":
locks_dict_keys = serverStatus["locks"].keys()
for lock_scope in locks_dict_keys: # Global, Database,Collection,Oplog
for lock_metric in serverStatus["locks"][lock_scope]:
for lock_type in serverStatus["locks"][lock_scope][lock_metric]: if lock_type == "R":
lock_name = "Slock"
elif lock_type == "W":
lock_name = "Xlock"
elif lock_type == "r":
lock_name = "ISlock"
elif lock_type == "w":
lock_name = "IXlock"
lock_metric_key = "locks_" + lock_scope + "_" + lock_metric + "_" + lock_name
mongodb_server_dict[lock_metric_key] = serverStatus["locks"][lock_scope][lock_metric][lock_type] # "network" section metrics: bytesIn, bytesOut, numRequests; counter type
if serverStatus.has_key("network"):
for network_metric in serverStatus["network"].keys():
network_metric_key = "network_" + network_metric # network metric key for upload
mongodb_server_dict[network_metric_key] = serverStatus["network"][network_metric] ### "opcounters" section metrics: insert, query, update, delete, getmore, command. couter type
if serverStatus.has_key("opcounters"):
for opcounters_metric in serverStatus["opcounters"].keys():
opcounters_metric_key = "opcounters_" + opcounters_metric
mongodb_server_dict[opcounters_metric_key] = serverStatus["opcounters"][opcounters_metric] ### "opcountersRepl" section metrics: insert, query, update, delete, getmore, command. couter type
if serverStatus.has_key("opcountersRepl"):
for opcountersRepl_metric in serverStatus["opcountersRepl"].keys():
opcountersRepl_metric_key = "opcountersRepl_" + opcountersRepl_metric
mongodb_server_dict[opcountersRepl_metric_key] = serverStatus["opcounters"][opcountersRepl_metric] ### "mem" section metrics:
if serverStatus.has_key("mem"):
for mem_metric in serverStatus["mem"].keys():
mem_metric_key = "mem_" + mem_metric
if( mem_metric in ["bits","supported"] ):
mongodb_server_dict[mem_metric_key] = serverStatus["mem"][mem_metric]
else:
mongodb_server_dict[mem_metric_key] = serverStatus["mem"][mem_metric]** ### "dur" section metrics:
if serverStatus.has_key("dur"):
mongodb_server_dict["dur_journaledBytes"] = serverStatus["dur"]["journaledMB"]**
mongodb_server_dict["dur_writeToDataFilesBytes"] = serverStatus["dur"]["writeToDataFilesMB"]**
mongodb_server_dict["dur_commitsInWriteLock"] = serverStatus["dur"]["commitsInWriteLock"] ### "repl" section
mongodb_role = ""
if (serverStatus.has_key("repl") and serverStatus["repl"].has_key("secondary")):
if serverStatus["repl"]["ismaster"]:
mongodb_role = "master"
if serverStatus["repl"]["secondary"]:
mongodb_role = "secondary"
else: # not Replica sets mode
mongodb_role = "standalone" ### "backgroundFlushing" section metrics, only for MMAPv1
if serverStatus.has_key("backgroundFlushing"):
for bgFlush_metric in serverStatus["backgroundFlushing"].keys():
if bgFlush_metric != "last_finished": # discard last_finished metric
bgFlush_metric_key = "backgroundFlushing_" + bgFlush_metric
mongodb_server_dict[bgFlush_metric_key] = serverStatus["backgroundFlushing"][bgFlush_metric] ### cursor from "metrics" section
if serverStatus.has_key("metrics") and serverStatus["metrics"].has_key("cursor"):
cursor_status = serverStatus["metrics"]["cursor"]
mongodb_server_dict["cursor_timedOut"] = cursor_status["timedOut"]
mongodb_server_dict["cursor_open_noTimeout"] = cursor_status["open"]["noTimeout"]
mongodb_server_dict["cursor_open_pinned"] = cursor_status["open"]["pinned"]
mongodb_server_dict["cursor_open_total"] = cursor_status["open"]["total"] ### "wiredTiger" section
if serverStatus.has_key("wiredTiger"):
serverStatus_wt = serverStatus["wiredTiger"] #cache
wt_cache = serverStatus_wt["cache"]
mongodb_server_dict["wt_cache_used_total_bytes"] = wt_cache["bytes currently in the cache"]
mongodb_server_dict["wt_cache_dirty_bytes"] = wt_cache["tracked dirty bytes in the cache"]
mongodb_server_dict["wt_cache_readinto_bytes"] = wt_cache["bytes read into cache"]
mongodb_server_dict["wt_cache_writtenfrom_bytes"] = wt_cache["bytes written from cache"] #concurrentTransactions
wt_concurrentTransactions = serverStatus_wt["concurrentTransactions"]
mongodb_server_dict["wt_concurrentTransactions_write"] = wt_concurrentTransactions["write"]["available"]
mongodb_server_dict["wt_concurrentTransactions_read"] = wt_concurrentTransactions["read"]["available"] #"block-manager" section
wt_block_manager = serverStatus_wt["block-manager"]
mongodb_server_dict["wt_bm_bytes_read"] = wt_block_manager["bytes read"]
mongodb_server_dict["wt_bm_bytes_written"] = wt_block_manager["bytes written"]
mongodb_server_dict["wt_bm_blocks_read"] = wt_block_manager["blocks read" ]
mongodb_server_dict["wt_bm_blocks_written"] = wt_block_manager["blocks written"] ### "rocksdb" engine
if serverStatus.has_key("rocksdb"):
serverStatus_rocksdb = serverStatus["rocksdb"] mongodb_server_dict["rocksdb_num_immutable_mem_table"] = serverStatus_rocksdb["num-immutable-mem-table"]
mongodb_server_dict["rocksdb_mem_table_flush_pending"] = serverStatus_rocksdb["mem-table-flush-pending"]
mongodb_server_dict["rocksdb_compaction_pending"] = serverStatus_rocksdb["compaction-pending"]
mongodb_server_dict["rocksdb_background_errors"] = serverStatus_rocksdb["background-errors"]
mongodb_server_dict["rocksdb_num_entries_active_mem_table"] = serverStatus_rocksdb["num-entries-active-mem-table"]
mongodb_server_dict["rocksdb_num_entries_imm_mem_tables"] = serverStatus_rocksdb["num-entries-imm-mem-tables"]
mongodb_server_dict["rocksdb_num_snapshots"] = serverStatus_rocksdb["num-snapshots"]
mongodb_server_dict["rocksdb_oldest_snapshot_time"] = serverStatus_rocksdb["oldest-snapshot-time"]
mongodb_server_dict["rocksdb_num_live_versions"] = serverStatus_rocksdb["num-live-versions"]
mongodb_server_dict["rocksdb_total_live_recovery_units"] = serverStatus_rocksdb["total-live-recovery-units"] ### "PerconaFT" engine
if serverStatus.has_key("PerconaFT"):
serverStatus_PerconaFT = serverStatus["PerconaFT"] mongodb_server_dict["PerconaFT_log_count"] = serverStatus_PerconaFT["log"]["count"]
mongodb_server_dict["PerconaFT_log_time"] = serverStatus_PerconaFT["log"]["time"]
mongodb_server_dict["PerconaFT_log_bytes"] = serverStatus_PerconaFT["log"]["bytes"] mongodb_server_dict["PerconaFT_fsync_count"] = serverStatus_PerconaFT["fsync"]["count"]
mongodb_server_dict["PerconaFT_fsync_time"] = serverStatus_PerconaFT["fsync"]["time"] ### cachetable
PerconaFT_cachetable = serverStatus_PerconaFT["cachetable"]
mongodb_server_dict["PerconaFT_cachetable_size_current"] = PerconaFT_cachetable["size"]["current"]
mongodb_server_dict["PerconaFT_cachetable_size_writing"] = PerconaFT_cachetable["size"]["writing"]
mongodb_server_dict["PerconaFT_cachetable_size_limit"] = PerconaFT_cachetable["size"]["limit"] ### PerconaFT checkpoint
PerconaFT_checkpoint = serverStatus_PerconaFT["checkpoint"]
mongodb_server_dict["PerconaFT_checkpoint_count"] = PerconaFT_checkpoint["count"]
mongodb_server_dict["PerconaFT_checkpoint_time"] = PerconaFT_checkpoint["time"] mongodb_server_dict["PerconaFT_checkpoint_write_nonleaf_count"] = PerconaFT_checkpoint["write"]["nonleaf"]["count"]
mongodb_server_dict["PerconaFT_checkpoint_write_nonleaf_time"] = PerconaFT_checkpoint["write"]["nonleaf"]["time"]
mongodb_server_dict["PerconaFT_checkpoint_write_nonleaf_bytes_compressed"] = PerconaFT_checkpoint["write"]["nonleaf"]["bytes"]["compressed"]
mongodb_server_dict["PerconaFT_checkpoint_write_nonleaf_bytes_uncompressed"] = PerconaFT_checkpoint["write"]["nonleaf"]["bytes"]["uncompressed"]
mongodb_server_dict["PerconaFT_checkpoint_write_leaf_count"] = PerconaFT_checkpoint["write"]["leaf"]["count"]
mongodb_server_dict["PerconaFT_checkpoint_write_leaf_time"] = PerconaFT_checkpoint["write"]["leaf"]["time"]
mongodb_server_dict["PerconaFT_checkpoint_write_leaf_bytes_compressed"] = PerconaFT_checkpoint["write"]["leaf"]["bytes"]["compressed"]
mongodb_server_dict["PerconaFT_checkpoint_write_leaf_bytes_uncompressed"] = PerconaFT_checkpoint["write"]["leaf"]["bytes"]["uncompressed"] ### serializeTime for serializeTime_item in serverStatus_PerconaFT["serializeTime"]:
prefix = "PerconaFT_serializeTime_" + serializeTime_item
for serializeTime_key in serverStatus_PerconaFT["serializeTime"][serializeTime_item]:
key_name = prefix + "_" + serializeTime_key
mongodb_server_dict[key_name] = serverStatus_PerconaFT["serializeTime"][serializeTime_item][serializeTime_key] ### PerconaFT compressionRatio
for compressionRatio_item in serverStatus_PerconaFT["compressionRatio"]:
key_name = "PerconaFT_compressionRatio_" + compressionRatio_item
mongodb_server_dict[key_name] = serverStatus_PerconaFT["compressionRatio"][compressionRatio_item] return (mongodb_role, mongodb_server_dict) def repl_status(self,connection):
replStatus = connection.admin.command("replSetGetStatus")
print replStatus
repl_status_dict = {} # repl set metric dict # myState "" for PRIMARY , "" for SECONDARY, "":
repl_status_dict["repl_myState"] = replStatus["myState"] repl_status_members = replStatus["members"] master_optime = # Master oplog ops time
myself_optime = # SECONDARY oplog ops time print "开始打印repl_status_members"
print repl_status_members
print "结束打印repl_status_members"
for repl_member in repl_status_members:
if repl_member.has_key("self") and repl_member["self"]:
repl_status_dict["repl_health"] = repl_member["health"]
#repl_status_dict["repl_optime"] = repl_member["optime"].time
#print "value of optime ts is:"
#print type(repl_member["optime"])
#print type(repl_member["optime"]["ts"])
print repl_member["optime"]["ts"].time
repl_status_dict["repl_optime"] = repl_member["optime"]["ts"].time
if repl_member.has_key("repl_electionTime"):
repl_status_dict["repl_electionTime"] = repl_member["electionTime"].time
if repl_member.has_key("repl_configVersion"):
repl_status_dict["repl_configVersion"] = repl_member["configVersion"]
#myself_optime = repl_member["optime"].time
myself_optime = repl_member["optime"]["ts"].time
if (replStatus["myState"] == and repl_member["state"] == ): # CONDARY ,get repl lag
master_optime = repl_member["optime"]["ts"].time
if replStatus["myState"] == : repl_status_dict["repl_lag"] = master_optime - myself_optime ### oplog window hours oplog_collection = connection["local"]["oplog.rs"] oplog_tFirst = oplog_collection.find({},{"ts":}).sort('$natural',pymongo.ASCENDING).limit().next()
oplog_tLast = oplog_collection.find({},{"ts":}).sort('$natural',pymongo.DESCENDING).limit().next() oplogrs_collstats = connection["local"].command("collstats", "oplog.rs") window_multiple = ##oplog.rs collections is not full
if oplogrs_collstats.has_key("maxSize"):
window_multiple = oplogrs_collstats["maxSize"]/(oplogrs_collstats["count"] * oplogrs_collstats["avgObjSize"])
else:
window_multiple = oplogrs_collstats["storageSize"]/(oplogrs_collstats["count"] * oplogrs_collstats["avgObjSize"]) #oplog_window .xx hours
oplog_window = round((oplog_tLast["ts"].time - oplog_tFirst["ts"].time)/3600.0,) * window_multiple # full repl_status_dict["repl_oplog_window"] = oplog_window return repl_status_dict # only for mongos node
def shard_status(self, conn): config_db = conn["config"] settings_col = config_db["settings"] balancer_doc = settings_col.find_one({'_id':'balancer'}) shards_dict = {}
if balancer_doc is None:
shards_dict["shards_BalancerState"] =
elif balancer_doc["stopped"]:
shards_dict["shards_BalancerState"] =
else:
shards_dict["shards_BalancerState"] = # shards_activeWindow metric,: without setting, :setting
# shards_activeWindow_start metric, { "start" : "23:30", "stop" : "6:00" } : 23.30 for :
# shards_activeWindow_stop metric if balancer_doc is None:
shards_dict["shards_activeWindow"] = elif balancer_doc.has_key("activeWindow"):
shards_dict["shards_activeWindow"] =
if balancer_doc["activeWindow"].has_key("start"):
window_start = balancer_doc["activeWindow"]["start"]
shards_dict["shards_activeWindow_start"] = window_start.replace(":",".") if balancer_doc["activeWindow"].has_key("stop"):
window_stop = balancer_doc["activeWindow"]["stop"]
shards_dict["shards_activeWindow_stop"] = window_stop.replace(":",".") # shards_chunkSize metric
chunksize_doc = settings_col.find_one({"_id" : "chunksize"})
if chunksize_doc is not None:
shards_dict["shards_chunkSize"] = chunksize_doc["value"] # shards_isBalancerRunning metric
locks_col = config_db["locks"]
balancer_lock_doc = locks_col.find_one({'_id':'balancer'}) if balancer_lock_doc is None:
print "config.locks collection empty or missing. be sure you are connected to a mongos"
shards_dict["shards_isBalancerRunning"] =
elif balancer_lock_doc["state"] > :
shards_dict["shards_isBalancerRunning"] =
else:
shards_dict["shards_isBalancerRunning"] = # shards_size metric shards_col = config_db["shards"]
shards_dict["shards_size"] = shards_col.count() # shards_mongosSize metric
mongos_col = config_db["mongos"]
shards_dict["shards_mongosSize"] = mongos_col.count() return shards_dict

monodb_monitor.py

#! /bin/env python
#-*- coding:utf8 -*- import sys
import os
import time
import datetime
import socket
import yaml
import requests
import json from mongodb_server import mongodbMonitor falcon_client = "http://127.0.0.1:1988/v1/push"
ts = int(time.time()) # all falcon counter type metrics list mongodb_counter_metric = ["uptime","asserts_msg",
"asserts_regular",
"asserts_rollovers",
"asserts_user",
"asserts_warning",
"page_faults",
"connections_totalCreated",
"locks_Global_acquireCount_ISlock",
"locks_Global_acquireCount_IXlock",
"locks_Global_acquireCount_Slock",
"locks_Global_acquireCount_Xlock",
"locks_Global_acquireWaitCount_ISlock",
"locks_Global_acquireWaitCount_IXlock",
"locks_Global_timeAcquiringMicros_ISlock",
"locks_Global_timeAcquiringMicros_IXlock",
"locks_Database_acquireCount_ISlock",
"locks_Database_acquireCount_IXlock",
"locks_Database_acquireCount_Slock",
"locks_Database_acquireCount_Xlock",
"locks_Collection_acquireCount_ISlock",
"locks_Collection_acquireCount_IXlock",
"locks_Collection_acquireCount_Xlock",
"opcounters_command",
"opcounters_insert",
"opcounters_delete",
"opcounters_update",
"opcounters_query",
"opcounters_getmore",
"opcountersRepl_command",
"opcountersRepl_insert",
"opcountersRepl_delete",
"opcountersRepl_update",
"opcountersRepl_query",
"opcountersRepl_getmore",
"network_bytesIn",
"network_bytesOut",
"network_numRequests",
"backgroundFlushing_flushes",
"backgroundFlushing_last_ms",
"cursor_timedOut",
"wt_cache_readinto_bytes",
"wt_cache_writtenfrom_bytes",
"wt_bm_bytes_read",
"wt_bm_bytes_written",
"wt_bm_blocks_read",
"wt_bm_blocks_written"
] with open('../../cfg.json') as f:
data = f.read().replace('\n','')
jsonlist = json.loads(data)
mongodb_hostname = jsonlist['hostname'] f=open("../conf/mongomon.conf")
y = yaml.load(f)
f.close()
mongodb_items = y["items"] for mongodb_ins in mongodb_items: mongodb_monitor = mongodbMonitor() mongodb_tag = "mongo=" + str(mongodb_ins["port"]) err,conn = mongodb_monitor.mongodb_connect(host="127.0.0.1",port=mongodb_ins["port"], user=mongodb_ins["user"], password=mongodb_ins["password"]) mongodb_upate_list = []
if err != :
key_item_dict = {"endpoint": mongodb_hostname, "metric": "mongo_local_alive", "tags":mongodb_tag , "timestamp":ts, "value": , "step": , "counterType": "GAUGE"}
mongodb_upate_list.append(key_item_dict)
r = requests.post(falcon_client,data=json.dumps(mongodb_upate_list))
continue #The instance is dead. upload the "mongo_alive_local=0" key, then continue. mongodb_dict = mongodb_monitor.get_mongo_monitor_data(conn)
mongodb_dict_keys = mongodb_dict.keys() for mongodb_metric in mongodb_dict_keys: if mongodb_metric in mongodb_counter_metric :
key_item_dict = {"endpoint": mongodb_hostname, "metric": mongodb_metric, "tags":mongodb_tag , "timestamp":ts, "value": mongodb_dict[mongodb_metric], "step": , "counterType": "COUNTER"}
else:
key_item_dict = {"endpoint": mongodb_hostname, "metric": mongodb_metric, "tags":mongodb_tag , "timestamp":ts, "value": mongodb_dict[mongodb_metric], "step": , "counterType": "GAUGE"} mongodb_upate_list.append(key_item_dict)
print "开始上报"
print json.dumps(mongodb_upate_list)
r = requests.post(falcon_client,data=json.dumps(mongodb_upate_list))
print r