#!/usr/bin/python # -*- coding:utf-8 -*- __author__ = 'lvnian' #!/usr/bin env python # coding: utf-8 import MySQLdb as mysql import sys,os,re db = mysql.connect(user="root",passwd="mysql",db="nginx_log",host="192.168.11.251") #数据库连接信息 db.autocommit(True) cur = db.cursor() cur.execute('set names utf8') dict_list = {} log = re.compile(r"""(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(?P<dateandtime>\d{2}\/[a-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} (\+|\-)\d{4})\] ((\"(GET|POST) )(?P<url>.+)(http\/1\.1")) \<br> (?P<statuscode>\d{3}) (?P<bytessent>\d+) (["](?P<refferer>(\-)|(.+))["]) (["](?P<ip>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})[,])""", re.IGNORECASE) with open('access.log') as logfin: for line in logfin.readlines(): data = re.search(log, line) if data: datadict = data.groupdict() date = datadict["dateandtime"] ip = datadict["ip"] url = datadict["url"] # ip url 和status当key,每次统计+1 dict_list[(date,ip,url)] = dict_list.get((date,ip,url),0)+1 # 转换成列表 ip_list = [(k[0],k[1],k[2],v) for k,v in dict_list.items()] # 按照统计数量排序,排序后保存到数据库。 for insert in sorted(ip_list,key=lambda x:x[3],reverse=True): print insert #测试用的,可以不要。整个功能是输出一行,然后保存到数据库。 sql = 'insert loginfo values ("%s","%s","%s","%s")' % insert print sql cur.execute(sql)