【使用python脚本筛分重复日志】
import json
import sys
filepath=sys.argv[1]
list = []
total = 0
count = 0
count2 = 0
temp={}
temp2={}
result={}
result2={}
resultStr=''
def dataClear(s):
global count,total,count2
total = total + 1
#获取回调对象
a='**客户回调信息:'
b='logSeq:0'
objson= s[s.find(a, 0, len(s))+len(a):s.find(b, 0, len(s))-1]
o=json.loads(objson)
#获取回调uri
c='uri:'
d='method:'
uriStr = s[s.find(c, 0, len(s)):s.find(d, 0, len(s))-3]
uriArr = uriStr.split('/')
uriStr = uriArr[len(uriArr)-1]
key = o['toUserName']
val = o['userID']+'_'+o['externalUserID']+'_'+str(o['createTime'])
val2 = o['userID']+'_'+o['externalUserID']+'_'+str(o['createTime'])+'_'+uriStr
if key in temp:
if temp[key].count(val) > 0:
count = count+1
putResult(key,val)
else:
temp[key].append(val)
else:
list = []
list.append(val)
temp[key] = list
if key in temp2:
if temp2[key].count(val2) > 0:
count2 = count2 + 1
putResult2(key,val2)
else:
temp2[key].append(val2)
else:
list2 = []
list2.append(val2)
temp2[key] = list2
def putResult(key,val):
if key in result:
result[key].append(val)
else:
list = []
list.append(val)
result[key] = list
def putResult2(key,val):
if key in result2:
result2[key].append(val)
else:
list = []
list.append(val)
result2[key] = list
with open(filepath,'r',encoding='utf-8') as f:
for line in f.readlines():
line = line.strip('\n')
dataClear(line)
for keys in result.keys():
resultStr = resultStr + '公司:'+keys+',重复:'+str(len(result[keys]))+'条,同uri重复:'+str(len(result2.get(keys,[])))+'条'+'\n'
for vals in result[keys]:
print('公司:'+keys+',重复回调:'+vals)
if keys in result2:
for vals in result2[keys]:
print('公司:'+keys+',同uri重复回调:'+vals)
print('总回调数:'+str(total)+',重复条数:'+str(count)+'条,同uri重复条数:'+str(count2)+'条')
print(resultStr)