linux统计cdn日志慢请求

时间:2023-11-24 14:54:38

./stat_ip.sh live-https.log-0510.gz 1000

#首先用shell脚本可以统计出?日志慢请求查询时间超过?秒对应的ip和对应的调用次数(传两个参数)

#!/bin/bash
log=$
threshold=$
function define()
{
ori_log_path="/home/bjliuzezhou/${log}"
tmp_log_path="/home/bjliuzezhou/temp.log"
tmp_log_path2="/home/bjliuzezhou/temp2.log"
confirm_path="/home/bjliuzezhou/previewlist.log"
}
function gather()
{
echo 'gather start-----------------------------------------------------------------'
zcat ${ori_log_path} | grep 'NewsApp'| grep 'previewlist' | awk '{print $(NF-3),$1}'> ${tmp_log_path}
log_num=`cat ${tmp_log_path} | wc -l`
request_time=`awk '{print $1}' ${tmp_log_path} | awk '{sum+=$1}END{print sum}'`
ave_request_time=`echo | awk "{print ${request_time}/${log_num}}" `
cat ${tmp_log_path} | awk -v th=${threshold} '$1>th {print $2}'> ${tmp_log_path2}
log_num2=`cat ${tmp_log_path2} | wc -l`
ratio=`echo | awk "{print ${log_num2}/${log_num}}" `
cat ${tmp_log_path2}| sort -n | uniq -c | sort -n > ${confirm_path}
# awk -f ip_cn.awk ${confirm_path}
rm -f /home/bjliuzezhou/temp.log
rm -f /home/bjliuzezhou/temp2.log
echo 'gather end--------------------------------------------------------------------'
}
function output()
{
echo "request total time is ${request_time}"
echo "request total number is ${log_num}"
echo "aver request time is ${ave_request_time}"
echo "long request total number is ${log_num2}"
echo "long request time ratio is ${ratio}"
}
function main()
{
define
gather
output
}
main

nohup ./ip_operator.sh preview.log &

#然后将ip后8位置为0(原因参照全国ip段划分),统计出ip段以及对应的次数,并且解析出对应的运营商
#参数要传刚获取的preview.log,抓取运营商时间较长,慎重!并且需要注意一个坑,awk脚本中不能执行shell脚本!
#!/bin/bash
cut -d . -f ,, $ > temp.log
sed 's/$/&.0/g' temp.log | sort -n | uniq -c > temp2.log
awk '{system("sh getip.sh " $3)}' temp2.log > zzz.log
rm -rf temp.log
#!/bin/bash
echo | curl ip.cn?ip=$

将最后解析出的temp2.log和zzz.log数据放在excel进行整合,分列,然后去重,开始快乐的统计之旅吧