1.1.1 获取日志的最大top10,排序
获取两列到新的文件中第一次处理
sed截取字符串中间的内容,sed不支持贪婪匹配.找出图片在的列和图片大小到test1文件
本题需要输出三个指标:【访问次数】【访问次数*单个文件大小】【文件名(可以带URL)】
测试数据
59.33.26.105 --[08/Dec/2010:15:43:56 +0800] "GET /static/images/photos/2.jpgHTTP/1.1" 200 11299 "http://oldboy.blog.51cto.com/static/web/column/17/index.shtml?courseId=43" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)"
59.33.26.105 --[08/Dec/2010:15:43:56 +0800] "GET /static/images/photos/2.jpgHTTP/1.1" 200 11299"http://oldboy.blog.51cto.com/static/web/column/17/index.shtml?courseId=43" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)"
59.33.26.105 --[08/Dec/2010:15:44:02 +0800] "GET /static/flex/vedioLoading.swfHTTP/1.1" 200 3583"http://oldboy.blog.51cto.com/static/flex/AdobeVideoPlayer.swf?width=590&height=328&url=/[[DYNAMIC]]/2" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)"
124.115.4.18 --[08/Dec/2010:15:44:15 +0800] "GET /?=HTTP/1.1" 200 46232 "-" "-"
124.115.4.18 --[08/Dec/2010:15:44:25 +0800] "GET /static/js/web_js.jsHTTP/1.1" 200 4460 "-" "-"
124.115.4.18 --[08/Dec/2010:15:44:25 +0800] "GET /static/js/jquery.lazyload.jsHTTP/1.1" 200 1627 "-" "
============================答题步骤
s sed截取字符串中间的内容,sed不支持贪婪匹配.找出图片在的列和图片大小到test1文件
sed -rn 's/.*GET (.*) HTTP.*200 (.*) /\1 \t \2/gp' bb.txt >>test1.txt
/static/images/photos/2.jpg 11299
/static/images/photos/2.jpg 11299
/static/flex/vedioLoading.swf 3583
/?= 46232 "-" "-"
/static/js/web_js.js 4460 "-" "-"
/static/js/jquery.lazyload.js 1627 "-" "-"
awk -F " " '{print $2}' test1.txt |sort -nr 对数字列进行倒序排序
uniq -c 对每行进行计数
awk -F " " '{print $1"\t"$2}' test1.txt |sort -n|uniq -c>test2.txt #第二次处理
1 /?= 46232
1 /static/flex/vedioLoading.swf 3583
2 /static/images/photos/2.jpg 11299
1 /static/js/jquery.lazyload.js 1627
1 /static/js/web_js.js 4460
[root@ob data]# awk -F " " '{print (($1*$3))"\t" $2}' test2.txt|sort -nru #最后合并排序
46232 /?=
22598 /static/images/photos/2.jpg
4460 /static/js/web_js.js
3583 /static/flex/vedioLoading.swf
1627 /static/js/jquery.lazyload.js
======================================================#sed
awk -F"GET |HTTP/1.1|200 " '{print $2,$4}' /data/bb.txt
/static/images/photos/2.jpg 11299
/static/images/photos/2.jpg 11299
/static/flex/vedioLoading.swf 3583
/?= 46232 "-" "-"
/static/js/web_js.js 4460 "-" "-"
/static/js/jquery.lazyload.js 1627 "-" "-"
[root@ob1 mytmp]# awk -F"GET |HTTP/1.1|200 " '{TP[$2]++}END{for (i in TP) print i,TP[i]}' /data/bb.txt
7
/static/js/jquery.lazyload.js 1
/static/flex/vedioLoading.swf 1
/?= 1
/static/images/photos/2.jpg 2
/static/js/web_js.js 1
[root@ob1 mytmp]# awk -F"GET |HTTP/1.1|200 " '{TP[$2]++}END{for (i in TP) print i,TP[i],$4}' /data/bb.txt|awk -F ' ' '{print $2*$3,$1}'|sort -nrk1
3254 /static/images/photos/2.jpg
1627 /static/js/web_js.js
1627 /static/js/jquery.lazyload.js
1627 /static/flex/vedioLoading.swf
1627 /?=
0 7