/******************************************************************************************
* 版权声明
* 本文为本人原创,本人拥有此文的版权。鉴于本人持续受益于开源软件社区,
* 本人声明:任何个人及团体均可不受限制的转载和复制本文,无论是否用于盈利
* 之目的,但不得修改文章内容,并必须在转载及复制时同时保留本版权声明,否
* 则为侵权行为,本人保留追究相应主体法律责任之权利。
* speng2005@gmail.com
* 2016-1
******************************************************************************************/
大道至简!
相信抓包是程序员,运维工程师,架构师,都必不可少的一项技能。但是能够深入掌握好这门技艺的人,确实需要有开发,网络,运维,架构等"跨界”背景才能比较好的发挥抓包神技的威力。本文是纯干货,重点不在于理论,更注重实战技能,尤其注重对抓包数据的分析。本文中的命令追求的是使用最简单,最普及的Linux系统自带工具包实现各种抓包分析,具有尽可能广泛的移植性和可用性。文中给出的命令均在Centos 6.3,tcpdump 4.1版本下测试可用;其他平台及环境,可能需要你自己微调部分命令及脚本才可以运行。文中多数命令及脚本都严重依赖于tcpdump命令输出文本数据格式,微调代码时应格外注意这一点。注意,本文中的命令适用于一般的基于tcp连接的,请求响应模型的网络服务,但不适用于使用pipeline模式的网络服务。如果想理解本文命令思路的话,需要你熟悉tcp/ip协议,网络osi模型,常见网络通讯协议,socket编程,linux脚本编程,awk脚本编程,数据挖掘思维方式等知识,不足者请自行脑补。理解本文的思路后,还可以在这些命令基础上有许多种灵活搭配和变种,请自行研究。不想费脑细胞的,运气好的话,很多命令都可以直接使用。尽管这里讲的是Linux抓包,但如果使用流行的wireshark在Windows上抓包后保存成tcpdump格式文件,然后上传到Linux系统上照样可以使用本文中的命令进行分析。
1.盲抓
盲抓就是瞎抓,尤其在你接触到一台陌生机器,感觉有点抓瞎的时候,应当使用本节中的方法,其目的是找出这台机器上的重点网络服务是哪些ip和端口。如果你明确的知道要抓的包是哪个ip,哪个端口,可以直接跳至下一小节。
在目标机器上抓取任意tcp数据(需要root权限):
tcpdump -i any -nn tcp > 123.pkg.head.txt分析目标机器上热点ip:port(按数据包数量统计):
cat 123.pkg.head.txt | awk 'NF>5 && $2=="IP" {print $3,"[out]"; print substr($5,1,length($5)-1), "[in]"}' | sort | uniq -c | awk '{buf[NR]=$0;sum+=$1} END{for(i=1;i<=NR;i++) print buf[i], sum}' | awk '{printf("%s %s %s %.2f %%\n",$1, $2, $3, 100*$1/$4)}' | sort -nr -k 4 | less 示例: 2359 10.71.28.21.80 [in] 23.59 % 2057 10.71.28.21.80 [out] 20.57 % 331 10.71.28.20.8080 [in] 3.31 % 253 10.71.28.20.8080 [out] 2.53 % 190 218.92.220.60.39147 [out] 1.90 % 154 218.92.220.60.39147 [in] 1.54 % 106 218.92.220.56.60503 [out] 1.06 %分析目标机器上热点ip:port(按字节数,即带宽统计):
cat 123.pkg.head.txt | awk 'BEGIN{header_len=0x36;inp=0;inb=0;inbb=0;outp=0;outb=0;outbb=0;start="";end=""} function getBodyLen(){if("length"==$(NF-1)) return $NF;t_c=split($7,a,/[()]/);if(3==t_c)return a[2];else return 0;} NF>5 && $2=="IP" {len=header_len + getBodyLen();print $3,"[out]", len; print substr($5,1,length($5)-1), "[in]", len; }' | sort | awk 'BEGIN{ipport="";dir="";bc=0} {if($1!=ipport || $2!=dir){if(bc>0)print ipport,dir,bc;ipport=$1;dir=$2;bc=$3;}else{bc+=$3;}} END{if(bc>0)print ipport,dir,bc;}' | awk '{buf[NR]=$0;sum+=$3} END{for(i=1;i<=NR;i++) print buf[i], sum}' | awk '{printf("%s %s %s %.2f %%\n",$1, $2, $3, 100*$3/$4)}' | sort -nr -k 4 | less 示例: 10.71.28.21.80 [out] 2559403 42.46 % 218.92.220.60.39147 [in] 417277 6.92 % 10.71.28.21.80 [in] 306507 5.08 % 101.26.37.38.39906 [in] 271432 4.50 % 122.225.28.115.48239 [in] 170901 2.84 % 218.92.220.56.60503 [in] 151630 2.52 % 218.92.220.62.59711 [in] 151322 2.51 % 218.92.220.56.25777 [in] 138038 2.29 % 60.210.23.239.7490 [in] 131700 2.18 % 10.71.28.20.8080 [out] 121366 2.01 %分析数据流量概要信息(调整ipportRegex的值来控制统计口径):
cat 123.pkg.head.txt | awk 'BEGIN{ipportRegex="10.71.28.21.80";header_len=0x36;inp=0;inb=0;inbb=0;outp=0;outb=0;outbb=0;start="";end=""} function getBodyLen(){if("length"==$(NF-1)) return $NF;t_c=split($7,a,/[()]/);if(3==t_c)return a[2];else return 0;} $3~ipportRegex{outp++;bl=getBodyLen();outb+=header_len+bl;outbb+=bl;if(""==start)start=$1;end=$1;} $5~ipportRegex{inp++;bl=getBodyLen();inb+=header_len+bl;inbb+=bl;if(""==start)start=$1;end=$1;} function timeSub(s,e){split(substr(s,1,8),sA1,":");s1=sA1[1]*3600+sA1[2]*60+sA1[3];us1=substr(s,10);split(substr(e,1,8),sA2,":");s2=sA2[1]*3600+sA2[2]*60+sA2[3];us2=substr(e,10);return s2*1000000+us2-s1*1000000-us1;} END{second=timeSub(start,end)/1000000;if(second<1) second=1; printf("total time: %.1f seconds\n", second);printf("[In] pkg count: %s, pkg rate: %.2f pkg/s, bytes count: %s, bytes rate: %.2f KB/s, body bytes: %s, body bytes rate: %.2f KB/s, payload percent: %.1f%%\n", inp,inp/second,inb,inb/second/1024,inbb,inbb/second/1024,100*inbb/inb);printf("[Out] pkg count: %s, pkg rate: %.2f pkg/s, bytes count: %s, bytes rate: %.2f KB/s, body bytes: %s, body bytes rate: %.2f KB/s, payload percent: %.1f%%\n", outp,outp/second,outb,outb/second/1024,outbb,outbb/second/1024,100*outbb/outb);}' cat 123.pkg.head.txt | awk 'BEGIN{ipportRegex="10.71.28.21.";header_len=0x36;inp=0;inb=0;inbb=0;outp=0;outb=0;outbb=0;start="";end=""} function getBodyLen(){if("length"==$(NF-1)) return $NF;t_c=split($7,a,/[()]/);if(3==t_c)return a[2];else return 0;} $3~ipportRegex{outp++;bl=getBodyLen();outb+=header_len+bl;outbb+=bl;if(""==start)start=$1;end=$1;} $5~ipportRegex{inp++;bl=getBodyLen();inb+=header_len+bl;inbb+=bl;if(""==start)start=$1;end=$1;} function timeSub(s,e){split(substr(s,1,8),sA1,":");s1=sA1[1]*3600+sA1[2]*60+sA1[3];us1=substr(s,10);split(substr(e,1,8),sA2,":");s2=sA2[1]*3600+sA2[2]*60+sA2[3];us2=substr(e,10);return s2*1000000+us2-s1*1000000-us1;} END{second=timeSub(start,end)/1000000;if(second<1) second=1; printf("total time: %.1f seconds\n", second);printf("[In] pkg count: %s, pkg rate: %.2f pkg/s, bytes count: %s, bytes rate: %.2f KB/s, body bytes: %s, body bytes rate: %.2f KB/s, payload percent: %.1f%%\n", inp,inp/second,inb,inb/second/1024,inbb,inbb/second/1024,100*inbb/inb);printf("[Out] pkg count: %s, pkg rate: %.2f pkg/s, bytes count: %s, bytes rate: %.2f KB/s, body bytes: %s, body bytes rate: %.2f KB/s, payload percent: %.1f%%\n", outp,outp/second,outb,outb/second/1024,outbb,outbb/second/1024,100*outbb/outb);}' cat 123.pkg.head.txt | awk 'BEGIN{ipportRegex="10.";header_len=0x36;inp=0;inb=0;inbb=0;outp=0;outb=0;outbb=0;start="";end=""} function getBodyLen(){if("length"==$(NF-1)) return $NF;t_c=split($7,a,/[()]/);if(3==t_c)return a[2];else return 0;} $3~ipportRegex{outp++;bl=getBodyLen();outb+=header_len+bl;outbb+=bl;if(""==start)start=$1;end=$1;} $5~ipportRegex{inp++;bl=getBodyLen();inb+=header_len+bl;inbb+=bl;if(""==start)start=$1;end=$1;} function timeSub(s,e){split(substr(s,1,8),sA1,":");s1=sA1[1]*3600+sA1[2]*60+sA1[3];us1=substr(s,10);split(substr(e,1,8),sA2,":");s2=sA2[1]*3600+sA2[2]*60+sA2[3];us2=substr(e,10);return s2*1000000+us2-s1*1000000-us1;} END{second=timeSub(start,end)/1000000;if(second<1) second=1; printf("total time: %.1f seconds\n", second);printf("[In] pkg count: %s, pkg rate: %.2f pkg/s, bytes count: %s, bytes rate: %.2f KB/s, body bytes: %s, body bytes rate: %.2f KB/s, payload percent: %.1f%%\n", inp,inp/second,inb,inb/second/1024,inbb,inbb/second/1024,100*inbb/inb);printf("[Out] pkg count: %s, pkg rate: %.2f pkg/s, bytes count: %s, bytes rate: %.2f KB/s, body bytes: %s, body bytes rate: %.2f KB/s, payload percent: %.1f%%\n", outp,outp/second,outb,outb/second/1024,outbb,outbb/second/1024,100*outbb/outb);}' 示例: total time: 45.7 seconds [In] pkg count: 2359, pkg rate: 51.59 pkg/s, bytes count: 306507, bytes rate: 6.55 KB/s, body bytes: 179121, body bytes rate: 3.83 KB/s, payload percent: 58.4% [Out] pkg count: 2057, pkg rate: 44.99 pkg/s, bytes count: 2559403, bytes rate: 54.66 KB/s, body bytes: 2448325, body bytes rate: 52.29 KB/s, payload percent: 95.7%
2.抓取源数据
明确了要抓取的目标服务的ip和端口后,就要开始正式抓包了,这些抓包数据是后续分析工作的基础。当然,你可能在抓包之前还要做一些其他工作,以保证抓包时的工作场景就是你想要分析的目标场景。抓包时你需要root权限。
抓取不含包体的包:
tcpdump -nn port 80 > 123.pkg.txt抓取含有包体的包并进行初步解包:
tcpdump port 80 -w 123.pkg -s 120 tcpdump port 80 -w 123.pkg -s 0 tcpdump -r 123.pkg -XXnn > 123.pkg.txt 示例: 11:12:01.894222 IP 10.64.12.14.59493 > 10.70.60.56.1521: Flags [P.], seq 3862499245:3862499262, ack 627056474, win 501, options [nop,nop,TS val 2916000715 ecr 736856730], length 17 0x0000: 0000 0c07 ace9 0022 195d 2445 0800 4500 .......".]$E..E. 0x0010: 0045 cb3b 4000 4006 12ac 0a40 0c0e 0a46 .E.;@.@....@...F 0x0020: 3c38 e865 05f1 e639 0fad 2560 1f5a 8018 <8.e...9..%`.Z.. 0x0030: 01f5 5d03 0000 0101 080a adce a3cb 2beb ..]...........+. 0x0040: 8a9a 0011 0000 0600 0000 0000 0305 0001 ................ 0x0050: 0301 0a ... 11:12:01.894226 IP 10.64.12.14.59514 > 10.70.60.56.1521: Flags [P.], seq 3903163064:3903163580, ack 4171729186, win 501, options [nop,nop,TS val 2916000715 ecr 736856729], length 516 0x0000: 0000 0c07 ace9 0022 195d 2445 0800 4500 .......".]$E..E. 0x0010: 0238 f2e9 4000 4006 e90a 0a40 0c0e 0a46 .8..@.@....@...F 0x0020: 3c38 e87a 05f1 e8a5 8ab8 f8a7 8922 8018 <8.z.........".. 0x0030: 01f5 5ef6 0000 0101 080a adce a3cb 2beb ..^...........+. 0x0040: 8a99 0204 0000 0600 0000 0000 1169 0001 .............i..
3.过滤
有时候,需要分析的问题是包含在抓取目标服务的全部数据包中的一小部分,这个时候就要对包数据进行过滤。当然也可以在上一节进行抓取时直接进行过滤,但这里主要满足事后的过滤需求。
过滤掉含有包体的数据文件,只保留数据包头:
cat 123.pkg.txt | awk 'substr($1,1,3)!="0x0"' > 123.pkg.head.txt 示例: 11:12:01.894222 IP 10.64.12.14.59493 > 10.70.60.56.1521: Flags [P.], seq 3862499245:3862499262, ack 627056474, win 501, options [nop,nop,TS val 2916000715 ecr 736856730], length 17 11:12:01.894226 IP 10.64.12.14.59514 > 10.70.60.56.1521: Flags [P.], seq 3903163064:3903163580, ack 4171729186, win 501, options [nop,nop,TS val 2916000715 ecr 736856729], length 516 11:12:01.894228 IP 10.64.12.14.59518 > 10.70.60.56.1521: Flags [P.], seq 3897659932:3897660175, ack 3171500152, win 501, options [nop,nop,TS val 2916000715 ecr 736856729], length 243 11:12:01.894231 IP 10.64.12.14.59512 > 10.70.60.56.1521: Flags [P.], seq 3898476274:3898476788, ack 1748466768, win 501, options [nop,nop,TS val 2916000715 ecr 736856730], length 514按时间段过滤含有包体的数据文件(调整start和end的值以改变时间范围):
cat 123.pkg.txt | awk 'BEGIN {start="11:13:49.0";end="11:13:49.9";flag=0} {if(substr($1,1,3)=="0x0"){if(1==flag){print $0}}else{cur=substr($1,1,length(start));if(cur>=start && cur<=end){print $0;flag=1}else{flag=0}}}' > new123.pkg.txt按ip端口过滤含有包体的数据文件(调整ip和port的值):
cat new123.pkg.txt | awk 'BEGIN {ip="192.168.122.180";port="54365";flag=0} {if(substr($1,1,3)=="0x0"){if(1==flag){print $0}}else if($0~ip"."port){print $0;flag=1;}else{flag=0;}}' > newnew123.pkg.txt按文本关键字过滤含有包体的数据文件(调整keyword的值):
cat newnew123.pkg.txt | awk 'BEGIN{keyword="id_10bc3c9";buf="";found=0} {if(substr($1,1,2)=="0x"){if($0~keyword) found=1;buf=buf"\n"$0}else{if(1==found){print buf;found=0;}if($0~keyword) found=1;buf=$0}} END{if(1==found) print buf}' > tmp.pkg.txt按二进制串关键字从二进制数据包中过滤含有包体的数据文件(性能稍低,对于大文件耐心等待;例子中是过滤包含"test_dava_11968@163.com"字符串的数据包):
cat newnew123.pkg.txt | awk 'BEGIN{keyword="746573745f646176615f3131393638403136332e636f6d";bin="";buf=""} {if(substr($1,1,2)=="0x"){for(i=2;i<NF;i++) bin=bin""$i;buf=buf"\n"$0}else{if(length(bin)>0&&bin~keyword) print buf;buf=$0;bin=""}} END{if(length(bin)>0&&bin~keyword) print buf}' > tmp2.pkg.txt
4.分类
通过分类,将抓包文件中混杂在一起的多个客户端的数据包拆分成各自单独的文件,以利于人工查看以及后续分析。新生成的文件放在当前目录下的子目录中,例如10.64.12.14.443_detail。
按client端ip端口将混杂在一起的含有包体的包拆分的单独文件:
cat 123.pkg.txt | awk 'BEGIN{server="10.70.60.56.1521";buf="";client="";dir=server"_detail";system("mkdir "dir" 2>/dev/null")} {if(substr($1,1,2)=="0x"){buf=buf"\n"$0}else{if(length(client)>0) print buf>>dir"/"client".txt";if($3==server){client=substr($5,1,length($5)-1);}else if($5~server){client=$3}else{client=""} buf=$0}}'按client端ip端口将混杂在一起的含有包体的包拆分的单独文件(超过1000个client后因受同时打开文件数限制awk性能非常低,可增加dd命令的bs缓冲区大小,并使用limit参数进行限制):
dd if=123.pkg.txt bs=100M 2>/dev/null | awk 'BEGIN{server="10.70.60.56.1521";limit=1000;buf="";client="";count=0;dir=server"_detail";system("mkdir "dir" 2>/dev/null")} {if(substr($1,1,2)=="0x"){if(length(client)>0) buf=buf"\n"$0}else{if(length(client)>0) print buf>>dir"/"client".txt";if($3==server){client=substr($5,1,length($5)-1)}else if($5~server){client=$3}else{client=""}if(length(client)>0){if("ok"==all_clients[client]){buf=$0}else if(count<limit){count++;all_clients[client]="ok";buf=$0}else{client=""}}}}'
5.交互分析
前面的分析处理仅限于数据包本身的ip和port,而本节的交互分析则将数据包归类到一个个具体的tcp连接之中,提取有关的交互“成本”数据,并尝试分析数据包在请求响应模型中的交互语义。本节中的命令需要系列awk脚本支持,为不影响理解本节主要内容,相关脚本源码附录在文章末尾供参考,其中包含主脚本parseTcp.awk,以及若干插件脚本:fixLenParser.awk,lineParser.awk,oracleSqlParser.awk,httpParser.awk。
分析单个不含包体的数据文件的交互过程:
cat 123.pkg.txt | /data/speng/tcpdump/parseTcp.awk 61.135.169.125.80 > interaction.txt分析单个不含包体的数据文件中只与某特定客户端有关的交互过程:
cat 123.pkg.txt | /data/speng/tcpdump/parseTcp.awk 61.135.169.125.80 1 172.20.154.45.3752 > interaction.txt 示例: 14:00:53.123517 client 172.20.154.45.3752_2455096098 connect take(us): 3291 slient_before(us): 72266270 result: ok 14:00:53.132710 client 172.20.154.45.3752_2455096098 round-trip 1 slient_before(us): 5902 total take(us): 5927 req_time(us): 76 req_bytes: 1635 server_process(us): 5402 response_time(us): 449 response_bytes: 6587 14:00:53.263446 client 172.20.154.45.3752_2455096098 round-trip 2 slient_before(us): 124809 total take(us): 17767 req_time(us): 88 req_bytes: 1620 server_process(us): 17679 response_time(us): 0 response_bytes: 946 14:00:53.472286 client 172.20.154.45.3752_2455096098 round-trip 3 slient_before(us): 191073 total take(us): 9319 req_time(us): 53 req_bytes: 1620 server_process(us): 8517 response_time(us): 749 response_bytes: 10361 14:00:54.587117 client 172.20.154.45.3752_2455096098 round-trip 4 slient_before(us): 1105512 total take(us): 13233 req_time(us): 92 req_bytes: 1618 server_process(us): 13113 response_time(us): 28 response_bytes: 2777 14:01:14.598513 client 172.20.154.45.3752_2455096098 close take(us): 589 slient_before(us): 19998163 close_bytes: 0 direction: server->client result: full_close分析多个分类后含有包体的数据文件的交互过程(cd进入上一节分类后所生成的目录下执行命令):
ls | xargs -n 10 awk 'BEGIN{f=""} {if(FILENAME!=f){print "newfile",FILENAME;f=FILENAME}if(substr($1,1,2)!="0x") print $0}' | ../parseTcp.awk 10.70.60.56.1521 > ../interaction.txt分析多个分类后含有包体的数据文件的交互过程(cd进入分类后文件所在目录下执行命令。支持识别包体中的特征数据,并打印出来便于后续业务分析。可编写类似oracleSqlParser.awk或httpParser.awk插件脚本进行新协议或业务扩展。源码附录在文章末尾):
ls | xargs -n 10 awk -f ../oracleSqlParser.awk --source 'function getLen(){if("length"==$(NF-1)) return $NF;t_c=split($7,a,/[()]/);if(3==t_c) return a[2];else return 0;} BEGIN{f="";h="";b=0;} {if(FILENAME!=f){if(1==b)parserEnd();if(""!=h)print h;print "newfile",FILENAME;f=FILENAME;h="";b=0;}if(substr($1,1,2)=="0x"){if(1==b)parserWork();}else{if(1==b)parserEnd();b=0;if(""!=h)print h;h=$0;bl=getLen();if(bl>0){parserInit(bl);b=1;}}} END{if(1==b)parserEnd();if(""!=h)print h;}' | ../parseTcp.awk 10.70.60.56.1521 > ../interaction.txt ls | xargs -n 10 awk -f ../httpParser.awk --source 'function getLen(){if("length"==$(NF-1)) return $NF;t_c=split($7,a,/[()]/);if(3==t_c) return a[2];else return 0;} BEGIN{f="";h="";b=0;} {if(FILENAME!=f){if(1==b)parserEnd();if(""!=h)print h;print "newfile",FILENAME;f=FILENAME;h="";b=0;}if(substr($1,1,2)=="0x"){if(1==b)parserWork();}else{if(1==b)parserEnd();b=0;if(""!=h)print h;h=$0;bl=getLen();if(bl>0){parserInit(bl);b=1;}}} END{if(1==b)parserEnd();if(""!=h)print h;}' | ../parseTcp.awk 10.70.66.120.80 > ../interaction.txt 示例: sql: 11:12:29.422380 client 10.64.12.14.59573_3961647293 connect take(us): 567 slient_before(us): 0 result: ok 11:12:29.423124 client 10.64.12.14.59573_3961647293 round-trip 1 slient_before(us): 177 total take(us): 54042 req_time(us): 0 req_bytes: 211 server_process(us): 54042 response_time(us): 0 response_bytes: 8 11:12:29.478165 client 10.64.12.14.59573_3961647293 round-trip 2 slient_before(us): 999 total take(us): 2021 req_time(us): 1409 req_bytes: 363 server_process(us): 612 response_time(us): 0 response_bytes: 127 11:12:29.480257 client 10.64.12.14.59573_3961647293 round-trip 3 slient_before(us): 71 total take(us): 1420 req_time(us): 0 req_bytes: 33 server_process(us): 1420 response_time(us): 0 response_bytes: 188 11:12:29.481825 client 10.64.12.14.59573_3961647293 round-trip 4 slient_before(us): 148 total take(us): 1036 req_time(us): 0 req_bytes: 779 server_process(us): 1036 response_time(us): 0 response_bytes: 834 11:12:29.483414 client 10.64.12.14.59573_3961647293 round-trip 5 slient_before(us): 553 total take(us): 3983 req_time(us): 810 req_bytes: 184 server_process(us): 3173 response_time(us): 0 response_bytes: 73 11:12:29.487836 client 10.64.12.14.59573_3961647293 round-trip 6 slient_before(us): 439 total take(us): 4891 req_time(us): 0 req_bytes: 574 server_process(us): 4891 response_time(us): 0 response_bytes: 1180 11:12:29.492988 client 10.64.12.14.59573_3961647293 round-trip 7 slient_before(us): 261 total take(us): 3220 req_time(us): 0 req_bytes: 505 server_process(us): 3220 response_time(us): 0 response_bytes: 358 11:12:29.496568 client 10.64.12.14.59573_3961647293 round-trip 8 slient_before(us): 360 total take(us): 2276 req_time(us): 0 req_bytes: 245 server_process(us): 2276 response_time(us): 0 response_bytes: 393 req_appflag: select.usrid,mailadd,mailflg,mobile,verifyflg,user_seq_id,is_oauth_type,lastlogin,pwd_strict.from.tab_users.where.mailadd.=:1--con_id_52d2c4 11:12:29.499283 client 10.64.12.14.59573_3961647293 round-trip 9 slient_before(us): 439 total take(us): 704 req_time(us): 0 req_bytes: 17 server_process(us): 704 response_time(us): 0 response_bytes: 208 11:12:29.500267 client 10.64.12.14.59573_3961647293 round-trip 10 slient_before(us): 280 total take(us): 766 req_time(us): 0 req_bytes: 245 server_process(us): 766 response_time(us): 0 response_bytes: 393 req_appflag: select.usrid,mailadd,mailflg,mobile,verifyflg,user_seq_id,is_oauth_type,lastlogin,pwd_strict.from.tab_users.where.mailadd.=:1--con_id_52d2c4 11:12:29.501945 client 10.64.12.14.59573_3961647293 round-trip 11 slient_before(us): 912 total take(us): 681 req_time(us): 0 req_bytes: 17 server_process(us): 681 response_time(us): 0 response_bytes: 208 11:12:29.503286 client 10.64.12.14.59573_3961647293 round-trip 12 slient_before(us): 660 total take(us): 4956 req_time(us): 0 req_bytes: 513 server_process(us): 4956 response_time(us): 0 response_bytes: 64 req_appflag: update.tab_users.tu.set.tu.lastlogin_third=tu.lastlogin,tu.lastlogin=to_date(:1,'yyyy-mm-dd.hh24:mi:ss'),tu.uct_time=to_date(:2,'yyyy-mm-dd.hh24:mi:ss'),tu.uct=:3,tu.auto_login_flag=:4,login_ip=:5.where.mailadd.=:6--con_id_52d2c4 11:12:29.510362 client 10.64.12.14.59573_3961647293 round-trip 13 slient_before(us): 2120 total take(us): 3591 req_time(us): 0 req_bytes: 515 server_process(us): 3591 response_time(us): 0 response_bytes: 63 req_appflag: update.tab_users.tu.set.tu.lastlogin_third=tu.lastlogin,tu.lastlogin=to_date(:1,'yyyy-mm-dd.hh24:mi:ss'),tu.uct_time=to_date(:2,'yyyy-mm-dd.hh24:mi:ss'),tu.uct=:3,tu.auto_login_flag=:4,login_ip=:5.where.mailadd.=:6--con_id_52d2c4 11:12:29.515085 client 10.64.12.14.59573_3961647293 round-trip 14 slient_before(us): 1132 total take(us): 3660 req_time(us): 0 req_bytes: 513 server_process(us): 3660 response_time(us): 0 response_bytes: 63 req_appflag: update.tab_users.tu.set.tu.lastlogin_third=tu.lastlogin,tu.lastlogin=to_date(:1,'yyyy-mm-dd.hh24:mi:ss'),tu.uct_time=to_date(:2,'yyyy-mm-dd.hh24:mi:ss'),tu.uct=:3,tu.auto_login_flag=:4,login_ip=:5.where.mailadd.=:6--con_id_52d2c4 11:12:29.518969 client 10.64.12.14.59573_3961647293 round-trip 15 slient_before(us): 224 total take(us): 724 req_time(us): 0 req_bytes: 245 server_process(us): 724 response_time(us): 0 response_bytes: 392 req_appflag: select.usrid,mailadd,mailflg,mobile,verifyflg,user_seq_id,is_oauth_type,lastlogin,pwd_strict.from.tab_users.where.mailadd.=:1--con_id_52d2c4 11:12:29.519737 client 10.64.12.14.59573_3961647293 round-trip 16 slient_before(us): 44 total take(us): 664 req_time(us): 0 req_bytes: 17 server_process(us): 664 response_time(us): 0 response_bytes: 208 11:12:29.522030 client 10.64.12.14.59573_3961647293 round-trip 17 slient_before(us): 1629 total take(us): 4037 req_time(us): 0 req_bytes: 513 server_process(us): 4037 response_time(us): 0 response_bytes: 63 req_appflag: update.tab_users.tu.set.tu.lastlogin_third=tu.lastlogin,tu.lastlogin=to_date(:1,'yyyy-mm-dd.hh24:mi:ss'),tu.uct_time=to_date(:2,'yyyy-mm-dd.hh24:mi:ss'),tu.uct=:3,tu.auto_login_flag=:4,login_ip=:5.where.mailadd.=:6--con_id_52d2c4 11:12:29.526610 client 10.64.12.14.59573_3961647293 round-trip 18 slient_before(us): 543 total take(us): 709 req_time(us): 0 req_bytes: 245 server_process(us): 709 response_time(us): 0 response_bytes: 392 req_appflag: select.usrid,mailadd,mailflg,mobile,verifyflg,user_seq_id,is_oauth_type,lastlogin,pwd_strict.from.tab_users.where.mailadd.=:1--con_id_52d2c4 11:12:29.527617 client 10.64.12.14.59573_3961647293 round-trip 19 slient_before(us): 298 total take(us): 1490 req_time(us): 0 req_bytes: 17 server_process(us): 1490 response_time(us): 0 response_bytes: 208 http: 14:00:53.123517 client 172.20.154.45.3752_2455096098 connect take(us): 3291 slient_before(us): 72266270 result: ok 14:00:53.132710 client 172.20.154.45.3752_2455096098 round-trip 1 slient_before(us): 5902 total take(us): 5927 req_time(us): 76 req_bytes: 1635 server_process(us): 5402 response_time(us): 449 response_bytes: 6587 req_appflag: GET./static/api/js/share.js?v=89860593.js?cdnversion=403517 response_appflag: 200.OK 14:00:53.263446 client 172.20.154.45.3752_2455096098 round-trip 2 slient_before(us): 124809 total take(us): 17767 req_time(us): 88 req_bytes: 1620 server_process(us): 17679 response_time(us): 0 response_bytes: 946 req_appflag: GET./static/js/shell_v2.js?cdnversion=403519 response_appflag: 200.OK 14:00:53.472286 client 172.20.154.45.3752_2455096098 round-trip 3 slient_before(us): 191073 total take(us): 9319 req_time(us): 53 req_bytes: 1620 server_process(us): 8517 response_time(us): 749 response_bytes: 10361 req_appflag: GET./static/js/bds_s_v2.js?cdnversion=403519 response_appflag: 200.OK 14:00:54.587117 client 172.20.154.45.3752_2455096098 round-trip 4 slient_before(us): 1105512 total take(us): 13233 req_time(us): 92 req_bytes: 1618 server_process(us): 13113 response_time(us): 28 response_bytes: 2777 req_appflag: GET./static/js/logger.js?cdnversion=403519 response_appflag: 200.OK 14:01:14.598513 client 172.20.154.45.3752_2455096098 close take(us): 589 slient_before(us): 19998163 close_bytes: 0 direction: server->client result: full_close https: 16:53:29.658744 client 10.64.12.16.32769_169947140 connect take(us): 89 slient_before(us): 0 result: ok 16:53:29.658857 client 10.64.12.16.32769_169947140 round-trip 1 slient_before(us): 24 total take(us): 6865 req_time(us): 0 req_bytes: 117 server_process(us): 1535 response_time(us): 5330 response_bytes: 5140 16:53:29.670379 client 10.64.12.16.32769_169947140 round-trip 2 slient_before(us): 4657 total take(us): 6071 req_time(us): 0 req_bytes: 198 server_process(us): 6071 response_time(us): 0 response_bytes: 59 16:53:29.676530 client 10.64.12.16.32769_169947140 round-trip 3 slient_before(us): 80 total take(us): 983303 req_time(us): 0 req_bytes: 133 server_process(us): 983303 response_time(us): 0 response_bytes: 842 16:53:30.659883 client 10.64.12.16.32769_169947140 close take(us): 4412 slient_before(us): 50 close_bytes: 37 direction: server->client result: full_close 16:54:39.540752 client 10.64.12.16.32769_239829149 connect take(us): 91 slient_before(us): 68876457 result: ok 16:54:39.540866 client 10.64.12.16.32769_239829149 round-trip 1 slient_before(us): 23 total take(us): 7459 req_time(us): 0 req_bytes: 117 server_process(us): 2127 response_time(us): 5332 response_bytes: 5140 16:54:39.552971 client 10.64.12.16.32769_239829149 round-trip 2 slient_before(us): 4646 total take(us): 5452 req_time(us): 0 req_bytes: 198 server_process(us): 5452 response_time(us): 0 response_bytes: 59 16:54:39.558487 client 10.64.12.16.32769_239829149 round-trip 3 slient_before(us): 64 total take(us): 965470 req_time(us): 0 req_bytes: 133 server_process(us): 965470 response_time(us): 0 response_bytes: 842 16:54:40.524004 client 10.64.12.16.32769_239829149 close take(us): 115 slient_before(us): 47 close_bytes: 37 direction: server->client result: full_close
6.统计分析
统计分析用于掌握一批数据包反映的整体交互规律和指标数据。配合使用上一节交互分析中获得的业务关键字过滤,还可以分析具有某些特定特征的交互指标数据。
分析多行模式输出的交互过程的请求响应对的平均响应时间(含请求响应的网络传输时间,但不含socket建立阶段):
cat interaction.txt | grep round-trip | awk '{sum+=($10/1000000)} END {print "Lines = "NR", Sum = "sum"(seconds), Average = "sum/NR"(seconds)"}' 示例: Lines = 678796, Sum = 4234.81(seconds), Average = 0.00623871(seconds)分析多行模式输出的交互过程的请求响应对的服务器平均处理时间:
cat interaction.txt | grep round-trip | awk '{sum+=($16/1000000)} END {print "Lines = "NR", Sum = "sum"(seconds), Average = "sum/NR"(seconds)"}' 示例: Lines = 678796, Sum = 1612.32(seconds), Average = 0.00237526(seconds)分析多行模式输出的交互过程的请求响应对的平均请求大小:
cat interaction.txt | grep round-trip | awk '{sum+=$14} END {print "Lines = "NR", Sum = "sum"(bytes), Average = "sum/NR"(bytes)"}'分析多行模式输出的交互过程的请求响应对的平均响应大小:
cat interaction.txt | grep round-trip | awk '{sum+=$20} END {print "Lines = "NR", Sum = "sum"(bytes), Average = "sum/NR"(bytes)"}'分析某多行模式输出的https短连接交互过程的ssl握手阶段的平均处理时间:
cat interaction.txt | grep round-trip | awk '$5==1 {start=$1;r1_time=$10} $5==2 {slient=$7;r2_time=$10;print start, r1_time+slient+r2_time}' | awk '{sum+=($2/1000000)} END {print "Lines = "NR", Sum = "sum"(seconds), Average = "sum/NR"(seconds)"}' 示例: Lines = 284861, Sum = 12216.7(seconds), Average = 0.0428866(seconds)分析某多行模式输出的https短连接交互过程的ssl握手阶段的tps及每秒平均处理时间(可导入excel作图):
cat interaction.txt | grep round-trip | awk '$5==1 {start=$1;r1_time=$10} $5==2 {slient=$7;r2_time=$10;print start, r1_time+slient+r2_time}' | sort | awk '{print $2,$1}' | awk -F '.' '{print $1}' | awk 'BEGIN{sum=0;count=0;time=""} {if($2!=time){if(count>0) print time,count,sum/count/1000000;sum=$1;count=1;time=$2}else{sum+=$1;count++}} END{if(count>0) print time,count,sum/count/1000000}'上例做图如下:
分析某多行模式输出的https短连接交互过程的socket建立完成至服务器accept此连接经历的平均等待时间(可导入excel作图):
cat interaction.txt | grep "round-trip 1 " | awk '{print $1,$7}' | sort | awk '{print $2,$1}' | awk -F '.' '{print $1}' | awk 'BEGIN{sum=0;count=0;time=""} {if($2!=time){if(count>0) printf("%s %s %.6f\n", time,count,sum/count/1000000);sum=$1;count=1;time=$2}else{sum+=$1;count++}} END{if(count>0) printf("%s %s %.6f\n", time,count,sum/count/1000000)}'分析某多行模式输出含有包体特征数据的oracle长连接交互过程中select语句及update语句的平均响应时间:
cat interaction.txt | grep select | awk '{sum+=($10/1000000)} END {print "Lines = "NR", Sum = "sum"(seconds), Average = "sum/NR"(seconds)"}' cat interaction.txt | grep update | awk '{sum+=($10/1000000)} END {print "Lines = "NR", Sum = "sum"(seconds), Average = "sum/NR"(seconds)"}'
附录:
parseTcp.awk源码(文件需有可执行权限):
#!/bin/awk -f BEGIN \ { if(ARGC<2) { printf("Usage:\n"); printf(" ./parseTcp.awk <server ip port> [<print in multi line, 0 or 1, default to 1> [client ip port]]\n"); printf("Example:\n"); printf(" cat ./pkg.txt | ./parseTcp.awk 10.64.12.14.443 1 10.64.12.16.60990\n"); exit; } server=ARGV[1]; multi_line=1; if(ARGC>2) multi_line=ARGV[2]; default_client=""; if(ARGC>3) default_client=ARGV[3]; max_close_wait=240000000; max_connect_wait=max_close_wait; isn=0; need_newline=0; ARGC=1; # don't delete this line! } function checkflag(flag) { pkgFlag=$6; if("Flags"==pkgFlag) pkgFlag=$7; if(pkgFlag~flag) return 1; else return 0 } function getBodyLen() { if("length"==$(NF-1)) return $NF; count=split($7,a,/[()]/); if(3==count) return a[2]; else return 0; } function getTcpISN() { if("seq"==$8) return substr($9,1,length($9)-1); count=split($7,a,":"); if(2==count) return a[1]; else return 0; } function timeSub(start,end) { split(substr(start,1,8),sA1,":"); s1=sA1[1]*3600+sA1[2]*60+sA1[3]; us1=substr(start,10); split(substr(end,1,8),sA2,":"); s2=sA2[1]*3600+sA2[2]*60+sA2[3]; us2=substr(end,10); return s2*1000000+us2-s1*1000000-us1; } function printConnectInfo() { if(1==connect_reset) connect_result="reset_by_server"; else if(1==connect_timeout) connect_result="timeout"; else connect_result="ok"; printf("%s client %s_%s connect take(us): %s slient_before(us): %s result: %s",connect_start, client, isn, timeSub(connect_start,$1), slient_time, connect_result); need_newline=1; if("ok"!=connect_result) { printf("\n"); need_newline=0; } } function printRoundInfo() { if(1==multi_line){printf("\n")}else{printf(" || ")} printf("%s client %s_%s round-trip %d slient_before(us): %s total take(us): %s req_time(us): %s req_bytes: %s server_process(us): %s response_time(us): %s response_bytes: %s",req_start, client, isn, round, slient_time, timeSub(req_start,response_end), timeSub(req_start,req_end), req_len, server_process, timeSub(response_start,response_end), response_len); if(""!=req_appflag) printf(" req_appflag: %s",req_appflag); if(""!=response_appflag) printf(" response_appflag: %s",response_appflag); need_newline=1; } function printCloseInfo() { if(1==server_close) direction="server->client"; else direction="client->server"; if(1==close_timeout) close_result="close_timeout"; else if(2==client_close || 2==server_close) close_result="full_close"; else close_result="half_close"; if(1==multi_line){printf("\n")}else{printf(" || ")} printf("%s client %s_%s close take(us): %s slient_before(us): %s close_bytes: %s direction: %s result: %s\n", close_start, client, isn, timeSub(close_start,close_end), close_slient_time, close_bytes, direction, close_result); need_newline=0; } function printResetInfo() { if(1==multi_line){printf("\n")}else{printf(" || ")}; if($3==server) resetPeer="server"; else resetPeer="client"; printf("%s client %s_%s reset connect by %s\n", $1, client, isn, resetPeer); need_newline=0; } function onNewConnect() { client=$3; isn=getTcpISN(); slient_time=timeSub(slient_start,$1); connect_start=$1; connect_timeout=0; connect_reset=0; prepareClose(); } function onFirstRoundTrip() { req_start=""; round=0; } function onNewRequest() { req_start=$1; req_end=$1; req_len=bodyLen; round++; slient_time=timeSub(slient_start,$1); process_start=$1; req_appflag=last_appflag; } function mergeRequest() { req_end=$1; req_len+=bodyLen; process_start=$1; if(""!=last_appflag) req_appflag=last_appflag; } function onNewResponse() { response_start=$1; response_end=$1; response_len=bodyLen; server_process=timeSub(req_end,$1); slient_start=$1; response_appflag=last_appflag; } function mergeResponse() { response_end=$1; response_len+=bodyLen; slient_start=$1; if(""!=last_appflag) response_appflag=last_appflag; } function prepareClose() { close_start=""; close_end=""; close_bytes=0; close_timeout=0; client_close=0; server_close=0; close_slient_time=0; } function checkCloseState() { if(""==close_start) { if(1==checkflag("F") && ($3==client || $5==client":")) { close_start=$1; close_end=$1; if($3==server) server_close=1; else client_close=1; close_slient_time=timeSub(slient_start,$1); close_bytes+=bodyLen; return 1; } else return 0; } if(timeSub(close_start,$1)>=max_close_wait) { close_end=$1; close_timeout=1; return 3; } if($3==client || $5==client":") { close_bytes+=bodyLen; close_end=$1; if(1==checkflag("F")) { if($3==server && 1==client_close) { server_close=2; return 2; } else if($3==client && 1==server_close) { client_close=2; return 2; } } } return 1; } { last_appflag=""; if("app_flag"==$1) { last_appflag=substr($0,length($1) + length(FS) + 1); getline; } if("newfile"==$1) { if("response"==state) printRoundInfo(); if(""!=close_start) printCloseInfo(); if(1==need_newline) printf("\n"); need_newline=0; isn=0; slient_start=""; state=""; next } if(""==slient_start) slient_start=$1; if($3!=server && $5!=server":") next; if(""!=default_client && $3!=default_client && $5!=default_client":") next; bodyLen=getBodyLen(); if(""==state) { if($5!=server":") next; if(1==checkflag("S")) state="listen"; else if(bodyLen>0) { client=$3; onFirstRoundTrip(); prepareClose(); state="request"; } else next; } if("listen"==state) { if($5==server":" && 1==checkflag("S")) { onNewConnect(); slient_start=$1; state="syn_recv"; } } else if("syn_recv"==state) { if(timeSub(connect_start,$1)>=max_connect_wait) { connect_timeout=1; printConnectInfo(); slient_start=$1; state="listen"; } else if($5==client":") { if(1==checkflag("S")) state="syn_ack"; else if(1==checkflag("R")) { connect_reset=1; printConnectInfo(); slient_start=$1; state="listen"; } } else if($5==server":" && 1==checkflag("S")) { onNewConnect(); slient_start=$1; state="syn_recv"; } } else if("syn_ack"==state) { if(timeSub(connect_start,$1)>=max_connect_wait) { connect_timeout=1; printConnectInfo(); slient_start=$1; state=""; } else if(0==checkflag("R") && 0==checkflag("S") && $3==client) { printConnectInfo(); onFirstRoundTrip(); slient_start=$1; state="request"; } else if(1==checkflag("R") && ($3==client || $5==client":")) { connect_reset=1; printConnectInfo(); slient_start=$1; state="listen"; } } else if("request"==state) { if(bodyLen>0) { if($3==client) { if(""==req_start) onNewRequest(); else mergeRequest(); if(checkCloseState()>1) { printCloseInfo(); close_start=""; state="listen"; } slient_start=$1; } else if($5==client":") { onNewResponse(); if(checkCloseState()>1) { printRoundInfo(); printCloseInfo(); close_start=""; state="listen"; } else state="response"; slient_start=$1; } } else if(checkCloseState()>1) { printCloseInfo(); close_start=""; slient_start=$1; state="listen"; } else if(1==checkflag("R") && ($3==client || $5==client":")) { printResetInfo(); slient_start=$1; state="listen"; } } else if("response"==state) { if(bodyLen>0) { if($5==client":") { mergeResponse(); if(checkCloseState()>1) { printRoundInfo(); printCloseInfo(); close_start=""; state="listen"; } slient_start=$1; } else if($3==client) { printRoundInfo(); if(checkCloseState()>1) { printCloseInfo(); close_start=""; state="listen"; } else { onNewRequest(); state="request"; } slient_start=$1; } } else if(checkCloseState()>1) { printRoundInfo(); printCloseInfo(); close_start=""; slient_start=$1; state="listen"; } else if(1==checkflag("R") && ($3==client || $5==client":")) { printRoundInfo(); printResetInfo(); slient_start=$1; state="listen"; } } } END { \ if("response"==state) printRoundInfo(); if(""!=close_start) printCloseInfo(); if(1==need_newline) printf("\n"); }
fixLenParser.awk源码(这是一个插件,不可独立运行):
function getHeaderBinLen() { #check min length of (Ethernet + IP + TCP) packet header if(length(bin) < 108) return 0; if("08004" == substr(bin,25,5)) ethernetHeaderLen=28; else if("08004" == substr(bin,29,5)) ethernetHeaderLen=32; else { #unknown ethernet frame packet return -1; } ipHeaderLen = 8 * substr(bin,ethernetHeaderLen+2,1); tcpHeaderLen = 8 * substr(bin,ethernetHeaderLen+ipHeaderLen+25,1); return ethernetHeaderLen + ipHeaderLen + tcpHeaderLen; } function parserInit(pkgBodyLen) { maxLen=100; bin=""; text=""; pkgHeaderBinLen=0; ignore=0; } function parserWork() { if(1 == ignore) return; for(field=2; field < NF; field++) bin=bin""$field; text=text""$NF; if(pkgHeaderBinLen <= 0) { pkgHeaderBinLen=getHeaderBinLen(); #print "pkgHeaderBinLen:", pkgHeaderBinLen if(-1 == pkgHeaderBinLen) ignore=1; if(pkgHeaderBinLen <= 0) return; } if(length(text) >= pkgHeaderBinLen/2 + maxLen) { print "app_flag", substr(text,pkgHeaderBinLen/2+1, maxLen); ignore=1; return; } } function parserEnd() { if(1 == ignore) return; print "app_flag", substr(text,pkgHeaderBinLen/2+1); }
lineParser.awk源码(这是一个插件,不可独立运行):
function getHeaderBinLen() { #check min length of (Ethernet + IP + TCP) packet header if(length(bin) < 108) return 0; if("08004" == substr(bin,25,5)) ethernetHeaderLen=28; else if("08004" == substr(bin,29,5)) ethernetHeaderLen=32; else { #unknown ethernet frame packet return -1; } ipHeaderLen = 8 * substr(bin,ethernetHeaderLen+2,1); tcpHeaderLen = 8 * substr(bin,ethernetHeaderLen+ipHeaderLen+25,1); return ethernetHeaderLen + ipHeaderLen + tcpHeaderLen; } function parserInit(pkgBodyLen) { maxline=1; bin=""; text=""; pkgHeaderBinLen=0; lineEndBin="0a"; newLineStartInBodyBin0=0; lineCount=0; output=""; if(pkgBodyLen < 2) ignore=1; else ignore=0; } function parserWork() { if(1 == ignore) return; for(field=2; field < NF; field++) bin=bin""$field; text=text""$NF; if(pkgHeaderBinLen <= 0) { pkgHeaderBinLen=getHeaderBinLen(); #print "pkgHeaderBinLen:", pkgHeaderBinLen if(-1 == pkgHeaderBinLen) ignore=1; if(pkgHeaderBinLen <= 0) return; } while(1) { pkgBodyBin=substr(bin,pkgHeaderBinLen+newLineStartInBodyBin0+1); #print "debug:pkgBodyBin", pkgBodyBin; lineEndPos=index(pkgBodyBin,lineEndBin); if(0 == lineEndPos) return; output=output""substr(text,(pkgHeaderBinLen+newLineStartInBodyBin0)/2+1,lineEndPos/2+1); #print "debug:output", output; lineCount++; if(lineCount >= maxline) { print "app_flag", output; ignore=1; return; } newLineStartInBodyBin0+=lineEndPos+1; } } function parserEnd() { if(1 == ignore) return; output=output""substr(text,(pkgHeaderBinLen+newLineStartInBodyBin0)/2+1); print "app_flag", output; }
oracleSqlParser.awk源码(这是一个插件,不可独立运行):
function getHeaderBinLen() { #check min length of (Ethernet + IP + TCP) packet header if(length(bin) < 108) return 0; if("08004" == substr(bin,25,5)) ethernetHeaderLen=28; else if("08004" == substr(bin,29,5)) ethernetHeaderLen=32; else { #unknown ethernet frame packet return -1; } ipHeaderLen = 8 * substr(bin,ethernetHeaderLen+2,1); tcpHeaderLen = 8 * substr(bin,ethernetHeaderLen+ipHeaderLen+25,1); return ethernetHeaderLen + ipHeaderLen + tcpHeaderLen; } function parserInit(pkgBodyLen) { bin=""; text=""; pkgHeaderBinLen=0; sqlStartInBodyBin0=100; slqEndBin="01010"; sql=""; foundSqlStart=0; if(pkgBodyLen < sqlStartInBodyBin0/2 + 6) ignore=1; else ignore=0; } function parserWork() { if(1 == ignore) return; for(field=2; field < NF; field++) bin=bin""$field; text=text""$NF; if(pkgHeaderBinLen <= 0) { pkgHeaderBinLen=getHeaderBinLen(); #print "pkgHeaderBinLen:", pkgHeaderBinLen if(-1 == pkgHeaderBinLen) ignore=1; if(pkgHeaderBinLen <= 0) return; } if(0 == foundSqlStart) { if(length(text) >= (pkgHeaderBinLen+sqlStartInBodyBin0)/2 + 6) { sqlCmd=substr(text,(pkgHeaderBinLen+sqlStartInBodyBin0)/2 + 1,6); if("select" == sqlCmd || "update" == sqlCmd || "delete" == sqlCmd || "create" == sqlCmd) foundSqlStart=1; else ignore=1; } } else { tmpBin=substr(bin,pkgHeaderBinLen+sqlStartInBodyBin0); sqlEndPos=index(tmpBin,slqEndBin); if(sqlEndPos>0) { sql=substr(text,(pkgHeaderBinLen+sqlStartInBodyBin0)/2 + 1,(sqlEndPos - 1)/2); print "app_flag",sql; ignore=1; } } } function parserEnd() { if(1 == ignore) return; if(1 == foundSqlStart) { sql=substr(text,(pkgHeaderBinLen+sqlStartInBodyBin0)/2 + 1); print "app_flag",sql; } }
httpParser.awk源码(这是一个插件,不可独立运行):
function getHeaderBinLen() { #check min length of (Ethernet + IP + TCP) packet header if(length(bin) < 108) return 0; if("08004" == substr(bin,25,5)) ethernetHeaderLen=28; else if("08004" == substr(bin,29,5)) ethernetHeaderLen=32; else { #unknown ethernet frame packet return -1; } ipHeaderLen = 8 * substr(bin,ethernetHeaderLen+2,1); tcpHeaderLen = 8 * substr(bin,ethernetHeaderLen+ipHeaderLen+25,1); return ethernetHeaderLen + ipHeaderLen + tcpHeaderLen; } function parserInit(pkgBodyLen) { bin=""; text=""; pkgHeaderBinLen=0; lineEndBin="0d0a"; spaceBin="20"; maybeHttpReq=""; maybeHttpResponse=0; if(pkgBodyLen < 16) ignore=1; else ignore=0; } function parserWork() { if(1 == ignore) return; for(field=2; field < NF; field++) bin=bin""$field; text=text""$NF; if(pkgHeaderBinLen <= 0) { pkgHeaderBinLen=getHeaderBinLen(); #print "pkgHeaderBinLen:", pkgHeaderBinLen if(-1 == pkgHeaderBinLen) ignore=1; if(pkgHeaderBinLen <= 0) return; } if("" == maybeHttpReq && 0 == maybeHttpResponse) { if(length(text) >= pkgHeaderBinLen/2 + 9) { tmp_count=split(substr(text,pkgHeaderBinLen/2+1,9),words,"."); if(tmp_count <= 1) { ignore = 1; return; } if("GET" == words[1] || "POST" == words[1] || "PUT" == words[1] || "DELETE" == words[1] || "OPTIONS" == words[1] || "HEAD" == words[1]) maybeHttpReq = words[1]; else if(words[1] ~ /^HTTP\//) maybeHttpResponse = 1; else { ignore = 1; return; } } else return; } pkgBodyBin=substr(bin,pkgHeaderBinLen+1); lineEndPos=index(pkgBodyBin,lineEndBin); if(0 == lineEndPos) return; firstLineBin=substr(pkgBodyBin,1,lineEndPos - 1); firstLineText=substr(text,pkgHeaderBinLen/2+1,lineEndPos/2); #print "debug:pkgBodyBin", pkgBodyBin; #print "debug:firstLineBin", firstLineBin; #print "debug:firstLineText", firstLineText; if("" != maybeHttpReq) { if(3 == split(firstLineBin, a1, spaceBin) && 2 == split(firstLineText, a2, ".HTTP/")) print "app_flag", a2[1]; ignore = 1; } else if(1 == maybeHttpResponse) { if(split(firstLineBin, a1, spaceBin) >= 3 && split(firstLineText, a2, ".") >= 4 && a2[3] + 0 >= 100) print "app_flag", substr(firstLineText, length(a1[1])/2 + 2); ignore = 1; } } function parserEnd() { if(1 == ignore) return; if("" != maybeHttpReq) print "app_flag", substr(text,pkgHeaderBinLen/2+1); }