高级C代码的汇编分析

时间:2021-11-17 07:34:11

在windows上,常用的函数调用方式有:

Pascal方式,WINAPI(_stdcall)方式 和C方式(_cdecl)

_cdecl调用规则:

1,参数从右到左入堆栈

2,在函数返回后,调用者要负责清除堆栈

所以这种调用常会生成较大的可执行文件。

_stdcall又称为WINAPI调用方式,规则:

1,参数从右向左入堆栈

2,被调用的函数在返回前自行清理堆栈

所以这种调用会生成比cdecl小的代码

Pascal调用方式,主要用在WIN16函数库中,现在基本不用

规则:

1,参数从左向右入堆栈

2,被调用函数在返回前自行清理堆栈

此外,在Windows内核中还常见的有快速调用方式(_fastcall)

在C++编译的代码中有this call方式(_thiscall)

在windows中,不管哪种方式,返回值都写在eax中,外部从中获取返回值

_cdecl方式步骤

1,保存ebp

2,保存esp到ebp

3,在堆栈中腾出一个区域来保存局部变量

4,保存ebx,esi,edi到堆栈中,函数调用完后返回

5,把局部变量区域初始化为0xcccccccch,实际上是int 3指令机器码,这是一个断点软中断

6,做函数里应该做的事情

7,恢复ebx,esi,edi,esp,ebp,最后返回

2:    int func(int a,int b)
3: {
00401010 push ebp
00401011 mov ebp,esp
00401013 sub esp,44h
00401016 push ebx
00401017 push esi
00401018 push edi
00401019 lea edi,[ebp-44h]
0040101C mov ecx,11h
00401021 mov eax,0CCCCCCCCh
00401026 rep stos dword ptr [edi]
4: int c = a + b;
00401028 mov eax,dword ptr [ebp+8]
0040102B add eax,dword ptr [ebp+0Ch]
0040102E mov dword ptr [ebp-4],eax
5: return c;
00401031 mov eax,dword ptr [ebp-4]
6: }
00401034 pop edi
00401035 pop esi
00401036 pop ebx
00401037 mov esp,ebp
00401039 pop ebp
0040103A ret

for循环的汇编代码分析:

6:        int i;
7: for(i = 0 ;i < 50 ; i ++)
0040B501 mov dword ptr [ebp-8],0
0040B508 jmp func+33h (0040b513)
0040B50A mov ecx,dword ptr [ebp-8]
0040B50D add ecx,1
0040B510 mov dword ptr [ebp-8],ecx
0040B513 cmp dword ptr [ebp-8],32h
0040B517 jge func+44h (0040b524)
8: c = c + i;
0040B519 mov edx,dword ptr [ebp-4]
0040B51C add edx,dword ptr [ebp-8]
0040B51F mov dword ptr [ebp-4],edx
0040B522 jmp func+2Ah (0040b50a)
9:
10: return c;
0040B524 mov eax,dword ptr [ebp-4]
11: }

从上面的汇编代码可以分析出,for循环就是cmp指令+jmp指令

根据cmp判断然后跳转到那个位置执行代码

do...while循环分析

5:
6: int i = 0;
0040B501 mov dword ptr [ebp-8],0
7:
8: do {
9: c = c +i;
0040B508 mov ecx,dword ptr [ebp-4]
0040B50B add ecx,dword ptr [ebp-8]
0040B50E mov dword ptr [ebp-4],ecx
10: }while(c < 50);
0040B511 cmp dword ptr [ebp-4],32h
0040B515 jl func+28h (0040b508)
11:
12: return c;
0040B517 mov eax,dword ptr [ebp-4]
13: }
0040B51A pop edi
0040B51B pop esi
0040B51C pop ebx
0040B51D mov esp,ebp
0040B51F pop ebp
0040B520 ret

从上面代码可以看出

本质do...while循环和for差不多

while循环:

6:        int i = 0;
0040B501 mov dword ptr [ebp-8],0
7:
8: while(i < 50)
0040B508 cmp dword ptr [ebp-8],32h
0040B50C jge func+39h (0040b519)
9: {
10: c = c +i;
0040B50E mov ecx,dword ptr [ebp-4]
0040B511 add ecx,dword ptr [ebp-8]
0040B514 mov dword ptr [ebp-4],ecx
11: };
0040B517 jmp func+28h (0040b508)
12:
13: return c;
0040B519 mov eax,dword ptr [ebp-4]
14: }
0040B51C pop edi
0040B51D pop esi
0040B51E pop ebx
0040B51F mov esp,ebp
0040B521 pop ebp
0040B522 ret

if...else if...else语句分析

:
6: int i = 0;
0040B501 mov dword ptr [ebp-8],0
7:
8: if(c>0 && c < 10)
0040B508 cmp dword ptr [ebp-4],0
0040B50C jle func+43h (0040b523)
0040B50E cmp dword ptr [ebp-4],0Ah
0040B512 jge func+43h (0040b523)
9: {
10: printf("c > 0");
0040B514 push offset string "c > 0" (0041ff5c)
0040B519 call printf (0040b780)
0040B51E add esp,4
11: }
12: else if(c>10 && c<00)
0040B521 jmp func+6Bh (0040b54b)
0040B523 cmp dword ptr [ebp-4],0Ah
0040B527 jle func+5Eh (0040b53e)
0040B529 cmp dword ptr [ebp-4],0
0040B52D jge func+5Eh (0040b53e)
13: {
14: printf("c>10 && c<100");
0040B52F push offset string "c>10 && c<100" (0041ff4c)
0040B534 call printf (0040b780)
0040B539 add esp,4
15: }
16: else
0040B53C jmp func+6Bh (0040b54b)
17: {
18: printf("c>10 && c < 100");
0040B53E push offset string "c>10 && c < 100" (0041ff3c)
0040B543 call printf (0040b780)
0040B548 add esp,4
19: }
20:
21: return c;
0040B54B mov eax,dword ptr [ebp-4]
22: }
0040B54E pop edi
0040B54F pop esi
0040B550 pop ebx
0040B551 add esp,48h
0040B554 cmp ebp,esp
0040B556 call __chkesp (0040b4a0)
0040B55B mov esp,ebp
0040B55D pop ebp
0040B55E ret

switch...case 代码分析

4:        int c = a + b;
0040B4F8 mov eax,dword ptr [ebp+8]
0040B4FB add eax,dword ptr [ebp+0Ch]
0040B4FE mov dword ptr [ebp-4],eax
5:
6: switch(c)
7: {
0040B501 mov ecx,dword ptr [ebp-4]
0040B504 mov dword ptr [ebp-8],ecx
0040B507 cmp dword ptr [ebp-8],0
0040B50B je func+35h (0040b515)
0040B50D cmp dword ptr [ebp-8],1
0040B511 je func+42h (0040b522)
0040B513 jmp func+51h (0040b531)
8: case 0:
9: printf("c>0");
0040B515 push offset string "c>0" (0041ff4c)
0040B51A call printf (0040b780)
0040B51F add esp,4
10: case 1:
11: printf("c>10 && c<100");
0040B522 push offset string "c>10 && c<100" (0041ff3c)
0040B527 call printf (0040b780)
0040B52C add esp,4
12: break;
0040B52F jmp func+5Eh (0040b53e)
13: default:
14: printf("c>10 && c<100");
0040B531 push offset string "c>10 && c<100" (0041ff3c)
0040B536 call printf (0040b780)
0040B53B add esp,4
15: }
16:
17: return c;
0040B53E mov eax,dword ptr [ebp-4]
18: }
0040B541 pop edi
0040B542 pop esi
0040B543 pop ebx
0040B544 add esp,48h
0040B547 cmp ebp,esp
0040B549 call __chkesp (0040b4a0)
0040B54E mov esp,ebp
0040B550 pop ebp
0040B551 ret

结构体分析

1:
2: typedef struct {
3: int a;
4: int b;
5: int c;
6: }mystruct;
7:
8: int func(int a,int b)
9: {
0040B800 push ebp
0040B801 mov ebp,esp
0040B803 sub esp,1D8h
0040B809 push ebx
0040B80A push esi
0040B80B push edi
0040B80C lea edi,[ebp-1D8h]
0040B812 mov ecx,76h
0040B817 mov eax,0CCCCCCCCh
0040B81C rep stos dword ptr [edi]
10:
11: unsigned char *buf[100];
12: mystruct *strs = (mystruct *)buf;
0040B81E lea eax,[ebp-190h]
0040B824 mov dword ptr [ebp-194h],eax
13: int i;
14: for(i=0; i<5; i++)
0040B82A mov dword ptr [ebp-198h],0
0040B834 jmp func+45h (0040b845)
0040B836 mov ecx,dword ptr [ebp-198h]
0040B83C add ecx,1
0040B83F mov dword ptr [ebp-198h],ecx
0040B845 cmp dword ptr [ebp-198h],5
0040B84C jge func+94h (0040b894)
15: {
16: strs[i].a=0;
0040B84E mov edx,dword ptr [ebp-198h]
0040B854 imul edx,edx,0Ch
0040B857 mov eax,dword ptr [ebp-194h]
0040B85D mov dword ptr [eax+edx],0
17: strs[i].b=1;
0040B864 mov ecx,dword ptr [ebp-198h]
0040B86A imul ecx,ecx,0Ch
0040B86D mov edx,dword ptr [ebp-194h]
0040B873 mov dword ptr [edx+ecx+4],1
18: strs[i].c=2;
0040B87B mov eax,dword ptr [ebp-198h]
0040B881 imul eax,eax,0Ch
0040B884 mov ecx,dword ptr [ebp-194h]
0040B88A mov dword ptr [ecx+eax+8],2
19: }
0040B892 jmp func+36h (0040b836)
20:
21: return 0;
0040B894 xor eax,eax
22: }
0040B896 pop edi
0040B897 pop esi
0040B898 pop ebx
0040B899 mov esp,ebp
0040B89B pop ebp
0040B89C ret

从上面不难看出,结构体赋值是先经过计算,然后把基址存放的一个变量

然后计算每个结构体的偏移量,然后对每个struct进行定数累加赋值

枚举,联合,结构结合分析:

1:    typedef enum {
2: ENUM_1 = 1,
3: ENUM_2 = 2,
4: ENUM_3,
5: ENUM_4
6: }myenum;
7:
8: typedef struct {
9: int a;
10: int b;
11: int c;
12: }mystruct;
13:
14: typedef union {
15: mystruct s;
16: myenum e[3];
17: }myunion;
18:
19: int func(int a,int b)
20: {
00401020 push ebp
00401021 mov ebp,esp
00401023 sub esp,0ACh
00401029 push ebx
0040102A push esi
0040102B push edi
0040102C lea edi,[ebp-0ACh]
00401032 mov ecx,2Bh
00401037 mov eax,0CCCCCCCCh
0040103C rep stos dword ptr [edi]
21: unsigned char buf[100] = {0};
0040103E mov byte ptr [ebp-64h],0
00401042 mov ecx,18h
00401047 xor eax,eax
00401049 lea edi,[ebp-63h]
0040104C rep stos dword ptr [edi]
0040104E stos word ptr [edi]
00401050 stos byte ptr [edi]
22: myunion *uns = (myunion *)buf;
00401051 lea eax,[ebp-64h]
00401054 mov dword ptr [ebp-68h],eax
23:
24: int i;
25:
26: for(i = 0; i < 5; i++)
00401057 mov dword ptr [ebp-6Ch],0
0040105E jmp func+49h (00401069)
00401060 mov ecx,dword ptr [ebp-6Ch]
00401063 add ecx,1
00401066 mov dword ptr [ebp-6Ch],ecx
00401069 cmp dword ptr [ebp-6Ch],5
0040106D jge func+83h (004010a3)
27: {
28: uns[i].s.a=0;
0040106F mov edx,dword ptr [ebp-6Ch]
00401072 imul edx,edx,0Ch
00401075 mov eax,dword ptr [ebp-68h]
00401078 mov dword ptr [eax+edx],0
29: uns[i].s.b = 1;
0040107F mov ecx,dword ptr [ebp-6Ch]
00401082 imul ecx,ecx,0Ch
00401085 mov edx,dword ptr [ebp-68h]
00401088 mov dword ptr [edx+ecx+4],1
30: uns[i].e[2] = ENUM_4;
00401090 mov eax,dword ptr [ebp-6Ch]
00401093 imul eax,eax,0Ch
00401096 mov ecx,dword ptr [ebp-68h]
00401099 mov dword ptr [ecx+eax+8],4
31: }
004010A1 jmp func+40h (00401060)
32:
33: return 0;
004010A3 xor eax,eax
34: }
004010A5 pop edi
004010A6 pop esi
004010A7 pop ebx
004010A8 mov esp,ebp
004010AA pop ebp
004010AB ret

我们发现这段代码和上面的汇编后代码基本一样,因此我们知道,汇编中对共用体和枚举类型没有特别的处理

并不会引入新的代码,因为共用体和枚举都是方便给程序员用的,本质没什么改变

其实上面这些控制语句,对反汇编来说很容易分析,逆向工程中最令人蛋疼的是算法

一个3*3矩阵算法的逆向分析

main函数

 int main()
13: {
0040B640 push ebp
0040B641 mov ebp,esp
0040B643 sub esp,0ACh
0040B649 push ebx
0040B64A push esi
0040B64B push edi
0040B64C lea edi,[ebp-0ACh]
0040B652 mov ecx,2Bh
0040B657 mov eax,0CCCCCCCCh
0040B65C rep stos dword ptr [edi]
14: int a[3][3] = {{1,2,3},{2,3,4},{3,4,5}};
0040B65E mov dword ptr [ebp-24h],1
0040B665 mov dword ptr [ebp-20h],2
0040B66C mov dword ptr [ebp-1Ch],3
0040B673 mov dword ptr [ebp-18h],2
0040B67A mov dword ptr [ebp-14h],3
0040B681 mov dword ptr [ebp-10h],4
0040B688 mov dword ptr [ebp-0Ch],3
0040B68F mov dword ptr [ebp-8],4
0040B696 mov dword ptr [ebp-4],5
15: int b[3][3] = {{2,3,4},{2,4,1},{6,2,1}};
0040B69D mov dword ptr [ebp-48h],2
0040B6A4 mov dword ptr [ebp-44h],3
0040B6AB mov dword ptr [ebp-40h],4
0040B6B2 mov dword ptr [ebp-3Ch],2
0040B6B9 mov dword ptr [ebp-38h],4
0040B6C0 mov dword ptr [ebp-34h],1
0040B6C7 mov dword ptr [ebp-30h],6
0040B6CE mov dword ptr [ebp-2Ch],2
0040B6D5 mov dword ptr [ebp-28h],1
16: int c[3][3];
17:
18: func(a,b,c);
0040B6DC lea eax,[ebp-6Ch]
0040B6DF push eax
0040B6E0 lea ecx,[ebp-48h]
0040B6E3 push ecx
0040B6E4 lea edx,[ebp-24h]
0040B6E7 push edx
0040B6E8 call @ILT+5(_func) (0040100a)
0040B6ED add esp,0Ch
19:
20: return 0;
0040B6F0 xor eax,eax
21: }
0040B6F2 pop edi
0040B6F3 pop esi
0040B6F4 pop ebx
0040B6F5 add esp,0ACh
0040B6FB cmp ebp,esp
0040B6FD call __chkesp (00401130)
0040B702 mov esp,ebp
0040B704 pop ebp
0040B705 ret

算法函数:

1:    int func(int a[3][3],int b[3][3],int c[3][3])
2: {
0040B580 push ebp
0040B581 mov ebp,esp
0040B583 sub esp,48h
0040B586 push ebx
0040B587 push esi
0040B588 push edi
0040B589 lea edi,[ebp-48h]
0040B58C mov ecx,12h
0040B591 mov eax,0CCCCCCCCh
0040B596 rep stos dword ptr [edi]
3: int i,j;
4: for(i = 0 ; i < 3; i++)
0040B598 mov dword ptr [ebp-4],0
0040B59F jmp func+2Ah (0040b5aa)
0040B5A1 mov eax,dword ptr [ebp-4]
0040B5A4 add eax,1
0040B5A7 mov dword ptr [ebp-4],eax
0040B5AA cmp dword ptr [ebp-4],3
0040B5AE jge func+0AAh (0040b62a)
5: {
6: for(j = 0 ; j < 3; j ++)
0040B5B0 mov dword ptr [ebp-8],0
0040B5B7 jmp func+42h (0040b5c2)
0040B5B9 mov ecx,dword ptr [ebp-8]
0040B5BC add ecx,1
0040B5BF mov dword ptr [ebp-8],ecx
0040B5C2 cmp dword ptr [ebp-8],3
0040B5C6 jge func+0A5h (0040b625)
7: c[i][j] = a[i][0]*b[0][j]+a[i][1]*b[1][j]+a[i][2]*b[2][j];
0040B5C8 mov edx,dword ptr [ebp-4]
0040B5CB imul edx,edx,0Ch
0040B5CE mov eax,dword ptr [ebp+8]
0040B5D1 mov ecx,dword ptr [ebp-8]
0040B5D4 mov esi,dword ptr [ebp+0Ch]
0040B5D7 mov edx,dword ptr [eax+edx]
0040B5DA imul edx,dword ptr [esi+ecx*4]
0040B5DE mov eax,dword ptr [ebp-4]
0040B5E1 imul eax,eax,0Ch
0040B5E4 mov ecx,dword ptr [ebp+8]
0040B5E7 mov esi,dword ptr [ebp-8]
0040B5EA mov edi,dword ptr [ebp+0Ch]
0040B5ED mov eax,dword ptr [ecx+eax+4]
0040B5F1 imul eax,dword ptr [edi+esi*4+0Ch]
0040B5F6 add edx,eax
0040B5F8 mov ecx,dword ptr [ebp-4]
0040B5FB imul ecx,ecx,0Ch
0040B5FE mov eax,dword ptr [ebp+8]
0040B601 mov esi,dword ptr [ebp-8]
0040B604 mov edi,dword ptr [ebp+0Ch]
0040B607 mov ecx,dword ptr [eax+ecx+8]
0040B60B imul ecx,dword ptr [edi+esi*4+18h]
0040B610 add edx,ecx
0040B612 mov eax,dword ptr [ebp-4]
0040B615 imul eax,eax,0Ch
0040B618 mov ecx,dword ptr [ebp+10h]
0040B61B add ecx,eax
0040B61D mov eax,dword ptr [ebp-8]
0040B620 mov dword ptr [ecx+eax*4],edx
0040B623 jmp func+39h (0040b5b9)
8: }
0040B625 jmp func+21h (0040b5a1)
9: return 0;
0040B62A xor eax,eax
10: }
0040B62C pop edi
0040B62D pop esi
0040B62E pop ebx
0040B62F mov esp,ebp
0040B631 pop ebp
0040B632 ret

从上面的代码我们可以看出,汇编对Debug模式的二位数组操作方式如下:

mov		eax,<数组元素下表>
imul eax,eax,<结构体的大小>
mov ecx,<结构体开始地址>
mov eax,dword ptr [ecx+eax]
访问内部变量的时候,还要加上数字 mov eax,dword ptr [ecx+eax+0CH]