堆栈是向下拓展的
一个很简单的例子,将两个32字节的数字压入堆栈,查看esp堆栈寄存器前后的变化。
.section .text
.global main
main:
nop
pushl $1
pushl $2
调试查看:
[edemon@CentOS workspace]$ gcc -gward -o pushpop pushpop.s
[edemon@CentOS workspace]$ gdb pushpop
...
(gdb) b *main+1
Breakpoint 1 at 0x80483ed: file pushpop.s, line 5.
(gdb) r
Starting program: /home/edemon/workspace/pushpop
Breakpoint 1, main () at pushpop.s:5
5 pushl $1
(gdb) p $esp
$5 = (void *) 0xbffff4dc
(gdb) n
main () at pushpop.s:6
6 pushl $2
(gdb) n
0x080483f1 in main ()
(gdb) p $esp
$6 = (void *) 0xbffff4d4
可以发现前后的esp寄存器地址值相差8,且数值变小。
函数调用call
用简单的函数调用计算2^4
.section .data
result:
.asciz "the result is %d\n"
.section .text
.globl main
main:
nop
movl $2,%eax
call func
call func
call func
pushl %eax
pushl $result
call printf
pushl $0
call exit
func:
pushl %ebp
movl %esp,%ebp
add %eax,%eax
movl %ebp,%esp
popl %ebp
ret
loop
loop指令自动使用ECX寄存器作为计数器,每一次迭代过程中递减并测试这个计数器。
下面的程序用于计算2^5:
# calculate 2^5
.section .data
output:
.asciz "2^5 is %d\n"
.section .text
.global main
main:
movl $4,%ecx
movl $2,%ebx
myloop:
addl %ebx,%ebx
loop myloop
pushl %ebx
pushl $output
call printf
addl $8,%esp #clear stack
movl $0,%ebx
movl $1,%eax #sys_exit
int $0x80
for循环
计算前20位的斐波纳契
代码:
.section .data
values:
.int 1,1
#output:
# .asciz "num = \n"
.section .text
.globl main
main:
nop
movl $0,%ecx
loop:
movl $0,%eax #eax is next feibo element.
add values(,%ecx,4),%eax
add $1,%ecx
add values(,%ecx,4),%eax
add $1,%ecx
movl %eax,values(,%ecx,4)
sub $1,%ecx cmp $18,%ecx #if ecx < 18 jle loop nop
gdb查看内存:
[edemon@CentOS workspace]$ gdb feibo1
(gdb) b 21
Breakpoint 1 at 0x804841a: file feibo1.s, line 21.
(gdb) r
Starting program:
Breakpoint 1, loop () at feibo1.s:21
21 nop
Missing separate debuginfos, use: debuginfo-install glibc-2.12-1.192.el6.i686
(gdb) x/20 &values
0x8049684 <values>: 1 1 2 3
0x8049694 <completed.6190>: 5 8 13 21
0x80496a4: 34 55 89 144
0x80496b4: 233 377 610 987
0x80496c4: 1597 2584 4181 6765
if分支
写一个简单的if分支语句,用于处理1和2的比较。
.section .data
str1:
.asciz "1 <= 2."
len1 = . - str1
str2:
.asciz "1 > 2."
len2 = . - str2
.section .text
.global main
main:
movl $1,%ecx
movl $2,%edx
cmpl %edx,%ecx
jle L1
movl $len2,%edx
movl $str2,%ecx
jmp end
L1:
movl $len1,%edx
movl $str1,%ecx
jmp end
end:
movl $1,%ebx #stdout
movl $4,%eax #sys call number for sys_write
int $0x80 #call kernel
movl $0,%ebx #for exit code
movl $1,%eax #sys call number for sys_exit
int $0x80
当然,最终的结果是1 <= 2
long long
AT&T汇编中的.quad
数据类型对应着long long
。
例子:
一个quad数组标签,然后进行内存查看。
.section .data
values:
.quad 14,28,-1,90,-60
.section .text
.global main
main:
nop
movl $0,%ebx
movl $1,%eax
int $0x80
gdb查看内存:
(gdb) x/5d &values
0x8049664 <values>: 14 0 28 0
0x8049674 <values+16>: -1
(gdb) x/10d &values
0x8049664 <values>: 14 0 28 0
0x8049674 <values+16>: -1 -1 90 0
0x8049684 <values+32>: -60 -1
(gdb) x/5g &values
0x8049664 <values>: 14 28
0x8049674 <values+16>: -1 90
0x8049684 <values+32>: -60
通过打印对比,可以发现quad是4字,8字节,64位。value的一行中的一个元素是2字(默认打出两个字),4字节,32位,所以需要占两个元素。
溢出分析
对于无符号的整数溢出,进位标志carry flag将会被设置成1. 在这种情况下(进位标志carry flag被设置成为1)可以使用jc进行跳转.
.section .text
.global main
main:
movl $0,%ebx
movb $200,%bl
movb $100,%al
addb %al,%bl
jc overflag
int $0x80
overflag:
movl $0,%ebx
int $0x80
gdb调试:
(gdb) p $ebx
$1 = 200
(gdb) n
10 addb %al,%bl
(gdb) p $ebx
$2 = 200
(gdb) n
11 jc overflag
(gdb) p $ebx
$3 = 44
(gdb) p/t $ebx
$4 = 101100
300的二进制表示100101100
,因为一个字节只有8位,所以只能存储结果00101100
。多余的那个最大的1直接被舍去了。carry flag
被设置成1,满足条件进入overflag块。
对于有符号的整数,当出现溢出的情况,相应的溢出标志overflow flag
被设置成1。
code:
.section .text
.global main
main:
movl $-2147483648,%eax
movl $-1,%ebx
addl %eax,%ebx
jo over
int $0x80
over:
movl $0,%ebx
int $0x80
调试:
(gdb) p $eax
$1 = -2147483648
(gdb) p/t $eax
$2 = 10000000000000000000000000000000
(gdb) n
6 addl %eax,%ebx
(gdb) p $ebx
$3 = -1
(gdb) p/t $ebx
$4 = 11111111111111111111111111111111
(gdb) n
7 jo over
(gdb) p $ebx
$5 = 2147483647
(gdb) p/t $ebx
$6 = 1111111111111111111111111111111
(gdb) n
over () at overflag.s:10
10 movl $0,%ebx
(gdb) n
11 int $0x80
(gdb) n
0x08048403 in over ()
整个过程是这样的:
十进制:
-2147483648 + -1 = 2147483647 二进制:
10000000000000000000000000000000 + 11111111111111111111111111111111 =
01111111111111111111111111111111
因为溢出,所以,over flag设置成1,jo满足,跳跃到over块。
long long add
一个long long (quad)是64位,需要两个32位寄存器保存该值。
例子:
val1 EAX EBX + val2 ECX EDX = ans ECX EDX
code:
.section .data
val1:
.quad 4000000000
val2:
.quad 5000000000
report:
.asciz "the answer is %qd\n" #%qd is just like %lld
.section .text
.global main
main:
movl val1,%ebx
movl val1+4,%eax #register writes as: 4(%val1)
movl val2,%edx
movl val2+4,%ecx
addl %ebx,%edx
adcl %eax,%ecx #add calculate includes carry flag.
pushl %ecx
pushl %edx
push $report
call printf
addl $12,%esp #one instruct spends 4 address value.
movl $0,%ebx
movl $1,%eax
int $0x80
#edemon@ubuntu1:~/workspace$ gcc -gward -o exe addtest.s
#edemon@ubuntu1:~/workspace$ ./exe
#the answer is 9000000000
long long sub
和long long的加法一样,我们需要两个32位寄存器来保存64位整数值。sbb resource destination
指令在做减法操作的时候还会处理进位标志。
例子:
val1 EAX EBX - val2 ECX EDX = ans ECX EDX
.section .data
val1:
.quad -3000000000
val2:
.quad 4000000000
report:
.asciz "the answer is %qd\n"
.section .text
.global main
main:
movl val1,%ebx
movl val1+4,%eax
movl val2,%edx
movl val2+4,%ecx
subl %ebx,%edx
sbbl %eax,%ecx
pushl %ecx
pushl %edx
pushl $report
call printf
add $12,%esp
movl $0,%ebx
movl $1,%eax
int $0x80
#the answer is 7000000000
int int multiply
下面实现100000 * 200000的简单乘法。
.section .data
val1:
.int 100000
val2:
.int 200000
ans:
.quad 0
report:
.asciz "the answer is %qd\n"
.section .text
.global main
main:
nop
movl val1,%eax
mull val2
movl %edx,ans
movl %eax,ans+4
#pushl ans
pushl %edx
pushl %eax
pushl $report
call printf
addl $12,%esp
pushl $0
call exit
# the answer is 20000000000
analyses result:
we use gdb to watch binary expression of ans:
(gdb) x/2t &ans
0x804a02c: 00000000000000000000000000000100 10101000000101111100100000000000
it's equal to 20000000000
but interesting fact is,
if we push ans, and printf anwser, ./exe:
the answer is -5233720230622003196
右移1和除以2是等价的吗?(负数右移)
我们知道-1在计算机中的表示为32个1,右移直接操作该二进制数字。那么-1右移的是怎样的呢,符号位会变化吗?
test codes:
#include <stdio.h>
#include <stdlib.h>
int main(){
int num = -1, i;
for(i=0;i<4;i++){
num = num>>1;
}
printf("%d\n",num);
return 0;
}
gdb debugs:
[edemon@CentOS workspace]$ gcc -gdwarf-2 -o div div.c
[edemon@CentOS workspace]$ gdb div
GNU gdb (GDB) Red Hat Enterprise Linux (7.2-90.el6)
...
(gdb) p/t num
$1 = 11111111111111111111111111111111
(gdb) n
6 for(i=0;i<4;i++){
(gdb) p/t num
$2 = 11111111111111111111111111111111
(gdb) n
7 num = num>>1;
(gdb) n
6 for(i=0;i<4;i++){
(gdb) p/t num
$3 = 11111111111111111111111111111111
(gdb) n
7 num = num>>1;
(gdb) p/t num
$4 = 11111111111111111111111111111111
(gdb) n
6 for(i=0;i<4;i++){
(gdb) n
7 num = num>>1;
(gdb) n
6 for(i=0;i<4;i++){
(gdb) n
9 printf("%d\n",num);
(gdb) n
-1
10 return 0;
(gdb) p/t num
$5 = 11111111111111111111111111111111
可以看出-1不管右移所少次,数值都是不变的。
我们修改右移变成除以2,再次查看结果.
(gdb) n
6 for(i=0;i<4;i++){
(gdb) n
7 num = num/2;
(gdb) p/t num
$1 = 11111111111111111111111111111111
(gdb) n
6 for(i=0;i<4;i++){
(gdb) p/t num
$2 = 0
(gdb) n
7 num = num/2;
(gdb) c
Continuing.
0
结果就得到-1/2 = 0的”正确”结果。所以除了效率的差异,>>1和/2有时还会得到不同的结果。
负数不断左移变成0
负数在不断左移的过程中会有变小的阶段(符号位仍然等于1),但是最后一定是等于0。 右移时空出来的位用0填充,负数右移时空出的高位用1填充。
7 num = num<<1;
(gdb) p/t num
$1 = 11111111111111111111111111111111
...
(gdb) p/t num
$37 = 11111000000000000000000000000000
(gdb) n
7 num = num<<1;
...
(gdb) n
6 for(i=0;i<33;i++){
(gdb) p/t num
$38 = 11000000000000000000000000000000
...
(gdb) n
6 for(i=0;i<33;i++){
(gdb) p/t num
$39 = 0
精度控制和时间消耗
下面是一个计算函数
[-1000,1000]
内最大值的程序:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
int main(){
double ans = 0;
double x;
clock_t start,finish;
start = clock();
for(x=-1000;x<1000;){
double temp = -x*x/3 + 8;
x=x+0.0001;
ans = ans>temp?ans:temp;
}
finish = clock();
printf("max value is %lf, and spent time is %.8lf seconds\n",ans,(double)(finish-start)/CLOCKS_PER_SEC);
return 0;
}
/* max value is 8.000000, and spent time is 0.26000000 seconds */
我们生成汇编文本,修改FPU的控制寄存器的精度控制位,再生成新的可执行文件:
[edemon@CentOS workspace]$ gcc -S -o mydiv.s div.c
[edemon@CentOS workspace]$ vim mydiv.s
[edemon@CentOS workspace]$ gcc -gdwarf -o mydiv mydiv.s
[edemon@CentOS workspace]$ ./mydiv
FPU的控制寄存器使用16位寄存器,前6位是异常标志,第7、8位保留,第9、10位是精度控制位。精度控制位的设置:
00 —— 单精度
01 —— 未使用
10 —— 双精度
11 —— 拓展双精度
mydiv.s:
.file "div.c"
.section .rodata
.align 4
.LC7:
.string "max value is %lf, and spent time is %.8lf seconds\n"
precflag: # add
.byte 0x7f,0x00 # add
.bss # add
.lcomm ctrl_regis,2 # add
.text
.globl main
.type main, @function
main:
.LFB0:
.cfi_startproc
fstcw ctrl_regis # add
fldcw precflag # add
fstcw ctrl_regis # add
leal 4(%esp), %ecx
.cfi_def_cfa 1, 0
andl $-16, %esp
pushl -4(%ecx)
pushl %ebp
.cfi_escape 0x10,0x5,0x2,0x75,0
movl %esp, %ebp
pushl %ecx
.cfi_escape 0xf,0x3,0x75,0x7c,0x6
subl $68, %esp
fldz
fstpl -16(%ebp)
call clock
movl %eax, -28(%ebp)
fldl .LC1
fstpl -24(%ebp)
jmp .L2
.L6:
fldl -24(%ebp)
fchs
fmull -24(%ebp)
fldl .LC2
fdivrp %st, %st(1)
fldl .LC3
faddp %st, %st(1)
fstpl -40(%ebp)
fldl -24(%ebp)
fldl .LC4
faddp %st, %st(1)
fstpl -24(%ebp)
fldl -16(%ebp)
fldl -40(%ebp)
fxch %st(1)
fucomip %st(1), %st
fstp %st(0)
jbe .L9
fldl -16(%ebp)
jmp .L5
.L9:
fldl -40(%ebp)
.L5:
fstpl -16(%ebp)
.L2:
fldl .LC5
fldl -24(%ebp)
fxch %st(1)
fucomip %st(1), %st
fstp %st(0)
ja .L6
call clock
movl %eax, -44(%ebp)
movl -44(%ebp), %eax
subl -28(%ebp), %eax
movl %eax, -60(%ebp)
fildl -60(%ebp)
fldl .LC6
fdivrp %st, %st(1)
subl $12, %esp
leal -8(%esp), %esp
fstpl (%esp)
pushl -12(%ebp)
pushl -16(%ebp)
pushl $.LC7
call printf
addl $32, %esp
movl $0, %eax
movl -4(%ebp), %ecx
.cfi_def_cfa 1, 0
leave
.cfi_restore 5
leal -4(%ecx), %esp
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE0:
.size main, .-main
.section .rodata
.align 8
.LC1:
.long 0
.long -1064353792
.align 8
.LC2:
.long 0
.long 1074266112
.align 8
.LC3:
.long 0
.long 1075838976
.align 8
.LC4:
.long -350469331
.long 1058682594
.align 8
.LC5:
.long 0
.long 1083129856
.align 8
.LC6:
.long 0
.long 1093567616
.ident "GCC: (GNU) 4.9.1"
.section .note.GNU-stack,"",@progbits
在gdb中观察到的情况:
[edemon@CentOS workspace]$ gdb mydiv
...
(gdb) start
...
(gdb) n
19 fldcw precflag
(gdb) x/2t &ctrl_regis
0x8049840 <ctrl_regis>: 00000000000000000000001101111111 00000000000000000000000000000000
(gdb) n
20 fstcw ctrl_regis
(gdb) n
21 leal 4(%esp), %ecx
(gdb) x/2t &ctrl_regis
0x8049840 <ctrl_regis>: 00000000000000000000000001111111 00000000000000000000000000000000
设置精度控制位是00
单精度。
程序新结果: max value is 8.000000, and spent time is 0.14000000 seconds
可以发现时间明显减小。不过这是牺牲精度换来的。