HexDump2程序是HexDump程序的改进版本,HexDump2把代码封装成函数,功能上要完善一些,实现要比前面的版本复杂的多。
先介绍几个指令:
pushad指令,会把所有的通用寄存器压入堆栈,通用寄存器包括eax,ebx,ecx,edx,esp,ebp,esi,edi。
popad指令与pushad指令相对应,会从堆栈弹出所有通用寄存器的值。
test指令,对两个操作数进行与运算,根据结果设置SF,ZF和PF标志位。
SECTION .bss ; Section containing uninitialized data
BUFFLEN EQU 10
Buff resb BUFFLEN
SECTION .data ; Section containing initialised data
; Here we have two parts of a single useful data structure, implementing
; the text line of a hex dump utility. The first part displays 16 bytes in
; hex separated by spaces. Immediately following is a 16-character line
; delimited by vertical bar characters. Because they are adjacent, the two
; parts can be referenced separately or as a single contiguous unit.
; Remember that if DumpLin is to be used separately, you must append an
; EOL before sending it to the Linux console.
DumpLin: db " 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 "
DUMPLEN EQU $-DumpLin
ASCLin: db "|................|",10
ASCLEN EQU $-ASCLin
FULLLEN EQU $-DumpLin
; The HexDigits table is used to convert numeric values to their hex
; equivalents. Index by nybble without a scale: [HexDigits+eax]
HexDigits: db "0123456789ABCDEF"
; This table is used for ASCII character translation, into the ASCII
; portion of the hex dump line, via XLAT or ordinary memory lookup.
; All printable characters "play through" as themselves. The high 128
; characters are translated to ASCII period (2Eh). The non-printable
; characters in the low 128 are also translated to ASCII period, as is
; char 127.
DotXlat:
db 2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh
db 2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh
db 20h,21h,22h,23h,24h,25h,26h,27h,28h,29h,2Ah,2Bh,2Ch,2Dh,2Eh,2Fh
db 30h,31h,32h,33h,34h,35h,36h,37h,38h,39h,3Ah,3Bh,3Ch,3Dh,3Eh,3Fh
db 40h,41h,42h,43h,44h,45h,46h,47h,48h,49h,4Ah,4Bh,4Ch,4Dh,4Eh,4Fh
db 50h,51h,52h,53h,54h,55h,56h,57h,58h,59h,5Ah,5Bh,5Ch,5Dh,5Eh,5Fh
db 60h,61h,62h,63h,64h,65h,66h,67h,68h,69h,6Ah,6Bh,6Ch,6Dh,6Eh,6Fh
db 70h,71h,72h,73h,74h,75h,76h,77h,78h,79h,7Ah,7Bh,7Ch,7Dh,7Eh,2Eh
db 2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh
db 2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh
db 2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh
db 2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh
db 2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh
db 2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh
db 2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh
db 2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh,2Eh
SECTION .text ; Section containing code
ClearLine:
pushad ; Save all caller's GP registers
mov edx,15 ; We're going to go 16 pokes, counting from 0
.poke: mov eax,0 ; Tell DumpChar to poke a '0'
call DumpChar ; Insert the '0' into the hex dump string
sub edx,1 ; DEC doesn't affect CF!
jae .poke ; Loop back if EDX >= 0
popad ; Restore all caller's GP registers
ret ; Go home
DumpChar:
push ebx ; Save caller's EBX
push edi ; Save caller's EDI
; First we insert the input char into the ASCII portion of the dump line
mov bl,byte [DotXlat+eax] ; Translate nonprintables to '.'
mov byte [ASCLin+edx+1],bl ; Write to ASCII portion
; Next we insert the hex equivalent of the input char in the hex portion
; of the hex dump line:
mov ebx,eax ; Save a second copy of the input char
lea edi,[edx*2+edx] ; Calc offset into line string (ECX X 3)
; Look up low nybble character and insert it into the string:
and eax,0000000Fh ; Mask out all but the low nybble
mov al,byte [HexDigits+eax] ; Look up the char equiv. of nybble
mov byte [DumpLin+edi+2],al ; Write the char equiv. to line string
; Look up high nybble character and insert it into the string:
and ebx,000000F0h ; Mask out all the but second-lowest nybble
shr ebx,4 ; Shift high 4 bits of byte into low 4 bits
mov bl,byte [HexDigits+ebx] ; Look up char equiv. of nybble
mov byte [DumpLin+edi+1],bl ; Write the char equiv. to line string
;Done! Let's go home:
pop edi ; Restore caller's EDI
pop ebx ; Restore caller's EBX
ret ; Return to caller
PrintLine:
pushad ; Save all caller's GP registers
mov eax,4 ; Specify sys_write call
mov ebx,1 ; Specify File Descriptor 1: Standard output
mov ecx,DumpLin ; Pass offset of line string
mov edx,FULLLEN ; Pass size of the line string
int 80h ; Make kernel call to display line string
popad ; Restore all caller's GP registers
ret ; Return to caller
LoadBuff:
push eax ; Save caller's EAX
push ebx ; Save caller's EBX
push edx ; Save caller's EDX
mov eax,3 ; Specify sys_read call
mov ebx,0 ; Specify File Descriptor 0: Standard Input
mov ecx,Buff ; Pass offset of the buffer to read to
mov edx,BUFFLEN ; Pass number of bytes to read at one pass
int 80h ; Call sys_read to fill the buffer
mov ebp,eax ; Save # of bytes read from file for later
xor ecx,ecx ; Clear buffer pointer ECX to 0
pop edx ; Restore caller's EDX
pop ebx ; Restore caller's EBX
pop eax ; Restore caller's EAX
ret ; And return to caller
GLOBAL _start
; ------------------------------------------------------------------------
; MAIN PROGRAM BEGINS HERE
;-------------------------------------------------------------------------
_start:
nop ; No-ops for GDB
nop
; Whatever initialization needs doing before the loop scan starts is here:
xor esi,esi ; Clear total byte counter to 0
call LoadBuff ; Read first buffer of data from stdin
cmp ebp,0 ; If ebp=0, sys_read reached EOF on stdin
jbe Exit
; Go through the buffer and convert binary byte values to hex digits:
Scan:
xor eax,eax ; Clear EAX to 0
mov al,byte[Buff+ecx] ; Get a byte from the buffer into AL
mov edx,esi ; Copy total counter into EDX
and edx,0000000Fh ; Mask out lowest 4 bits of char counter
call DumpChar ; Call the char poke procedure
; Bump the buffer pointer to the next character and see if buffer's done:
inc esi ; Increment total chars processed counter
inc ecx ; Increment buffer pointer
cmp ecx,ebp ; Compare with # of chars in buffer
jb .modTest ; If we've processed all chars in buffer...
call LoadBuff ; ...go fill the buffer again
cmp ebp,0 ; If ebp=0, sys_read reached EOF on stdin
jbe Done ; If we got EOF, we're done
; See if we're at the end of a block of 16 and need to display a line:
.modTest:
test esi,0000000Fh ; Test 4 lowest bits in counter for 0
jnz Scan ; If counter is *not* modulo 16, loop back
call PrintLine ; ...otherwise print the line
call ClearLine ; Clear hex dump line to 0's
jmp Scan ; Continue scanning the buffer
; All done! Let's end this party:
Done:
call PrintLine ; Print the "leftovers" line
Exit: mov eax,1 ; Code for Exit Syscall
mov ebx,0 ; Return a code of zero
int 80H ; Make kernel call
程序分析:
ClearLine函数,此函数的作用是把ASCLin和DumpLin两个字节数组的内容恢复到初始值。
pushad //保存所有通用寄存器
movedx,15 //edx=15
.poke: moveax,0 //eax = 0
callDumpChar //调用DumpChar函数,因为传入的eax=0,所以通过表转换,会把ASCLin[1]到ASCLin[16]的值都填充成“.”,把DumpLin[3*edx+1]和DumpLin[3*edx+2]的值都填充成“0”。
subedx,1 //edx = edx-1
jae.poke //如果edx大于等于0,跳转到.poke,继续循环,这样共循环16次。
popad //恢复所有通用寄存器
ret //返回
DumpChar函数,此函数入参是eax和edx,eax是输入的字符的ASCII值,edx是字节数组的索引值。此函数用于用于修改两个字节数组:ASCLin和DumpLin。
对于ASCLin,修改ASCLin[edx+1]的值,如果eax是可见字符,则ASCLin[edx+1]=eax,否则ASCLin[edx+1]=’.’。
对于DumpLin,修改DumpLin[3*edx+1]和DumpLin[3*edx+2]的值,其中DumpLin[3*edx+1]存储ah对应的16进制数的ASCII码值,DumpLin[3*edx+2]存储al对应的16进制数的ASCII值。例如:eax=’a’,al中保存的是1,ah中保存的是6。DumpLin[3*edx+1]=’6’,DumpLin[3*edx+2]=’1’。
pushebx //保存ebx寄存器
pushedi //保存edi寄存器
movbl,byte [DotXlat+eax] //bl= DotXlat[eax],把ASCII表DotXlat[eax]的值装入bl
movbyte [ASCLin+edx+1],bl //ASCLin[edx+1]=bl,转换的结果是不可见字符转换成了“.”,可见字符不变。
movebx,eax //ebx = eax
leaedi,[edx*2+edx] //edi=3*edx
andeax,0000000Fh //只保留al的值
moval,byte [HexDigits+eax] //al= HexDigits[al],通过查表,al转换成16进制数的ASCII码值。
movbyte [DumpLin+edi+2],al // DumpLin[3*edx+2]=al
andebx,000000F0h //只保留bh(ah)的值。
shrebx,4 //逻辑右移四位,把高四位的值装入bl(高位用0填充)。
movbl,byte [HexDigits+ebx] //bl = HexDigits[bl],通过查表,bl转换成16进制数ASCII码值。
movbyte [DumpLin+edi+1],bl // DumpLin[3*edx+1]=bl
popedi //恢复edi寄存器
popebx //恢复ebx寄存器
ret //返回
PrintLine函数,打印输入行。
pushad //保存所有通用寄存器
moveax,4 //sys_write系统调用号
movebx,1 //输出到标准输出
movecx,DumpLin //输出的字节数组DumpLin
movedx,FULLLEN //DumpLin和ASCLin的总长度,DumpLin和ASCLin在数据段的内存是连在一起的。
int80h //执行系统调用
popad //恢复通用寄存器
ret //返回
LoadBuff函数,从标准输入读取字符到Buff。
pusheax //保存eax寄存器
pushebx //保存ebx寄存器
pushedx //保存edx寄存器
moveax,3 // sys_read系统调用号
movebx,0 //标准输入
movecx,Buff //读入到Buff
movedx,BUFFLEN //Buff的长度
int80h //系统调用
movebp,eax //保存返回值到ebp
xorecx,ecx //对ecx清零
popedx //恢复edx
popebx //恢复ebx
popeax //恢复eax
ret //返回
_start入口后面的汇编代码:
xor esi,esi //对esi清零
callLoadBuff //调用LoadBuff函数,读取输入字符到缓存。
cmpebp,0 //比较LoadBuff函数返回结果和0
jbeExit //如果结果值小于等于0,退出。
Scan:
xoreax,eax //对eax清零
moval,byte[Buff+ecx] //al=Buff[ecx],初次调用时,ecx已经在调用LoadBuff函数时清零。
movedx,esi //edx=esi,初次调用时,esi在程序入口处已经清零。
andedx,0000000Fh //保留低4位
callDumpChar //调用函数DumpChar
inc esi //esi=esi+1
incecx //ecx=ecx+1
cmpecx,ebp //比较ecx和读入缓存的的字节数
jb.modTest //如果ecx小于读入缓存的的字节数,跳转到.modTest
callLoadBuff //调用LoadBuff函数,读入字符到缓存
cmpebp,0 //比较返回值和0
jbeDone //如果小于等于0,退出。
.modTest:
testesi,0000000Fh //测试esi和15与的结果,如果esi在[1,15]与的结果是非0,如果esi=16,则是0。
jnzScan //如果不等于0,则跳转到Scan继续循环
callPrintLine //调用PrintLine函数打印出一行
callClearLine //清除缓存中的数据
jmpScan //跳转到Scan继续循环
Scan标记的这段代码有些复杂,有两个控制循环的变量,ecx用于和读入缓存的字节数进行比较,esi用于判断输出缓存的字节数是否是16的整数倍,如果是则打印一次转换的内容,并且清空缓存。当输入缓存的字节数不足16个字节时,程序不会打印,而是等待输入到16个字节后再打印。
Done:
callPrintLine //打印剩余的字节
Exit:moveax,1 //以下是执行退出的系统调用
movebx,0
int80H
makefile文件:
hexdump2: hexdump2.o
ld-o hexdump2 hexdump2.o
hexdump2.o: hexdump2.asm
nasm-f elf -g -F stabs hexdump2.asm
测试:
[root@bogon hexdump2]# make
nasm -f elf -g -F stabs hexdump2.asm
ld -o hexdump2 hexdump2.o
[root@bogon hexdump2]# ./hexdump2
ab
cdefghijklmn
opq
6162 0A 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 0A |ab.cdefghijklmn.|
当程序从标准输入恰好获取16个字符时,也不会打印,不会打印的原因是:在ecx=ebp场景下,jb .modTest不成立,程序没有调用代码test esi,0000000Fh检测是否缓存已经到16个字符了。
在jb .modTest指令之后call LoadBuff指令之前加入如下几行代码可以解决这个问题:
test esi,0000000Fh
jnz.loadBuff
callPrintLine
callClearLine
.loadBuff:
测试一下:
[root@bogon hexdump2]# ./hexdump2
123456789012345
3132 33 34 35 36 37 38 39 30 31 32 33 34 35 0A |123456789012345.|
果然可以解决问题。