ELF文件加载过程分析

从编译、链接和运行的角度，应用程序和库程序的链接有两种方式。一种是静态链接，库程序的二进制代码链接进应用程序的映像中；一种是动态链接，库函数的代码不放入应用程序映像，而是在启动时，将库程序的映像加载到应用程序进程空间。

在动态链接中，GNU将动态链接ELF文件的工作做了分工：ELF映像的载入与启动由Linux内核完成，而动态链接过程由用户空间glibc实现。并提供了一个“解释器”工具ld-linux.so.2。

Linux内核中，使用struct linux_binfmt结构定义一个ELF文件加载

/* binfmts.h */

struct linux_binfmt {

    struct list_head lh;

    struct module *module;

    int (*load_binary)(struct linux_binprm *, struct  pt_regs * regs);

    int (*load_shlib)(struct file *);

    int (*core_dump)(struct coredump_params *cprm);

    unsigned long min_coredump; /* minimal dump size */

};

load_binary函数指针指向的是一个可执行程序的处理函数。我们研究的ELF文件格式的定义如下：

/* binfmt_elf.c */

static struct linux_binfmt elf_format = {

    .module     = THIS_MODULE,

    .load_binary    = load_elf_binary,

    .load_shlib = load_elf_library,

    .core_dump  = elf_core_dump,

    .min_coredump   = ELF_EXEC_PAGESIZE,

};

Linux内核将这个数据结构注册到可执行程序队列，当运行一个可执行程序时，所有注册的处理程序（这里的load_elf_binary）逐一前来认领，若发现格式相符，则载入并启动该程序。

static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)

{

    struct file *interpreter = NULL; /* to shut gcc up */

    unsigned long load_addr = 0, load_bias = 0;

    int load_addr_set = 0;

    char * elf_interpreter = NULL;  //"解释器"

        /*......*/

    struct {

        struct elfhdr elf_ex;

        struct elfhdr interp_elf_ex;

    } *loc; //elf头结构

 

    loc = kmalloc(sizeof(*loc), GFP_KERNEL);

        /*......*/

     

    /* Get the exec-header */

    loc->elf_ex = *((struct elfhdr *)bprm->buf);  //bprm->buf是内核读的的128字节映像头

 

    retval = -ENOEXEC;

    /* First of all, some simple consistency checks */

    if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)   //查看文件头4个字节，判断是否为"\177ELF"

        goto out;

 

    if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)    //是否为可执行文件或共享库?

        goto out;

        /*......*/

 

    /* Now read in all of the header information */

        /*......*/

 

    retval = kernel_read(bprm->file, loc->elf_ex.e_phoff, // kernel_read读取整个程序头表

                 (char *)elf_phdata, size);

        /*......*/

 

    for (i = 0; i < loc->elf_ex.e_phnum; i++) {   //这个大for循环功能是加载"解释器"

        if (elf_ppnt->p_type == PT_INTERP) { //PT_INTERP指"解释器"段

            /* This is the program interpreter used for

             * shared libraries - for now assume that this

             * is an a.out format binary

             */

                /*......*/

 

            retval = kernel_read(bprm->file, elf_ppnt->p_offset,  //根据位置p_offset和大小p_filesz将"解释器"读入

                         elf_interpreter,   //这里读入的其实是"解释器"名字"/lib/ld-linux.so.2"

                         elf_ppnt->p_filesz);

                /*......*/

            /* make sure path is NULL terminated */

            retval = -ENOEXEC;

            if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')

                goto out_free_interp;

 

            interpreter = open_exec(elf_interpreter);   //打开"解释器"

            retval = PTR_ERR(interpreter);

            if (IS_ERR(interpreter))

                goto out_free_interp;

 

            /*

             * If the binary is not readable then enforce

             * mm->dumpable = 0 regardless of the interpreter's

             * permissions.

             */

            would_dump(bprm, interpreter);

 

            retval = kernel_read(interpreter, 0, bprm->buf,  //读入128字节的"解释器"头部

                         BINPRM_BUF_SIZE);

                    /*......*/

 

            /* Get the exec headers */

            loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);

            break;

        }

        elf_ppnt++;

    }

        /*......*/

 

    /* Some simple consistency checks for the interpreter */

    if (elf_interpreter) { //对"解释器"段的校验

        /*......*/

    }

 

        /*......*/

    for(i = 0, elf_ppnt = elf_phdata;

        i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {

        int elf_prot = 0, elf_flags;

        unsigned long k, vaddr;

 

        if (elf_ppnt->p_type != PT_LOAD) //搜索类型为"PT_LOAD"的段(需载入的段)

            continue;

 

        if (unlikely (elf_brk > elf_bss)) {

            /*......*/

        }

 

            /*......*/

        }

 

        error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,

                elf_prot, elf_flags, 0); //建立用户虚拟地址空间与映射文件某连续区间的映射

            /*......*/

    }

 

        /*......*/

 

    if (elf_interpreter) { //如果要载入"解释器"（都是静态链接的情况）

        unsigned long uninitialized_var(interp_map_addr);

 

        elf_entry = load_elf_interp(&loc->interp_elf_ex,

                        interpreter,

                        &interp_map_addr,

                        load_bias);     //载入"解释器"映像

        if (!IS_ERR((void *)elf_entry)) {

            /*

             * load_elf_interp() returns relocation

             * adjustment

             */

            interp_load_addr = elf_entry;

            elf_entry += loc->interp_elf_ex.e_entry; //用户空间入口地址设置为elf_entry

        }

        if (BAD_ADDR(elf_entry)) {

            force_sig(SIGSEGV, current);

            retval = IS_ERR((void *)elf_entry) ?

                    (int)elf_entry : -EINVAL;

            goto out_free_dentry;

        }

        reloc_func_desc = interp_load_addr;

 

        allow_write_access(interpreter);

        fput(interpreter);

        kfree(elf_interpreter);

    } else { //有动态链接存在

        elf_entry = loc->elf_ex.e_entry; //用户空间入口地址设置为映像本身地址

        if (BAD_ADDR(elf_entry)) {

            force_sig(SIGSEGV, current);

            retval = -EINVAL;

            goto out_free_dentry;

        }

    }

 

    kfree(elf_phdata);

    /*......*/

 

    start_thread(regs, elf_entry, bprm->p);  //修改eip与esp为新的地址，程序从内核返回应用态时的入口

    /*......*/

 

    /* error cleanup */

    /*......*/

}

我们这样一个Hello world程序，除非在编译时指定-static选项，否则都是动态链接的：

#include <stdio.h>

int main()

{

        printf("Hello world.\n");

        return 0;

}

Hello world程序被内存载入内存后，控制权先交给“解释器”，“解释器”完成动态库的装载后，再将控制权交给用户程序。

ELF文件符号的动态解析

“解释器”将所有动态库文件加载到内存后，形成一个链表，后面的符号解析过程主要是在这个链表中搜索符号的定义。

我们以上面Hello world程序为例，分析程序如何调用动态库中的printf函数：

000000000040052d <main>:

  40052d:   55                      push   %rbp

  40052e:   48 89 e5                mov    %rsp,%rbp

  400531:   bf d4 05 40 00          mov    $0x4005d4,%edi

  400536:   e8 d5 fe ff ff          callq  400410 <puts@plt>

  40053b:   b8 00 00 00 00          mov    $0x0,%eax

  400540:   5d                      pop    %rbp

  400541:   c3                      retq  

  400542:   66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)

  400549:   00 00 00

  40054c:   0f 1f 40 00             nopl   0x0(%rax)

从汇编代码看到，printf调用被换成了puts，其中callq指令就是调用的puts函数，它使用了puts@plt标号。要分析这段汇编代码，需要先了解2个基本概念：GOT（global offset table）和PLT（procedure linkage table）

GOT

当程序引用某个动态库中的符号时（如puts()函数），编译链接阶段并不知道这个符号在内存中的具体位置，只有在动态链接器将共享库加载到内存后，即在运行阶段，符号地址才会最终确定。因此要有一个结构来保存符号的绝对地址，这就是GOT。这样通过表中的某一项，就可以引用某符号的地址。

GOT表前3项是保留项，用于保存特殊的数据结构地址，其中GOT[1]保存共享库列表地址，上文提到“解释器”加载的所有共享库以列表形式组织。GOT[2]保存函数_dl_runtime_resolve的地址，这个函数的主要作用是找到某个符号的地址，并把它写到相应GOT项中，然后将控制转移到目标函数。

PLT

在编译链接时，链接器不能将控制从一个可执行文件或共享库文件转到另外一个，因为如前面所说的，这时函数地址还未确定。因此链接器将控制转移到PLT中的一项，PLT通过引用GOT的绝对地址，实现控制转移。

实际在通过objdump查看ELF文件，GOT表在名称为.got.plt的section中，PLT表在名称为.plt的section中。

21 .got          00000008  0000000000600ff8  0000000000600ff8  00000ff8  2**3

                 CONTENTS, ALLOC, LOAD, DATA

22 .got.plt      00000030  0000000000601000  0000000000601000  00001000  2**3

                 CONTENTS, ALLOC, LOAD, DATA

加到上面的汇编代码，我们看一下puts@plt是什么内容：

ezreal@ez:~/workdir$ objdump -d hello

...

Disassembly of section .plt:

 

0000000000400400 <puts@plt-0x10>:

  400400:   ff 35 02 0c 20 00       pushq  0x200c02(%rip)        # 601008 <_GLOBAL_OFFSET_TABLE_+0x8>

  400406:   ff 25 04 0c 20 00       jmpq   *0x200c04(%rip)        # 601010 <_GLOBAL_OFFSET_TABLE_+0x10>

  40040c:   0f 1f 40 00             nopl   0x0(%rax)

 

0000000000400410 <puts@plt>:

  400410:   ff 25 02 0c 20 00       jmpq   *0x200c02(%rip)        # 601018 <_GLOBAL_OFFSET_TABLE_+0x18>

  400416:   68 00 00 00 00          pushq  $0x0

  40041b:   e9 e0 ff ff ff          jmpq   400400 <_init+0x20>

 

0000000000400420 <__libc_start_main@plt>:

  400420:   ff 25 fa 0b 20 00       jmpq   *0x200bfa(%rip)        # 601020 <_GLOBAL_OFFSET_TABLE_+0x20>

  400426:   68 01 00 00 00          pushq  $0x1

  40042b:   e9 d0 ff ff ff          jmpq   400400 <_init+0x20>

 

0000000000400430 <__gmon_start__@plt>:

  400430:   ff 25 f2 0b 20 00       jmpq   *0x200bf2(%rip)        # 601028 <_GLOBAL_OFFSET_TABLE_+0x28>

  400436:   68 02 00 00 00          pushq  $0x2

  40043b:   e9 c0 ff ff ff          jmpq   400400 <_init+0x20>

我们看到puts@plt包含3条指令，程序中所有对puts的调用都会先来到这里。还可以看出除了PLT0（puts@plt-0x10标号）外，其余PLT项形式都是一样的，最后的jmpq指令都是跳转到400400即PLT0处。整个PLT表就像一个数组，除PLT0外所有指令第一条都是一个间接寻址。以puts@plt为例，从0x200c02(%rip)处的注释可以看到，这条指令跳转到了GOT中的一项，其内容为0x601018即地址0x400406处（0x601018-0x200c02），也即puts@plt的第二条指令。（RIP相对寻址模式）

秒客网

ELF文件加载与动态链接（一）

ELF文件加载过程分析

ELF文件符号的动态解析

相关文章