Linux启动过程的内核代码分析

时间:2021-09-11 04:58:33

参考上文:

http://www.cnblogs.com/long123king/p/3543872.html

http://www.cnblogs.com/long123king/p/3545688.html

 

补充:linker script documentation

http://www.nacad.ufrj.br/online/sgi/860-0247-001/sgi_html/ldLinker_scripts.html

参考:http://blog.chinaunix.net/uid-20499746-id-1663135.html

http://blog.csdn.net/redredbird/article/details/5986035

 

同类文章参考:

http://blog.chinaunix.net/uid-1701789-id-148056.html

http://www.cnblogs.com/cybertitan/archive/2012/09/29/2708184.html

1. 内核代码的布局

我们知道,内核代码被加载到物理内存1MB处,然后in_pm32跳转到1MB物理内存处执行。

那么1MB物理内存处存放的是什么代码呢?

 

我们先看一个链接器的脚本文件arch/x86/boot/compressed/vmlinux.lds.S

   1: #include <asm-generic/vmlinux.lds.h>
   2:  
   3: OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
   4:  
   5: #undef i386
   6:  
   7: #include <asm/cache.h>
   8: #include <asm/page_types.h>
   9:  
  10: #ifdef CONFIG_X86_64
  11: OUTPUT_ARCH(i386:x86-64)
  12: ENTRY(startup_64)
  13: #else
  14: OUTPUT_ARCH(i386)
  15: ENTRY(startup_32)
  16: #endif
  17:  
  18: SECTIONS
  19: {
  20:     /* Be careful parts of head_64.S assume startup_32 is at
  21:      * address 0.
  22:      */
  23:     . = 0;
  24:     .head.text : {
  25:         _head = . ;
  26:         HEAD_TEXT
  27:         _ehead = . ;
  28:     }
  29:     .rodata..compressed : {
  30:         *(.rodata..compressed)
  31:     }
  32:     .text :    {
  33:         _text = .;     /* Text */
  34:         *(.text)
  35:         *(.text.*)
  36:         _etext = . ;
  37:     }
  38:     .rodata : {
  39:         _rodata = . ;
  40:         *(.rodata)     /* read-only data */
  41:         *(.rodata.*)
  42:         _erodata = . ;
  43:     }
  44:     .got : {
  45:         _got = .;
  46:         KEEP(*(.got.plt))
  47:         KEEP(*(.got))
  48:         _egot = .;
  49:     }
  50:     .data :    {
  51:         _data = . ;
  52:         *(.data)
  53:         *(.data.*)
  54:         _edata = . ;
  55:     }
  56:     . = ALIGN(L1_CACHE_BYTES);
  57:     .bss : {
  58:         _bss = . ;
  59:         *(.bss)
  60:         *(.bss.*)
  61:         *(COMMON)
  62:         . = ALIGN(8);    /* For convenience during zeroing */
  63:         _ebss = .;
  64:     }
  65: #ifdef CONFIG_X86_64
  66:        . = ALIGN(PAGE_SIZE);
  67:        .pgtable : {
  68:         _pgtable = . ;
  69:         *(.pgtable)
  70:         _epgtable = . ;
  71:     }
  72: #endif
  73:     _end = .;
  74: }

可见,在vmlinux即内核映像的0地址处存放的是.head.text段。

#define __HEAD        .section    ".head.text","ax"

 

因此,我们找到下面的代码:arch/x86/boot/compressed/head_32.S

   1:  
   2:     __HEAD
   3: ENTRY(startup_32)
   4:     cld
   5:     /*
   6:      * Test KEEP_SEGMENTS flag to see if the bootloader is asking
   7:      * us to not reload segments
   8:      */
   9:     testb    $(1<<6), BP_loadflags(%esi)
  10:     jnz    1f
  11:  
  12:     cli
  13:     movl    $__BOOT_DS, %eax
  14:     movl    %eax, %ds
  15:     movl    %eax, %es
  16:     movl    %eax, %fs
  17:     movl    %eax, %gs
  18:     movl    %eax, %ss
  19: 1:

以及arch/x86/kernel/head_32.S

   1: /*
   2:  * 32-bit kernel entrypoint; only used by the boot CPU.  On entry,
   3:  * %esi points to the real-mode code as a 32-bit pointer.
   4:  * CS and DS must be 4 GB flat segments, but we don't depend on
   5:  * any particular GDT layout, because we load our own as soon as we
   6:  * can.
   7:  */
   8: __HEAD
   9: ENTRY(startup_32)
  10:     movl pa(stack_start),%ecx
  11:     
  12:     /* test KEEP_SEGMENTS flag to see if the bootloader is asking
  13:         us to not reload segments */
  14:     testb $(1<<6), BP_loadflags(%esi)
  15:     jnz 2f
  16:  
  17: /*
  18:  * Set segments to known values.
  19:  */
  20:     lgdt pa(boot_gdt_descr)
  21:     movl $(__BOOT_DS),%eax
  22:     movl %eax,%ds
  23:     movl %eax,%es
  24:     movl %eax,%fs
  25:     movl %eax,%gs
  26:     movl %eax,%ss
  27: 2:
  28:     leal -__PAGE_OFFSET(%ecx),%esp

那么这二者有什么先后顺序吗?


原来,思路到了这里是走进了一个误区,/compressed目录下面存放的是压缩后的代码,主要功能是解压缩内核;而/kernel目录下的才是真正的内核文件。

在/kernel目录下也有一个链接器的脚本文件arch/x86/kernel/vmlinux.lds.S

   1: /*
   2:  * ld script for the x86 kernel
   3:  *
   4:  * Historic 32-bit version written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
   5:  *
   6:  * Modernisation, unification and other changes and fixes:
   7:  *   Copyright (C) 2007-2009  Sam Ravnborg <sam@ravnborg.org>
   8:  *
   9:  *
  10:  * Don't define absolute symbols until and unless you know that symbol
  11:  * value is should remain constant even if kernel image is relocated
  12:  * at run time. Absolute symbols are not relocated. If symbol value should
  13:  * change if kernel is relocated, make the symbol section relative and
  14:  * put it inside the section definition.
  15:  */
  16:  
  17: #ifdef CONFIG_X86_32
  18: #define LOAD_OFFSET __PAGE_OFFSET
  19: #else
  20: #define LOAD_OFFSET __START_KERNEL_map
  21: #endif
  22:  
  23: #include <asm-generic/vmlinux.lds.h>
  24: #include <asm/asm-offsets.h>
  25: #include <asm/thread_info.h>
  26: #include <asm/page_types.h>
  27: #include <asm/cache.h>
  28: #include <asm/boot.h>
  29:  
  30: #undef i386     /* in case the preprocessor is a 32bit one */
  31:  
  32: OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
  33:  
  34: #ifdef CONFIG_X86_32
  35: OUTPUT_ARCH(i386)
  36: ENTRY(phys_startup_32)
  37: jiffies = jiffies_64;
  38: #else
  39: OUTPUT_ARCH(i386:x86-64)
  40: ENTRY(phys_startup_64)
  41: jiffies_64 = jiffies;
  42: #endif
  43:  
  44: #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
  45: /*
  46:  * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA
  47:  * we retain large page mappings for boundaries spanning kernel text, rodata
  48:  * and data sections.
  49:  *
  50:  * However, kernel identity mappings will have different RWX permissions
  51:  * to the pages mapping to text and to the pages padding (which are freed) the
  52:  * text section. Hence kernel identity mappings will be broken to smaller
  53:  * pages. For 64-bit, kernel text and kernel identity mappings are different,
  54:  * so we can enable protection checks that come with CONFIG_DEBUG_RODATA,
  55:  * as well as retain 2MB large page mappings for kernel text.
  56:  */
  57: #define X64_ALIGN_DEBUG_RODATA_BEGIN    . = ALIGN(HPAGE_SIZE);
  58:  
  59: #define X64_ALIGN_DEBUG_RODATA_END                \
  60:         . = ALIGN(HPAGE_SIZE);                \
  61:         __end_rodata_hpage_align = .;
  62:  
  63: #else
  64:  
  65: #define X64_ALIGN_DEBUG_RODATA_BEGIN
  66: #define X64_ALIGN_DEBUG_RODATA_END
  67:  
  68: #endif
  69:  
  70: PHDRS {
  71:     text PT_LOAD FLAGS(5);          /* R_E */
  72:     data PT_LOAD FLAGS(6);          /* RW_ */
  73: #ifdef CONFIG_X86_64
  74:     user PT_LOAD FLAGS(5);          /* R_E */
  75: #ifdef CONFIG_SMP
  76:     percpu PT_LOAD FLAGS(6);        /* RW_ */
  77: #endif
  78:     init PT_LOAD FLAGS(7);          /* RWE */
  79: #endif
  80:     note PT_NOTE FLAGS(0);          /* ___ */
  81: }
  82:  
  83: SECTIONS
  84: {
  85: #ifdef CONFIG_X86_32
  86:         . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
  87:         phys_startup_32 = startup_32 - LOAD_OFFSET;
  88: #else
  89:         . = __START_KERNEL;
  90:         phys_startup_64 = startup_64 - LOAD_OFFSET;
  91: #endif
  92:  
  93:     /* Text and read-only data */
  94:     .text :  AT(ADDR(.text) - LOAD_OFFSET) {
  95:         _text = .;
  96:         /* bootstrapping code */
  97:         HEAD_TEXT
  98: #ifdef CONFIG_X86_32
  99:         . = ALIGN(PAGE_SIZE);
 100:         *(.text..page_aligned)
 101: #endif
 102:         . = ALIGN(8);
 103:         _stext = .;
 104:         TEXT_TEXT
 105:         SCHED_TEXT
 106:         LOCK_TEXT
 107:         KPROBES_TEXT
 108:         ENTRY_TEXT
 109:         IRQENTRY_TEXT
 110:         *(.fixup)
 111:         *(.gnu.warning)
 112:         /* End of text section */
 113:         _etext = .;
 114:     } :text = 0x9090
 115:  
 116:     NOTES :text :note
 117:  
 118:     EXCEPTION_TABLE(16) :text = 0x9090
 119:  
 120: #if defined(CONFIG_DEBUG_RODATA)
 121:     /* .text should occupy whole number of pages */
 122:     . = ALIGN(PAGE_SIZE);
 123: #endif
 124:     X64_ALIGN_DEBUG_RODATA_BEGIN
 125:     RO_DATA(PAGE_SIZE)
 126:     X64_ALIGN_DEBUG_RODATA_END
 127:  
 128:     /* Data */
 129:     .data : AT(ADDR(.data) - LOAD_OFFSET) {
 130:         /* Start of data section */
 131:         _sdata = .;
 132:  
 133:         /* init_task */
 134:         INIT_TASK_DATA(THREAD_SIZE)
 135:  
 136: #ifdef CONFIG_X86_32
 137:         /* 32 bit has nosave before _edata */
 138:         NOSAVE_DATA
 139: #endif
 140:  
 141:         PAGE_ALIGNED_DATA(PAGE_SIZE)
 142:  
 143:         CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
 144:  
 145:         DATA_DATA
 146:         CONSTRUCTORS
 147:  
 148:         /* rarely changed data like cpu maps */
 149:         READ_MOSTLY_DATA(INTERNODE_CACHE_BYTES)
 150:  
 151:         /* End of data section */
 152:         _edata = .;
 153:     } :data
 154:  
 155: #ifdef CONFIG_X86_64
 156:  
 157: #define VSYSCALL_ADDR (-10*1024*1024)
 158:  
 159: #define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET)
 160: #define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
 161:  
 162: #define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
 163: #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
 164: #define EMIT_VVAR(x, offset) .vsyscall_var_ ## x    \
 165:     ADDR(.vsyscall_0) + offset             \
 166:     : AT(VLOAD(.vsyscall_var_ ## x)) {             \
 167:         *(.vsyscall_var_ ## x)            \
 168:     }                        \
 169:     x = VVIRT(.vsyscall_var_ ## x);
 170:  
 171:     . = ALIGN(4096);
 172:     __vsyscall_0 = .;
 173:  
 174:     . = VSYSCALL_ADDR;
 175:     .vsyscall_0 : AT(VLOAD(.vsyscall_0)) {
 176:         *(.vsyscall_0)
 177:     } :user
 178:  
 179:     . = ALIGN(L1_CACHE_BYTES);
 180:     .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) {
 181:         *(.vsyscall_fn)
 182:     }
 183:  
 184:     .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) {
 185:         *(.vsyscall_1)
 186:     }
 187:     .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) {
 188:         *(.vsyscall_2)
 189:     }
 190:  
 191:     .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) {
 192:         *(.vsyscall_3)
 193:     }
 194:  
 195: #define __VVAR_KERNEL_LDS
 196: #include <asm/vvar.h>
 197: #undef __VVAR_KERNEL_LDS
 198:  
 199:     . = __vsyscall_0 + PAGE_SIZE;
 200:  
 201: #undef VSYSCALL_ADDR
 202: #undef VLOAD_OFFSET
 203: #undef VLOAD
 204: #undef VVIRT_OFFSET
 205: #undef VVIRT
 206: #undef EMIT_VVAR
 207:  
 208: #endif /* CONFIG_X86_64 */
 209:  
 210:     /* Init code and data - will be freed after init */
 211:     . = ALIGN(PAGE_SIZE);
 212:     .init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) {
 213:         __init_begin = .; /* paired with __init_end */
 214:     }
 215:  
 216: #if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
 217:     /*
 218:      * percpu offsets are zero-based on SMP.  PERCPU_VADDR() changes the
 219:      * output PHDR, so the next output section - .init.text - should
 220:      * start another segment - init.
 221:      */
 222:     PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu)
 223: #endif
 224:  
 225:     INIT_TEXT_SECTION(PAGE_SIZE)
 226: #ifdef CONFIG_X86_64
 227:     :init
 228: #endif
 229:  
 230:     INIT_DATA_SECTION(16)
 231:  
 232:     /*
 233:      * Code and data for a variety of lowlevel trampolines, to be
 234:      * copied into base memory (< 1 MiB) during initialization.
 235:      * Since it is copied early, the main copy can be discarded
 236:      * afterwards.
 237:      */
 238:      .x86_trampoline : AT(ADDR(.x86_trampoline) - LOAD_OFFSET) {
 239:         x86_trampoline_start = .;
 240:         *(.x86_trampoline)
 241:         x86_trampoline_end = .;
 242:     }
 243:  
 244:     .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
 245:         __x86_cpu_dev_start = .;
 246:         *(.x86_cpu_dev.init)
 247:         __x86_cpu_dev_end = .;
 248:     }
 249:  
 250:     /*
 251:      * start address and size of operations which during runtime
 252:      * can be patched with virtualization friendly instructions or
 253:      * baremetal native ones. Think page table operations.
 254:      * Details in paravirt_types.h
 255:      */
 256:     . = ALIGN(8);
 257:     .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
 258:         __parainstructions = .;
 259:         *(.parainstructions)
 260:         __parainstructions_end = .;
 261:     }
 262:  
 263:     /*
 264:      * struct alt_inst entries. From the header (alternative.h):
 265:      * "Alternative instructions for different CPU types or capabilities"
 266:      * Think locking instructions on spinlocks.
 267:      */
 268:     . = ALIGN(8);
 269:     .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
 270:         __alt_instructions = .;
 271:         *(.altinstructions)
 272:         __alt_instructions_end = .;
 273:     }
 274:  
 275:     /*
 276:      * And here are the replacement instructions. The linker sticks
 277:      * them as binary blobs. The .altinstructions has enough data to
 278:      * get the address and the length of them to patch the kernel safely.
 279:      */
 280:     .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
 281:         *(.altinstr_replacement)
 282:     }
 283:  
 284:     /*
 285:      * struct iommu_table_entry entries are injected in this section.
 286:      * It is an array of IOMMUs which during run time gets sorted depending
 287:      * on its dependency order. After rootfs_initcall is complete
 288:      * this section can be safely removed.
 289:      */
 290:     .iommu_table : AT(ADDR(.iommu_table) - LOAD_OFFSET) {
 291:         __iommu_table = .;
 292:         *(.iommu_table)
 293:         __iommu_table_end = .;
 294:     }
 295:  
 296:     . = ALIGN(8);
 297:     .apicdrivers : AT(ADDR(.apicdrivers) - LOAD_OFFSET) {
 298:         __apicdrivers = .;
 299:         *(.apicdrivers);
 300:         __apicdrivers_end = .;
 301:     }
 302:  
 303:     . = ALIGN(8);
 304:     /*
 305:      * .exit.text is discard at runtime, not link time, to deal with
 306:      *  references from .altinstructions and .eh_frame
 307:      */
 308:     .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
 309:         EXIT_TEXT
 310:     }
 311:  
 312:     .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
 313:         EXIT_DATA
 314:     }
 315:  
 316: #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
 317:     PERCPU_SECTION(INTERNODE_CACHE_BYTES)
 318: #endif
 319:  
 320:     . = ALIGN(PAGE_SIZE);
 321:  
 322:     /* freed after init ends here */
 323:     .init.end : AT(ADDR(.init.end) - LOAD_OFFSET) {
 324:         __init_end = .;
 325:     }
 326:  
 327:     /*
 328:      * smp_locks might be freed after init
 329:      * start/end must be page aligned
 330:      */
 331:     . = ALIGN(PAGE_SIZE);
 332:     .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
 333:         __smp_locks = .;
 334:         *(.smp_locks)
 335:         . = ALIGN(PAGE_SIZE);
 336:         __smp_locks_end = .;
 337:     }
 338:  
 339: #ifdef CONFIG_X86_64
 340:     .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
 341:         NOSAVE_DATA
 342:     }
 343: #endif
 344:  
 345:     /* BSS */
 346:     . = ALIGN(PAGE_SIZE);
 347:     .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
 348:         __bss_start = .;
 349:         *(.bss..page_aligned)
 350:         *(.bss)
 351:         . = ALIGN(PAGE_SIZE);
 352:         __bss_stop = .;
 353:     }
 354:  
 355:     . = ALIGN(PAGE_SIZE);
 356:     .brk : AT(ADDR(.brk) - LOAD_OFFSET) {
 357:         __brk_base = .;
 358:         . += 64 * 1024;        /* 64k alignment slop space */
 359:         *(.brk_reservation)    /* areas brk users have reserved */
 360:         __brk_limit = .;
 361:     }
 362:  
 363:     _end = .;
 364:  
 365:         STABS_DEBUG
 366:         DWARF_DEBUG
 367:  
 368:     /* Sections to be discarded */
 369:     DISCARDS
 370:     /DISCARD/ : { *(.eh_frame) }
 371: }
 372:  
 373:  
 374: #ifdef CONFIG_X86_32
 375: /*
 376:  * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
 377:  */
 378: . = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
 379:        "kernel image bigger than KERNEL_IMAGE_SIZE");
 380: #else
 381: /*
 382:  * Per-cpu symbols which need to be offset from __per_cpu_load
 383:  * for the boot processor.
 384:  */
 385: #define INIT_PER_CPU(x) init_per_cpu__##x = x + __per_cpu_load
 386: INIT_PER_CPU(gdt_page);
 387: INIT_PER_CPU(irq_stack_union);
 388:  
 389: /*
 390:  * Build-time check on the image size:
 391:  */
 392: . = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
 393:        "kernel image bigger than KERNEL_IMAGE_SIZE");
 394:  
 395: #ifdef CONFIG_SMP
 396: . = ASSERT((irq_stack_union == 0),
 397:            "irq_stack_union is not at start of per-cpu area");
 398: #endif
 399:  
 400: #endif /* CONFIG_X86_32 */
 401:  
 402: #ifdef CONFIG_KEXEC
 403: #include <asm/kexec.h>
 404:  
 405: . = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
 406:            "kexec control code size is too big");
 407: #endif
 408:  

 

SECTIONS
{
#ifdef CONFIG_X86_32
       . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
        phys_startup_32 = startup_32 - LOAD_OFFSET;
#else
        . = __START_KERNEL;
        phys_startup_64 = startup_64 - LOAD_OFFSET;
#endif

    /* Text and read-only data */
    .text :  AT(ADDR(.text) - LOAD_OFFSET) {
        _text = .;
        /* bootstrapping code */
        HEAD_TEXT
#ifdef CONFIG_X86_32
        . = ALIGN(PAGE_SIZE);
        *(.text..page_aligned)
#endif
        . = ALIGN(8);
        _stext = .;
        TEXT_TEXT
        SCHED_TEXT
        LOCK_TEXT
        KPROBES_TEXT
        ENTRY_TEXT
        IRQENTRY_TEXT
        *(.fixup)
        *(.gnu.warning)
        /* End of text section */
        _etext = .;
    } :text = 0x9090

其中

   1:  
   2: #ifdef CONFIG_X86_32
   3: #define LOAD_OFFSET __PAGE_OFFSET
   4: #else
   5: #define LOAD_OFFSET __START_KERNEL_map
   6: #endif
   1: /* Physical address where kernel should be loaded. */
   2: #define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \
   3:                 + (CONFIG_PHYSICAL_ALIGN - 1)) \
   4:                 & ~(CONFIG_PHYSICAL_ALIGN - 1))

config PHYSICAL_START
    hex "Physical address where the kernel is loaded" if (EXPERT || CRASH_DUMP)
    default "0x1000000"
    ---help---
      This gives the physical address where the kernel is loaded.

      If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then
      bzImage will decompress itself to above physical address and
      run from there. Otherwise, bzImage will run from the address where
      it has been loaded by the boot loader and will ignore above physical
      address.

[arch/x86/Kconfig]

因此红色语句将.[当前标号]定位到3GB+1MB(0xC0100000)虚拟地址处

1MB物理内存处

#define HEAD_TEXT  *(.head.text)

 

因此,可以确认,1MB物理内存处是arch/x86/kernel/head_32.S中的startup_32函数

 

2. startup_32函数

该函数也是通过汇编定义

#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)

 

/* Number of possible pages in the lowmem region */
LOWMEM_PAGES = (((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT)
   
/* Enough space to fit pagetables for the low memory linear map */
MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT

/*
* Worst-case size of the kernel mapping we need to make:
* a relocatable kernel can live anywhere in lowmem, so we need to be able
* to map all of lowmem.
*/
KERNEL_PAGES = LOWMEM_PAGES

INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE

LOWMEM_PAGES为1GB大小,即内核态的内存空间范围。

long(((long long)1 << 32 ) / 4) >> 12 = 0x00040000

即需要0x40000个页表项来表示内核可能用到的地址空间。

PAGE_TABLE_SIZE(0x40000) = 0x40000 / 1024 = 0x100

需要页目录能包含0x100(256)个项目,每个项目用于指定对应的页表的物理地址,每个页目录项为32位。因此需要

0x100 * 4 = 1024bytes = 1KB

来保存内核需要的页目录项目。

接下来,分配存放内核页表的内存空间:

RESERVE_BRK(pagetables, INIT_MAP_SIZE)

 

   1: /*
   2:  * Reserve space in the brk section.  The name must be unique within
   3:  * the file, and somewhat descriptive.  The size is in bytes.  Must be
   4:  * used at file scope.
   5:  *
   6:  * (This uses a temp function to wrap the asm so we can pass it the
   7:  * size parameter; otherwise we wouldn't be able to.  We can't use a
   8:  * "section" attribute on a normal variable because it always ends up
   9:  * being @progbits, which ends up allocating space in the vmlinux
  10:  * executable.)
  11:  */
  12: #define RESERVE_BRK(name,sz)                        \
  13:     static void __section(.discard.text) __used notrace        \
  14:     __brk_reservation_fn_##name##__(void) {                \
  15:         asm volatile (                        \
  16:             ".pushsection .brk_reservation,\"aw\",@nobits;" \
  17:             ".brk." #name ":"                \
  18:             " 1:.skip %c0;"                    \
  19:             " .size .brk." #name ", . - 1b;"        \
  20:             " .popsection"                    \
  21:             : : "i" (sz));                    \
  22:     }

相当于分配一个.brk.pagetables的section,大小为1KB。

下面这段代码,检查bootloader有没有明确地指示不要重新设置各个段选择子的内容,如果可以,就将各个数据段选择子都重置为BOOT_DS段选择子。

   1: /* test KEEP_SEGMENTS flag to see if the bootloader is asking
   2:     us to not reload segments */
   3: testb $(1<<6), BP_loadflags(%esi)
   4: jnz 2f
   5:  
   6: /*
   7: et segments to known values.
   8:  
   9: lgdt pa(boot_gdt_descr)
  10: movl $(__BOOT_DS),%eax
  11: movl %eax,%ds
  12: movl %eax,%es
  13: movl %eax,%fs
  14: movl %eax,%gs
  15: movl %eax,%ss

在上面.data section中有设置标号stack_start

   1: .data
   2: .balign 4
   3: ENTRY(stack_start)
   4:     .long init_thread_union+THREAD_SIZE
   5:  
   6: early_recursion_flag:
   7:     .long 0
   8:  
   9: ready:    .byte 0
  10:  
  11: int_msg:
  12:     .asciz "Unknown interrupt or fault at: %p %p %p\n"
  13:  
  14: fault_msg:
  15: /* fault info: */
  16:     .ascii "BUG: Int %d: CR2 %p\n"
  17: /* pusha regs: */
  18:     .ascii "     EDI %p  ESI %p  EBP %p  ESP %p\n"
  19:     .ascii "     EBX %p  EDX %p  ECX %p  EAX %p\n"
  20: /* fault frame: */
  21:     .ascii "     err %p  EIP %p   CS %p  flg %p\n"
  22:     .ascii "Stack: %p %p %p %p %p %p %p %p\n"
  23:     .ascii "       %p %p %p %p %p %p %p %p\n"
  24:     .asciz "       %p %p %p %p %p %p %p %p\n"
  25:  
  26: #include "../../x86/xen/xen-head.S"