由内核Makefile分析可知,
文件linux/arch/arm/boot/compressed/head.S是linux内核启动过程执行的第一个文件。
.align
start:
.type
start,#function //type指定start这个符号是函数类型
.rept
8
mov
r0, r0 //空操作,重复八次
.endr
b
1f //跳转
.word
0x016f2818
@ Magic numbers to help the loader
//魔数0x016f2818是在bootloader中用于判断zImage的存在,这是内核和bootloader约定好的。
.word
start
@ absolute load/run zImage address
.word
_edata
@ zImage end address
1:
mov
r7, r1
@ save architecture ID //bootloader 传递过来的r1 r2
mov
r8, r2
@ save atags pointer
//r1和r2中分别存放着由bootloader传递过来的architecture ID和指向标记列表的指针
#ifndef __ARM_ARCH_2__
/*
* Booting from Angel - need to enter
SVC mode and disable
* FIQs/IRQs (numeric definitions from angel arm.h source).
* We only do this if we were in user mode on entry.
*/
mrs
r2, cpsr
@ get current mode
tst
r2, #3
@ not user?
bne
not_angel
mov
r0, #0x17
@ angel_SWIreason_EnterSVC
swi
0x123456
@ angel_SWI_ARM
not_angel:
mrs
r2, cpsr
@ turn off interrupts to
orr
r2, r2, #0xc0
@ prevent angel from running
msr
cpsr_c, r2 //关闭IRQ FIQ
#else
teqp
pc, #0x0c000003
@ turn off interrupts
#endif
.text
adr
r0,
LC0 //下面有解释
ldmia
r0, {r1, r2, r3, r4, r5, r6, ip, sp}
subs
r0, r0, r1
@ calculate the delta offset 获取偏移量
@ if delta is zero, we are
beq
not_relocated
@ running at the address we
@ were linked at.
//如果内核没有进行搬移,就跳转。一般是需要的
/*
* We're running at a different address. We need to fix
* up various pointers:
* r5 - zImage base address
* r6 - GOT start
* ip - GOT end
*/
add
r5, r5, r0 //修改内核映像基地址
add
r6, r6, r0 //修改got表的起始和结束地址
add
ip, ip, r0
/*
其中LC0表是链接文件(arch/arm/boot/compressed/vmlinux.lds)的各段入口。源码如下:
OUTPUT_ARCH(arm)
ENTRY(_start)
SECTIONS
{
. = 0;
_text = .;
.text : {
_start = .;
*(.start)
*(.text)
*(.text.*)
*(.fixup)
*(.gnu.warning)
*(.rodata)
*(.rodata.*)
*(.glue_7)
*(.glue_7t)
*(.piggydata)
. = ALIGN(4);
}
_etext = .;
_got_start = .;
.got
: { *(.got) }
_got_end = .;
.got.plt
: { *(.got.plt) }
.data
: { *(.data) }
_edata = .;
. = ALIGN(4);
__bss_start = .;
.bss
: { *(.bss) }
_end = .;
.stack (NOLOAD)
: { *(.stack) }
.stab 0
: { *(.stab) }
.stabstr 0
: { *(.stabstr) }
.stab.excl 0
: { *(.stab.excl) }
.stab.exclstr 0
: { *(.stab.exclstr) }
.stab.index 0
: { *(.stab.index) }
.stab.indexstr 0
: { *(.stab.indexstr) }
.comment 0
: { *(.comment) }
}
依次是.text .got .data .bss .stack .stab,另外连接地址都是位置无关的,即都是以0地址为偏移的。
而此时内核已被bootloader搬移,链接地址应该加上这个偏移。
*/
#ifndef CONFIG_ZBOOT_ROM
/*
* If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
* we need to fix up pointers into the BSS region.
* r2 - BSS start
* r3 - BSS end
* sp - stack pointer
*/
add
r2, r2, r0 //修改bss段和堆栈的地址
add
r3, r3, r0
add
sp, sp, r0
/*
* Relocate all entries in the GOT table.
*/
1:
ldr
r1, [r6, #0]
@ relocate entries in the GOT 重定位got表
add
r1, r1, r0
@ table. This fixes up the
str
r1, [r6], #4
@ C references.
cmp
r6, ip
blo
1b //bhs 大于等于跳转,bls小于等于跳转,blo不相等跳转
#else
/*
* Relocate entries in the GOT table. We only relocate
* the entries that are outside the (relocated) BSS region.
*/
1:
ldr
r1, [r6, #0]
@ relocate entries in the GOT
cmp
r1, r2
@ entry < bss_start ||
cmphs
r3, r1
@ _end < entry
addlo
r1, r1, r0
@ table. This fixes up the
str
r1, [r6], #4
@ C references.
cmp
r6, ip
blo
1b
#endif
not_relocated:
mov
r0, #0
1:
str
r0, [r2], #4
@
clear bss 清bss段,所有arm程序都需要做这些
str
r0, [r2], #4
str
r0, [r2], #4
str
r0, [r2], #4
cmp
r2, r3
blo
1b
/*
* The C runtime environment should now be setup
* sufficiently. Turn the cache on, set up some
* pointers, and start decompressing.
*/
bl
cache_on //打开cache
mov
r1, sp
@ malloc space above stack
add
r2, sp, #0x10000
@
64k max 解压函数需要的内存缓存
/*
* Check to see if we will overwrite ourselves.
*
r4 = final kernel address
*
r5 = start of this image
*
r2 = end of malloc space (and therefore this image)
* We basically want:
* r4 >= r2 -> OK
* r4 + image length <= r5 -> OK
*/
cmp
r4, r2 //r4现在在.text,而r2在.stack地址的后面,那个刚分配给解压函数的64K,显然r2>r4,所以不跳转。
bhs
wont_overwrite //branch if higher or same
sub
r3, sp, r5
@ > compressed kernel size 得到映像大小
add
r0, r4, r3, lsl #2
@ allow for 4x expansion 将这个大小乘以4
cmp
r0, r5
bls
wont_overwrite
mov
r5, r2
@ decompress after malloc space
mov
r0, r5 //r5为映像解压后的起始地址,紧接着r2
mov
r3, r7 //r7中存放的是architecture ID
bl
decompress_kernel
add
r0, r0, #127 + 128
@ alignment + stack
bic
r0, r0, #127
@ align the kernel length 清除127位,与上面的128字节对齐
/*
其中 decompress_kernel解压函数在arch/arm/boot/compressed/misc.c中。
ulg
decompress_kernel(ulg output_start, ulg free_mem_ptr_p, ulg free_mem_ptr_end_p,
int arch_id)
{
output_data
= (uch *)output_start;
/* Points to kernel start */解压后内核输出的起始地址
free_mem_ptr
= free_mem_ptr_p; 解压函数缓存的起始地址
free_mem_ptr_end
= free_mem_ptr_end_p; 解压函数缓存的结束地址
__machine_arch_type
= arch_id; 体系结构ID
arch_decomp_setup();
makecrc();
putstr("Uncompressing Linux...");
gunzip();
putstr(" done, booting the kernel.\n"); //这个打印信息是不是很熟悉啊
return output_ptr;
}
*/
此时,
r0:解压后内核的长度,后存放映像解压后的起始地址 mov
r0, r5,重新指定为解压后内核的长度
r1:解压函数缓存的起始地址
r2:解压函数缓存的结束地址
r3:原来存映像大小,后来存放体系结构ID mov
r3, r7
r4:内核执行地址
r5:映像解压后的起始地址 mov
r5, r2
r6:处理器ID
r7:体系结构ID
r8:标记列表地址
r9-r14:corrupted
所以decompress_kernel函数带入的参数就是r0-r3
add
r1, r5, r0
@ end of decompressed kernel r1:解压后内核代码的结束地址
adr
r2, reloc_start //设定重定义的开始地址和结束地址
ldr
r3, LC1
add
r3, r2, r3
1:
ldmia
r2!, {r9 - r14}
@ copy relocation code 拷贝内核重定位的代码,不至于被覆盖
stmia
r1!, {r9 - r14}
ldmia
r2!, {r9 - r14}
stmia
r1!, {r9 - r14}
cmp
r2, r3
blo
1b
add
sp, r1, #128
@ relocate the stack 改变堆栈指针
bl
cache_clean_flush 刷新cache
add
pc, r5, r0
@ call relocation code 唤醒内核重定义的代码
/*
* We're not in danger of overwriting ourselves. Do this the simple way.
*
* r4 = kernel execution address
* r7 = architecture ID
*/
wont_overwrite:
mov
r0, r4 //假如内核映像没有被bootloader移动过,就会跳到此处
mov
r3, r7
bl
decompress_kernel
b
call_kernel
.align
5
reloc_start:
add
r9, r5, r0 //r0+r5 = 解压后内核代码的结束地址加上128字节栈空间
sub
r9, r9, #128
@ do not copy the stack
debug_reloc_start
mov
r1, r4
1:
.rept
4
ldmia
r5!, {r0, r2, r3, r10 - r14}
@ relocate kernel
stmia
r1!, {r0, r2, r3, r10 - r14}
.endr
cmp
r5, r9
blo
1b
add
sp, r1, #128
@ relocate the stack
debug_reloc_end
call_kernel:
bl
cache_clean_flush
bl
cache_off //关闭cache
mov
r0, #0
@ must be zero 清零r0
mov
r1, r7
@ restore architecture number
mov
r2, r8
@ restore atags pointer
mov
pc, r4
@ call
kernel
//内核映像zImage是由压缩后的内核piggy.o,加上一段初始化及解压功能的代码组成的。
linux/arch/arm/kernel/head.S是linux内核映像解压后执行的第一个文件。
//PAGE_OFFSET = 0xc0000000; TEXT_OFFSET = 0x00008000;
//PHYS_OFFSET = 0xa0000000;
#define KERNEL_RAM_VADDR
(PAGE_OFFSET + TEXT_OFFSET)
#define KERNEL_RAM_PADDR
(PHYS_OFFSET + TEXT_OFFSET)
/*
链接脚本文件arch/arm/kernel/vmlinux.lds指定了编译时程序段存放的位置。
OUTPUT_ARCH(arm)
ENTRY(stext) //这里对应head.S中的ENTRY(stext)
jiffies = jiffies_64;
SECTIONS
{
. = (0xc0000000) + 0x00008000;
.text.head : {
_stext = .;
_sinittext = .;
*(.text.head)
}
.init : { /* Init code and data
*/
*(.init.text) *(.cpuinit.text) *(.meminit.text)
_einittext = .;
__proc_info_begin = .;
*(.proc.info.init)
__proc_info_end = .;
__arch_info_begin = .;
*(.arch.info.init)
__arch_info_end = .;
__tagtable_begin = .;
*(.taglist.init)
__tagtable_end = .;
. = ALIGN(16);
__setup_start = .;
*(.init.setup)
__setup_end = .;
__early_begin = .;
*(.early_param.init)
__early_end = .;
__initcall_start = .;
……
}
*/
/*
.section是GNU ASM的语法。格式如下:
.section name[,"flags"[,@type]] 其中,name是必须的,flags是可选。
"ax"表示:a为section is allocatable,x为executable。
*/
.section ".text.head", "ax"
.type
stext, %function
ENTRY(stext) //kernel的入口点函数
//MSR:是ARM汇编指令,用来将数据copy到status register寄存器中。cpsr_c表示要操作
msr
cpsr_c, #PSR_F_BIT | PSR_I_BIT | SVC_MODE @ ensure svc mode 禁止FIQ、IRQ,设定SVC模式
@ and irqs disabled
mrc
p15, 0, r9, c0, c0
@ get processor id //可以去查看arm CPU ID各字段的涵义
bl
__lookup_processor_type
@ r5=procinfo r9=cpuid //检测cpu类型,如果支持,r5返回一个用来描述处理器结构体的地址,否则返回0.
movs
r10, r5
@ invalid processor (r5=0)?
beq
__error_p
@ yes, error 'p'
bl
__lookup_machine_type
@ r5=machinfo //检测开发板类型,即machine ID,如果支持,r5返回一个用来描述开发板结构体的地址,否则返回0.
movs
r8, r5
@ invalid machine (r5=0)?
beq
__error_a
@ yes, error 'a'
bl
__vet_atags //检测bootloader传入的参数列表atags的合法性
bl
__create_page_tables //创建初始页表
/*
* The following calls CPU specific code in a position independent
* manner. See arch/arm/mm/proc-*.S for details. r10 = base of
* xxx_proc_info structure selected by __lookup_machine_type
* above. On return, the CPU will be ready for the MMU to be
* turned on, and r0 will hold the CPU control register value.
*/
ldr
r13,
__switch_data
@ address to jump to after //将列表__switch_data存到r13中,在head-common.S中。
@ mmu has been enabled
adr
lr,
__enable_mmu
@ return (PIC) address //使能mmu
add
pc, r10, #PROCINFO_INITFUNC
//r10中存放的基地址是从__lookup_processor_type中得到的,如上面movs r10, r5
ENTRY(secondary_startup)
……
第二个cpu的检测和设置。
下面主要来说说
__lookup_processor_type和
__lookup_machine_type。它们都在/arch/arm/kernel/head-common.S实现。
__lookup_processor_type 检测CPU
内核支持的,每一种CPU类型都由结构体proc_info_list来描述,在linux/include/asm-arm/procinfo.h定义
struct proc_info_list {
unsigned int
cpu_val;
unsigned int
cpu_mask;
unsigned long
__cpu_mm_mmu_flags;
/* used by head.S */
unsigned long
__cpu_io_mmu_flags;
/* used by head.S */
unsigned long
__cpu_flush;
/* used by head.S */
const char
*arch_name;
const char
*elf_name;
unsigned int
elf_hwcap;
const char
*cpu_name;
struct processor
*proc;
struct cpu_tlb_fns
*tlb;
struct cpu_user_fns
*user;
struct cpu_cache_fns
*cache;
};
对于我们的CPU来说,其对应的结构体在文件arch/arm/mm/proc-xsc3.S中
.section ".proc.info.init", #alloc, #execinstr
.type
__xsc3_proc_info,#object
__xsc3_proc_info:
.long
0x69056000
.long
0xffffe000
.long
PMD_TYPE_SECT | \
PMD_SECT_BUFFERABLE | \
PMD_SECT_CACHEABLE | \
PMD_SECT_AP_WRITE | \
PMD_SECT_AP_READ
.long
PMD_TYPE_SECT | \
PMD_SECT_AP_WRITE | \
PMD_SECT_AP_READ
b
__xsc3_setup
.long
cpu_arch_name
.long
cpu_elf_name
.long
HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
.long
cpu_xsc3_name
.long
xsc3_processor_functions
.long
v4wbi_tlb_fns
.long
xsc3_mc_user_fns
.long
xsc3_cache_fns
.size
__xsc3_proc_info, . - __xsc3_proc_info
不同的proc_info_list结构被用来支持不同的CPU,它们都定义在“.proc.info.init”段中。在链接文件arch/arm/kernel/vmlinux.lds中可知,
__proc_info_begin = .;
*(.proc.info.init)
__proc_info_end = .;
所有CPU类型对应的被初始化的proc_info_list结构体都放在__proc_info_begin和__proc_info_end之间。
/*
* Read processor ID register (CP#15, CR0), and look up in the linker-built
* supported processor list. Note that we can't use the absolute addresses
* for the __proc_info lists since we aren't running with the MMU on
* (and therefore, we are not in the correct address space). We have to
* calculate the offset.
*
*
r9 = cpuid
* Returns:
*
r3, r4, r6 corrupted
*
r5 = proc_info pointer in physical address space
*
r9 = cpuid (preserved)
*/
.type
__lookup_processor_type, %function
__lookup_processor_type:
adr
r3, 3f //r3:标记3处的物理地址 r7:标记3处的虚拟地址
ldmda
r3, {r5 - r7} //过后减少装载 ldm是load multiple register的意思,它的作用是将[r3]对应的内存内容存储到r5,r6,r7寄存器中,每传递一次,r3递减4个字节
sub
r3, r3, r7
@ get offset between virt&phys //得到虚拟地址和物理地址之间的offset
add
r5, r5, r3
@ convert virt addresses to
add
r6, r6, r3
@ physical address space 将r5和r6中保存的虚拟地址转变为物理地址
1:
ldmia
r5, {r3, r4}
@ value, mask //r3=cpu_val r4=cpu_mask
and
r4, r4, r9
@ mask wanted bits
teq
r3, r4
beq
2f //如果匹配成功则返回
//PROC_INFO_SZ (proc_info_list结构的长度,在这等于48),跳到下一个proc_info_list处
add
r5, r5, #PROC_INFO_SZ
@ sizeof(proc_info_list)
cmp
r5, r6 //判断是否已经到了结构体proc_info_list存放区域的末尾__proc_info_end
blo
1b
mov
r5, #0
@ unknown processor
2:
mov
pc, lr //子程序返回
/*
* This provides a C-API version of the above function.
*/
ENTRY(lookup_processor_type)
stmfd
sp!, {r4 - r7, r9, lr}
mov
r9, r0
bl
__lookup_processor_type
mov
r0, r5
ldmfd
sp!, {r4 - r7, r9, pc}
/*
* Look in include/asm-arm/procinfo.h and arch/arm/kernel/arch.[ch] for
* more information about the __proc_info and __arch_info structures.
*/
.long
__proc_info_begin
.long
__proc_info_end
3:
.long
. //“.”表示当前这行代码编译链接后的虚拟地址
.long
__arch_info_begin
.long
__arch_info_end
__lookup_machine_type 检测开发板
与lookup_processor_type类似,每一个CPU平台都可能有其不一样的结构体,描述这个平台的结构体是machine_desc。这个结构体在文件/include/asm/mach/arch.h中定义
struct machine_desc {
/*
* Note! The first four elements are used
* by assembler code in head.S, head-common.S
*/
unsigned int
nr;
/* architecture number
*/
unsigned int
phys_io;
/* start of physical io
*/
unsigned int
io_pg_offst;
/* byte offset for io
* page tabe entry
*/
const char
*name;
/* architecture name
*/
unsigned long
boot_params;
/* tagged list
*/
unsigned int
video_start;
/* start of video RAM
*/
unsigned int
video_end;
/* end of video RAM
*/
unsigned int
reserve_lp0 :1;
/* never has lp0
*/
unsigned int
reserve_lp1 :1;
/* never has lp1
*/
unsigned int
reserve_lp2 :1;
/* never has lp2
*/
unsigned int
soft_reboot :1;
/* soft reboot
*/
void
(*fixup)(struct machine_desc *,
struct tag *, char **,
struct meminfo *);
void
(*map_io)(void);/* IO mapping function
*/
void
(*init_irq)(void);
struct sys_timer
*timer;
/* system tick timer
*/
void
(*init_machine)(void);
};
对应的结构在文件arch/arm/mach-pxa/littleton.c
MACHINE_START(LITTLETON, "Marvell Form Factor Development Platform (aka Littleton)")
.phys_io
= 0x40000000,
.boot_params
= 0xa0000100,
.io_pg_offst
= (io_p2v(0x40000000) >> 18) & 0xfffc,
.map_io
= pxa_map_io,
.init_irq
= pxa3xx_init_irq,
.timer
= &pxa_timer,
.init_machine
= littleton_init,
MACHINE_END
内核中对于每种支持的开发板都会使用宏MACHINE_START、MACHINE_END来定义一个machine_desc结构,宏MACHINE_START的具体定义也在文件/include/asm/mach/arch.h中。
#define MACHINE_START(_type,_name)
\
static const struct machine_desc __mach_desc_##_type
\
__used
\
__attribute__((__section__(".arch.info.init"))) = {
\
.nr
= MACH_TYPE_##_type,
\
.name
= _name,
#define MACHINE_END
\
};
#endif
/*
* Lookup machine architecture in the linker-build list of architectures.
* Note that we can't use the absolute addresses for the __arch_info
* lists since we aren't running with the MMU on (and therefore, we are
* not in the correct address space). We have to calculate the offset.
*
* r1 = machine architecture number
* Returns:
* r3, r4, r6 corrupted
* r5 = mach_info pointer in physical address space
*/
.type
__lookup_machine_type, %function
__lookup_machine_type:
adr
r3, 3b
ldmia
r3, {r4, r5, r6}
sub
r3, r3, r4
@ get offset between virt&phys
add
r5, r5, r3
@ convert virt addresses to
add
r6, r6, r3
@ physical address space
1:
ldr
r3, [r5, #MACHINFO_TYPE]
@ get machine type
teq
r3, r1
@ matches loader number?
beq
2f
@ found
add
r5, r5, #SIZEOF_MACHINE_DESC
@ next machine_desc
cmp
r5, r6
blo
1b
mov
r5, #0
@ unknown machine
2:
mov
pc, lr
/*
* This provides a C-API version of the above function.
*/
ENTRY(lookup_machine_type)
stmfd
sp!, {r4 - r6, lr}
mov
r1, r0
bl
__lookup_machine_type
mov
r0, r5
ldmfd
sp!, {r4 - r6, pc}
__vet_atags 检测参数列表
//检查bootloader传入的参数列表atags的合法性。
先看看结构体在include/asm-arm/setup.h
struct tag {
struct tag_header hdr;
union {
struct tag_core
core;
struct tag_mem32
mem;
struct tag_videotext
videotext;
struct tag_ramdisk
ramdisk;
struct tag_initrd
initrd;
struct tag_serialnr
serialnr;
struct tag_revision
revision;
struct tag_videolfb
videolfb;
struct tag_cmdline
cmdline;
/*
* Acorn specific
*/
struct tag_acorn
acorn;
/*
* DC21285 specific
*/
struct tag_memclk
memclk;
} u;
};
struct tag_header {
__u32 size;
__u32 tag;
};
//其中 size:表示整个 tag 结构体的大小(用字的个数来表示,而不是字节的个数),等于
tag_header的大小加上 u联合体的大小。
#define ATAG_CORE
0x54410001 atag开始
#define ATAG_NONE
0x00000000 atag结束
#define ATAG_CORE_SIZE ((2*4 + 3*4) >> 2)
/* Determine validity of the r2 atags pointer. The heuristic requires
* that the pointer be aligned, in the first 16k of physical RAM and
* that the ATAG_CORE marker is first and present. Future revisions
* of this function may be more lenient with the physical address and
* may also be able to move the ATAGS block if necessary.
*
* r8 = machinfo
*
* Returns:
* r2 either valid atags pointer, or zero
* r5, r6 corrupted
*/
.type
__vet_atags, %function
__vet_atags:
tst
r2, #0x3
@ aligned? //r2指向该参数链表的起始位置,此处判断它是否字对齐
bne
1f
ldr
r5, [r2, #0]
@ is first tag ATAG_CORE? 获取第一个tag结构的size
subs
r5, r5, #ATAG_CORE_SIZE //比较长度是否有效
bne
1f
ldr
r5, [r2, #4] //获取第一个tag结构体的标记
ldr
r6, =ATAG_CORE
cmp
r5, r6 //判断第一个tag标记是不是ATAG_CORE
bne
1f
mov
pc, lr
@ atag pointer is ok
1:
mov
r2, #0
mov
pc, lr
__create_page_tables 建立页表
/*
* Setup the initial page tables. We only setup the barest
* amount which are required to get the kernel running, which
* generally means mapping in the kernel code.
*
* r8 = machinfo
* r9 = cpuid
* r10 = procinfo
*
* Returns:
* r0, r3, r6, r7 corrupted
* r4 = physical page table address
*/
.type
__create_page_tables, %function
__create_page_tables:
pgtbl
r4
@ page table address 转换表的物理基地址
/*
* Clear the 16K level 1 swapper page table //为内核代码存储区域创建页表,首先将内核起始地址-0x4000~内核起始地址之间的16K 存储器清0,将创建的页表存于此处。
*/
mov
r0, r4
mov
r3, #0
add
r6, r0, #0x4000
1:
str
r3, [r0], #4
str
r3, [r0], #4
str
r3, [r0], #4
str
r3, [r0], #4
teq
r0, r6
bne
1b
ldr
r7, [r10, #PROCINFO_MM_MMUFLAGS] @ mm_mmuflags //从proc_info_list结构中获取字段__cpu_mm_mmu_flags,该字段包含了存储空间访问权限等
/*
* Create identity mapping for first MB of kernel to
* cater for the MMU enable. This identity mapping
* will be removed by paging_init(). We use our current program
* counter to determine corresponding section base address.
*/
mov
r6, pc, lsr #20
@ start of kernel section
orr
r3, r7, r6, lsl #20
@ flags + kernel base
str
r3, [r4, r6, lsl #2]
@ identity mapping
/*
* Now setup the pagetables for our kernel direct
* mapped region.
*/
add
r0, r4, #(KERNEL_START & 0xff000000) >> 18
str
r3, [r0, #(KERNEL_START & 0x00f00000) >> 18]! r0存放转换表的起始地址
ldr
r6, =(KERNEL_END - 1) //获取内核结束地址
add
r0, r0, #4 //计算第一个地址条目存放的地址
add
r6, r4, r6, lsr #18 //计算最好一个地址条目存放的位置
1:
cmp
r0, r6
add
r3, r3, #1 << 20
strls
r3, [r0], #4
bls
1b
#ifdef CONFIG_XIP_KERNEL //如果是XIP就进行以下映射,这只是将内核代码存储的空间重新映射
/*
* Map some ram to cover our .data and .bss areas.
*/
orr
r3, r7, #(KERNEL_RAM_PADDR & 0xff000000)
.if
(KERNEL_RAM_PADDR & 0x00f00000)
orr
r3, r3, #(KERNEL_RAM_PADDR & 0x00f00000)
.endif
add
r0, r4, #(KERNEL_RAM_VADDR & 0xff000000) >> 18
str
r3, [r0, #(KERNEL_RAM_VADDR & 0x00f00000) >> 18]!
ldr
r6, =(_end - 1)
add
r0, r0, #4
add
r6, r4, r6, lsr #18
1:
cmp
r0, r6
add
r3, r3, #1 << 20
strls
r3, [r0], #4
bls
1b
#endif
/*
* Then map first 1MB of ram in case it contains our boot params.映射开始的1M空间
*/
add
r0, r4, #PAGE_OFFSET >> 18
orr
r6, r7, #(PHYS_OFFSET & 0xff000000)
.if
(PHYS_OFFSET & 0x00f00000)
orr
r6, r6, #(PHYS_OFFSET & 0x00f00000)
.endif
str
r6, [r0]
#ifdef CONFIG_DEBUG_LL //下面是为了调试而做的相关映射,可以跳过
ldr
r7, [r10, #PROCINFO_IO_MMUFLAGS] @ io_mmuflags
/*
* Map in IO space for serial debugging.
* This allows debug messages to be output
* via a serial console before paging_init.
*/
ldr
r3, [r8, #MACHINFO_PGOFFIO]
add
r0, r4, r3
rsb
r3, r3, #0x4000
@ PTRS_PER_PGD*sizeof(long)
cmp
r3, #0x0800
@ limit to 512MB
movhi
r3, #0x0800
add
r6, r0, r3
ldr
r3, [r8, #MACHINFO_PHYSIO]
orr
r3, r3, r7
1:
str
r3, [r0], #4
add
r3, r3, #1 << 20
teq
r0, r6
bne
1b
#if defined(CONFIG_ARCH_NETWINDER) || defined(CONFIG_ARCH_CATS)
/*
* If we're using the NetWinder or CATS, we also need to map
* in the 16550-type serial port for the debug messages
*/
add
r0, r4, #0xff000000 >> 18
orr
r3, r7, #0x7c000000
str
r3, [r0]
#endif
#ifdef CONFIG_ARCH_RPC
/*
* Map in screen at 0x02000000 & SCREEN2_BASE
* Similar reasons here - for debug. This is
* only for Acorn RiscPC architectures.
*/
add
r0, r4, #0x02000000 >> 18
orr
r3, r7, #0x02000000
str
r3, [r0]
add
r0, r4, #0xd8000000 >> 18
str
r3, [r0]
#endif
#endif
mov
pc, lr
.ltorg
setup 禁止cache
在head.S中
add
pc, r10, #PROCINFO_INITFUNC 就是调用__xsc3_setup。
.type
__xsc3_setup, #function
__xsc3_setup:
mov
r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
msr
cpsr_c, r0
mcr
p15, 0, ip, c7, c7, 0
@ invalidate L1 caches and BTB
mcr
p15, 0, ip, c7, c10, 4
@ data write barrier
mcr
p15, 0, ip, c7, c5, 4
@ prefetch flush
mcr
p15, 0, ip, c8, c7, 0
@ invalidate I and D TLBs
#if L2_CACHE_ENABLE
orr
r4, r4, #0x18
@ cache the page table in L2
#endif
mcr
p15, 0, r4, c2, c0, 0
@ load page table pointer
mov
r0, #0
@ don't allow CP access
mcr
p15, 0, r0, c15, c1, 0
@ write CP access register
mrc
p15, 0, r0, c1, c0, 1
@ get auxiliary control reg
and
r0, r0, #2
@ preserve bit P bit setting
#if L2_CACHE_ENABLE
orr
r0, r0, #(1 << 10)
@ enable L2 for LLR cache
#endif
mcr
p15, 0, r0, c1, c0, 1
@ set auxiliary control reg
adr
r5, xsc3_crval
ldmia
r5, {r5, r6}
mrc
p15, 0, r0, c1, c0, 0
@ get control register
bic
r0, r0, r5
@ ..V. ..R. .... ..A.
orr
r0, r0, r6
@ ..VI Z..S .... .C.M (mmu)
@ ...I Z..S .... .... (uc)
#if L2_CACHE_ENABLE
orr
r0, r0, #0x04000000
@ L2 enable
#endif
mov
pc, lr //跳到adr lr, __enable_mmu
.size
__xsc3_setup, . - __xsc3_setup
.type
xsc3_crval, #object
xsc3_crval:
crval
clear=0x04002202, mmuset=0x00003905, ucset=0x00001900
__INITDATA
__enable_mmu 使能mmu
/*
* Setup common bits before finally enabling the MMU. Essentially
* this is just loading the page table pointer and domain access
* registers.
*/
.type
__enable_mmu, %function
__enable_mmu:
#ifdef CONFIG_ALIGNMENT_TRAP
orr
r0, r0, #CR_A
#else
bic
r0, r0, #CR_A
#endif
#ifdef CONFIG_CPU_DCACHE_DISABLE
bic
r0, r0, #CR_C //禁止数据cache
#endif
#ifdef CONFIG_CPU_BPREDICT_DISABLE
bic
r0, r0, #CR_Z
#endif
#ifdef CONFIG_CPU_ICACHE_DISABLE
bic
r0, r0, #CR_I //禁止指令cache
#endif
mov
r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
domain_val(DOMAIN_IO, DOMAIN_CLIENT))
mcr
p15, 0, r5, c3, c0, 0
@ load domain access register //将访问权限写入协处理器
mcr
p15, 0, r4, c2, c0, 0
@ load page table pointer //将页表基地址写入基址寄存器C2
b
__turn_mmu_on
/*
* Enable the MMU. This completely changes the structure of the visible
* memory space. You will not be able to trace execution through this.
* If you have an enquiry about this, *please* check the linux-arm-kernel
* mailing list archives BEFORE sending another post to the list.
*
* r0 = cp#15 control register
* r13 = *virtual* address to jump to upon completion
*
* other registers depend on the function called upon completion
*/
.align
5
.type
__turn_mmu_on, %function
__turn_mmu_on:
mov
r0, r0
mcr
p15, 0, r0, c1, c0, 0
@ write control reg 写入懂控制寄存器,打开mmu,打开cache
mrc
p15, 0, r3, c0, c0, 0
@ read id reg 读取ID寄存器
mov
r3, r3
mov
r3, r3 //空操作,等待前面所取得的指令得以执行
mov
pc, r13 //程序跳转 ldr r13, __switch_data
__switch_data 数据转换
在文件linux/arch/arm/kernel/head-common.S中:
.type
__switch_data, %object //定义一个对象
__switch_data:
.long
__mmap_switched //跳转到__mmap_switched
.long
__data_loc
@ r4 数据存放地址
.long
__data_start
@ r5 数据开始地址
.long
__bss_start
@ r6 bss开始地址
.long
_end
@ r7 bss结束地址,也是内核结束地址
.long
processor_id
@ r4
.long
__machine_arch_type
@ r5
.long
__atags_pointer
@ r6
.long
cr_alignment
@ r7
.long
init_thread_union + THREAD_START_SP @ sp
/*
* The following fragment of code is executed with the MMU on in MMU mode,
* and uses absolute addresses; this is not position independent.
*
* r0 = cp#15 control register
* r1 = machine ID
* r2 = atags pointer
* r9 = processor ID
*/
.type
__mmap_switched, %function
__mmap_switched:
adr
r3, __switch_data + 4
ldmia
r3!, {r4, r5, r6, r7}
cmp
r4, r5
@ Copy data segment if needed
1:
cmpne
r5, r6
ldrne
fp, [r4], #4
strne
fp, [r5], #4
bne
1b
mov
fp, #0
@ Clear BSS (and zero fp)
1:
cmp
r6, r7
strcc
fp, [r6],#4
bcc
1b
ldmia
r3, {r4, r5, r6, r7, sp}
str
r9, [r4]
@ Save processor ID
str
r1, [r5]
@ Save machine type
str
r2, [r6]
@ Save atags pointer
bic
r4, r0, #CR_A
@ Clear 'A' bit
stmia
r7, {r0, r4}
@ Save control register values
b
start_kernel
小结:
第一阶段主要做以下两个步骤:
1.连接内核时使用的虚拟地址,所以要设置页表,使能mmu,之前要确定是否支持cpu和开发板
2.调用C函数start_kernel的准备工作:复制数据段,清除bss段,设置栈指针,保存processor ID,保存machine type,调用start_kernel。
第一阶段到此结束。