程序首先是从head.S里面进行执行的。这个代码有点长,我一次贴出
#define __ASSEMBLY__
#include "s3c2440.h"
#include "smdk2440.h"
#include "parameters.h"
@ Start of executable code
/* Fin = 12MHz */
#define S3C2440_UPLL_48MHZ_Fin12MHz ((0x38<<12)|(0x02<<4)|(0x02))
@
@ Exception vector table (physical address = 0x00000000)
@
.section .text.FirstSector
.globl first_sector
first_sector:
@ 0x00: Reset
bReset
@ 0x04: Undefined instruction exception
UndefEntryPoint:
bUndefEntryPoint
@ 0x08: Software interrupt exception
SWIEntryPoint:
bSWIEntryPoint
@ 0x0c: Prefetch Abort (Instruction Fetch Memory Abort)
PrefetchAbortEnteryPoint:
bPrefetchAbortEnteryPoint
@ 0x10: Data Access Memory Abort
DataAbortEntryPoint:
bDataAbortEntryPoint
@ 0x14: Not used
NotUsedEntryPoint:
bNotUsedEntryPoint
@ 0x18: IRQ(Interrupt Request) exception
IRQEntryPoint:
bIRQHandle
@ 0x1c: FIQ(Fast Interrupt Request) exception
FIQEntryPoint:
bFIQEntryPoint
@0x20: Fixed address global value. will be replaced by downloader.
.long ZBOOT_MAGIC
.byte OS_TYPE, HAS_NAND_BIOS, (LOGO_POS & 0xFF), ((LOGO_POS >>8) &0xFF)
.long OS_START
.long OS_LENGTH
.long OS_RAM_START
.string LINUX_CMD_LINE
.section .text
Reset:
@ disable watch dog timer
movr1, #0x53000000 //WTCON
movr2, #0x0
strr2, [r1]
@ disable all interrupts
movr1, #INT_CTL_BASE //SRCPND S3c2440.h
movr2, #0xffffffff
strr2, [r1, #oINTMSK]
ldrr2, =0x7ff
strr2, [r1, #oINTSUBMSK]
@ initialise system clocks
movr1, #CLK_CTL_BASE
mvnr2, #0xff000000
strr2, [r1, #oLOCKTIME]
movr1, #CLK_CTL_BASE
ldrr2, clkdivn_value
strr2, [r1, #oCLKDIVN]
mrcp15, 0, r1, c1, c0, 0@ read ctrl register
orrr1, r1, #0xc0000000@ Asynchronous
mcrp15, 0, r1, c1, c0, 0@ write ctrl register
movr1, #CLK_CTL_BASE
ldr r2, =S3C2440_UPLL_48MHZ_Fin12MHz
str r2, [r1, #oUPLLCON]
nop
nop
nop
nop
nop
nop
nop
nop
nop
ldrsp, DW_STACK_START@ setup stack pointer
ldr r2, mpll_value_USER @ clock user set 12MHz
strr2, [r1, #oMPLLCON]
blmemsetup
@ set GPIO for UART
movr1, #GPIO_CTL_BASE
addr1, r1, #oGPIO_H
ldrr2, gpio_con_uart
strr2, [r1, #oGPIO_CON]
ldrr2, gpio_up_uart
strr2, [r1, #oGPIO_UP]
blInitUART
@ get read to call C functions
movfp, #0@ no previous frame, so fp=0
mova2, #0@ set argv to NULL
blMain
1:b1b @
/*
* subroutines
*/
memsetup:
@ initialise the static memory
@ set memory control registers
movr1, #MEM_CTL_BASE
adrlr2, mem_cfg_val
addr3, r1, #52
1:ldrr4, [r2], #4
strr4, [r1], #4
cmpr1, r3
bne1b
movpc, lr
.globl ReadPage512
ReadPage512:
stmfdsp!, {r2-r7}
movr2, #0x200
1:
ldrr4, [r1]
ldrr5, [r1]
ldrr6, [r1]
ldrr7, [r1]
stmiar0!, {r4-r7}
ldrr4, [r1]
ldrr5, [r1]
ldrr6, [r1]
ldrr7, [r1]
stmiar0!, {r4-r7}
ldrr4, [r1]
ldrr5, [r1]
ldrr6, [r1]
ldrr7, [r1]
stmiar0!, {r4-r7}
ldrr4, [r1]
ldrr5, [r1]
ldrr6, [r1]
ldrr7, [r1]
stmiar0!, {r4-r7}
subsr2, r2, #64
bne1b;
ldmfdsp!, {r2-r7}
movpc,lr
@ Initialize UART
@
@ r0 = number of UART port
InitUART:
ldrr1, SerBase
movr2, #0x0
strr2, [r1, #oUFCON]
strr2, [r1, #oUMCON]
movr2, #0x3
strr2, [r1, #oULCON]
ldrr2, =0x245
strr2, [r1, #oUCON]
//#define UART_BAUD_RATE115200
//#define UART_PCLK_400_148 50000000
//#define UART_PCLK UART_PCLK_400_148
#define UART_BRD ((UART_PCLK / (UART_BAUD_RATE * 16)) - 1)
movr2, #UART_BRD
strr2, [r1, #oUBRDIV]
movr3, #100
movr2, #0x0
1:subr3, r3, #0x1
tstr2, r3
bne1b
movpc, lr
IRQHandle:
ldrpc, =0x33f00000+0x18
nop
nop
@
@ Data Area
@
@ Memory configuration values
.align 4
mem_cfg_val:
.longvBWSCON
.longvBANKCON0
.longvBANKCON1
.longvBANKCON2
.longvBANKCON3
.longvBANKCON4
.longvBANKCON5
.longvBANKCON6
.longvBANKCON7
.longvREFRESH
.longvBANKSIZE
.longvMRSRB6
.longvMRSRB7
@ Processor clock values
.align 4
mpll_value_USER:
.long vMPLLCON_NOW_USER
clkdivn_value:
.longvCLKDIVN_NOW
@ initial values for serial
uart_ulcon:
.longvULCON
uart_ucon:
.longvUCON
uart_ufcon:
.longvUFCON
uart_umcon:
.longvUMCON
@ inital values for GPIO
gpio_con_uart:
.longvGPHCON
gpio_up_uart:
.longvGPHUP
.align2
DW_STACK_START:
.word0x34000000-4
.align 4
SerBase:
.long UART0_CTL_BASE
程序从第19行开始执行,20~29行定义了异常向量表,可以发现其中第一条指令是一条跳转指令,说以说程序开始后直接跳转到第61行开始进行执行。再上面的异常向量表中,只有复位异常实现了,其余的都没有实现。52~58行是数据的定义。下面开始从61行看,62~65行关闭看门狗,这是一个常见的用法,也是arm汇编指令的LOAD/STORE模式的典型体现。这里以这个作为例子说下,后面很多同样的用法不再重复。0x53000000正好是看门口控制寄存器WTCON的地址,63行就是将立即数0x53000000放入寄存器R1中,然后64行将立即数0放到寄存器R2中,52行将R2寄存器中的内容放入R1寄存器里面存储的值值的地址处。用C实现的话很简单,这三行的意思就是WTCON=0;关闭看门狗。同样,68行~72行是屏蔽中断,75~100设置系统时钟和USB时钟,想具体了解的可以看看手册。102行:设置堆栈。106行:调到标号memsetup处执行,进行存储器控制器设置。也就是说跳转到131行继续执行,135行到142行进行存储器相关的13个控制寄存器的设置。MEM_CTRL_BASE=0X48000000,在s3c2440.h中定义。136行是一条伪指令,其中的mem_val_cfg在215行,215行开始的地方,表示连续分配了13个int型的空间。所以这几行的意思就是将mem_val_cfg开始的13个数值依次赋值给48000000开始的13个寄存器。这13个寄存器如下图所示:
然后执行142行,意思说从调用的这个子函数中返回,程序跳到109行继续执行,109~114设置管脚的功能,也就是设置为UART,115行跳转的inituart对串口进行初始化,179~202实现了此函数。 122行 bl Main,跳到Main.c中的Main()函数去执行。
void Main(void)
{
MMU_EnableICache();
MMU_EnableDCache();
Port_Init();
NandInit();
if (g_page_type == PAGE_UNKNOWN) {
Uart_SendString("\r\nunsupport NAND\r\n");
for(;;);
}
GetParameters();
Uart_SendString("load Image of Linux...\n\r");
ReadImageFromNand();
}
上面3,4行打开缓存。
6行,是端口初始化函数,进入这个函数看下
(在244x_lib.c中)
void Port_Init(void)
{
GPACON = 0x7fffff;
GPBCON = 0x044555;
GPBUP = 0x7ff;// The pull up function is disabled GPB[10:0]
GPCCON = 0xaaaaaaaa;
GPCUP = 0xffff;// The pull up function is disabled GPC[15:0]
GPDCON = 0x00151544;
GPDDAT = 0x0430;
GPDUP = 0x877A;
GPECON = 0xaa2aaaaa;
GPEUP = 0xf7ff;// GPE11 is NC
GPFCON = 0x55aa;
GPFUP = 0xff;// The pull up function is disabled GPF[7:0]
GPGCON = 1<<8;
GPGDAT = 0;
GPHCON = 0x16faaa;
GPHUP = 0x7ff;// The pull up function is disabled GPH[10:0]
EXTINT0 = 0x22222222;// EINT[7:0]
EXTINT1 = 0x22222222;// EINT[15:8]
EXTINT2 = 0x22222222;// EINT[23:16]
}
void NandInit(void)
{
NFCONF = (TACLS << 12) | (TWRPH0 << 8) | (TWRPH1 << 4) | (0 << 0);
NFCONT =
(0 << 13) | (0 << 12) | (0 << 10) | (0 << 9) | (0 << 8) | (0 << 6) |
(0 << 5) | (1 << 4) | (1 << 1) | (1 << 0);
NFSTAT = 0;
NandReset();
NandCheckId();
}
static inline U32 NandCheckId(void)
{
U8 Mid, Did, DontCare, id4th;
NF_nFCE_L();
NF_CMD(0x90);
NF_ADDR(0x0);
delay();
Mid = NF_RDDATA8();
Did = NF_RDDATA8();
DontCare = NF_RDDATA8();
id4th = NF_RDDATA8();
NF_nFCE_H();
switch(Did) {
case 0x76:
g_page_type = PAGE512;
break;
case 0xF1:
case 0xD3:
case 0xDA:
case 0xDC:
g_page_type = PAGE2048;
break;
default:
;
}
return (U32) ((Mid << 24) | (Did << 16) | (DontCare << 8) | id4th);
}
在上面的检查NAND的ID里面要注意,g_page_tyoe这个决定了页的类型,如果探测到的页的类型,是未定义的,直接进入死循环。
就绪看main()函数的下面几行就知道了,如下
if (g_page_type == PAGE_UNKNOWN) {
Uart_SendString("\r\nunsupport NAND\r\n");
for(;;);
}
main的14行GetParameters(); 进入这个函数看看,就在Main.c文件中
static inline void GetParameters(void)
{
U32 Buf[2048];
g_os_type = OS_LINUX;
g_os_start = 0x60000;
g_os_length = 0x500000;
g_os_ram_start = 0x30008000;
// vivi LINUX CMD LINE
NandReadOneSector((U8 *)Buf, 0x48000);
if (Buf[0] == 0x49564956 && Buf[1] == 0x4C444D43) {
memcpy(g_linux_cmd_line, (char *)&(Buf[2]), sizeof g_linux_cmd_line);
}
}
第4行确定了系统的类型为linux,第5行确定了系统在flash中的开始地址,第六行是系统的长度 ,第8行是系统在ram中的起始地址,第11行从nand地址0x48000地址处读一个扇区。下面两行是判断如果幻数码正确,就能判定下面的相应字节数为命令行参数。对上面的NandReadOneSector((U8 *)Buf, 0x48000);函数进行跟踪下
int NandReadOneSector(U8 * buffer, U32 addr)
{
int ret;
switch(g_page_type) {
case PAGE512:
ret = NandReadOneSectorP512(buffer, addr);
break;
case PAGE2048:
ret = NandReadOneSectorP2048(buffer, addr);
break;
default:
for(;;);
}
return ret;
}
上面就是根据一开始检测出来的页大小调用相应的函数,这里是调用下面一个,也就是NandReadOneSectorP2048(buffer, addr);在对这个函数进行跟踪
static inline int NandReadOneSectorP2048(U8 * buffer, U32 addr)
{
U32 sector;
sector = addr >> 11;
delay();
NandReset();
#if 0
NF_RSTECC();
NF_MECC_UnLock();
#endif
NF_nFCE_L();
NF_CLEAR_RB();
NF_CMD(0x00);
NF_ADDR(0x00);
NF_ADDR(0x00);
NF_ADDR(sector & 0xff);
NF_ADDR((sector >> 8) & 0xff);
NF_ADDR((sector >> 16) & 0xff);
NF_CMD(0x30);
delay();
NF_DETECT_RB();
ReadPage512(buffer + 0 * 512, &NFDATA);
ReadPage512(buffer + 1 * 512, &NFDATA);
ReadPage512(buffer + 2 * 512, &NFDATA);
ReadPage512(buffer + 3 * 512, &NFDATA);
#if 0
NF_MECC_Lock();
#endif
NF_nFCE_H();
return 1;
}
这里不懂的就要去了解下NAND的读写了,这个很重要。这里重要的函数是ReadPage512(buffer + 0 * 512, &NFDATA);,这个函数在head.S中用汇编实现的,在最上面的head.s中的147~174行,这个函数每次读取512字节。
下面就是执行Main()中的ReadImageFromNand();实现linux内核从nandflash复制到ram中,下面是实现
void ReadImageFromNand(void)
{
unsigned int Length;
U8 *RAM;
unsigned BlockNum;
unsigned pos;
Length = g_os_length;
Length = (Length + BLOCK_SIZE - 1) >> (BYTE_SECTOR_SHIFT + SECTOR_BLOCK_SHIFT) << (BYTE_SECTOR_SHIFT + SECTOR_BLOCK_SHIFT); // align to Block Size
BlockNum = g_os_start >> (BYTE_SECTOR_SHIFT + SECTOR_BLOCK_SHIFT);
RAM = (U8 *) g_os_ram_start;
for (pos = 0; pos < Length; pos += BLOCK_SIZE) {
unsigned int i;
// skip badblock
for (;;) {
if (NandIsGoodBlock
(BlockNum <<
(BYTE_SECTOR_SHIFT + SECTOR_BLOCK_SHIFT))) {
break;
}
BlockNum++;//try next
}
for (i = 0; i < BLOCK_SIZE; i += SECTOR_SIZE) {
int ret =
NandReadOneSector(RAM,
(BlockNum <<
(BYTE_SECTOR_SHIFT +
SECTOR_BLOCK_SHIFT)) + i);
RAM += SECTOR_SIZE;
ret = 0;
}
BlockNum++;
}
CallLinux();
}
这个首先进行个地址的转换,然后判断是否是坏块,如果是坏块,就直接跳过,不是坏块就去执行从NAND到ram的复制操作, 最后调用CallLinux();看其实现
static void CallLinux(void)
{
struct param_struct {
union {
struct {
unsigned long page_size;/* 0 */
unsigned long nr_pages;/* 4 */
unsigned long ramdisk_size;/* 8 */
unsigned long flags;/* 12 */
unsigned long rootdev;/* 16 */
unsigned long video_num_cols;/* 20 */
unsigned long video_num_rows;/* 24 */
unsigned long video_x;/* 28 */
unsigned long video_y;/* 32 */
unsigned long memc_control_reg;/* 36 */
unsigned char sounddefault;/* 40 */
unsigned char adfsdrives;/* 41 */
unsigned char bytes_per_char_h;/* 42 */
unsigned char bytes_per_char_v;/* 43 */
unsigned long pages_in_bank[4];/* 44 */
unsigned long pages_in_vram;/* 60 */
unsigned long initrd_start;/* 64 */
unsigned long initrd_size;/* 68 */
unsigned long rd_start;/* 72 */
unsigned long system_rev;/* 76 */
unsigned long system_serial_low;/* 80 */
unsigned long system_serial_high;/* 84 */
unsigned long mem_fclk_21285;/* 88 */
} s;
char unused[256];
} u1;
union {
char paths[8][128];
struct {
unsigned long magic;
char n[1024 - sizeof(unsigned long)];
} s;
} u2;
char commandline[1024];
};
struct param_struct *p = (struct param_struct *)0x30000100;
memset(p, 0, sizeof(*p));
memcpy(p->commandline, g_linux_cmd_line, sizeof(g_linux_cmd_line));
p->u1.s.page_size = 4 * 1024;
p->u1.s.nr_pages = 64 * 1024 * 1024 / (4 * 1024);
{
unsigned int *pp = (unsigned int *)(0x30008024);
if (pp[0] == 0x016f2818) { // Magic number of zImage
//Uart_SendString("\n\rOk\n\r");
} else {
Uart_SendString("\n\rWrong Linux Kernel\n\r");
for (;;) ;
}
}
asm (
"movr5, %2\n"
"movr0, %0\n"
"movr1, %1\n"
"movip, #0\n"
"movpc, r5\n"
"nop\n" "nop\n":/* no outpus */
:"r"(0), "r"(1999), "r"(g_os_ram_start)
);
}
先定义了一个struct param_struct结构体变量,从这里就可以看出,vboot用的是旧的方式,新的是用tag方式,U-boot里面有实现,可以去看一下,struct param_struct与内核里定义的一样。第41~59行,看注释可以明白,第60~67行,是内核的一些约定:
R0 = 0
R1 = 机器ID
。。。
最后第65行,设置pc为内核映像在内存中的起始地址,直接跳到内核映像的入口,从而开始内核代码的执行......
总结一下:本来看这个bootloader不大,想仔仔细细介绍一下,后来发现不大容易,如果要一条条语句介绍,那也实在没啥意思。如果基本的ARM汇编都懂的话,那这个也不难了。装个sourceinsight软件对这个代码进行跟踪,条理很清晰。vboot的好处是很简单,代码量在4K内,因此可以直接在SOC自带的sram里面执行,进行系统的引导。但是,但是目前只支持2440,只能从NANDFLASH启动,功能化不是单一,是唯一,那就是用来引导系统。不能提供UBOOT那样大的功能。如果学bootloader先从vboot开始,我想是非常好的。因为UBOOT相对来说较为复杂,很容易让人头晕。这些类型的选取,要根据项目而定,也并不是越复杂越好。
需要vboot源码的,请留言留下邮箱。
ZJW