可以参见:https://github.com/mengning/mykernel
首先感谢:http://www.euryugasaki.com/archives/1014
1.搭建实验环境(实验环境centos6.5)
wget https://www.kernel.org/pub/linux/kernel/v3.x/linux-3.9.4.tar.xz # download Linux Kernel 3.9.4 source code
wget --no-check-certificate https://raw.github.com/mengning/mykernel/master/mykernel_for_linux3.9.4sc.patch # downloadmykernel_for_linux3.9.4sc.patch
xz -d linux-3.9.4.tar.xz
tar -xvf linux-3.9.4.tar
cd linux-3.9.4
patch -p1 < ../mykernel_for_linux3.9.4sc.patch
make allnoconfig
make
#在进行一下步骤时,当时系统提示没有qemu命令,需要进行相关配置!
qemu -kernel arch/x86/boot/bzImage
ln -s /usr/bin/qemu-system-i386 /usr/bin/qemu
2.代码分析
2.1 mypcb.h
#define MAX_TASK_NUM 4 #define KERNEL_STACK_SIZE 1024*8 //进程控制块 /* CPU-specific state of this task */ struct Thread { //存储ip,sp unsigned long ip; unsigned long sp; }; typedef struct PCB{ int pid; //进程的id volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ char stack[KERNEL_STACK_SIZE]; //内核堆栈 /* CPU-specific state of this task */ struct Thread thread; unsigned long task_entry; //指定的入口,平时入口为main函数 struct PCB *next; //进程用链表连接 }tPCB; void my_schedule(void); //函数,调度器
2.2 mymain.c
#include <linux/types.h> #include <linux/string.h> #include <linux/ctype.h> #include <linux/tty.h> #include <linux/vmalloc.h> #include "mypcb.h" tPCB task[MAX_TASK_NUM]; //声明tPCB类型的数组 tPCB * my_current_task = NULL; //声明当前task的指针 volatile int my_need_sched = 0; //是否需要调度 void my_process(void); void __init my_start_kernel(void) { int pid = 0; int i; /* Initialize process 0*/ task[pid].pid = pid; //初始化0号进程 task[pid].state = 0;/* -1 unrunnable, 0 runnable, >0 stopped ,状态正在运行*/ task[pid].task_entry = task[pid].thread.ip = (unsigned long)my_process; //入口 task[pid].thread.sp = (unsigned long)&task[pid].stack[KERNEL_STACK_SIZE-1];// task[pid].next = &task[pid]; //指向自己,系统启动只有0号进程 /*fork more process */ for(i=1;i<MAX_TASK_NUM;i++) { memcpy(&task[i],&task[0],sizeof(tPCB)); task[i].pid = i; task[i].state = -1; task[i].thread.sp = (unsigned long)&task[i].stack[KERNEL_STACK_SIZE-1]; task[i].next = task[i-1].next; //新进程加到进程链表尾部 task[i-1].next = &task[i]; } /* start process 0 by task[0] */ pid = 0; my_current_task = &task[pid]; asm volatile( "movl %1,%%esp\n\t" /* set task[pid].thread.sp(%1) to esp */ "pushl %1\n\t" /* push ebp */ "pushl %0\n\t" /* push task[pid].thread.ip */ "ret\n\t" /* pop task[pid].thread.ip to eip ,ret之后0号进程正式启动*/ "popl %%ebp\n\t" : : "c" (task[pid].thread.ip),"d" (task[pid].thread.sp) /* input c or d mean %ecx/%edx*/ ); } void my_process(void) { int i = 0; while(1) { i++; if(i%10000000 == 0) //循环1000万次判断是否需要调度 { printk(KERN_NOTICE "this is process %d -\n",my_current_task->pid); if(my_need_sched == 1) { my_need_sched = 0; my_schedule(); } printk(KERN_NOTICE "this is process %d +\n",my_current_task->pid); } } }
2.3 myinterrupt.c
#include <linux/types.h> #include <linux/string.h> #include <linux/ctype.h> #include <linux/tty.h> #include <linux/vmalloc.h> #include "mypcb.h" extern tPCB task[MAX_TASK_NUM]; extern tPCB * my_current_task; extern volatile int my_need_sched; volatile int time_count = 0; /* * Called by timer interrupt. * it runs in the name of current running process, * so it use kernel stack of current running process */ void my_timer_handler(void) { #if 1 if(time_count%1000 == 0 && my_need_sched != 1) //设置时间片的大小,时间片用完时设置调度的标志 { printk(KERN_NOTICE ">>>my_timer_handler here<<<\n"); my_need_sched = 1; } time_count ++ ; #endif return; } void my_schedule(void) { tPCB * next; tPCB * prev; if(my_current_task == NULL || my_current_task->next == NULL) { return; } printk(KERN_NOTICE ">>>my_schedule<<<\n"); /* schedule */ next = my_current_task->next; prev = my_current_task; if(next->state == 0)/* -1 unrunnable, 0 runnable, >0 stopped */ { my_current_task = next; printk(KERN_NOTICE ">>>switch %d to %d<<<\n",prev->pid,next->pid); /* switch to next process */ asm volatile( "pushl %%ebp\n\t" /* save ebp */ "movl %%esp,%0\n\t" /* save esp */ "movl %2,%%esp\n\t" /* restore esp */ "movl $1f,%1\n\t" /* save eip,%1f指接下来的标号为1的位置 */ "pushl %3\n\t" "ret\n\t" /* restore eip */ "1:\t" /* next process start here */ "popl %%ebp\n\t" : "=m" (prev->thread.sp),"=m" (prev->thread.ip) : "m" (next->thread.sp),"m" (next->thread.ip) ); } else { next->state = 0; my_current_task = next; printk(KERN_NOTICE ">>>switch %d to %d<<<\n",prev->pid,next->pid); /* switch to new process */ asm volatile( "pushl %%ebp\n\t" /* save ebp */ "movl %%esp,%0\n\t" /* save esp */ "movl %2,%%esp\n\t" /* restore esp */ "movl %2,%%ebp\n\t" /* restore ebp */ "movl $1f,%1\n\t" /* save eip */ "pushl %3\n\t" "ret\n\t" /* restore eip */ : "=m" (prev->thread.sp),"=m" (prev->thread.ip) : "m" (next->thread.sp),"m" (next->thread.ip) ); } return; }
2.4总结如下
2.4.1
my_start_kernel()帮助我们创建进程;
my_timer_handler()来记录时间,触发调度;
my_start_kernel()中创建的0号进程的入口地址是my_process()。
2.4.2
my_process()作为每个进程的入口地址,开始逐个执行;
通过到达时间片的轮转时刻,my_process()会调用my_schedule()来保护进程堆栈现场,完成进程间的切换;
在mymain.c中实现内核的启动,通过my_start_kernel()来初始化进程;
2.4.3总体框架(不够完善)
2.4.4具体分析
2.4.4.1 首先看入口函数my_start_kernel()
int pid = 0; int i; /* Initialize process 0*/ task[pid].pid = pid; //初始化0号进程 task[pid].state = 0;/* -1 unrunnable, 0 runnable, >0 stopped ,状态正在运行*/ task[pid].task_entry = task[pid].thread.ip = (unsigned long)my_process; //入口 task[pid].thread.sp = (unsigned long)&task[pid].stack[KERNEL_STACK_SIZE-1];// task[pid].next = &task[pid]; //指向自己,系统启动只有0号进程
上述完成了对0号进程的初始化,包括
设置task[pid].state;
0号进程的入口地址为my_process();
task[0]的进程属性中ip被设置成了my_process()函数的入口地址,sp设置成了堆栈的首地址
/*fork more process */ for(i=1;i<MAX_TASK_NUM;i++) { memcpy(&task[i],&task[0],sizeof(tPCB)); task[i].pid = i; task[i].state = -1; task[i].thread.sp = (unsigned long)&task[i].stack[KERNEL_STACK_SIZE-1]; task[i].next = task[i-1].next; //新进程加到进程链表 task[i-1].next = &task[i]; }
以0号进程为模板复制了MAX_TASK_NUM-1个进程,进程链表如下:
之后:
/* start process 0 by task[0] */ pid = 0; my_current_task = &task[pid]; asm volatile( "movl %1,%%esp\n\t" /* set task[pid].thread.sp(%1) to esp */ "pushl %1\n\t" /* push ebp */ "pushl %0\n\t" /* push task[pid].thread.ip */ "ret\n\t" /* pop task[pid].thread.ip to eip ,ret之后0号进程正式启动*/ "popl %%ebp\n\t" : : "c" (task[pid].thread.ip),"d" (task[pid].thread.sp) /* input c or d mean %ecx/%edx*/ );
第一步(set task[pid].thread.sp(%1) to esp),将task[0].thread.sp拿去修改esp的值,这时候内核堆栈的栈顶被修改到了task[0]的sp位置;
第二步(push ebp),在task[0]的sp位置处压入ebp的值,来保护原来的内核堆栈;
第三步(push task[pid].thread.ip ,pop task[pid].thread.ip to eip),设置task[0].thread.ip的值给eip,这样就能够保证cpu下一步能够执行0号进程,完成了进入my_process()的过程。
注意:此时eip的值已经被修改,CPU进入my_process(),所以最后一句的popl ebp并不会被立即执行了。
2.4.4.2 再看my_process()与my_timer_handler()
void my_process(void) { int i = 0; while(1) { i++; if(i%10000000 == 0) //循环1000万次判断是否需要调度 { printk(KERN_NOTICE "this is process %d -\n",my_current_task->pid); if(my_need_sched == 1) { my_need_sched = 0; my_schedule(); } printk(KERN_NOTICE "this is process %d +\n",my_current_task->pid); } } }
void my_timer_handler(void) { #if 1 if(time_count%1000 == 0 && my_need_sched != 1) //设置时间片的大小,时间片用完时设置调度的标志 { printk(KERN_NOTICE ">>>my_timer_handler here<<<\n"); my_need_sched = 1; } time_count ++ ; #endif return; }
经过前面所述步骤,程序执行转到了my_process()。由于这时候所有的进程只有task[0]才是执行态,my_need_sched == 0,无论如何,都不会触发时间片的轮转从而调度其他的进程。但是有my_timer_handler()函数(该函数会被linux内核自动调用),my_timer_handler()能够得以自动执行,每次执行时,会检查时间计数以及当前进程是否应该被调度,当满足条件后,会修改0号进程的my_need_sched值为1,0号进程就被暂停执行,进而调用my_schedule()函数。
2.4.4.3 最后分析核心函数my_schedule()
if(my_current_task == NULL || my_current_task->next == NULL) { return; }
首先进行一个简单的判断(判断当前的任务和接下来要被执行的任务是否为空)
/* schedule */ next = my_current_task->next; prev = my_current_task; if(next->state == 0)/* -1 unrunnable, 0 runnable, >0 stopped */ { my_current_task = next; printk(KERN_NOTICE ">>>switch %d to %d<<<\n",prev->pid,next->pid); /* switch to next process */ asm volatile( "pushl %%ebp\n\t" /* save ebp */ "movl %%esp,%0\n\t" /* save esp */ "movl %2,%%esp\n\t" /* restore esp */ "movl $1f,%1\n\t" /* save eip,%1f指接下来的标号为1的位置 */ "pushl %3\n\t" "ret\n\t" /* restore eip */ "1:\t" /* next process start here */ "popl %%ebp\n\t" : "=m" (prev->thread.sp),"=m" (prev->thread.ip) : "m" (next->thread.sp),"m" (next->thread.ip) ); }
在上面的代码中,next指针指向了当前任务的下一个任务,prev指针指向了当前0号进程。由建立的进程链表知:下一个被调度的进程应该是task[3],而此时task[3]的状态是-1,会执行else中的部分:
else { next->state = 0; my_current_task = next; printk(KERN_NOTICE ">>>switch %d to %d<<<\n",prev->pid,next->pid); /* switch to new process */ asm volatile( "pushl %%ebp\n\t" /* save ebp */ "movl %%esp,%0\n\t" /* save esp */ "movl %2,%%esp\n\t" /* restore esp */ "movl %2,%%ebp\n\t" /* restore ebp */ "movl $1f,%1\n\t" /* save eip */ "pushl %3\n\t" "ret\n\t" /* restore eip */ : "=m" (prev->thread.sp),"=m" (prev->thread.ip) : "m" (next->thread.sp),"m" (next->thread.ip) ); }
task[3]的状态被更改为执行态,当前任务被修改为task[3]。
此时便开 始进行0号进程的现场保护工作,以便日后的调度。堆栈会保存ebp的值,同时将esp保存到0号进程的sp中。这是因为,当切换回0号进程的时候,可以通过0号进程内sp的值来寻找要执行的task[3]的进程堆栈。然后将task[3]的sp值设置到esp和ebp中,创建好了task[3]的执行堆栈。将task[3]执行任务的入口地址ip设置给eip,完成对任务的执行入口设置。这时候实际上后面的返回仍然不会被执行,因为在修改eip后,cpu又去执行下一步的my_process()了,因此这时候就会出现各种循环调用,利用时间片的统计,完成对进程之间的切换。