参考:
1. Linux下1号进程的前世(kernel_init)今生(init进程)----Linux进程的管理与调度(六)
linux内核在启动的最后用kernel_thread生成两个内核线程:rest_init()会开启两个进程:kernel_init,kthreadd,之后主线程变成idle线程,init/main.c。
其中kernel_init内核线程转换为用户态1号进程init,原来的内核线程转换为idle内核线程。
/* * We need to finalize in a non-__init function, or else race conditions * between the root thread and the init thread may cause start_kernel to * be reaped by free_initmem before the root thread has proceeded to * cpu_idle. * * gcc-3.4 accidentally inlines this function, so use noinline. */ static __initdata DECLARE_COMPLETION(kthreadd_done); static noinline void __init_refok rest_init(void) { int pid; rcu_scheduler_starting(); /* * We need to spawn init first so that it obtains pid 1, however * the init task will end up wanting to create kthreads, which, if * we schedule it before we create kthreadd, will OOPS. */ kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND); numa_default_policy(); pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); rcu_read_lock(); kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns); rcu_read_unlock(); complete(&kthreadd_done); /* * The boot idle thread must execute schedule() * at least once to get things moving: */ init_idle_bootup_task(current); preempt_enable_no_resched(); schedule(); /* Call into cpu_idle with preempt disabled */ preempt_disable(); cpu_idle(); }
kthread_init继续完成系统初始化工作,最后阶段调用init_post(),init_post()完成异步初始化并释放init内存,然后执行init代码,开启init进程。
linux到默认位置寻找init代码,大部分系统默认/sbin/init,若执行不成功,按以下顺序继续查找并执行:/etc/init, /bin/init, /bin/sh,若都不能找到,panic;
若能找到,并成功执行后,不会返回。
static int __init kernel_init(void * unused) { /* * Wait until kthreadd is all set-up. */ wait_for_completion(&kthreadd_done); /* * init can allocate pages on any node */ set_mems_allowed(node_states[N_HIGH_MEMORY]); /* * init can run on any cpu. */ set_cpus_allowed_ptr(current, cpu_all_mask); cad_pid = task_pid(current); smp_prepare_cpus(setup_max_cpus); do_pre_smp_initcalls(); lockup_detector_init(); smp_init(); sched_init_smp(); do_basic_setup(); /* Open the /dev/console on the rootfs, this should never fail */ if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) printk(KERN_WARNING "Warning: unable to open an initial console.\n"); (void) sys_dup(0); (void) sys_dup(0); /* * check if there is an early userspace init. If yes, let it do all * the work */ if (!ramdisk_execute_command) ramdisk_execute_command = "/init"; if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) { ramdisk_execute_command = NULL; prepare_namespace(); } /* * Ok, we have completed the initial bootup, and * we're essentially up and running. Get rid of the * initmem segments and start the user-mode stuff.. */ init_post(); return 0; }
/* This is a non __init function. Force it to be noinline otherwise gcc * makes it inline to init() and it becomes part of init.text section */ static noinline int init_post(void) { /* need to finish all async __init code before freeing the memory */ async_synchronize_full(); free_initmem(); mark_rodata_ro(); system_state = SYSTEM_RUNNING; numa_default_policy(); current->signal->flags |= SIGNAL_UNKILLABLE; if (ramdisk_execute_command) { run_init_process(ramdisk_execute_command); printk(KERN_WARNING "Failed to execute %s\n", ramdisk_execute_command); } /* * We try each of these until one succeeds. * * The Bourne shell can be used instead of init if we are * trying to recover a really broken machine. */ if (execute_command) { run_init_process(execute_command); printk(KERN_WARNING "Failed to execute %s. Attempting " "defaults...\n", execute_command); } run_init_process("/sbin/init"); run_init_process("/etc/init"); run_init_process("/bin/init"); run_init_process("/bin/sh"); panic("No init found. Try passing init= option to kernel. " "See Linux Documentation/init.txt for guidance."); }
执行init采用的函数为run_init_process(),实调用kernel_execv()。
static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; static void run_init_process(const char *init_filename) { argv_init[0] = init_filename; kernel_execve(init_filename, argv_init, envp_init); }
kernel_execv()调用init,切换到用户态。
arch/arm/kernel/sys_arm.c
int kernel_execve(const char *filename, const char *const argv[], const char *const envp[]) { struct pt_regs regs; int ret; memset(®s, 0, sizeof(struct pt_regs)); ret = do_execve(filename, (const char __user *const __user *)argv, (const char __user *const __user *)envp, ®s); if (ret < 0) goto out; /* * Save argc to the register structure for userspace. */ regs.ARM_r0 = ret; /* * We were successful. We won't be returning to our caller, but * instead to user space by manipulating the kernel stack. */ asm( "add r0, %0, %1\n\t" "mov r1, %2\n\t" "mov r2, %3\n\t" "bl memmove\n\t" /* copy regs to top of stack */ "mov r8, #0\n\t" /* not a syscall */ "mov r9, %0\n\t" /* thread structure */ "mov sp, r0\n\t" /* reposition stack pointer */ "b ret_to_user" : : "r" (current_thread_info()), "Ir" (THREAD_START_SP - sizeof(regs)), "r" (®s), "Ir" (sizeof(regs)) : "r0", "r1", "r2", "r3", "ip", "lr", "memory"); out: return ret; } EXPORT_SYMBOL(kernel_execve);
至此,以后的所有进程都有用户态init完成。