linux下1号进程的前世(kthread_init)今生(init)

时间:2022-01-07 15:48:47

参考:

1.  Linux下1号进程的前世(kernel_init)今生(init进程)----Linux进程的管理与调度(六)

2. linux挂载根文件系统过程

3. BusyBox init工作流程

4. kthreadd-linux下2号进程

linux内核在启动的最后用kernel_thread生成两个内核线程:rest_init()会开启两个进程:kernel_init,kthreadd,之后主线程变成idle线程,init/main.c。

其中kernel_init内核线程转换为用户态1号进程init,原来的内核线程转换为idle内核线程。

/*
 * We need to finalize in a non-__init function, or else race conditions
 * between the root thread and the init thread may cause start_kernel to
 * be reaped by free_initmem before the root thread has proceeded to
 * cpu_idle.
 *
 * gcc-3.4 accidentally inlines this function, so use noinline.
 */
static __initdata DECLARE_COMPLETION(kthreadd_done);

static noinline void __init_refok rest_init(void)
{
    int pid;

    rcu_scheduler_starting();
    /*  
     * We need to spawn init first so that it obtains pid 1, however
     * the init task will end up wanting to create kthreads, which, if
     * we schedule it before we create kthreadd, will OOPS.
     */
    kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND);
    numa_default_policy();
    pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
    rcu_read_lock();
    kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
    rcu_read_unlock();
    complete(&kthreadd_done);

    /*
     * The boot idle thread must execute schedule()
     * at least once to get things moving:
     */
    init_idle_bootup_task(current);
    preempt_enable_no_resched();
    schedule();

    /* Call into cpu_idle with preempt disabled */
    preempt_disable();
    cpu_idle();
}

kthread_init继续完成系统初始化工作,最后阶段调用init_post(),init_post()完成异步初始化并释放init内存,然后执行init代码,开启init进程。

linux到默认位置寻找init代码,大部分系统默认/sbin/init,若执行不成功,按以下顺序继续查找并执行:/etc/init, /bin/init, /bin/sh,若都不能找到,panic;

若能找到,并成功执行后,不会返回。

static int __init kernel_init(void * unused)
{
    /*  
     * Wait until kthreadd is all set-up.
     */
    wait_for_completion(&kthreadd_done);
    /*  
     * init can allocate pages on any node
     */
    set_mems_allowed(node_states[N_HIGH_MEMORY]);
    /*
     * init can run on any cpu.
     */
    set_cpus_allowed_ptr(current, cpu_all_mask);

    cad_pid = task_pid(current);

    smp_prepare_cpus(setup_max_cpus);

    do_pre_smp_initcalls();
    lockup_detector_init();

    smp_init();
    sched_init_smp();


    do_basic_setup();

    /* Open the /dev/console on the rootfs, this should never fail */
    if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
        printk(KERN_WARNING "Warning: unable to open an initial console.\n");

    (void) sys_dup(0);
    (void) sys_dup(0);
    /*
     * check if there is an early userspace init.  If yes, let it do all
     * the work
     */

    if (!ramdisk_execute_command)
        ramdisk_execute_command = "/init";

    if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) {
        ramdisk_execute_command = NULL;
        prepare_namespace();
    }

    /*
     * Ok, we have completed the initial bootup, and
     * we're essentially up and running. Get rid of the
     * initmem segments and start the user-mode stuff..
     */

    init_post();
    return 0;
}
/* This is a non __init function. Force it to be noinline otherwise gcc
 * makes it inline to init() and it becomes part of init.text section
 */
static noinline int init_post(void)
{
    /* need to finish all async __init code before freeing the memory */
    async_synchronize_full();
    free_initmem();
    mark_rodata_ro();
    system_state = SYSTEM_RUNNING;
    numa_default_policy();


    current->signal->flags |= SIGNAL_UNKILLABLE;

    if (ramdisk_execute_command) {
        run_init_process(ramdisk_execute_command);
        printk(KERN_WARNING "Failed to execute %s\n",
                ramdisk_execute_command);
    }
    /*
     * We try each of these until one succeeds.
     *
     * The Bourne shell can be used instead of init if we are
     * trying to recover a really broken machine.
     */
    if (execute_command) {
        run_init_process(execute_command);
        printk(KERN_WARNING "Failed to execute %s.  Attempting "
                    "defaults...\n", execute_command);
    }
    run_init_process("/sbin/init");
    run_init_process("/etc/init");
    run_init_process("/bin/init");
    run_init_process("/bin/sh");

    panic("No init found.  Try passing init= option to kernel. "
          "See Linux Documentation/init.txt for guidance.");
}

执行init采用的函数为run_init_process(),实调用kernel_execv()。

static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };

static void run_init_process(const char *init_filename)
{
    argv_init[0] = init_filename;
    kernel_execve(init_filename, argv_init, envp_init);
}

kernel_execv()调用init,切换到用户态。

arch/arm/kernel/sys_arm.c

int kernel_execve(const char *filename,
          const char *const argv[],
          const char *const envp[])
{
    struct pt_regs regs;
    int ret;

    memset(&regs, 0, sizeof(struct pt_regs));
    ret = do_execve(filename,
            (const char __user *const __user *)argv,
            (const char __user *const __user *)envp, &regs);
    if (ret < 0)
        goto out;

    /*
     * Save argc to the register structure for userspace.
     */
    regs.ARM_r0 = ret;

    /*
     * We were successful.  We won't be returning to our caller, but
     * instead to user space by manipulating the kernel stack.
     */
    asm(    "add    r0, %0, %1\n\t"
        "mov    r1, %2\n\t"
        "mov    r2, %3\n\t"
        "bl memmove\n\t"    /* copy regs to top of stack */
        "mov    r8, #0\n\t" /* not a syscall */
        "mov    r9, %0\n\t" /* thread structure */
        "mov    sp, r0\n\t" /* reposition stack pointer */
        "b  ret_to_user"
        :
        : "r" (current_thread_info()),
          "Ir" (THREAD_START_SP - sizeof(regs)),
          "r" (&regs),
          "Ir" (sizeof(regs))
        : "r0", "r1", "r2", "r3", "ip", "lr", "memory");

 out:
    return ret;
}
EXPORT_SYMBOL(kernel_execve);

至此,以后的所有进程都有用户态init完成。