Linux内核分析--理解进程调度时机、跟踪分析进程调度和进程切换的过程

ID：fuchen1994

姓名：江军

作业要求：

理解Linux系统中进程调度的时机，可以在内核代码中搜索schedule()函数，看都是哪里调用了schedule()，判断我们课程内容中的总结是否准确；
使用gdb跟踪分析一个schedule()函数，验证您对Linux系统进程调度与进程切换过程的理解；推荐在实验楼Linux虚拟机环境下完成实验。
特别关注并仔细分析switch_to中的汇编代码，理解进程上下文的切换机制，以及与中断上下文切换的关系；

实验过程：

进程调度的时机

中断处理过程（包括时钟中断、I/O中断、系统调用和异常）中，直接调用schedule()，或者返回用户态时根据need_resched标记调用schedule()；
内核线程可以直接调用schedule()进行进程切换，也可以在中断处理过程中进行调度，也就是说内核线程作为一类的特殊的进程可以主动调度，也可以被动调度；
用户态进程无法实现主动调度，仅能通过陷入内核态后的某个时机点进行调度，即在中断处理过程中进行调度。

代码分析：

1.这部分是关闭掉内核抢占

#ifdef CONFIG_PREEMPT
2906/*
2907 * this is the entry point to schedule() from in-kernel preemption
2908 * off of preempt_enable. Kernel preemptions off return from interrupt
2909 * occur there and call schedule directly.
2910 */
2911asmlinkage __visible void __sched notrace preempt_schedule(void)
2912{
2913    /*
2914     * If there is a non-zero preempt_count or interrupts are disabled,
2915     * we do not want to preempt the current task. Just return..
2916     */
2917    if (likely(!preemptible()))
2918        return;
2919
2920    do {
2921        __preempt_count_add(PREEMPT_ACTIVE);
2922        __schedule();  //这个函数进入
2923        __preempt_count_sub(PREEMPT_ACTIVE);
2924
2925        /*
2926         * Check again in case we missed a preemption opportunity
2927         * between schedule and now.
2928         */
2929        barrier();
2930    } while (need_resched());
2931}

static void __sched __schedule(void)  
2771{
2772    struct task_struct *prev, *next;
2773    unsigned long *switch_count;
2774    struct rq *rq;
2775    int cpu;
2776
2777need_resched:
2778    preempt_disable();
2779    cpu = smp_processor_id();  //保存当前CPU的状态在rq中
2780    rq = cpu_rq(cpu);
2781    rcu_note_context_switch(cpu);
2782    prev = rq->curr;  //保存当前进程current
2783
2784    schedule_debug(prev); //进入这个判断函数，判断

static inline void schedule_debug(struct task_struct *prev)
2676{
2677#ifdef CONFIG_SCHED_STACK_END_CHECK
2678    BUG_ON(unlikely(task_stack_end_corrupted(prev)));
2679#endif
2680    /*
2681     * Test if we are atomic. Since do_exit() needs to call into
2682     * schedule() atomically, we ignore that path. Otherwise whine
2683     * if we are scheduling when we should not.
2684     */
2685    if (unlikely(in_atomic_preempt_off() && prev->state != TASK_DEAD))
2686        __schedule_bug(prev);
2687    rcu_sleep_check();
2688
2689    profile_hit(SCHED_PROFILING, __builtin_return_address(0));
2690
2691    schedstat_inc(this_rq(), sched_count);
2692}

if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {检测prev，如果处于不可运行状态并且在内核态没有被抢占，就从运行队列上删除
2799        if (unlikely(signal_pending_state(prev->state, prev))) {  检测prev，如果有非阻塞等待信号，就并且它的状态是TASK_INTERRUPTBLE,设置其状态为TASK_RUNNING，并且把它留在runqueue中
2800            prev->state = TASK_RUNNING;
2801        } else {
2802            deactivate_task(rq, prev, DEQUEUE_SLEEP);  否则就调用deactiveate_task()函数，进行移除
2803            prev->on_rq = 0;
2804
2805            /*
2806             * If a worker went to sleep, notify and ask workqueue
2807             * whether it wants to wake up a task to maintain
2808             * concurrency.
2809             */
2810            if (prev->flags & PF_WQ_WORKER) {
2811                struct task_struct *to_wakeup;
2812
2813                to_wakeup = wq_worker_sleeping(prev, cpu);
2814                if (to_wakeup)
2815                    try_to_wake_up_local(to_wakeup);
2816            }
2817        }
2818        switch_count = &prev->nvcsw;
2819    }

860void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
861{
862    if (task_contributes_to_load(p))
863        rq->nr_uninterruptible++;   调用后将会发生上下文交换次数的增加
864
865    dequeue_task(rq, p, flags);  并且进入dequeue_task()函数
866}

845static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
846{
847    update_rq_clock(rq);
848    sched_info_dequeued(rq, p);
849    p->sched_class->dequeue_task(rq, p, flags);  调用自身，将p进程从当前运行队列上移除
850}

2.执行下一个进程

2698pick_next_task(struct rq *rq, struct task_struct *prev)
2699{
2700    const struct sched_class *class = &fair_sched_class;
2701    struct task_struct *p;
2702
2703    /*
2704     * Optimization: we know that if all tasks are in
2705     * the fair class we can call that function directly:
2706     */
2707    if (likely(prev->sched_class == class &&
2708           rq->nr_running == rq->cfs.h_nr_running)) {
2709        p = fair_sched_class.pick_next_task(rq, prev);
2710        if (unlikely(p == RETRY_TASK))
2711            goto again;
2712
2713        /* assumes fair_sched_class->next == idle_sched_class */
2714        if (unlikely(!p))
2715            p = idle_sched_class.pick_next_task(rq, prev);
2716
2717        return p;
2718    }

5const struct sched_class idle_sched_class = {
86    /* .next is NULL */
87    /* no enqueue/yield_task for idle tasks */
88
89    /* dequeue is not valid, we print a debug message there: */
90    .dequeue_task        = dequeue_task_idle,
91
92    .check_preempt_curr    = check_preempt_curr_idle,
93
94    .pick_next_task        = pick_next_task_idle,
95    .put_prev_task        = put_prev_task_idle,
96

2.GDB跟踪分析

这个有点坑了，git不下来文件，就没法进行试验，等我后期在本机上搭建补上。其实跟我前面的代码分析差不多，区别不大

3.switch_to中的汇编代码分析，关注进程上下文切换机制，以及中断上下文切换的关系

schedule()函数选择一个新的进程来运行，并调用context_switch进行上下文的切换，这个宏调用switch_to来进行关键上下文切换
next = pick_next_task(rq, prev);//进程调度算法都封装这个函数内部
context_switch(rq, prev, next);//进程上下文切换
switch_to利用了prev和next两个参数：prev指向当前进程，next指向被调度的进程
31#define switch_to(prev, next, last)                    \
32do {                                 \
33  /*                              \
34   * Context-switching clobbers all registers, so we clobber  \
35   * them explicitly, via unused output variables.     \
36   * (EAX and EBP is not listed because EBP is saved/restored  \
37   * explicitly for wchan access and EAX is the return value of   \
38   * __switch_to())                     \
39   */                                \
40  unsigned long ebx, ecx, edx, esi, edi;                \
41                                  \
42  asm volatile("pushfl\n\t"      /* save    flags */   \ 
43           "pushl %%ebp\n\t"        /* save    EBP   */ \ 当前进程堆栈基址压栈
44           "movl %%esp,%[prev_sp]\n\t"  /* save    ESP   */ \ 将当前进程栈顶保存prev->thread.sp
45           "movl %[next_sp],%%esp\n\t"  /* restore ESP   */ \ 讲下一个进程栈顶保存到esp中
46           "movl $1f,%[prev_ip]\n\t"    /* save    EIP   */ \ 保存当前进程的eip
47           "pushl %[next_ip]\n\t"   /* restore EIP   */    \ 将下一个进程的eip压栈,next进程的栈顶就是他的的起点
48           __switch_canary                   \
49           "jmp __switch_to\n"  /* regparm call  */ \ 
50           "1:\t"                        \
51           "popl %%ebp\n\t"     /* restore EBP   */    \ 
52           "popfl\n"         /* restore flags */  \ 开始执行下一个进程的第一条命令
53                                  \
54           /* output parameters */                \
55           : [prev_sp] "=m" (prev->thread.sp),     \
56             [prev_ip] "=m" (prev->thread.ip),        \
57             "=a" (last),                 \
58                                  \
59             /* clobbered output registers: */     \
60             "=b" (ebx), "=c" (ecx), "=d" (edx),      \
61             "=S" (esi), "=D" (edi)             \
62                                       \
63             __switch_canary_oparam                \
64                                  \
65             /* input parameters: */                \
66           : [next_sp]  "m" (next->thread.sp),        \
67             [next_ip]  "m" (next->thread.ip),       \
68                                       \
69             /* regparm parameters for __switch_to(): */  \
70             [prev]     "a" (prev),              \
71             [next]     "d" (next)               \
72                                  \
73             __switch_canary_iparam                \
74                                  \
75           : /* reloaded segment registers */           \
76          "memory");                  \
77} while (0)

通过系统调用，用户空间的应用程序就会进入内核空间，由内核代表该进程运行于内核空间，这就涉及到上下文的切换，用户空间和内核空间具有不同的地址映射，通用或专用的寄存器组，而用户空间的进程要传递很多变量、参数给内核，内核也要保存用户进程的一些寄存器、变量等，以便系统调用结束后回到用户空间继续执行，所谓的进程上下文，就是一个进程在执行的时候，CPU的所有寄存器中的值、进程的状态以及堆栈中的内容，当内核需要切换到另一个进程时，它需要保存当前进程的所有状态，即保存当前进程的进程上下文，以便再次执行该进程时，能够恢复切换时的状态，继续执行。

同理，硬件通过触发信号，导致内核调用中断处理程序，进入内核空间。这个过程中，硬件的一些变量和参数也要传递给内核，内核通过这些参数进行中断处理，中断上下文就可以理解为硬件传递过来的这些参数和内核需要保存的一些环境，主要是被中断的进程的环境。

Linux内核工作在进程上下文或者中断上下文。提供系统调用服务的内核代码代表发起系统调用的应用程序运行在进程上下文；另一方面，中断处理程序，异步运行在中断上下文。中断上下文和特定进程无关。

运行在进程上下文的内核代码是可以被抢占的（Linux2.6支持抢占）。但是一个中断上下文，通常都会始终占有CPU（当然中断可以嵌套，但我们一般不这样做），不可以被打断。正因为如此，运行在中断上下文的代码就要受一些限制

秒客网

Linux内核分析--理解进程调度时机、跟踪分析进程调度和进程切换的过程

相关文章