如何创建新的Linux内核调度程序

Looking through the scheduler source code (2.6.34, kernel/sched.c), I can see how the "pluggable" schedulers are used, and I believe I understand the interface to be implemented. What I don't understand yet is how to get my code built into the kernel. At the very least, pointers to other sites would be appreciated.

通过调度程序源代码（2.6.34，kernel / sched.c），我可以看到如何使用“可插拔”调度程序，我相信我理解要实现的接口。我还不明白的是如何将我的代码内置到内核中。至少，赞赏其他网站的指针。

Right now, I'm grepping for SCHED_FIFO, SCHED_RR, and SCHED_NORMAL in the kernel source tree, so really I'm looking for a more insightful way to look at it :-)

现在，我正在内核源代码树中查找SCHED_FIFO，SCHED_RR和SCHED_NORMAL，所以我真的在寻找一种更有见地的方式来看待它:-)

EDIT: As some background, I'm very familiar with the FreeBSD scheduler (and the FreeBSD kernel in general), so I'm not looking for pointers on how to do process/thread level scheduling. I'm looking for a way to add my own scheduler alongside the normal linux schedulers (similar to SCHED_FIFO).

编辑：作为一些背景，我对FreeBSD调度程序（以及一般的FreeBSD内核）非常熟悉，所以我不是在寻找关于如何进行进程/线程级调度的指针。我正在寻找一种方法来添加我自己的调度程序和普通的linux调度程序（类似于SCHED_FIFO）。

EDIT #2: The BFS pointer below is a good start, but it still rips CFS out of the kernel. sched.c now looks like:

编辑＃2：下面的BFS指针是一个好的开始，但它仍然将CFS从内核中删除。 sched.c现在看起来像：

#ifdef CONFIG_SCHED_BFS
#include "sched_bfs.c"
#else
   // original sched.c 
#endif // CONFIG_SCHED_BFS

I'd love to see an answer or a pointer on how to do this a little better (ie, keep CFS, at least for right now).

我希望看到一个答案或指示如何更好地做到这一点（即保持CFS，至少目前为止）。

EDIT #3: I've answered my own question below, as I think I've figured it out.

编辑＃3：我已经在下面回答了我自己的问题，因为我认为我已经弄明白了。

3 个解决方案

#1

I've figured out the answer to my question, so I thought I'd add it here. Below is the patch that will add a new scheduler to the 2.6.34 vanilla kernel. Right now, I've only compiled the kernel. I fully expect running a system with this EXACT patch will cause it to crash -- so use at your own risk :-)

我已经找到了问题的答案，所以我想我会在这里添加它。下面是将为2.6.34 vanilla内核添加新调度程序的补丁。现在，我只编译了内核。我完全希望运行这个EXACT补丁的系统会导致它崩溃 - 所以使用风险自负:-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2b7b81d..a2a2b21 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -37,6 +37,7 @@
 #define SCHED_RR       2
 #define SCHED_BATCH        3
 /* SCHED_ISO: reserved but not implemented yet */
+#define SCHED_NEW               4 /* Stealing from SCHED_ISO */
 #define SCHED_IDLE     5
 /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
 #define SCHED_RESET_ON_FORK     0x40000000
diff --git a/init/Kconfig b/init/Kconfig
index eb77e8c..0055d26 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -23,6 +23,11 @@ config CONSTRUCTORS

 menu "General setup"

+config SCHED_NEW
+       bool "NEW cpu scheduler"
+       ---help---
+         Brand new scheduler 
+
 config EXPERIMENTAL
    bool "Prompt for development and/or incomplete code/drivers"
    ---help---
diff --git a/kernel/sched.c b/kernel/sched.c
index 3c2a54f..588960d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1931,6 +1931,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)

 #include "sched_idletask.c"
 #include "sched_fair.c"
+#include "sched_new.c"
 #include "sched_rt.c"
 #ifdef CONFIG_SCHED_DEBUG
 # include "sched_debug.c"
diff --git a/kernel/sched_new.c b/kernel/sched_new.c
new file mode 100644
index 0000000..c2e269e
--- /dev/null
+++ b/kernel/sched_new.c
@@ -0,0 +1,140 @@
+#ifdef CONFIG_SCHED_NEW
+
+/*
+ * Starting with a simple, 1 runq per cpu scheduler.  Don't care
+ * about fairness for right now.  Just get it up and running to 
+ * verify that we have the interface correct
+ */
+
+static void
+enqueue_task_new(struct rq *rq, struct task_struct *p, int wakeup, bool head)
+{
+}
+
+static void dequeue_task_new(struct rq *rq, struct task_struct *p, int sleep)
+{
+}
+
+static void yield_task_new(struct rq *rq)
+{
+}
+
+static void check_preempt_curr_new(struct rq *rq, struct task_struct *p, int flags)
+{
+}
+
+static struct task_struct *pick_next_task_new(struct rq *rq)
+{
+}
+
+static void put_prev_task_new(struct rq *rq, struct task_struct *p)
+{
+}
+
+#ifdef CONFIG_SMP
+static int select_task_rq_new(struct task_struct *p, int sd_flag, int flags)
+{
+}
+static void pre_schedule_new(struct rq *rq, struct task_struct *prev)
+{
+}
+
+static void post_schedule_new(struct rq *rq)
+{
+}
+
+static void task_woken_new(struct rq *rq, struct task_struct *p)
+{
+}
+
+static void task_waking_new(struct rq *this_rq, struct task_struct *task)
+{
+}
+static void set_cpus_allowed_new(struct task_struct *p,
+               const struct cpumask *new_mask)
+{
+}
+/* Assumes rq->lock is held */
+static void rq_online_new(struct rq *rq)
+{
+}
+
+/* Assumes rq->lock is held */
+static void rq_offline_new(struct rq *rq)
+{
+}
+#endif /* COMFIG_SMP */
+
+static void set_curr_task_new(struct rq *rq)
+{
+}
+
+
+static void task_tick_new(struct rq *rq, struct task_struct *p, int queued)
+{
+} 
+
+static void task_fork_new(struct task_struct *p)
+{
+}
+static void switched_from_new(struct rq *rq, struct task_struct *p,
+              int running)
+{
+}
+static void switched_to_new(struct rq *this_rq, struct task_struct *task,
+               int running)
+{
+}
+static void prio_changed_new(struct rq *rq, struct task_struct *p,
+               int oldprio, int running)
+{
+}
+static unsigned int get_rr_interval_new(struct rq *rq, struct task_struct *task)
+{
+}
+
+
+
+static const struct sched_class new_sched_class = {
+   .next           = &fair_sched_class,
+   .enqueue_task       = enqueue_task_new,
+   .dequeue_task       = dequeue_task_new,
+   .yield_task     = yield_task_new,
+
+   .check_preempt_curr = check_preempt_curr_new,
+
+   .pick_next_task     = pick_next_task_new,
+   .put_prev_task      = put_prev_task_new,
+
+#ifdef CONFIG_SMP
+   .select_task_rq     = select_task_rq_new,
+
+   .pre_schedule       = pre_schedule_new,
+   .post_schedule      = post_schedule_new,
+
+   .task_waking            = task_waking_new,
+   .task_woken     = task_woken_new,
+
+   .set_cpus_allowed       = set_cpus_allowed_new,
+
+   .rq_online              = rq_online_new,
+   .rq_offline             = rq_offline_new,
+#endif
+
+   .set_curr_task          = set_curr_task_new,
+   .task_tick      = task_tick_new,
+   .task_fork              = task_fork_new,
+
+   .switched_from          = switched_from_new,
+   .switched_to        = switched_to_new,
+
+   .prio_changed       = prio_changed_new,
+
+   .get_rr_interval    = get_rr_interval_new,
+#ifdef CONFIG_FAIR_GROUP_SCHED
+   .moved_group            = NULL
+#endif
+};
+
+#endif /* CONFIG_SCHED_NEW */
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index b5b920a..aaf4beb 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1731,7 +1731,11 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
 }

 static const struct sched_class rt_sched_class = {
+#ifdef CONFIG_SCHED_NEW
+   .next           = &new_sched_class,
+#else
    .next           = &fair_sched_class,
+#endif /* CONFIG_SCHED_NEW */
    .enqueue_task       = enqueue_task_rt,
    .dequeue_task       = dequeue_task_rt,
    .yield_task     = yield_task_rt,

#2

Embedded.com has a 3-part entry that walks through implementing a simple real-time scheduler:

Embedded.com有一个由3部分组成的条目，介绍如何实现一个简单的实时调度程序：

part 1
第1部分
part 2
第2部分
part 3
第3部分

Unlike the other answers, this one is created as a tutorial:

与其他答案不同，这个答案是作为教程创建的：

[...] in the literature we did not find documents that explain how to implement a new scheduling policy for Linux.

[...]在文献中我们没有找到解释如何为Linux实施新调度策略的文档。

[...]

[...]

In this document, we have presented in a [sic] depth description all steps required to implement a new scheduling policy.

在本文档中，我们以[原文]深度描述了实施新调度策略所需的所有步骤。

[...]

[...]

This is a simple implementation of that scheduling algorithm. However, advanced issues, like interruptions, timers and multiprocessor systems, just to mention some, are out of the scope of this article.

这是该调度算法的简单实现。但是，高级问题，如中断，计时器和多处理器系统，仅举几例，超出了本文的范围。

#3

Con Kolivas Brainfuck Scheduler. I just found this by googling. It may be an example to start from.

Con Kolivas Brainfuck Scheduler。我只是通过谷歌搜索找到了这个。这可能是一个例子。

#1

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2b7b81d..a2a2b21 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -37,6 +37,7 @@
 #define SCHED_RR       2
 #define SCHED_BATCH        3
 /* SCHED_ISO: reserved but not implemented yet */
+#define SCHED_NEW               4 /* Stealing from SCHED_ISO */
 #define SCHED_IDLE     5
 /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
 #define SCHED_RESET_ON_FORK     0x40000000
diff --git a/init/Kconfig b/init/Kconfig
index eb77e8c..0055d26 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -23,6 +23,11 @@ config CONSTRUCTORS

 menu "General setup"

+config SCHED_NEW
+       bool "NEW cpu scheduler"
+       ---help---
+         Brand new scheduler 
+
 config EXPERIMENTAL
    bool "Prompt for development and/or incomplete code/drivers"
    ---help---
diff --git a/kernel/sched.c b/kernel/sched.c
index 3c2a54f..588960d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1931,6 +1931,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)

 #include "sched_idletask.c"
 #include "sched_fair.c"
+#include "sched_new.c"
 #include "sched_rt.c"
 #ifdef CONFIG_SCHED_DEBUG
 # include "sched_debug.c"
diff --git a/kernel/sched_new.c b/kernel/sched_new.c
new file mode 100644
index 0000000..c2e269e
--- /dev/null
+++ b/kernel/sched_new.c
@@ -0,0 +1,140 @@
+#ifdef CONFIG_SCHED_NEW
+
+/*
+ * Starting with a simple, 1 runq per cpu scheduler.  Don't care
+ * about fairness for right now.  Just get it up and running to 
+ * verify that we have the interface correct
+ */
+
+static void
+enqueue_task_new(struct rq *rq, struct task_struct *p, int wakeup, bool head)
+{
+}
+
+static void dequeue_task_new(struct rq *rq, struct task_struct *p, int sleep)
+{
+}
+
+static void yield_task_new(struct rq *rq)
+{
+}
+
+static void check_preempt_curr_new(struct rq *rq, struct task_struct *p, int flags)
+{
+}
+
+static struct task_struct *pick_next_task_new(struct rq *rq)
+{
+}
+
+static void put_prev_task_new(struct rq *rq, struct task_struct *p)
+{
+}
+
+#ifdef CONFIG_SMP
+static int select_task_rq_new(struct task_struct *p, int sd_flag, int flags)
+{
+}
+static void pre_schedule_new(struct rq *rq, struct task_struct *prev)
+{
+}
+
+static void post_schedule_new(struct rq *rq)
+{
+}
+
+static void task_woken_new(struct rq *rq, struct task_struct *p)
+{
+}
+
+static void task_waking_new(struct rq *this_rq, struct task_struct *task)
+{
+}
+static void set_cpus_allowed_new(struct task_struct *p,
+               const struct cpumask *new_mask)
+{
+}
+/* Assumes rq->lock is held */
+static void rq_online_new(struct rq *rq)
+{
+}
+
+/* Assumes rq->lock is held */
+static void rq_offline_new(struct rq *rq)
+{
+}
+#endif /* COMFIG_SMP */
+
+static void set_curr_task_new(struct rq *rq)
+{
+}
+
+
+static void task_tick_new(struct rq *rq, struct task_struct *p, int queued)
+{
+} 
+
+static void task_fork_new(struct task_struct *p)
+{
+}
+static void switched_from_new(struct rq *rq, struct task_struct *p,
+              int running)
+{
+}
+static void switched_to_new(struct rq *this_rq, struct task_struct *task,
+               int running)
+{
+}
+static void prio_changed_new(struct rq *rq, struct task_struct *p,
+               int oldprio, int running)
+{
+}
+static unsigned int get_rr_interval_new(struct rq *rq, struct task_struct *task)
+{
+}
+
+
+
+static const struct sched_class new_sched_class = {
+   .next           = &fair_sched_class,
+   .enqueue_task       = enqueue_task_new,
+   .dequeue_task       = dequeue_task_new,
+   .yield_task     = yield_task_new,
+
+   .check_preempt_curr = check_preempt_curr_new,
+
+   .pick_next_task     = pick_next_task_new,
+   .put_prev_task      = put_prev_task_new,
+
+#ifdef CONFIG_SMP
+   .select_task_rq     = select_task_rq_new,
+
+   .pre_schedule       = pre_schedule_new,
+   .post_schedule      = post_schedule_new,
+
+   .task_waking            = task_waking_new,
+   .task_woken     = task_woken_new,
+
+   .set_cpus_allowed       = set_cpus_allowed_new,
+
+   .rq_online              = rq_online_new,
+   .rq_offline             = rq_offline_new,
+#endif
+
+   .set_curr_task          = set_curr_task_new,
+   .task_tick      = task_tick_new,
+   .task_fork              = task_fork_new,
+
+   .switched_from          = switched_from_new,
+   .switched_to        = switched_to_new,
+
+   .prio_changed       = prio_changed_new,
+
+   .get_rr_interval    = get_rr_interval_new,
+#ifdef CONFIG_FAIR_GROUP_SCHED
+   .moved_group            = NULL
+#endif
+};
+
+#endif /* CONFIG_SCHED_NEW */
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index b5b920a..aaf4beb 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1731,7 +1731,11 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
 }

 static const struct sched_class rt_sched_class = {
+#ifdef CONFIG_SCHED_NEW
+   .next           = &new_sched_class,
+#else
    .next           = &fair_sched_class,
+#endif /* CONFIG_SCHED_NEW */
    .enqueue_task       = enqueue_task_rt,
    .dequeue_task       = dequeue_task_rt,
    .yield_task     = yield_task_rt,

#2