一些内核调度API（2）

//根据TCB，获取对应的内存信息，保存在专门描述TCB内存信息的 mm_struct 中
struct mm_struct *get_task_mm(struct task_struct *task)
{
struct mm_struct *mm;

task_lock(task);
mm = task->mm;
if (mm) {
if (task->flags & PF_KTHREAD)
mm = NULL;
else
atomic_inc(&mm->mm_users);
}
task_unlock(task);
return mm;
}

//TCB专门的内存信息存储区
struct mm_struct {
struct vm_area_struct *mmap; /* 指向线性区对象的链表头 */
struct rb_root mm_rb;
u32 vmacache_seqnum; /* 每一个进程的vmacache */
#ifdef CONFIG_MMU //一些搜寻线性地址空间的方法
unsigned long (*get_unmapped_area) (struct file *filp,
unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags);
#endif
unsigned long mmap_base; /* 第一个匿名线性区的地址 */
unsigned long mmap_legacy_base; /* 自下而上的分配区 */
unsigned long task_size; /* vm空间任务大小 */
unsigned long highest_vm_end; /* 最大vma结束地址 */
pgd_t * pgd; /*指向也全局目录*/
atomic_t mm_users; /* 次使用计数器 */
atomic_t mm_count; /* 主使用计数器*/
atomic_long_t nr_ptes; /* 页地址 */
int map_count; /* vma个数 */

spinlock_t page_table_lock; /* Protects page tables and some counters */
struct rw_semaphore mmap_sem;

struct list_head mmlist; /* List of maybe swapped mm's. These are globally strung
* together off init_mm.mmlist, and are protected
* by mmlist_lock
*/

unsigned long hiwater_rss; /* High-watermark of RSS usage */
unsigned long hiwater_vm; /* High-water virtual memory usage */

unsigned long total_vm; /* Total pages mapped */
unsigned long locked_vm; /* Pages that have PG_mlocked set */
unsigned long pinned_vm; /* Refcount permanently increased */
unsigned long shared_vm; /* Shared pages (files) */
unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE */
unsigned long stack_vm; /* VM_GROWSUP/DOWN */
unsigned long def_flags;
unsigned long start_code, end_code, start_data, end_data;
unsigned long start_brk, brk, start_stack;
unsigned long arg_start, arg_end, env_start, env_end;

unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */

/*
* Special counters, in some configurations protected by the
* page_table_lock, in other configurations by being atomic.
*/
struct mm_rss_stat rss_stat;

struct linux_binfmt *binfmt;

cpumask_var_t cpu_vm_mask_var;

/* Architecture-specific MM context */
mm_context_t context;

unsigned long flags; /* Must use atomic bitops to access the bits */

struct core_state *core_state; /* coredumping support */
#ifdef CONFIG_AIO
spinlock_t ioctx_lock;
struct kioctx_table __rcu *ioctx_table;
#endif
#ifdef CONFIG_MEMCG
/*
* "owner" points to a task that is regarded as the canonical
* user/owner of this mm. All of the following must be true in
* order for it to be changed:
*
* current == mm->owner
* current->mm != mm
* new_owner->mm == mm
* new_owner->alloc_lock is held
*/
struct task_struct __rcu *owner;
#endif

/* store ref to file /proc/<pid>/exe symlink points to */
struct file *exe_file;
#ifdef CONFIG_MMU_NOTIFIER
struct mmu_notifier_mm *mmu_notifier_mm;
#endif
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
pgtable_t pmd_huge_pte; /* protected by page_table_lock */
#endif
#ifdef CONFIG_CPUMASK_OFFSTACK
struct cpumask cpumask_allocation;
#endif
#ifdef CONFIG_NUMA_BALANCING
/*
* numa_next_scan is the next time that the PTEs will be marked
* pte_numa. NUMA hinting faults will gather statistics and migrate
* pages to new nodes if necessary.
*/
unsigned long numa_next_scan;

/* Restart point for scanning and setting pte_numa */
unsigned long numa_scan_offset;

/* numa_scan_seq prevents two threads setting pte_numa */
int numa_scan_seq;
#endif
#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
/*
* An operation with batched TLB flushing is going on. Anything that
* can move process memory needs to flush the TLB when moving a
* PROT_NONE or PROT_NUMA mapped page.
*/
bool tlb_flush_pending;
#endif
struct uprobes_state uprobes_state;
#ifdef CONFIG_X86_INTEL_MPX
/* address of the bounds directory */
void __user *bd_addr;
#endif
};

//给一个任务的内存空间，减少这个空间的用户数量，如果用户数量减为0，就释放任务所占用的内存空间
//mmput 会使得 mm_users 减1，might_sleep 给进程切换提供了机会
void mmput(struct mm_struct *mm)
{
might_sleep();

if (atomic_dec_and_test(&mm->mm_users)) {
uprobe_clear_state(mm);
exit_aio(mm);
ksm_exit(mm);
khugepaged_exit(mm); /* must run before exit_mmap */
exit_mmap(mm);
set_mm_exe_file(mm, NULL);
if (!list_empty(&mm->mmlist)) {
spin_lock(&mmlist_lock);
list_del(&mm->mmlist);
spin_unlock(&mmlist_lock);
}
if (mm->binfmt)
module_put(mm->binfmt->module);
mmdrop(mm);
}
}

//给一个进程描述符，得到一个该进程的命名空间
static inline struct pid_namespace *ns_of_pid(struct pid *pid)
{
struct pid_namespace *ns = NULL;
if (pid)
ns = pid->numbers[pid->level].ns;
return ns;
}

//给一个进程描述符，得到这个描述符的进程号，就保存在upid的nr里面
static inline pid_t pid_nr(struct pid *pid)
{
pid_t nr = 0;
if (pid)
nr = pid->numbers[0].nr;
return nr;
}

//给一个进程描述符，得到这个进程的任务描述符TCB
struct task_struct *pid_task(struct pid *pid, enum pid_type type)
{
struct task_struct *result = NULL;
if (pid) {
struct hlist_node *first;
first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
lockdep_tasklist_lock_is_held());
if (first)
result = hlist_entry(first, struct task_struct, pids[(type)].node);
}
return result;
}

/*当进程用户数量为1且目前没有任务在使用此进程时，释放这个进程所占用的Cache
当进程用户数量大于1时，count就减1
*/
void put_pid(struct pid *pid)
{
struct pid_namespace *ns;

if (!pid)
return;

ns = pid->numbers[pid->level].ns;
if ((atomic_read(&pid->count) == 1) ||
atomic_dec_and_test(&pid->count)) {
kmem_cache_free(ns->pid_cachep, pid);
put_pid_ns(ns);
}
}

//给一个TCB，返回这个TCB的进程命名空间，打通了TCB和进程命名空间的通道
struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
{
return ns_of_pid(task_pid(tsk));
}

//获取TCB进程组中领头羊进程的进程号，需要满足领头羊进程命名空间和 ns 一致
pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
{
return pid_nr_ns(task_tgid(tsk), ns);
}
static inline struct pid *task_tgid(struct task_struct *task)
{
return task->group_leader->pids[PIDTYPE_PID].pid;
}

秒客网

一些内核调度API（2）

相关文章