执行完/bin/echo之后,会调动do_exit,销毁子进程:
我们还是先从系统调用exit()说起,先来看exit()的实现,进入到内核态执行sys_exit。
asmlinkage long sys_exit(int error_code)
{
do_exit((error_code&0xff)<< 8);
}
NORET_TYPE void do_exit(long code) { struct task_struct *tsk = current; if (in_interrupt())//中断服务不能中断 panic("Aiee, killing interrupt handler!"); if (!tsk->pid)//空转idle进程是不允许退出的 panic("Attempted to kill the idle task!"); if (tsk->pid == 1)//init进程是不允许退出的 panic("Attempted to kill init!"); tsk->flags |= PF_EXITING; del_timer_sync(&tsk->real_timer); fake_volatile: #ifdef CONFIG_BSD_PROCESS_ACCT acct_process(code); #endif __exit_mm(tsk);//如果通过指针共享,只是减少共享计数mm->mm_users。如果自立门户,则释放mm_struct,vm_struct;释放页目录表,页表 lock_kernel(); sem_exit();//信号相关,看完进程间通信再说 __exit_files(tsk);//如果通过指针共享,只是减少共享计数files->count。如果自立门户,那就要释放files_struct数据结构 __exit_fs(tsk);//如果通过指针共享,只是减少共享计数fs->count。如果自立门户,那就要释放fs_struct数据结构 exit_sighand(tsk);//如果通过指针共享,只是减少共享计数sig->count。如果自立门户,那就要释放signal_struct数据结构 exit_thread();//空函数 if (current->leader) disassociate_ctty(1); put_exec_domain(tsk->exec_domain); if (tsk->binfmt && tsk->binfmt->module) __MOD_DEC_USE_COUNT(tsk->binfmt->module); tsk->exit_code = code; exit_notify();//将当前进程设置为僵死状态;并给父进程发信号;其当前进程的子进程的父进程设置为init进程 schedule(); BUG(); /* * In order to get rid of the "volatile function does return" message * I did this little loop that confuses gcc to think do_exit really * is volatile. In fact it's schedule() that is volatile in some * circumstances: when current->state = ZOMBIE, schedule() never * returns. * * In fact the natural way to do all this is to have the label and the * goto right after each other, but I put the fake_volatile label at * the start of the function just in case something /really/ bad * happens, and the schedule returns. This way we can try again. I'm * not paranoid: it's just that everybody is out to get me. */ goto fake_volatile; }
static inline void __exit_mm(struct task_struct * tsk) { struct mm_struct * mm = tsk->mm; mm_release(); if (mm) { atomic_inc(&mm->mm_count); if (mm != tsk->active_mm) BUG(); /* more a memory barrier than a real lock */ task_lock(tsk); tsk->mm = NULL; task_unlock(tsk); enter_lazy_tlb(mm, current, smp_processor_id()); mmput(mm);//主要是这句 } }
void mmput(struct mm_struct *mm) { if (atomic_dec_and_lock(&mm->mm_users, &mmlist_lock)) {//mm->mm_users为1 list_del(&mm->mmlist); spin_unlock(&mmlist_lock); exit_mmap(mm);//释放vm_struct,并把页目录表项和页表项都清0 mmdrop(mm);//释放mm_struct和页目录表,页表 } }
void exit_mmap(struct mm_struct * mm) { struct vm_area_struct * mpnt; release_segments(mm); spin_lock(&mm->page_table_lock); mpnt = mm->mmap; mm->mmap = mm->mmap_avl = mm->mmap_cache = NULL; spin_unlock(&mm->page_table_lock); mm->rss = 0; mm->total_vm = 0; mm->locked_vm = 0; while (mpnt) { struct vm_area_struct * next = mpnt->vm_next; unsigned long start = mpnt->vm_start; unsigned long end = mpnt->vm_end; unsigned long size = end - start; if (mpnt->vm_ops) { if (mpnt->vm_ops->close) mpnt->vm_ops->close(mpnt); } mm->map_count--; remove_shared_vm_struct(mpnt); flush_cache_range(mm, start, end); zap_page_range(mm, start, size); if (mpnt->vm_file) fput(mpnt->vm_file); kmem_cache_free(vm_area_cachep, mpnt); mpnt = next; } /* This is just debugging */ if (mm->map_count) printk("exit_mmap: map count is %d\n", mm->map_count); clear_page_tables(mm, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD); }
static inline void mmdrop(struct mm_struct * mm) { if (atomic_dec_and_test(&mm->mm_count)) __mmdrop(mm); }
inline void __mmdrop(struct mm_struct *mm) { if (mm == &init_mm) BUG(); pgd_free(mm->pgd); destroy_context(mm); free_mm(mm); }
回到do_exit,继续执行__exit_files,子进程自立门户,释放files_struct数据结构,代码如下:
static inline void __exit_files(struct task_struct *tsk) { struct files_struct * files = tsk->files; if (files) { task_lock(tsk); tsk->files = NULL; task_unlock(tsk); put_files_struct(files); } }
void put_files_struct(struct files_struct *files) { if (atomic_dec_and_test(&files->count)) {//files->count为1 close_files(files); /* * Free the fd and fdset arrays if we expanded them. */ if (files->fd != &files->fd_array[0]) free_fd_array(files->fd, files->max_fds); if (files->max_fdset > __FD_SETSIZE) { free_fdset(files->open_fds, files->max_fdset); free_fdset(files->close_on_exec, files->max_fdset); } kmem_cache_free(files_cachep, files); } }
static inline void __exit_fs(struct task_struct *tsk) { struct fs_struct * fs = tsk->fs; if (fs) { task_lock(tsk); tsk->fs = NULL; task_unlock(tsk); __put_fs_struct(fs); } }
static inline void __put_fs_struct(struct fs_struct *fs) { /* No need to hold fs->lock if we are killing it */ if (atomic_dec_and_test(&fs->count)) {//fs->count为1 dput(fs->root); mntput(fs->rootmnt); dput(fs->pwd); mntput(fs->pwdmnt); if (fs->altroot) { dput(fs->altroot); mntput(fs->altrootmnt); } kmem_cache_free(fs_cachep, fs); } }
void exit_sighand(struct task_struct *tsk) { struct signal_struct * sig = tsk->sig; spin_lock_irq(&tsk->sigmask_lock); if (sig) { tsk->sig = NULL; if (atomic_dec_and_test(&sig->count))//sig->count为1 kmem_cache_free(sigact_cachep, sig); } tsk->sigpending = 0; flush_sigqueue(&tsk->pending); spin_unlock_irq(&tsk->sigmask_lock); }
static void exit_notify(void) { struct task_struct * p, *t; forget_original_parent(current);//其当前进程的子进程的父进程设置为init进程 /* * Check to see if any process groups have become orphaned * as a result of our exiting, and if they have any stopped * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) * * Case i: Our father is in a different pgrp than we are * and we were the only connection outside, so our pgrp * is about to become orphaned. */ t = current->p_pptr;//通知的是p_pptr,在forget_original_parent设置的是p->p_opptr = reaper if ((t->pgrp != current->pgrp) && (t->session == current->session) && will_become_orphaned_pgrp(current->pgrp, current) && has_stopped_jobs(current->pgrp)) { kill_pg(current->pgrp,SIGHUP,1); kill_pg(current->pgrp,SIGCONT,1); } /* Let father know we died * * Thread signals are configurable, but you aren't going to use * that to send signals to arbitary processes. * That stops right now. * * If the parent exec id doesn't match the exec id we saved * when we started then we know the parent has changed security * domain. * * If our self_exec id doesn't match our parent_exec_id then * we have changed execution domain as these two values started * the same after a fork. * */ if(current->exit_signal != SIGCHLD && ( current->parent_exec_id != t->self_exec_id || current->self_exec_id != current->parent_exec_id) && !capable(CAP_KILL)) current->exit_signal = SIGCHLD;//给父进程发的信号是SIGCHLD /* * This loop does two things: * * A. Make init inherit all the child processes * B. Check to see if any process groups have become orphaned * as a result of our exiting, and if they have any stopped * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) */ write_lock_irq(&tasklist_lock); current->state = TASK_ZOMBIE;//当前进程设置为僵死状态 do_notify_parent(current, current->exit_signal);//给父进程发信号 while (current->p_cptr != NULL) { p = current->p_cptr; current->p_cptr = p->p_osptr; p->p_ysptr = NULL; p->ptrace = 0; p->p_pptr = p->p_opptr;//这里,把p_pptr和p_opptr统一了,都是reaper p->p_osptr = p->p_pptr->p_cptr; if (p->p_osptr) p->p_osptr->p_ysptr = p; p->p_pptr->p_cptr = p; if (p->state == TASK_ZOMBIE) do_notify_parent(p, p->exit_signal); /* * process group orphan check * Case ii: Our child is in a different pgrp * than we are, and it was the only connection * outside, so the child pgrp is now orphaned. */ if ((p->pgrp != current->pgrp) && (p->session == current->session)) { int pgrp = p->pgrp; write_unlock_irq(&tasklist_lock); if (is_orphaned_pgrp(pgrp) && has_stopped_jobs(pgrp)) { kill_pg(pgrp,SIGHUP,1); kill_pg(pgrp,SIGCONT,1); } write_lock_irq(&tasklist_lock); } } write_unlock_irq(&tasklist_lock); }
static inline void forget_original_parent(struct task_struct * father) { struct task_struct * p, *reaper; read_lock(&tasklist_lock); /* Next in our thread group */ reaper = next_thread(father); if (reaper == father) reaper = child_reaper;//init进程 for_each_task(p) { if (p->p_opptr == father) { /* We dont want people slaying init */ p->exit_signal = SIGCHLD; p->self_exec_id++; p->p_opptr = reaper;//其当前进程的子进程的父进程设置为init进程,这里设置的p_opptr if (p->pdeath_signal) send_sig(p->pdeath_signal, p, 0); } } read_unlock(&tasklist_lock); }
void do_notify_parent(struct task_struct *tsk, int sig)//sig为SIGCHLD { struct siginfo info; int why, status; info.si_signo = sig; info.si_errno = 0; info.si_pid = tsk->pid; info.si_uid = tsk->uid; /* FIXME: find out whether or not this is supposed to be c*time. */ info.si_utime = tsk->times.tms_utime; info.si_stime = tsk->times.tms_stime; status = tsk->exit_code & 0x7f; why = SI_KERNEL; /* shouldn't happen */ switch (tsk->state) { case TASK_STOPPED: /* FIXME -- can we deduce CLD_TRAPPED or CLD_CONTINUED? */ if (tsk->ptrace & PT_PTRACED) why = CLD_TRAPPED; else why = CLD_STOPPED; break; default: if (tsk->exit_code & 0x80) why = CLD_DUMPED; else if (tsk->exit_code & 0x7f) why = CLD_KILLED; else { why = CLD_EXITED; status = tsk->exit_code >> 8; } break; } info.si_code = why; info.si_status = status; send_sig_info(sig, &info, tsk->p_pptr);//给父进程发送SIGCHLD信号 wake_up_parent(tsk->p_pptr);//唤醒父进程,父进程在wait时,将状态设置为TASK_INTERRUPTIBLE,现在设置为TASK_RUNNING }至此,进程的基本资源都已经释放了,但是当前进程的残骸仍旧占用着最低限度的资源,包括其task_struct数据结构和系统空间堆栈所在的两个页面。当前进程自己不释放这两个页面,就像人们自己并不在临终注销自己的户口一样,而是通知其父进程,让父进程料理后事。当前进程状态为 TASK_ZOMBIE,schedule时,无限延迟调度该进程。
下面,最后执行schedule,假设只有父进程和子进程,父进程的状态已经是TASK_RUNNING,切换到父进程继续执行。
#define switch_to(prev,next,last) do { \ asm volatile("pushl %%esi\n\t" \ //把esi存入现在进程prev的堆栈 "pushl %%edi\n\t" \ //把edi存入现在进程prev的堆栈 "pushl %%ebp\n\t" \ //把ebp存入现在进程prev的堆栈 "movl %%esp,%0\n\t" /* save ESP */ \ //现在进程prev的esp保存在prev->thread.esp "movl %3,%%esp\n\t" /* restore ESP */ \ //将要切换的进程next->thread.esp保存在esp中,堆栈已经切换了 "movl $1f,%1\n\t" /* save EIP */ \ //现在进程prev的eip(也就是"1:\t"地址)保存在prev->thread.eip "pushl %4\n\t" /* restore EIP */ \ //将要切换的进程next->thread.eip保存在eip中 "jmp __switch_to\n" \ //且不说__switch_to中干了些什么,当CPU执行到那里的ret指令时,由于是通过jmp指令转过去的,最后进入堆栈的next->thread.eip就变成了返回地址 "1:\t" \ //如果切换的不是子进程,next->thread.eip实际上就是上一次保存在prev->thread.eip,也就是这一行语句 "popl %%ebp\n\t" \ //由于堆栈已经切换过来,pop出的都是上面存入进程prev堆栈的内容 "popl %%edi\n\t" \ "popl %%esi\n\t" \ :"=m" (prev->thread.esp),"=m" (prev->thread.eip), \ "=b" (last) \ :"m" (next->thread.esp),"m" (next->thread.eip), \ "a" (prev), "d" (next), \ "b" (prev)); \ } while (0)父进程在sys_wait4等待,父进程从"1:\t"继续执行,继续执行 sys_wait4函数 。