Linux之mount流程分析

学习Linux已经有一段时间了，最近看了下mount这个系统调用的一些流程，把它用博客记录下来，方便自己以后查找，也可以给那些有需要的人提供一些帮助。

当在用户层或者启动脚本中时调用mount函数把一个设备用相应的文件系统挂载起来时，可以让我们很方便的去访问这个设备中的文件；在内核中，mount的入口函数在fs/namespace.c

  
 
 
SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,  
   
  
  
        char __user *, type, unsigned long, flags, void __user *, data)  

   
  
  {  
   
  
  
    int ret;  

   
  
  
    char *kernel_type;  

   
  
  
    char *kernel_dir;  

   
  
  
    char *kernel_dev;  

   
  
  
    unsigned long data_page;  

   
  
   
   
  
      ret = copy_mount_string(type, &kernel_type);//复制数据到内核空间  
   
  
  
    if (ret < 0)  

   
  
  
        goto out_type;  

   
  
   
   
  
  
    kernel_dir = getname(dir_name);  //复制数据到内核空间

   
  
  
    if (IS_ERR(kernel_dir)) {  

   
  
          ret = PTR_ERR(kernel_dir);  
   
  
  
        goto out_dir;  

   
  
      }  
   
  
   
   
  
      ret = copy_mount_string(dev_name, &kernel_dev); //复制数据到内核空间 
   
  
  
    if (ret < 0)  

   
  
  
        goto out_dev;  

   
  
   
   
  
      ret = copy_mount_options(data, &data_page);//复制数据到内核空间  
   
  
  
    if (ret < 0)  

   
  
  
        goto out_data;  

   
  
   
   
  
      ret = do_mount(kernel_dev, kernel_dir, kernel_type, flags,  
   
  
  
        (void *) data_page);  

   
  
   
   
  
      free_page(data_page);  
   
  
  out_data:  
   
  
      kfree(kernel_dev);  
   
  
  out_dev:  
   
  
      putname(kernel_dir);  
   
  
  out_dir:  
   
  
      kfree(kernel_type);  
   
  
  out_type:  
   
  
  
    return ret;  

   
  
  }

用户空间传递了dev_name、dir_name、type、flags和data五个参数到内核中，由于dev_name、dir_name、type和data四个参数都是指针，都指向用户空间的某区域，所以需要用特定的函数将这些数据从用户层拷贝到内核。

这个函数的主要实现都在do_mount函数中：

  
 
 
long do_mount(char *dev_name, char *dir_name, char *type_page,  
   
  
  
          unsigned long flags, void *data_page)  

   
  
  {  
   
  
  
    struct path path;  

   
  
  
    int retval = 0;  

   
  
  
    int mnt_flags = 0;  

   
  
   
   
  
  
    /* Discard magic */ 

   
  
  
    if ((flags & MS_MGC_MSK) == MS_MGC_VAL)  

   
  
          flags &= ~MS_MGC_MSK;  
   
  
   
   
  
  
    /* Basic sanity checks */ 

   
  
   
   
  
  
    if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))  

   
  
  
        return -EINVAL;  

   
  
   
   
  
  
    if (data_page)  

   
  
  
        ((char *)data_page)[PAGE_SIZE - 1] = 0;  

   
  
   
   
  
  
    /* ... and get the mountpoint */ 

   
  
      retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);  
   
  
  
    if (retval)  

   
  
  
        return retval;  

   
  
   
   
  
      retval = security_sb_mount(dev_name, &path,  
   
  
                     type_page, flags, data_page);  
   
  
  
    if (retval)  

   
  
  
        goto dput_out;  

   
  
   
   
  
  
    /* Default to relatime unless overriden */ 

   
  
  
    if (!(flags & MS_NOATIME))  

   
  
          mnt_flags |= MNT_RELATIME;  
   
  
   
   
  
  
    /* Separate the per-mountpoint flags */ 

   
  
  
    if (flags & MS_NOSUID)  

   
  
          mnt_flags |= MNT_NOSUID;  
   
  
  
    if (flags & MS_NODEV)  

   
  
          mnt_flags |= MNT_NODEV;  
   
  
  
    if (flags & MS_NOEXEC)  

   
  
          mnt_flags |= MNT_NOEXEC;  
   
  
  
    if (flags & MS_NOATIME)  

   
  
          mnt_flags |= MNT_NOATIME;  
   
  
  
    if (flags & MS_NODIRATIME)  

   
  
          mnt_flags |= MNT_NODIRATIME;  
   
  
  
    if (flags & MS_STRICTATIME)  

   
  
          mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);  
   
  
  
    if (flags & MS_RDONLY)  

   
  
          mnt_flags |= MNT_READONLY;  
   
  
   
   
  
      flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |  
   
  
             MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |  
   
  
             MS_STRICTATIME);  
   
  
   
   
  
  
    if (flags & MS_REMOUNT)  

   
  
          retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,  
   
  
                      data_page);  
   
  
  
    else if (flags & MS_BIND)  

   
  
          retval = do_loopback(&path, dev_name, flags & MS_REC);  
   
  
  
    else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))  

   
  
          retval = do_change_type(&path, flags);  
   
  
  
    else if (flags & MS_MOVE)  

   
  
          retval = do_move_mount(&path, dev_name);  
   
  
  
    else 

   
  
          retval = do_new_mount(&path, type_page, flags, mnt_flags,  
   
  
                        dev_name, data_page);  
   
  
  dput_out:  
   
  
      path_put(&path);  
   
  
  
    return retval;  

   
  
  }

前面都是对一些指针的判断，函数kern_path用于在给定的字符串去查找出将要挂在在哪个目录中，查找成功会通过path这个指针带回查找的结构，之后用do_new_mount这个函数去进行下一步的挂载。

kern_path函数中只调用了函数do_path_lookup

  
 
 
static int do_path_lookup(int dfd, const char *name,  
   
  
  
                unsigned int flags, struct nameidata *nd)  

   
  
  {  
   
  
  
    int retval = path_init(dfd, name, flags, nd);  

   
  
  
    if (!retval)  

   
  
          retval = path_walk(name, nd);  
   
  
  
    if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&  

   
  
                  nd->path.dentry->d_inode))  
   
  
          audit_inode(name, nd->path.dentry);  
   
  
  
    if (nd->root.mnt) {  

   
  
          path_put(&nd->root);  
   
  
          nd->root.mnt = NULL;  
   
  
      }  
   
  
  
    return retval;  

   
  
  }

分为两部分看：第一部分调用path_init，用于初始化查找的根目录；第二部分在根目录的基础上对所给的字符串目录进行逐级查找。

先看path_init

  
 
 
static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)  
   
  
  {  
   
  
  
    int retval = 0;  

   
  
  
    int fput_needed;  

   
  
  
    struct file *file;  

   
  
   
   
  
  
    nd->last_type = LAST_ROOT; /* if there are only slashes... */ 

   
  
      nd->flags = flags;  
   
  
      nd->depth = 0;  
   
  
      nd->root.mnt = NULL;  
   
  
   
   
  
  
    if (*name=='/') {  

   
  
          set_root(nd);  
   
  
          nd->path = nd->root;  
   
  
          path_get(&nd->root);  
   
  
  
    } else if (dfd == AT_FDCWD) {  

   
  
  
        struct fs_struct *fs = current->fs;  

   
  
          read_lock(&fs->lock);  
   
  
          nd->path = fs->pwd;  
   
  
          path_get(&fs->pwd);  
   
  
          read_unlock(&fs->lock);  
   
  
  
    } else {  

   
  
  
        struct dentry *dentry;  

   
  
   
   
  
          file = fget_light(dfd, &fput_needed);  
   
  
          retval = -EBADF;  
   
  
  
        if (!file)  

   
  
  
            goto out_fail;  

   
  
   
   
  
          dentry = file->f_path.dentry;  
   
  
   
   
  
          retval = -ENOTDIR;  
   
  
  
        if (!S_ISDIR(dentry->d_inode->i_mode))  

   
  
  
            goto fput_fail;  

   
  
   
   
  
          retval = file_permission(file, MAY_EXEC);  
   
  
  
        if (retval)  

   
  
  
            goto fput_fail;  

   
  
   
   
  
          nd->path = file->f_path;  
   
  
          path_get(&file->f_path);  
   
  
   
   
  
          fput_light(file, fput_needed);  
   
  
      }  
   
  
  
    return 0;  

   
  
   
   
  
  fput_fail:  
   
  
      fput_light(file, fput_needed);  
   
  
  out_fail:  
   
  
  
    return retval;  

   
  
  }

这个函数就是一个if ..else..语句，如果第一个字符时'/'，则说明是绝对路径，从当前进程描述符的fs的root成员中得到根目录，否则从pwd中保存的当前路径作为查找根目录。

再来看path_walk

  
 
 
static int path_walk(const char *name, struct nameidata *nd)  
   
  
  {  
   
  
  
    struct path save = nd->path;  

   
  
  
    int result;  

   
  
   
   
  
      current->total_link_count = 0;  
   
  
   
   
  
  
    /* make sure the stuff we saved doesn't go away */ 

   
  
      path_get(&save);  
   
  
   
   
  
      result = link_path_walk(name, nd);  
   
  
  
    if (result == -ESTALE) {  

   
  
  
        /* nd->path had been dropped */ 

   
  
          current->total_link_count = 0;  
   
  
          nd->path = save;  
   
  
          path_get(&nd->path);  
   
  
          nd->flags |= LOOKUP_REVAL;  
   
  
          result = link_path_walk(name, nd);  
   
  
      }  
   
  
   
   
  
      path_put(&save);  
   
  
   
   
  
  
    return result;  

   
  
  }

path_walk函数的代码中只调用了link_path_walk

  
 
 
static int link_path_walk(const char *name, struct nameidata *nd)  
   
  
  {  
   
  
  
    struct path next;  

   
  
  
    struct inode *inode;  

   
  
  
    int err;  

   
  
  
    unsigned int lookup_flags = nd->flags;  

   
  
        
   
  
  
    while (*name=='/')  //去掉开头的/字符

   
  
          name++;  
   
  
  
    if (!*name)  

   
  
  
        goto return_reval;  

   
  
   
   
  
      inode = nd->path.dentry->d_inode;  
   
  
  
    if (nd->depth)  

   
  
          lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);  
   
  
   
   
  
  
    /* At this point we know we have a real path component. */ 

   
  
  
    for(;;) {  

   
  
  
        unsigned long hash;  

   
  
  
        struct qstr this;  //临时保存将要查找的目录

   
  
  
        unsigned int c;  

   
  
   
   
  
          nd->flags |= LOOKUP_CONTINUE;  
   
  
          err = exec_permission(inode);  
   
  
  
        if (err)  

   
  
  
            break;  

   
  
   
   
  
  
        this.name = name;  

   
  
  
        c = *(const unsigned char *)name;  

   
  
   
   
  
          hash = init_name_hash();  
   
  
  
        do {  

   
  
              name++;  
   
  
  
            hash = partial_name_hash(c, hash);  //计算hash值

   
  
  
            c = *(const unsigned char *)name;  

   
  
  
        } while (c && (c != '/'));  

   
  
  
        this.len = name - (const char *) this.name;  

   
  
  
        this.hash = end_name_hash(hash);  

   
  
   
   
  
  
        /* remove trailing slashes? */ 

   
  
  
        if (!c)  

   
  
  
            goto last_component;  //跳转去处理最后一级目录

   
  
  
        while (*++name == '/');  

   
  
  
        if (!*name)  

   
  
  
            goto last_with_slashes;  

   
  
   
   
  
  
        /*  
   
  
           * "." and ".." are special - ".." especially so because it has  
   
  
           * to be able to know about the current root directory and  
   
  
           * parent relationships.  
   
  
           */ 
   
  
  
        if (this.name[0] == '.') switch (this.len) {  

   
  
  
            default:  

   
  
  
                break;  

   
  
  
            case 2:   

   
  
  
                if (this.name[1] != '.')  

   
  
  
                    break;  

   
  
                  follow_dotdot(nd);  
   
  
  
                inode = nd->path.dentry->d_inode;  //两个点将当前设置为上一级目录

   
  
  
                /* fallthrough */ 

   
  
  
            case 1:  

   
  
  
                continue;  //只有一个点不做任何处理

   
  
          }  
   
  
  
        /* This does the actual lookups.. */ 

   
  
  
        err = do_lookup(nd, &this, &next);  //真正的查找函数

   
  
  
        if (err)  

   
  
  
            break;  

   
  
   
   
  
          err = -ENOENT;  
   
  
          inode = next.dentry->d_inode;  
   
  
  
        if (!inode)  

   
  
  
            goto out_dput;  

   
  
   
   
  
  
        if (inode->i_op->follow_link) {  

   
  
              err = do_follow_link(&next, nd);  
   
  
  
            if (err)  

   
  
  
                goto return_err;  

   
  
              err = -ENOENT;  
   
  
              inode = nd->path.dentry->d_inode;  
   
  
  
            if (!inode)  

   
  
  
                break;  

   
  
  
        } else 

   
  
              path_to_nameidata(&next, nd);  
   
  
          err = -ENOTDIR;   
   
  
  
        if (!inode->i_op->lookup)  

   
  
  
            break;  

   
  
  
        continue;  

   
  
  
        /* here ends the main loop */ 

   
  
   
   
  
  last_with_slashes:  
   
  
          lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;  
   
  
  last_component:  
   
  
  
        /* Clear LOOKUP_CONTINUE iff it was previously unset */ 

   
  
          nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;  
   
  
  
        if (lookup_flags & LOOKUP_PARENT)  

   
  
  
            goto lookup_parent;  

   
  
  
        if (this.name[0] == '.') switch (this.len) {  

   
  
  
            default:  

   
  
  
                break;  

   
  
  
            case 2:   

   
  
  
                if (this.name[1] != '.')  

   
  
  
                    break;  

   
  
                  follow_dotdot(nd);  
   
  
                  inode = nd->path.dentry->d_inode;  
   
  
  
                /* fallthrough */ 

   
  
  
            case 1:  

   
  
  
                goto return_reval;  

   
  
          }  
   
  
  
        err = do_lookup(nd, &this, &next);  

   
  
  
        if (err)  

   
  
  
            break;  

   
  
          inode = next.dentry->d_inode;  
   
  
  
        if (follow_on_final(inode, lookup_flags)) {  

   
  
              err = do_follow_link(&next, nd);  
   
  
  
            if (err)  

   
  
  
                goto return_err;  

   
  
              inode = nd->path.dentry->d_inode;  
   
  
  
        } else 

   
  
              path_to_nameidata(&next, nd);  
   
  
          err = -ENOENT;  
   
  
  
        if (!inode)  

   
  
  
            break;  

   
  
  
        if (lookup_flags & LOOKUP_DIRECTORY) {  

   
  
              err = -ENOTDIR;   
   
  
  
            if (!inode->i_op->lookup)  

   
  
  
                break;  

   
  
          }  
   
  
  
        goto return_base;  

   
  
  lookup_parent:  
   
  
  
        nd->last = this;  

   
  
          nd->last_type = LAST_NORM;  
   
  
  
        if (this.name[0] != '.')  

   
  
  
            goto return_base;  

   
  
  
        if (this.len == 1)  

   
  
              nd->last_type = LAST_DOT;  
   
  
  
        else if (this.len == 2 && this.name[1] == '.')  

   
  
              nd->last_type = LAST_DOTDOT;  
   
  
  
        else 

   
  
  
            goto return_base;  

   
  
  return_reval:  
   
  
  
        /*  
   
  
           * We bypassed the ordinary revalidation routines.  
   
  
           * We may need to check the cached dentry for staleness.  
   
  
           */ 
   
  
  
        if (nd->path.dentry && nd->path.dentry->d_sb &&  

   
  
              (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {  
   
  
              err = -ESTALE;  
   
  
  
            /* Note: we do not d_invalidate() */ 

   
  
  
            if (!nd->path.dentry->d_op->d_revalidate(  

   
  
                      nd->path.dentry, nd))  
   
  
  
                break;  

   
  
          }  
   
  
  return_base:  
   
  
  
        return 0;  

   
  
  out_dput:  
   
  
          path_put_conditional(&next, nd);  
   
  
  
        break;  

   
  
      }  
   
  
      path_put(&nd->path);  
   
  
  return_err:  
   
  
  
    return err;  

   
  
  }

link_path_walk函数先把给的字符串进行拆分，去除每级目录的名字，然后调用do_lookup函数在当前的目录基础上进行查找，知道查完整个字符串。

  
 
 
static int do_lookup(struct nameidata *nd, struct qstr *name,struct path *path)  
   
  
  {  
   
  
  
    struct vfsmount *mnt = nd->path.mnt;  

   
  
  
    struct dentry *dentry, *parent;  

   
  
  
    struct inode *dir;  

   
  
  
    /*  
   
  
       * See if the low-level filesystem might want  
   
  
       * to use its own hash..  
   
  
       */ 
   
  
  
    if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {  

   
  
  
        int err = nd->path.dentry->d_op->d_hash(nd->path.dentry, name);  

   
  
  
        if (err < 0)  

   
  
  
            return err;  

   
  
      }  
   
  
   
   
  
      dentry = __d_lookup(nd->path.dentry, name);  
   
  
  
    if (!dentry)  

   
  
  
        goto need_lookup;  

   
  
  
    if (dentry->d_op && dentry->d_op->d_revalidate)  

   
  
  
        goto need_revalidate;  

   
  
  done:  
   
  
      path->mnt = mnt;  
   
  
      path->dentry = dentry;  
   
  
      __follow_mount(path);  
   
  
  
    return 0;  

   
  
   
   
  
  need_lookup:  
   
  
      parent = nd->path.dentry;  
   
  
      dir = parent->d_inode;  
   
  
   
   
  
      mutex_lock(&dir->i_mutex);  
   
  
  
    /*  
   
  
       * First re-do the cached lookup just in case it was created  
   
  
       * while we waited for the directory semaphore..  
   
  
       *  
   
  
       * FIXME! This could use version numbering or similar to  
   
  
       * avoid unnecessary cache lookups.  
   
  
       *  
   
  
       * The "dcache_lock" is purely to protect the RCU list walker  
   
  
       * from concurrent renames at this point (we mustn't get false  
   
  
       * negatives from the RCU list walk here, unlike the optimistic  
   
  
       * fast walk).  
   
  
       *  
   
  
       * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup  
   
  
       */ 
   
  
      dentry = d_lookup(parent, name);  
   
  
  
    if (!dentry) {  

   
  
  
        struct dentry *new;  

   
  
   
   
  
  
        /* Don't create child dentry for a dead directory. */ 

   
  
          dentry = ERR_PTR(-ENOENT);  
   
  
  
        if (IS_DEADDIR(dir))  

   
  
  
            goto out_unlock;  

   
  
   
   
  
  
        new = d_alloc(parent, name);  

   
  
          dentry = ERR_PTR(-ENOMEM);  
   
  
  
        if (new) {  

   
  
  
            dentry = dir->i_op->lookup(dir, new, nd);  

   
  
  
            if (dentry)  

   
  
  
                dput(new);  

   
  
  
            else 

   
  
  
                dentry = new;  

   
  
          }  
   
  
  out_unlock:  
   
  
          mutex_unlock(&dir->i_mutex);  
   
  
  
        if (IS_ERR(dentry))  

   
  
  
            goto fail;  

   
  
  
        goto done;  

   
  
      }  
   
  
   
   
  
  
    /*  
   
  
       * Uhhuh! Nasty case: the cache was re-populated while  
   
  
       * we waited on the semaphore. Need to revalidate.  
   
  
       */ 
   
  
      mutex_unlock(&dir->i_mutex);  
   
  
  
    if (dentry->d_op && dentry->d_op->d_revalidate) {  

   
  
          dentry = do_revalidate(dentry, nd);  
   
  
  
        if (!dentry)  

   
  
              dentry = ERR_PTR(-ENOENT);  
   
  
      }  
   
  
  
    if (IS_ERR(dentry))  

   
  
  
        goto fail;  

   
  
  
    goto done;  

   
  
   
   
  
  need_revalidate:  
   
  
      dentry = do_revalidate(dentry, nd);  
   
  
  
    if (!dentry)  

   
  
  
        goto need_lookup;  

   
  
  
    if (IS_ERR(dentry))  

   
  
  
        goto fail;  

   
  
  
    goto done;  

   
  
   
   
  
  fail:  
   
  
  
    return PTR_ERR(dentry);  

   
  
  }

do_lookup先调用__d_lookup进行查找，如果查找失败，再去启用d_lookup，d_lookup其实内部还是调用__d_lookup函数，只是在这个基础上会使用信号量保护起来，以防止重命名造成的同步问题；如果都查找失败就新分配一个dentry并把它连接起来，函数的最后会调用__follow_mount，用于在当前dentry上查找是否存在挂载点，并用最新的挂载点的dentry和vfsmount对path进行重新赋值。__follow_mount的代码如下：

  
 
 
static int __follow_mount(struct path *path)  
   
  
  {  
   
  
  
    int res = 0;  

   
  
  
    while (d_mountpoint(path->dentry)) {  

   
  
  
        struct vfsmount *mounted = lookup_mnt(path);  

   
  
  
        if (!mounted)  

   
  
  
            break;  

   
  
          dput(path->dentry);  
   
  
  
        if (res)  

   
  
              mntput(path->mnt);  
   
  
          path->mnt = mounted;  
   
  
          path->dentry = dget(mounted->mnt_root);  
   
  
          res = 1;  
   
  
      }  
   
  
  
    return res;  

   
  
  }

再看下__d_lookup函数的实现：

  
 
 
struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)  
   
  
  {  
   
  
  
    unsigned int len = name->len;  

   
  
  
    unsigned int hash = name->hash;  

   
  
  
    const unsigned char *str = name->name;  

   
  
  
    struct hlist_head *head = d_hash(parent,hash);  

   
  
  
    struct dentry *found = NULL;  

   
  
  
    struct hlist_node *node;  

   
  
  
    struct dentry *dentry;  

   
  
   
   
  
      rcu_read_lock();  
   
  
        
   
  
      hlist_for_each_entry_rcu(dentry, node, head, d_hash) {  
   
  
  
        struct qstr *qstr;  

   
  
   
   
  
  
        if (dentry->d_name.hash != hash)  

   
  
  
            continue;  

   
  
  
        if (dentry->d_parent != parent)  

   
  
  
            continue;  

   
  
   
   
  
          spin_lock(&dentry->d_lock);  
   
  
   
   
  
  
        /*  
   
  
           * Recheck the dentry after taking the lock - d_move may have  
   
  
           * changed things.  Don't bother checking the hash because we're  
   
  
           * about to compare the whole name anyway.  
   
  
           */ 
   
  
  
        if (dentry->d_parent != parent)  

   
  
  
            goto next;  

   
  
   
   
  
  
        /* non-existing due to RCU? */ 

   
  
  
        if (d_unhashed(dentry))  

   
  
  
            goto next;  

   
  
   
   
  
  
        /*  
   
  
           * It is safe to compare names since d_move() cannot  
   
  
           * change the qstr (protected by d_lock).  
   
  
           */ 
   
  
          qstr = &dentry->d_name;  
   
  
  
        if (parent->d_op && parent->d_op->d_compare) {  

   
  
  
            if (parent->d_op->d_compare(parent, qstr, name))  

   
  
  
                goto next;  

   
  
  
        } else {  //如果d_compare函数没有实现就匹配字符串，对没有特殊要求的文件系统都可以匹配字符串即可

   
  
  
            if (qstr->len != len)  

   
  
  
                goto next;  

   
  
  
            if (memcmp(qstr->name, str, len))  

   
  
  
                goto next;  

   
  
          }  
   
  
   
   
  
          atomic_inc(&dentry->d_count);  
   
  
          found = dentry;  
   
  
          spin_unlock(&dentry->d_lock);  
   
  
  
        break;  

   
  
  next:  
   
  
          spin_unlock(&dentry->d_lock);  
   
  
      }  
   
  
      rcu_read_unlock();  
   
  
   
   
  
  
    return found;  

   
  
  }

__d_lookup函数会遍历父目录的hash表找出相匹配的子目录。

到这里整个挂载目录的查找就结束了，kern_path完成之后会通过path变量带回挂载点的dentry和父文件系统的vfsmount到do_mount函数中。

do_mountj继续调用do_new_mount函数：do_new_mount分为两部分，第一部分是生成挂载所需的超级快等文件结构；第二部分用于将一种生成的加到内核中去。

先看第一部分，通过do_kern_mount实现，do_kern_mount有调用了vfs_kern_mount：

  
 
 
struct vfsmount *  
   
  
  
vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)  

   
  
  {  
   
  
  
    struct vfsmount *mnt;  

   
  
  
    char *secdata = NULL;  

   
  
  
    int error;  

   
  
   
   
  
  
    if (!type)  

   
  
  
        return ERR_PTR(-ENODEV);  

   
  
   
   
  
      error = -ENOMEM;  
   
  
      mnt = alloc_vfsmnt(name);  
   
  
  
    if (!mnt)  

   
  
  
        goto out;  

   
  
   
   
  
  
    if (flags & MS_KERNMOUNT)  

   
  
          mnt->mnt_flags = MNT_INTERNAL;  
   
  
   
   
  
  
    if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {  

   
  
          secdata = alloc_secdata();  
   
  
  
        if (!secdata)  

   
  
  
            goto out_mnt;  

   
  
   
   
  
          error = security_sb_copy_data(data, secdata);  
   
  
  
        if (error)  

   
  
  
            goto out_free_secdata;  

   
  
      }  
   
  
   
   
  
      error = type->get_sb(type, flags, name, data, mnt);  
   
  
  
    if (error < 0)  

   
  
  
        goto out_free_secdata;  

   
  
      BUG_ON(!mnt->mnt_sb);  
   
  
      WARN_ON(!mnt->mnt_sb->s_bdi);  
   
  
      mnt->mnt_sb->s_flags |= MS_BORN;  
   
  
   
   
  
      error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);  
   
  
  
    if (error)  

   
  
  
        goto out_sb;  

   
  
   
   
  
  
    /*  
   
  
       * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE  
   
  
       * but s_maxbytes was an unsigned long long for many releases. Throw  
   
  
       * this warning for a little while to try and catch filesystems that  
   
  
       * violate this rule. This warning should be either removed or  
   
  
       * converted to a BUG() in 2.6.34.  
   
  
       */ 
   
  
  
    WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " 

   
  
  
        "negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes);  

   
  
   
   
  
      mnt->mnt_mountpoint = mnt->mnt_root;  
   
  
      mnt->mnt_parent = mnt;  
   
  
      up_write(&mnt->mnt_sb->s_umount);  
   
  
      free_secdata(secdata);  
   
  
  
    return mnt;  

   
  
  out_sb:  
   
  
      dput(mnt->mnt_root);  
   
  
      deactivate_locked_super(mnt->mnt_sb);  
   
  
  out_free_secdata:  
   
  
      free_secdata(secdata);  
   
  
  out_mnt:  
   
  
      free_vfsmnt(mnt);  
   
  
  out:  
   
  
  
    return ERR_PTR(error);  

   
  
  }

这部分的重点在于type->get_sb(type, flags, name, data, mnt); 调用特定文件系统的get_sb函数生成超级块对象和挂载点等数据结构。

第二部分的代码为函数do_add_mount

  
 
 
int do_add_mount(struct vfsmount *newmnt, struct path *path,  
   
  
  
         int mnt_flags, struct list_head *fslist)  

   
  
  {  
   
  
  
    int err;  

   
  
   
   
  
      mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);  
   
  
   
   
  
      down_write(&namespace_sem);  
   
  
  
    /* Something was mounted here while we slept */ 

   
  
  
    while (d_mountpoint(path->dentry) &&  

   
  
             follow_down(path))  
   
  
          ;  
   
  
      err = -EINVAL;  
   
  
  
    if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt))  

   
  
  
        goto unlock;  

   
  
   
   
  
  
    /* Refuse the same filesystem on the same mount point */ 

   
  
      err = -EBUSY;  
   
  
  
    if (path->mnt->mnt_sb == newmnt->mnt_sb &&  

   
  
          path->mnt->mnt_root == path->dentry)  
   
  
  
        goto unlock;  

   
  
   
   
  
      err = -EINVAL;  
   
  
  
    if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))  

   
  
  
        goto unlock;  

   
  
   
   
  
      newmnt->mnt_flags = mnt_flags;  
   
  
  
    if ((err = graft_tree(newmnt, path)))  

   
  
  
        goto unlock;  

   
  
   
   
  
  
    if (fslist) /* add to the specified expiration list */ 

   
  
          list_add_tail(&newmnt->mnt_expire, fslist);  
   
  
   
   
  
      up_write(&namespace_sem);  
   
  
  
    return 0;  

   
  
   
   
  
  unlock:  
   
  
      up_write(&namespace_sem);  
   
  
      mntput(newmnt);  
   
  
  
    return err;  

   
  
  }

继续调用graft_tree

  
 
 
static int graft_tree(struct vfsmount *mnt, struct path *path)  
   
  
  {  
   
  
  
    int err;  

   
  
  
    if (mnt->mnt_sb->s_flags & MS_NOUSER)  

   
  
  
        return -EINVAL;  

   
  
   
   
  
  
    if (S_ISDIR(path->dentry->d_inode->i_mode) !=  

   
  
            S_ISDIR(mnt->mnt_root->d_inode->i_mode))  
   
  
  
        return -ENOTDIR;  

   
  
   
   
  
      err = -ENOENT;  
   
  
      mutex_lock(&path->dentry->d_inode->i_mutex);  
   
  
  
    if (cant_mount(path->dentry))  

   
  
  
        goto out_unlock;  

   
  
   
   
  
  
    if (!d_unlinked(path->dentry))  

   
  
  
        err = attach_recursive_mnt(mnt, path, NULL);  

   
  
  out_unlock:  
   
  
      mutex_unlock(&path->dentry->d_inode->i_mutex);  
   
  
  
    return err;  

   
  
  }

调用attach_recursive_mnt

  
 
 
static int attach_recursive_mnt(struct vfsmount *source_mnt,  
   
  
  
            struct path *path, struct path *parent_path)  

   
  
  {  
   
  
      LIST_HEAD(tree_list);  
   
  
  
    struct vfsmount *dest_mnt = path->mnt;  

   
  
  
    struct dentry *dest_dentry = path->dentry;  

   
  
  
    struct vfsmount *child, *p;  

   
  
  
    int err;  

   
  
   
   
  
  
    if (IS_MNT_SHARED(dest_mnt)) {  

   
  
  
        err = invent_group_ids(source_mnt, true);  

   
  
  
        if (err)  

   
  
  
            goto out;  

   
  
      }  
   
  
      err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list);  
   
  
  
    if (err)  

   
  
  
        goto out_cleanup_ids;  

   
  
   
   
  
      spin_lock(&vfsmount_lock);  
   
  
   
   
  
  
    if (IS_MNT_SHARED(dest_mnt)) {  

   
  
  
        for (p = source_mnt; p; p = next_mnt(p, source_mnt))  

   
  
              set_mnt_shared(p);  
   
  
      }  
   
  
  
    if (parent_path) {  

   
  
          detach_mnt(source_mnt, parent_path);  
   
  
          attach_mnt(source_mnt, path);  
   
  
          touch_mnt_namespace(parent_path->mnt->mnt_ns);  
   
  
  
    } else {  

   
  
  
        mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);  

   
  
          commit_tree(source_mnt);  
   
  
      }  
   
  
   
   
  
      list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {  
   
  
          list_del_init(&child->mnt_hash);  
   
  
          commit_tree(child);  
   
  
      }  
   
  
      spin_unlock(&vfsmount_lock);  
   
  
  
    return 0;  

   
  
   
   
  
   out_cleanup_ids:  
   
  
  
    if (IS_MNT_SHARED(dest_mnt))  

   
  
          cleanup_group_ids(source_mnt, NULL);  
   
  
   out:  
   
  
  
    return err;  

   
  
  }

调用mnt_set_mountpoint

  
 
 
void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,  
   
  
  
            struct vfsmount *child_mnt)  

   
  
  {  
   
  
  
    child_mnt->mnt_parent = mntget(mnt);  //设置父文件系统

   
  
  
    child_mnt->mnt_mountpoint = dget(dentry);  //设置挂载点目录项

   
  
  
    dentry->d_mounted++;  //挂载计数加1

   
  
  }

到这里整个mount的流程就分析完毕了，mount的流程可以分为以下几个步骤：

一、查找给定挂载目录中的目录项结构和挂载点；

二、通过设备节点和文件系统类型生成新挂载文件系统的超级快等结构；

三、将二中生成的结构连接到一中查找到的路径中

本文出自 “Linux” 博客，请务必保留此出处http://ywn7263.blog.51cto.com/7048508/1184859

秒客网

Linux之mount流程分析

相关文章