文件系统-- 标准路径名查找

转：http://blog.chinaunix.net/uid/12567959/frmd/13328.html

当进程要使用一个文件时，如open()、mkdir()、rename()或stat()等，就要首先进行路径名查找，即是将人类易于识别的字符串形式路径名，转换为一个文件在内核中的内部表示，也就是目录项、vfsmount和inode等。

执行这一任务的标准过程就是分析路径名并把它拆分成一个文件名序列。除了最后一个项以外，所有的文件名都必定是目录。如果路径名的第一个字符是“/”，例如：/usr/bin/tree，这个路径名就是一个绝对路径名，因此从current->fs->root（进程的根目录）所标识的目录开始搜索。否则，路径名就是一个相对路径，从 currrent->fs->pwd（进程的当前目录）所标识的目录开始搜索。
在对初始目录（进程的根目录或进程的当前目录）的索引节点进行处理的过程中，代码要检查与文件名匹配的目录项，以获得相应的索引节点。然后，从缓存或磁盘读出那个索引节点所表示的目录文件，并检查与第二个名字匹配的目录项，以获得相应的索引节点。对于包含在路径中的每个名字，这个过程反复执行。
目录项高速缓存极大地加速了这一过程，因为它把最近最常使用的目录项对象保留在内存中。正如我们以前看到的，每个这样的对象使特定目录中的一个文件名与它相应的索引节点相联系。因此在很多情况下，路径名的分析可以避免从磁盘读取中间目录文件的内容。但是，事情并不像看起来那么简单，因为必须考虑如下的Unix和VFS文件系统的特点：
- 对每个目录的访问权必须进行检查，以验证是否允许进程读取这一目录的内容。
- 文件名可能是与任意一个路径名对应的符号链接；在这种情况下，分析必须扩展到那个路径名的所有分量。
- 符号链接可能导致循环引用；内核必须考虑这个可能性，并能在出现这种情况时将循环终止。
- 文件名可能是一个已挂载文件系统的挂载点。这种情况必须检测到，这样，查找操作必须延伸到新的文件系统。
- 路径名查找应该在发出系统调用的进程的命名空间中完成。由具有不同命名空间的两个进程使用的相同路径名，可能指定了不同的文件。
内核提供了在不同的条件下调用的用于路径名查找的函数，他们分别为path_lookup()、kern_path()、user_path_at()，

其定义如下：

fs/namei.c
int path_lookup(const char *name, unsigned int flags,
           struct nameidata *nd)
{
    return do_path_lookup(AT_FDCWD, name, flags, nd);
}

int kern_path(const char *name, unsigned int flags, struct path *path)
{
    struct nameidata nd;
    int res = do_path_lookup(AT_FDCWD, name, flags, &nd);
    if (!res)
       *path = nd.path;
    return res;
}

int user_path_at(int dfd, const char __user *name, unsigned flags,
        struct path *path)
{
    struct nameidata nd;
    char *tmp = getname(name);
    int err = PTR_ERR(tmp);
    if (!IS_ERR(tmp)) {

       BUG_ON(flags & LOOKUP_PARENT);

       err = do_path_lookup(dfd, tmp, flags, &nd);
       putname(tmp);
       if (!err)
           *path = nd.path;
    }
    return err;
}

user_path_at()会首先调用getname(name)，将用户空间的路径名参数复制到内核空间的临时缓冲区中。他们最终都会调用do_path_lookup()函数来实际完成路径名的查找工作，这个函数接受四个参数：
dfd：使用的基目录；name：指向要解析的文件路径名的指针；flags：标志的值，表示将会怎样访问查找的文件；nd：nameidata数据结构的地址，这个结构存放了查找操作的结果。
可以使用的dfd定义如下，注释中的解释也比较清楚明白了：

include/linux/fcntl.h
#define AT_FDCWD         -100     /* Special value used to indicate openat should use the current working directory. */
#define AT_SYMLINK_NOFOLLOW 0x100  /* Do not follow symbolic links.  */
#define AT_REMOVEDIR     0x200   /* Remove directory instead of
                                           unlinking file.  */
#define AT_SYMLINK_FOLLOW   0x400   /* Follow symbolic links.  */

do_path_lookup()返回时，查找的结果存放在参数nd指向的nameidata结构中，其定义如下：

include/linux/namei.h
struct nameidata {
    struct path   path; /* 查找到的路径 */
/* 路径名的最后一个分量（当LOOKUP_PARENT标志被设置时使用） */
    struct qstr   last;
    struct path   root; /*进程根路径 */
    unsigned int  flags; /* 查找标志 */
/* 路径名的最后一个分量的类型（当LOOKUP_PARENT标志被设置时使用） */
    int    last_type;
    unsigned   depth; /* 符号链接嵌套的当前级别；它必须小于6 */
    /* 与嵌套的符号链接关联的路径名数组 */
    char *saved_names[MAX_NESTED_LINKS + 1];

    /* Intent data */
    union {
       struct open_intent open;
    } intent;  /* 单个成员联合体，指定如何访问文件 */
};

path字段会保存最后一个路径分量的信息（目录项对象和vfsmount对象）。这个字段“描述”由给定路径名表示的文件。
由于do_path_lookup()函数返回的nameidata结构中的目录项对象和vfsmount对象代表了查找操作的结果，因此在do_path_lookup()的调用者完成使用查找结果之前，这个两个对象都不能被释放。因此，do_path_lookup()增加这两个对象引用计数器的值。如果调用者想释放这些对象，则调用path_put()函数，传递给它的参数就是path结构的地址。
flags字段存放查找操作中使用的某些标志的值，这些标志中的大部分可由调用者在do_path_lookup()的flags参数中进行设置：

include/linux/namei.h
/* 如果最后一个分量是符号链接，则解释（追踪）它 */
#define LOOKUP_FOLLOW       1
/* 最后一个分量必须是目录 */
#define LOOKUP_DIRECTORY 2
/* 在路径名中还有文件名要检查 */
#define LOOKUP_CONTINUE     4
/* 查找最后一个分量名所在的目录 */
#define LOOKUP_PARENT       16
/* dentry 中的内容不被信任，强制执行一个真实的查找，即从父目录的文件目录项中查找 */
#define LOOKUP_REVAL     64
/*
 * Intent data
 */
/* 试图打开一个文件 */
#define LOOKUP_OPEN      0x0100
/* 试图创建一个文件（如果不存在） */
#define LOOKUP_CREATE       0x0200
#define LOOKUP_EXCL      0x0400
#define LOOKUP_RENAME_TARGET    0x0800

下面，我们来详细查看do_path_lookup ()函数，do_path_lookup()定义如下：

fs/namei.c
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
static int do_path_lookup(int dfd, const char *name,
              unsigned int flags, struct nameidata *nd)
{
    int retval = path_init(dfd, name, flags, nd);
    if (!retval)
       retval = path_walk(name, nd);
    if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
              nd->path.dentry->d_inode))
       audit_inode(name, nd->path.dentry);
    if (nd->root.mnt) {
       path_put(&nd->root);
       nd->root.mnt = NULL;
                                }
    return retval;
}

do_path_lookup ()执行如下操作：
1、调用path_init()来初始化nd参数的某些字段，path_init()定义如下：

fs/namei.c
static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
{
    int retval = 0;
    int fput_needed;
    struct file *file;

    nd->last_type = LAST_ROOT; /* if there are only slashes... */
    nd->flags = flags;
    nd->depth = 0;
    nd->root.mnt = NULL;

    if (*name=='/') {
       set_root(nd);
       nd->path = nd->root;
       path_get(&nd->root);
    } else if (dfd == AT_FDCWD) {
       struct fs_struct *fs = current->fs;
       read_lock(&fs->lock);
       nd->path = fs->pwd;
       path_get(&fs->pwd);
       read_unlock(&fs->lock);
    } else {
       struct dentry *dentry;

       file = fget_light(dfd, &fput_needed);
       retval = -EBADF;
       if (!file)
           goto out_fail;

       dentry = file->f_path.dentry;

       retval = -ENOTDIR;
       if (!S_ISDIR(dentry->d_inode->i_mode))
           goto fput_fail;

       retval = file_permission(file, MAY_EXEC);
       if (retval)
           goto fput_fail;

       nd->path = file->f_path;
       path_get(&file->f_path);

       fput_light(file, fput_needed);
    }
    return 0;

fput_fail:
    fput_light(file, fput_needed);
out_fail:
    return retval;
}

a. last_type字段置位LAST_ROOT（如果路径名是一个“/”或“/”序列，那么这是必需的）
b.把flags字段设置为参数flags的值。
c.把depth字段设为0。
d.判断路径名为绝对路径还是相对路径。若为绝对路径，则调用set_root(nd)将root字段设置为current->fs ->root，并增加其引用计数。并将path字段同样设为nd->root，在此增加其引用计数。否则，若dfd设置了AT_FDCWD，则为相对路径名。则将path字段设置为进程的当前路径current->fs ->pwd，并增加该路径引用计数。否则，使用的基目录就是进程文件文件描述符表中的某个文件，而此时dfd参数则正是该目录文件的文件描述符，则将path字段设为该目录文件的路径。
总之，也就是将查找的路径名的基路径找到，并赋给path字段。

2、调用path_walk(name, nd)处理

fs/namei.c
static int path_walk(const char *name, struct nameidata *nd)
{
    struct path save = nd->path;
    int result;

    current->total_link_count = 0;

    /* make sure the stuff we saved doesn't go away */
    path_get(&save);

    result = link_path_walk(name, nd);
    if (result == -ESTALE) {
       /* nd->path had been dropped */
       current->total_link_count = 0;
       nd->path = save;
       path_get(&nd->path);
       nd->flags |= LOOKUP_REVAL;
       result = link_path_walk(name, nd);
    }

    path_put(&save);

    return result;
}

path_walk(name, nd)执行如下操作：
a.将查找的路径名的基目录的路径保存在临时变量save中
b.将当前进程的total_link_count字段设置为0。
c.增加查找的路径名的基目录的路径的引用计数，确保保存的查找的路径名的基目录的路径不会消失。
d.调用link_path_walk()函数处理正在进行的查找操作：
result = link_path_walk(name, nd);
如果返回值为-ESTALE，则设置查找标志LOOKUP_REVAL并再次查找。
（这个函数是路径名查找操作的核心，随后有更详细说明。）
e.减少查找的路径名的基目录的路径的引用计数。

3、产生审计信息，即记录对于文件的访问。说明：2.6 Linux内核有用日志记录事件的能力，比如记录系统调用和文件访问。然后，管理员可以评审这些日志，确定可能存在的安全裂口，比如失败的登录尝试，或者用户对系统文件不成功的访问。这种功能称为Linux审计系统。

nclude/linux/audit.h
static inline int audit_dummy_context(void)
{
    void *p = current->audit_context;
    return !p || *(int *)p;
}

static inline void audit_inode(const char *name, const struct dentry *dentry) {
    if (unlikely(!audit_dummy_context()))
       __audit_inode(name, dentry);
}

4、若nd->root.mnt不为空（当路径名为绝对路径时，在path_init()设置），则减少对根路径的引用并设置nd->root.mnt为NULL。

路径名查找是由link_path_walk()函数执行的，它接收两个参数：

name：指向要解析的文件路径名的指针。
nd：nameidata数据结构的地址，这个结构存放了查找操作的结果。

当link_path_walk()返回时，结果参数nd指向的nameidata结构用与路径名查找操作有关的数据来填充：

struct nameidata {
	struct path	path;//路径
	struct qstr	last; /* 路径名的最后一个分量（当LOOKUP_PARENT标志被设置时使用）*/
	struct path	root;
	unsigned int	flags;/* 查找标志 */
	int		last_type; /* 路径名最后一个分量的类型（当LOOKUP_PARENT标志被设置时使用）*/
	unsigned	depth; /* 符号链接嵌套的当前级别；必须小于6 */
	char *saved_names[MAX_NESTED_LINKS + 1]; /* 与嵌套的符号链接关联的路径名数组 */

	/* Intent data */
	union { /* Intent data 单个成员联合体，指定如何访问文件 */
		struct open_intent open;
	} intent;
};

flags字段存放查找操作中使用的某些标志的值，这些标志中的大部分可由调用者在path_lookup()的flags参数中进行设置：
LOOKUP_FOLLOW：如果最后一个分量是符号链接，则解释（追踪）它
LOOKUP_DIRECTORY：最后一个分量必须是目录
LOOKUP_CONTINUE：在路径名中还有文件名要检查
LOOKUP_PARENT：查找最后一个分量名所在的目录
LOOKUP_NOALT：不考虑模拟根目录（在80x86体系结构中没有用）
LOOKUP_OPEN：试图打开一个文件
LOOKUP_CREATE：试图创建一个文件（如果不存在）
LOOKUP_ACCESS：试图为一个文件检查用户的权限

我们来看一下link_path_walk的实现

/*
 * Name resolution.
 * This is the basic name resolution function, turning a pathname into
 * the final dentry. We expect 'base' to be positive and a directory.
 *
 * Returns 0 and nd will have valid dentry and mnt on success.
 * Returns error and drops reference to input namei data on failure.
 */
static int link_path_walk(const char *name, struct nameidata *nd)
{
	struct path next;
	struct inode *inode;
	int err;
	unsigned int lookup_flags = nd->flags;//查找标志
	//printk(KERN_INFO "link_path_walk. name = %s.\n", name);
	//printk(KERN_INFO "link_path_walk. nd->path.dentry->d_iname = %s.\n", nd->path.dentry->d_iname);
	while (*name=='/')/* 跳过路径名第一个分量前的任何斜杠 */  
		name++;
	if (!*name)/* 如果剩余路径名为NULL, 则返回0 */ 
		goto return_reval;

	inode = nd->path.dentry->d_inode;//把将要查找的路径名的基路径的inode地址存放在局部变量inode中
	if (nd->depth)//如果nd描述符中的depth字段（即符号链接嵌套的当前级别）的值为正（大于0），则把lookup_flags局部变量置为LOOKUP_FOLLOW标志
		lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);

	/* At this point we know we have a real path component. */
	for(;;) { /* 把name参数中传递的路径名分解为分量"/"被当做分隔符对于每个分量执行 */  
		unsigned long hash;
		struct qstr this;/* 用来存放路径名中当前节点的名,长度 */ 
		unsigned int c;
		//printk(KERN_INFO "link_path_walk. inode->i_ino = %d.\n", inode->i_ino);	
		//printk(KERN_INFO "__d_lookup. next.dentry->d_iname = %s.\n", next.dentry->d_iname);
		nd->flags |= LOOKUP_CONTINUE;//设置lookup_flags局部变量置的LOOKUP_CONTINUE标志
		err = exec_permission(inode);/* 检查存放到索引节点中的最近那个所解析分量的许可权是否允许执行（在Unix中，只有目录是可执行的，它才可以被遍历） */  
 		if (err)
			break;

		this.name = name;//逐个获取路径中的各个分量进行解析
		c = *(const unsigned char *)name;

		hash = init_name_hash();
		do {/* 将路径名分解为分量，以"/"为分隔符 */  
			name++;
			hash = partial_name_hash(c, hash);
			c = *(const unsigned char *)name;
		} while (c && (c != '/'));
		this.len = name - (const char *) this.name;
		this.hash = end_name_hash(hash);
		//printk(KERN_INFO "link_path_walk. c = %c.\n", c);
		/* remove trailing slashes? */
		if (!c)/* 如果接下来为NULL，则表示为最后一个分量 */  
			goto last_component;
		while (*++name == '/');//去掉‘\’
		//printk(KERN_INFO "link_path_walk. name = %s.\n", name);
		if (!*name)
			goto last_with_slashes;

		/*
		 * "." and ".." are special - ".." especially so because it has
		 * to be able to know about the current root directory and
		 * parent relationships.
		 */
		if (this.name[0] == '.') switch (this.len) {
			default:
				break;
			case 2:	/* 如果是两个圆点则跳出循环，尝试返回父目录 */ 
				if (this.name[1] != '.')
					break;
				follow_dotdot(nd);
				inode = nd->path.dentry->d_inode;
				/* fallthrough */
			case 1:/* 如果是单个圆点则继续下个分量 */  
				continue;
		}
		/* This does the actual lookups.. */
		err = do_lookup(nd, &this, &next);/* 将路径名转化为分量 */  
		//printk(KERN_INFO "__d_lookup. next.dentry->d_iname = %s.\n", next.dentry->d_iname);
		if (err)
			break;

		err = -ENOENT;
		inode = next.dentry->d_inode;
		if (!inode)
			goto out_dput;

		if (inode->i_op->follow_link) {//若存在符号链接，且设置了do_follow_link方法
			//printk("follow_link ...\n");
			err = do_follow_link(&next, nd);
			if (err)
				goto return_err;
			err = -ENOENT;
			inode = nd->path.dentry->d_inode;
			if (!inode)
				break;
		} else
			path_to_nameidata(&next, nd);//赋值下一下要解析的分量所属目录
		err = -ENOTDIR; 
		if (!inode->i_op->lookup)
			break;
		//printk("continue ...\n");
		continue;
		/* here ends the main loop */

last_with_slashes:
		lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
last_component:
		/* Clear LOOKUP_CONTINUE iff it was previously unset */
		nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
		if (lookup_flags & LOOKUP_PARENT)
			goto lookup_parent;
		if (this.name[0] == '.') switch (this.len) {
			default:
				break;
			case 2:	
				if (this.name[1] != '.')
					break;
				follow_dotdot(nd);
				inode = nd->path.dentry->d_inode;
				/* fallthrough */
			case 1:
				goto return_reval;
		}
		err = do_lookup(nd, &this, &next);
		if (err)
			break;
		inode = next.dentry->d_inode;
		if (follow_on_final(inode, lookup_flags)) {
			err = do_follow_link(&next, nd);
			if (err)
				goto return_err;
			inode = nd->path.dentry->d_inode;
		} else
			path_to_nameidata(&next, nd);
		err = -ENOENT;
		if (!inode)
			break;
		if (lookup_flags & LOOKUP_DIRECTORY) {
			err = -ENOTDIR; 
			if (!inode->i_op->lookup)
				break;
		}
		goto return_base;
lookup_parent:
		nd->last = this;
		nd->last_type = LAST_NORM;
		if (this.name[0] != '.')
			goto return_base;
		if (this.len == 1)
			nd->last_type = LAST_DOT;
		else if (this.len == 2 && this.name[1] == '.')
			nd->last_type = LAST_DOTDOT;
		else
			goto return_base;
return_reval:
		/*
		 * We bypassed the ordinary revalidation routines.
		 * We may need to check the cached dentry for staleness.
		 */
		if (nd->path.dentry && nd->path.dentry->d_sb &&
		    (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
			err = -ESTALE;
			/* Note: we do not d_invalidate() */
			if (!nd->path.dentry->d_op->d_revalidate(
					nd->path.dentry, nd))
				break;
		}
return_base:
		return 0;
out_dput:
		path_put_conditional(&next, nd);
		break;
	}
	path_put(&nd->path);
return_err:
	return err;
}

link_path_walk()执行下列步骤：
1、用nd->flags初始化lookup_flags局部变量。
2、跳过路径名第一个分量前的任何斜杠（/）。
3、如果剩余的路径名为空，则返回0。没有改变nameidata结构数据，nd->path中存放将要查找的路径名的基路径。
4、把将要查找的路径名的基路径的inode地址存放在局部变量inode中，即初始化最近一个所解析分量的索引节点对象的地址为将要查找的路径名的基路径的inode地址。
5、如果nd描述符中的depth字段（即符号链接嵌套的当前级别）的值为正（大于0），则把lookup_flags局部变量置为LOOKUP_FOLLOW标志（这个跟符号链接查找相关）。
6、执行一个循环，把name参数中传递的路径名分解为分量（中间的“/”被当做文件名分隔符对待）；对于每个找到的分量，该函数：
a.设置lookup_flags局部变量置的LOOKUP_CONTINUE标志。
b. 执行exec_permission(inode)函数检查存放到索引节点中的最近那个所解析分量的许可权是否允许执行（在Unix中，只有目录是可执行的，它才可以被遍历）。exec_permission()函数定义如下：

static int exec_permission(struct inode *inode)
{
	int ret;

	if (inode->i_op->permission) {//文件系统提供了inode->i_op->permission方法
		ret = inode->i_op->permission(inode, MAY_EXEC);
		if (!ret)
			goto ok;
		return ret;
	}
	ret = acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl);//调用acl_permission_check()执行基本的POSIX ACL权限检查
	if (!ret)
		goto ok;

	if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH))
		goto ok;

	return ret;
ok:
	return security_inode_permission(inode, MAY_EXEC);
}

如果文件系统提供了inode->i_op->permission方法，则exec_permission()调用该例程执行EXEC权限检查，如果不允许执行则返回错误码，若允许，则调用security_inode_permission()，使用LSM的security_ops->inode_permission()方法来执行权限检查，并返回该方法的返回值。
inode->i_op->permission方法不存在，则调用acl_permission_check()执行基本的POSIX ACL权限检查，若通过检查，则调用security_inode_permission()，使用LSM的security_ops->inode_permission()方法来执行权限检查，并返回该方法的返回值。
若不通过，则执行权能检查，若同样不允许，则返回错误码。若允许，则调用security_inode_permission()，使用LSM的security_ops->inode_permission()方法来执行权限检查，并返回该方法的返回值。
如果最近所解析分量不允许执行，那么link_path_walk()跳出循环并返回一个错误码。
c. 考虑要解析的下一个分量（841行-851行）。从它的名字，函数为目录项高速缓存散列表计算一个32位的散列值。
注意，这里用到了目录项名字数据结构qstr：

include/linux/dcache.h
 struct qstr {
         unsigned int hash;
         unsigned int len;
         const unsigned char *name;
 };

当前目录分量存放到了指向qstr结构的this局部变量中。
散列表的32位散列值如下计算：

include/linux/dcache.h
 #define init_name_hash()      0

 /* partial hash update function. Assume roughly 4 bits per character */
 static inline unsigned long
 partial_name_hash(unsigned long c, unsigned long prevhash)
 {
         return (prevhash + (c << 4) + (c >> 4)) * 11;
 }

 static inline unsigned long end_name_hash(unsigned long hash)
 {
         return (unsigned int) hash;
 }

d. 如果要解析的分量是原路径名中的最后一个分量，则跳到第last_component标号处去执行。
e. 如果“/”终止了要解析的分量名，则跳过“/”之后的任何尾部“/”。多么强大的处理路径名的能力啊，也就是说路径名中两个目录之间是可以插入多个“/”。这一步为解析下一个分量做准备。而如果在一连串的“/”之后没有内容了，则跳转到标号last_with_slashes处执行。这是最后一个分量的特殊情况，也就是它必须一个目录。同样在后面“link_path_walk()对于路径名最后一个分量的处理”部分说明
f. 如果分量名是一个“.”（单个圆点），则继续下一个分量（“.”指的是当前目录，因此，这个点在目录内没有什么效果）。
g.如果分量名是“..”（两个圆点），则尝试回到父目录。这里面有个重要的follow_dotdot(nd)函数：

static __always_inline void follow_dotdot(struct nameidata *nd)
{
	set_root(nd);//设置nd的root字段为当前进程的根路径

	while(1) {
		struct dentry *old = nd->path.dentry;

		if (nd->path.dentry == nd->root.dentry &&
		    nd->path.mnt == nd->root.mnt) {
			break;
		}
		if (nd->path.dentry != nd->path.mnt->mnt_root) {
			/* rare case of legitimate dget_parent()... */
			nd->path.dentry = dget_parent(nd->path.dentry);
			dput(old);
			break;
		}
		if (!follow_up(&nd->path))
			break;
	}
	follow_mount(&nd->path);
}

(1)、首先，设置nd的root字段为当前进程的根路径。
(2)、如果最近解析的目录是进程的根目录（nd->path.dentry等于nd->root.dentry，而nd->path.mnt等于nd->root.mnt），那么再向上追踪是不允许的：在最近解析的分量上调用follow_mount()，继续下一个分量。
(3)、如果最近解析的目录不是nd->path.mnt文件系统的根目录（nd->path.dentry 不等于 nd->path.mnt->mnt_root，如果当前节点dentry 不等于当前节点vfsmount对象的根设备的dentry, 说明当前节点不是做为根节点被mount到其它设备上去的。在这里再来看vfsmount对象的mnt_mountpoint字段，它指向它挂载的目录的目录项，也就是原来的目录文件的信息)，那么必须回到父目录：把nd->path.dentry置为其父目录的目录项，其实也就是nd-> path.dentry-> d_parent在父目录上调用follow_mount(&nd->path) ，继续下一个分量。
(4)、如果最近解析的目录是nd->mnt文件系统的根目录，则调用函数follow_up(&nd->path)来处理，这个函数定义如下：

int follow_up(struct path *path)
{
	struct vfsmount *parent;
	struct dentry *mountpoint;
	spin_lock(&vfsmount_lock);
	parent = path->mnt->mnt_parent;
	if (parent == path->mnt) {
		spin_unlock(&vfsmount_lock);
		return 0;
	}
	mntget(parent);
	mountpoint = dget(path->mnt->mnt_mountpoint);
	spin_unlock(&vfsmount_lock);
	dput(path->dentry);
	path->dentry = mountpoint;
	mntput(path->mnt);
	path->mnt = parent;
	return 1;

如果这个文件系统没有被安装在其他文件系统之上（path->mnt->mnt_parent等于path->mnt），那么 path->mnt文件系统通常就是进程命名空间的根文件系统：在这种情况下，再向上追踪是不可能的，因此在最近解析的分量上调用 follow_mount()，继续下一个分量。（这种情况是不应该出现的，或者说这种情况应该是在follow_dotdot的步骤(2)中就已经检测出来的）。
如果这个文件系统被安装在其他文件系统之上，那么就需要文件系统交换。因此，把path->dentry置为path->mnt->mnt_mountpoint，且把path->mnt置为 path->mnt->mnt_parent，然后重新开始第6g步（几个文件系统可以挂载在同一个挂载点上，在挂载的时候，原来的那个目录文件的vfsmount对象和目录项信息被保存在新的vfsmount对象的mnt_parent和mnt_mountpoint字段中）。
最后来看follow_mount()， follow_mount()定义如下：

static void follow_mount(struct path *path)
{
	while (d_mountpoint(path->dentry)) {
		struct vfsmount *mounted = lookup_mnt(path);
		if (!mounted)
			break;
		dput(path->dentry);
		mntput(path->mnt);
		path->mnt = mounted;
		path->dentry = dget(mounted->mnt_root);
	}
}

follow_mount()函数检查path ->dentry是否是某文件系统的挂载点（path-> dentry-> d_mounted的值大于0），如果不是，则直接退出。如果是，则调用lookup_mnt()，它的定义如下：

struct vfsmount *lookup_mnt(struct path *path)
{
	struct vfsmount *child_mnt;
	spin_lock(&vfsmount_lock);
	if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1)))
		mntget(child_mnt);
	spin_unlock(&vfsmount_lock);
	return child_mnt;
}

/*
 * find the first or last mount at @dentry on vfsmount @mnt depending on
 * @dir. If @dir is set return the first mount else return the last mount.
 */
struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
			      int dir)
{
	struct list_head *head = mount_hashtable + hash(mnt, dentry);
	struct list_head *tmp = head;
	struct vfsmount *p, *found = NULL;

	for (;;) {
		tmp = dir ? tmp->next : tmp->prev;
		p = NULL;
		if (tmp == head)
			break;
		p = list_entry(tmp, struct vfsmount, mnt_hash);
		if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) {
			found = p;
			break;
		}
	}
	return found;
}

这里struct list_head *head = mount_hashtable + hash(mnt, dentry)，恰好与我们挂载时相对应，假如这个inode上面挂载了很多个，则在前面的while循环中会依次读取挂载的dentry,从最原始的创建该目录时候创建的dentry开始，一直到最后一次挂载创建的dentry结构，这里__lookup_mnt的两个参数会随着挂载的次数变化一直变化。

static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
{
	unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
	tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
	tmp = tmp + (tmp >> HASH_SHIFT);
	return tmp & (HASH_SIZE - 1);
}

对于一个vfsmount来说，哈希值是根据其父vfsmount对象的地址和挂载点地址来计算的。
follow_mount()函数就是要找到挂载在本路径上的文件系统，即vfsmount对象的地址和目录项对象地址。

h.分量名既不是“.”，也不是“..”，调用do_lookup(nd, &this, &next)（878行），得到与给定的父目录（nd->path）和文件名（要解析的路径名分量&this）相关的目录项对象，存放在结果参数next中。这个函数完成实际的查找，是link_path_walk()函数的核心。后面会有更详细的说明。
i.检查刚解析的分量是否指向一个符号链接（next.dentry->d_inode具有一个i_op->follow_link方法）。将在后面“符号链接的查找”有更详细的说明。如果是则调用do_follow_link(&next, nd)做相应的处理。
j.刚解析的分量不是指向一个符号链接调用path_to_nameidata(&next, nd)，把nd->path.dentry和nd->path.mnt分别置为next.dentry和next.mnt，然后继续路径名的下一个分量：

static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
{
	dput(nd->path.dentry);
	if (nd->path.mnt != path->mnt) {
		mntput(nd->path.mnt);
		nd->path.mnt = path->mnt;
	}
	nd->path.dentry = path->dentry;
}

k. 检查刚解析的分量是否指向一个目录（next.dentry->d_inode具有一个自定义的i_op->lookup方法）。如果没有，返回一个错误码-ENOTDIR，因为这个分量位于原路径名的中间，然后continue继续路径名的下一个分量。主要的循环到此结束。

秒客网

文件系统-- 标准路径名查找

相关文章