继续上一篇的分析,应用层udev或者mdev获取到内核的事件,来创建设备文件的时候,实际就是调用mknod命令进行节点创建。我们可以模仿udev或者mdev,来手动创建这个节点。
1 /dev设备节点创建
我们在使用命令: mknod /dev/iTestDevice c $major 0 创建设备文件的时候,内核做了哪些事呢?可以肯定的一点是,内核肯定创建了一个 dentry,inode 结构体,并加入到系统里面,要不然在打开设备文件的时候,会因为lookup_fast()函数找不到相应的 inode,从而使得打开失败。也许你会说,lookup_fast()函数失败了,还有lookup_slow()函数呢?这里因为是特殊文件,情况有所不同,如果lookup_fast()函数失败了,那么就会导致打开失败,不会在lookup_slow()函数里面动态创建 inode,而创建inode的工作其实是在 mknod 系统调用里面完成的。下面来简单分析其具体过程。
首先通过 strace 来查看下系统调用的传入参数:
strace -o syscall mknod /dev/test c 250 0
结果如下:
...
mknod("/dev/test", S_IFCHR|0666, makedev(243, 0)) = 0
...
现在来看下内核里面关于 mknod 系统调用的定义,在 source/fs/ 文件中
SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev)
{
return sys_mknodat(AT_FDCWD, filename, mode, dev);
}
好了,来看 sys_mknodat 的定义:
SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
unsigned, dev)
{
...
/* 这里进行路径解析并创建新的 dentry */
dentry = user_path_create(dfd, filename, &path, lookup_flags);
...
switch (mode & S_IFMT) {
...
/* 在这里创建 inode */
case S_IFCHR: case S_IFBLK:
error = vfs_mknod(->d_inode,dentry,mode,
new_decode_dev(dev));
break;
...
}
其实就两步:1,创建 dentry;2,创建 inode。我们在分析具体函数前先来分析一下/dev/iTestDevice这个目录,假设我们的rootfs使用的是ubi文件系统,则dev目录在ubi文件系统中,dev/目录下面挂载的是ramfs文件系统,所以在路径搜索的过程中,搜索到dev目录的时候,会切换到挂载的ramfs根文件夹,得到ramfs的root dentry和root inode。然后再在ramfs文件系统中创建子文件或者子文件夹。这个父目录的搜索切换过程这边不详细分析,感兴趣的可以参考下面这两篇文章:
/oqqYuJi12345678/article/details/101689334
/oqqYuJi12345678/article/details/101849978
先来看iTestDevice文件dentry的创建:
struct dentry *user_path_create(int dfd, const char __user *pathname,
struct path *path, unsigned int lookup_flags)
{
struct filename *tmp = getname(pathname);
struct dentry *res;
if (IS_ERR(tmp))
return ERR_CAST(tmp);
res = kern_path_create(dfd, tmp->name, path, lookup_flags);
putname(tmp);
return res;
}
核心函数为kern_path_create:
struct dentry *kern_path_create(int dfd, const char *pathname,
struct path *path, unsigned int lookup_flags)
{
struct dentry *dentry = ERR_PTR(-EEXIST);
struct nameidata nd;
int err2;
int error;
bool is_dir = (lookup_flags & LOOKUP_DIRECTORY);
/*
* Note that only LOOKUP_REVAL and LOOKUP_DIRECTORY matter here. Any
* other flags passed in are ignored!
*/
lookup_flags &= LOOKUP_REVAL;
----------------------------------------------------------(1)
error = do_path_lookup(dfd, pathname, LOOKUP_PARENT|lookup_flags, &nd);
if (error)
return ERR_PTR(error);
/*
* Yucky last component or no last component at all?
* (foo/., foo/.., /)
*/
if (nd.last_type != LAST_NORM)
goto out;
&= ~LOOKUP_PARENT;
|= LOOKUP_CREATE | LOOKUP_EXCL;
/* don't fail immediately if it's r/o, at least try to report other errors */
err2 = mnt_want_write();
/*
* Do the final lookup.
*/
mutex_lock_nested(&->d_inode->i_mutex, I_MUTEX_PARENT);
------------------------------------------------------------(2)
dentry = lookup_hash(&nd);
if (IS_ERR(dentry))
goto unlock;
error = -EEXIST;
if (dentry->d_inode)
goto fail;
/*
* Special case - lookup gave negative, but... we had foo/bar/
* From the vfs_mknod() POV we just have a negative dentry -
* all is fine. Let's be bastards - you had / on the end, you've
* been asking for (non-existent) directory. -ENOENT for you.
*/
if (unlikely(!is_dir && [])) {
error = -ENOENT;
goto fail;
}
if (unlikely(err2)) {
error = err2;
goto fail;
}
*path = ;
return dentry;
fail:
dput(dentry);
dentry = ERR_PTR(error);
unlock:
mutex_unlock(&->d_inode->i_mutex);
if (!err2)
mnt_drop_write();
out:
path_put(&);
return dentry;
}
(1)do_path_lookup完成上层父目录的解析,而lookup_hash则完成子节点的解析。先来看一下do_path_lookup函数。
do_path_lookup
--------->filename_lookup
------------>path_lookupat
static int path_lookupat(int dfd, const char *name,
unsigned int flags, struct nameidata *nd)
{
struct file *base = NULL;
struct path path;
int err;
err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);
if (unlikely(err))
return err;
current->total_link_count = 0;
err = link_path_walk(name, nd);//完成对父目录的搜索,获取父目录的dentry
if (!err && !(flags & LOOKUP_PARENT)) {//设置了LOOKUP_PARENT,这边不走
err = lookup_last(nd, &path);
while (err > 0) {
void *cookie;
struct path link = path;
err = may_follow_link(&link, nd);
if (unlikely(err))
break;
nd->flags |= LOOKUP_PARENT;
err = follow_link(&link, nd, &cookie);
if (err)
break;
err = lookup_last(nd, &path);
put_link(nd, &link, cookie);
}
}
if (!err)
err = complete_walk(nd);
if (!err && nd->flags & LOOKUP_DIRECTORY) {
if (!can_lookup(nd->inode)) {
path_put(&nd->path);
err = -ENOTDIR;
}
}
if (base)
fput(base);
if (nd-> && !(nd->flags & LOOKUP_ROOT)) {
path_put(&nd->root);
nd-> = NULL;
}
return err;
}
该函数只完成对父目录的搜索,即对于/dev/iTestDevice这样的目录结构,只搜索/dev目录,最终得到ramfs的root dentry和inode。所以nd->为ramfs文件系统root dentry。下面创建iTestDevice inode节点的时候会用到。
(2)lookup_hash函数完成iTestDevice节点dentry创建
static struct dentry *lookup_hash(struct nameidata *nd)
{
return __lookup_hash(&nd->last, nd->, nd->flags);
}
static struct dentry *__lookup_hash(struct qstr *name,
struct dentry *base, unsigned int flags)
{
bool need_lookup;
struct dentry *dentry;
dentry = lookup_dcache(name, base, flags, &need_lookup);//在这里面分配新的dentry,然后设置need_lookup为true
if (!need_lookup)
return dentry;
return lookup_real(base->d_inode, dentry, flags);//像sysfs文件系统会在该函数里面调用父inode的方法创建inode,而ramfs文件系统不会,inode的创建工作放到后面
}
有的文件系统调用lookup_real会创建inode节点,ramfs不会,看一下该函数:
static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
struct dentry *old;
/* Don't create child dentry for a dead directory. */
if (unlikely(IS_DEADDIR(dir))) {
dput(dentry);
return ERR_PTR(-ENOENT);
}
old = dir->i_op->lookup(dir, dentry, flags);
if (unlikely(old)) {
dput(dentry);
dentry = old;
}
return dentry;
}
dir->i_op->lookup到底是哪个函数呢,ramfs文件系统初始化的时候,挂载根目录的时候创建的root inode,其i_op操作集函数为:
static const struct inode_operations shmem_dir_inode_operations = {
#ifdef CONFIG_TMPFS
.create = shmem_create,
.lookup = simple_lookup,
.link = shmem_link,
.unlink = shmem_unlink,
.symlink = shmem_symlink,
.mkdir = shmem_mkdir,
.rmdir = shmem_rmdir,
.mknod = shmem_mknod,
.rename = shmem_rename,
。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。
所以其lookup函数为:
struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
static const struct dentry_operations simple_dentry_operations = {
.d_delete = simple_delete_dentry,
};
if (dentry->d_name.len > NAME_MAX)
return ERR_PTR(-ENAMETOOLONG);
d_set_d_op(dentry, &simple_dentry_operations);
d_add(dentry, NULL);
return NULL;
}
可以看到该lookup函数确实没有创建新的inode。
好了下面看一下子节点inode的创建过程:
int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
{
...
error = dir->i_op->mknod(dir, dentry, mode, dev);
...
return error;
}
这里调用了文件系统相关的函数:dir->i_op->mknod(),调用父目录的inode操作方法,这是ramfs根目录的i_op->mknod 函数,这个函数即是上面操作函数集合中的shmem_mknod:
static int
shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
{
struct inode *inode;
int error = -ENOSPC;
inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
if (inode) {
...
d_instantiate(dentry, inode); /* 可简单理解成: dentry->d_inode = inode; */
dget(dentry); /* Extra count - pin the dentry in core */
}
return error;
...
}
其中主要工作是在shmem_get_inode函数中完成:
static struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir,
umode_t mode, dev_t dev, unsigned long flags)
{
struct inode *inode;
struct shmem_inode_info *info;
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
if (shmem_reserve_inode(sb))
return NULL;
/* 在内核空间创建 inode 结构体(分配内存) */
inode = new_inode(sb);
if (inode) {
/* 下面是各种成员变量的初始化 */
inode->i_ino = get_next_ino();
inode_init_owner(inode, dir, mode);
inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
inode->i_generation = get_seconds();
info = SHMEM_I(inode);
memset(info, 0, (char *)inode - (char *)info);
spin_lock_init(&info->lock);
info->seals = F_SEAL_SEAL;
info->flags = flags & VM_NORESERVE;
INIT_LIST_HEAD(&info->shrinklist);
INIT_LIST_HEAD(&info->swaplist);
simple_xattrs_init(&info->xattrs);
cache_no_acl(inode);
/***********************************************/
switch (mode & S_IFMT) {
default:
inode->i_op = &shmem_special_inode_operations;
init_special_inode(inode, mode, dev); /* 我们最感兴趣的在这里 */
break;
...
}
} else
shmem_free_inode(sb);
return inode;
}
可见在这个函数里面,首先通过new_inode函数在内核空间分配内存,这里不再详细展开。然后对各个成员变量进行初始化,这里我们也不感兴趣,最感兴趣的地方在init_special_inode函数里面:
void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
{
inode->i_mode = mode;
if (S_ISCHR(mode)) {
inode->i_fop = &def_chr_fops;
inode->i_rdev = rdev;
}
...
}
可见这里保存了两个重要的成员变量:文件操作函数集和设备号。而这个文件操作函数集是一个通用的操作集,所有字符驱动文件打开时都会调用,在这个函数里面,通过设备号来找到真正的该设备的文件操作函数集。先看这个 def_chr_fops 的定义:
const struct file_operations def_chr_fops = {
.open = chrdev_open,
.llseek = noop_llseek,
};
2 /dev设备节点打开
文件的打开流程,可以参考这篇文章:
/oqqYuJi12345678/article/details/101849978
从上面文章可以知道,在完成路径搜索以后,会调用do_dentry_open函数,在该函数里面,会最终会调用节点inode->i_fop函数集中的i_fop函数。对于字符设备,就是上一节讲的def_chr_fops函数集,其打开函数为chrdev_open:
static int chrdev_open(struct inode *inode, struct file *filp)
{
struct cdev *p;
struct cdev *new = NULL;
int ret = 0;
spin_lock(&cdev_lock);
----------------------------------------------(1)
p = inode->i_cdev;
if (!p) {
struct kobject *kobj;
int idx;
spin_unlock(&cdev_lock);
--------------------------------------------------(2)
kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
if (!kobj)
return -ENXIO;
new = container_of(kobj, struct cdev, kobj);
spin_lock(&cdev_lock);
/* Check i_cdev again in case somebody beat us to it while
we dropped the lock. */
p = inode->i_cdev;
if (!p) {
inode->i_cdev = p = new;
---------------------------------------------------(3)
list_add(&inode->i_devices, &p->list);
new = NULL;
} else if (!cdev_get(p))
ret = -ENXIO;
} else if (!cdev_get(p))
ret = -ENXIO;
spin_unlock(&cdev_lock);
cdev_put(new);
if (ret)
return ret;
ret = -ENXIO;
filp->f_op = fops_get(p->ops);
if (!filp->f_op)
goto out_cdev_put;
if (filp->f_op->open) {
--------------------------------------------------------------------(4)
ret = filp->f_op->open(inode, filp);
if (ret)
goto out_cdev_put;
}
return 0;
out_cdev_put:
cdev_put(p);
return ret;
}
(1)inode->i_cdev还没有设置,为空
(2)根据inode的设备号,找到设备的kobject,然后再根据kobject找到其cdev结构。
struct kobject *kobj_lookup(struct kobj_map *domain, dev_t dev, int *index)
{
struct kobject *kobj;
struct probe *p;
unsigned long best = ~0UL;
retry:
mutex_lock(domain->lock);
for (p = domain->probes[MAJOR(dev) % 255]; p; p = p->next) {
struct kobject *(*probe)(dev_t, int *, void *);
struct module *owner;
void *data;
if (p->dev > dev || p->dev + p->range - 1 < dev)
continue;
if (p->range - 1 >= best)
break;
if (!try_module_get(p->owner))
continue;
owner = p->owner;
data = p->data;
probe = p->get;
best = p->range - 1;
*index = dev - p->dev;
if (p->lock && p->lock(dev, data) < 0) {
module_put(owner);
continue;
}
mutex_unlock(domain->lock);
kobj = probe(dev, index, data);
/* Currently ->owner protects _only_ ->probe() itself. */
module_put(owner);
if (kobj)
return kobj;
goto retry;
}
mutex_unlock(domain->lock);
return NULL;
}
当找到对应的probe以后,调用其p->get函数,在下面的初始化里面我们知道该函数为 exact_match
static struct kobject *exact_match(dev_t dev, int *part, void *data)
{
struct cdev *p = data;
return &p->kobj;
}
通过exact_match获取其kobject
(3)把inode添加到cdev的list中,是不是意味着设备可以被打开几次?
(4)调用cdev的ops函数集进一步做打开操作,这个操作函数集才是真正的我们写字符驱动的时候注册的操作集函数。
关于上面的(2)和(4),其初始化是在字符设备注册的时候做的,下面来进一步分析。
2.1 字符设备初始化
从上一篇文章可以知道,字符设备的注册是通过调用register_chrdev函数,看看该函数具体做了什么事情:
static inline int register_chrdev(unsigned int major, const char *name,
const struct file_operations *fops)
{
return __register_chrdev(major, 0, 256, name, fops);
}
int __register_chrdev(unsigned int major, unsigned int baseminor,
unsigned int count, const char *name,
const struct file_operations *fops)
{
struct char_device_struct *cd;
struct cdev *cdev;
int err = -ENOMEM;
//获取主设备号,如果设定的major为0,则自动分配一个,否则对该设备号进行检查,看是否合法
cd = __register_chrdev_region(major, baseminor, count, name);
if (IS_ERR(cd))
return PTR_ERR(cd);
cdev = cdev_alloc();
if (!cdev)
goto out2;
cdev->owner = fops->owner;
------------------------------------------------------------(1)
cdev->ops = fops;//字符设备的操作函数,是该字符驱动的主要实现
kobject_set_name(&cdev->kobj, "%s", name);
-------------------------------------------------------------(2)
err = cdev_add(cdev, MKDEV(cd->major, baseminor), count);
if (err)
goto out;
cd->cdev = cdev;
return major ? 0 : cd->major;
out:
kobject_put(&cdev->kobj);
out2:
kfree(__unregister_chrdev_region(cd->major, baseminor, count));
return err;
}
(1)上一节open函数中最终操作的字符设备open函数就是在这边注册的字符设备操作集
(2)cdev_add把cdev添加到cdev_map中:
int cdev_add(struct cdev *p, dev_t dev, unsigned count)
{
int error;
p->dev = dev;
p->count = count;
error = kobj_map(cdev_map, dev, count, NULL,
exact_match, exact_lock, p);
if (error)
return error;
kobject_get(p->);
return 0;
}
int kobj_map(struct kobj_map *domain, dev_t dev, unsigned long range, struct module *module, kobj_probe_t *probe, int (*lock)(dev_t, void *), void *data)
{
unsigned n = MAJOR(dev+range-1) - MAJOR(dev) + 1;
unsigned index = MAJOR(dev);
unsigned i;
struct probe *p;
if (n > 255) /* 若n > 255,则超出了kobj_map中probes数组的大小 */
n = 255;
p = kmalloc(sizeof(struct probe) * n, GFP_KERNEL); /* 分配n个struct probe */
if(p == NULL)
return -ENOMEM;
for(i = 0; i < n; i++, p++) { /* 用函数的参数初始化probe */
p->owner = module;
p->get = probe; //该函数open的时候用来得到cdev的kobject结构
p->lock = lock;
p->dev = dev; //设备号
p->range = range;
p->data = data; //保存cdev,后面文件open的时候会用到
}
mutex_lock(domain->lock);
for(i = 0, p-=n; i < n; i++, p++, index++) {
struct probe **s = &domain->probes[index % 255];//添加到全局模块中
while(*s && (*s)->range < range)
s = &(*s)->next;
p->next = *s;
*s = p;
}
mutex_unlock(domain->lock);
return 0;
}
dev_t的前12位为主设备号,后20位为次设备号。
n = MAJOR(dev + range - 1) - MAJOR(dev) + 1 表示设备号范围(dev, dev+range)中不同的主设备号的个数。
通常n的值为1。
从代码中的第二个for循环可以看出kobj_map中的probes数组中每个元素为一个struct probe链表的头指针。
每个链表中的probe对象有(MAJOR() % 255)值相同的关系。若主设备号小于255, 则每个链表中的probe都有相同的主设备号。
链表中的元素是按照range值从小到大排列的。
while循环即是找出该将p插入的位置。
该初始化过程也印证了上面设备的打开过程。