Linux Netlink通信机制详解（下）

这里我以路由中的netlink为例，看一下内核中的处理流程是怎么样的！在/kernel/net/core/rtnetlink.c文件中，有一个接收从用户空间过来的Netlink消息的函数。

static void rtnetlink_rcv(struct sock*sk, int len)
{
unsigned int qlen = 0;
do {
rtnl_lock();
netlink_run_queue(sk,&qlen, &rtnetlink_rcv_msg);
up(&rtnl_sem);
netdev_run_todo();
} while(qlen);
}

上面的内核函数就是用来接收用户路由方面Netlink消息的，当我们使用route命令添加一条路由时，就会调用该函数接收。该函数是再netlink的初始化是注册的。同样在rtnetlink.c文件中。

void __init rtnetlink_init(void)
{
int i;
rtattr_max = 0;
for (i= 0; i < ARRAY_SIZE(rta_max); i++)
if (rta_max[i]> rtattr_max)
rtattr_max = rta_max[i];
rta_buf = kmalloc(rtattr_max* sizeof(struct rtattr*), GFP_KERNEL);
if (!rta_buf)
panic("rtnetlink_init: cannot allocate rta_buf\n");
rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv,
THIS_MODULE);//在创建内核的netlink时，注册了路由netlink的接收函数，rtnetlink_rcv.
if (rtnl== NULL)
panic("rtnetlink_init: cannot initialize rtnetlink\n");
netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
register_netdevice_notifier(&rtnetlink_dev_notifier);
rtnetlink_links[PF_UNSPEC]= link_rtnetlink_table;
rtnetlink_links[PF_PACKET]= link_rtnetlink_table;
}

在netlink_kernel_create函数中，可以看到内核接收用户空间传过来的消息的接收函数，

struct sock *
netlink_kernel_create(int unit, unsignedint groups,
void (*input)(struct sock*sk, int len),
struct module *module)
{
struct socket *sock;
struct sock *sk;
struct netlink_sock *nlk;
if (!nl_table)
return NULL;
if (unit<0|| unit>=MAX_LINKS)
return NULL;
if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit,&sock))
return NULL;
if (__netlink_create(sock, unit)< 0)
goto out_sock_release;
sk = sock->sk;
sk->sk_data_ready= netlink_data_ready;
if (input)
nlk_sk(sk)->data_ready= input;//设置内核接收Netlink消息的函数，这里就是前面的rtnetlink_rcv函数
if (netlink_insert(sk, 0))
goto out_sock_release;
nlk = nlk_sk(sk);//取得sock嵌入的netlink_sock结构体
nlk->flags|= NETLINK_KERNEL_SOCKET;
netlink_table_grab();
nl_table[unit].groups= groups < 32? 32 : groups;
nl_table[unit].module= module;
nl_table[unit].registered= 1;// 更新netlink_table结构体信息，每中协议对应一个netlink_
table结构
netlink_table_ungrab();
return sk;
out_sock_release:
sock_release(sock);
return NULL;
}

到此，内核创建netlink到接收用户空间发送过来消息整个流程就清晰了。那当我们添加一条新路由时，在接收函数rtnetlink_rcv中的循环中，会从一个队列中调用实际的接收处理函数，这里为rtnetlink_rcv_msg函数。

/**
* nelink_run_queue - Process netlink receive queue.
* @sk: Netlink socket containing the queue
* @qlen: Placeto store queue length upon entry
* @cb: Callbackfunction invoked foreach netlink message found
*
* Processes as much as there was in the queue upon entry and invokes
* a callback function for each netlink message found. The callback
* function may refuse a message by returning a negativeerror code
* but setting the error pointer to 0 in which case this function
* returns with a qlen != 0.
*
* qlen must be initialized to 0 before the initial entry, afterwards
* the function may be called repeatedlyuntil qlen reaches 0.
*/
void netlink_run_queue(struct sock *sk, unsigned int *qlen,
int (*cb)(struct sk_buff*, struct nlmsghdr*, int *))
{
struct sk_buff *skb;
if (!*qlen|| *qlen > skb_queue_len(&sk->sk_receive_queue))
*qlen = skb_queue_len(&sk->sk_receive_queue);
for (;*qlen; (*qlen)--){
skb = skb_dequeue(&sk->sk_receive_queue);
if (netlink_rcv_skb(skb, cb)){
if (skb->len)
skb_queue_head(&sk->sk_receive_queue, skb);
else {
kfree_skb(skb);
(*qlen)--;
}
break;
}
kfree_skb(skb);
}
}

下面是rtnetlink_rcv_msg()函数的实现，对netlink消息进行相应的处理。其中有一个数据结构

struct rtnetlink_link *link; 其定义如下：是两个不同的处理函数

struct rtnetlink_link
{
int (*doit)(struct sk_buff*, struct nlmsghdr*, void*attr);
int (*dumpit)(struct sk_buff*, struct netlink_callback*cb);
};
/* Process one rtnetlink message.*/
static __inline__ int
rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int*errp)
{
struct rtnetlink_link *link;
struct rtnetlink_link *link_tab;
int sz_idx, kind;
int min_len;
int family;
int type;
int err;
/* Only requests are handled by kernelnow */
if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
return 0;
type = nlh->nlmsg_type;
/* A control message: ignore them*/
if (type< RTM_BASE)
return 0;
/* Unknown message: reply with EINVAL*/
if (type> RTM_MAX)
goto err_inval;
type -= RTM_BASE;
/* All the messages must have at least 1 byte length*/
if (nlh->nlmsg_len< NLMSG_LENGTH(sizeof(struct rtgenmsg)))
return 0;
family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
if (family>= NPROTO){
*errp = -EAFNOSUPPORT;
return -1;
}
link_tab = rtnetlink_links[family];//根据用户空间传过来的不同德family类型，调用不同的处理函数，这里以路由为例的话为AF_ROUTE或者AF_NETLINK
if (link_tab== NULL)
link_tab = rtnetlink_links[PF_UNSPEC];
link =&link_tab[type];//根据不同的type调用不同的处理函数。这里的type为RTM_NEWROUTE
sz_idx = type>>2;
kind = type&3;
if (kind!= 2 && security_netlink_recv(skb)){
*errp = -EPERM;
return -1;
}
if (kind== 2 && nlh->nlmsg_flags&NLM_F_DUMP){
if (link->dumpit== NULL)
link = &(rtnetlink_links[PF_UNSPEC][type]);
if (link->dumpit== NULL)
goto err_inval;
if ((*errp= netlink_dump_start(rtnl, skb, nlh,
link->dumpit,NULL))!= 0){
return -1;
}
netlink_queue_skip(nlh, skb);
return -1;
}
memset(rta_buf, 0,(rtattr_max * sizeof(struct rtattr*)));
min_len = rtm_min[sz_idx];
if (nlh->nlmsg_len< min_len)
goto err_inval;
if (nlh->nlmsg_len> min_len) {
int attrlen = nlh->nlmsg_len- NLMSG_ALIGN(min_len);
struct rtattr *attr = (void*)nlh+ NLMSG_ALIGN(min_len);
while (RTA_OK(attr, attrlen)){
unsigned flavor = attr->rta_type;
if (flavor) {
if (flavor > rta_max[sz_idx])
goto err_inval;
rta_buf[flavor-1]= attr;
}
attr = RTA_NEXT(attr, attrlen);
}
}
if (link->doit== NULL)
link =&(rtnetlink_links[PF_UNSPEC][type]);
if (link->doit== NULL)
goto err_inval;
err =link->doit(skb, nlh,(void *)&rta_buf[0]);//此处调用RTM_NEWROUTE，对应的route处理函数，也就是下面的inet6_rtm_newroute函数。
*errp =err;
return err;
err_inval:
*errp =-EINVAL;
return -1;
}
int inet6_rtm_newroute(struct sk_buff*skb, struct nlmsghdr* nlh, void*arg)
{
struct rtmsg *r = NLMSG_DATA(nlh);
struct in6_rtmsg rtmsg;
if (inet6_rtm_to_rtmsg(r, arg,&rtmsg))
return -EINVAL;
return ip6_route_add(&rtmsg, nlh, arg,&NETLINK_CB(skb));
}

inet6_rtm_newroute函数通过下面的数组进行了相应的注册处理，所以上面的link->doit(skb, nlh, (void *)&rta_buf[0])就是根据下面的这个调用的。

static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES]= {
[RTM_GETLINK - RTM_BASE] = { .dumpit = inet6_dump_ifinfo,},
[RTM_NEWADDR - RTM_BASE] = { .doit = inet6_rtm_newaddr,},
[RTM_DELADDR - RTM_BASE] = { .doit = inet6_rtm_deladdr,},
[RTM_GETADDR - RTM_BASE] = { .dumpit = inet6_dump_ifaddr,},
[RTM_GETMULTICAST - RTM_BASE] = { .dumpit= inet6_dump_ifmcaddr,},
[RTM_GETANYCAST - RTM_BASE] = { .dumpit = inet6_dump_ifacaddr,},
[RTM_NEWROUTE - RTM_BASE] = { .doit = inet6_rtm_newroute,},
[RTM_DELROUTE - RTM_BASE] = { .doit = inet6_rtm_delroute,},
[RTM_GETROUTE - RTM_BASE] = { .doit = inet6_rtm_getroute,
.dumpit = inet6_dump_fib,},
};

秒客网

Linux Netlink通信机制详解（下）

相关文章