这篇是我临时加的,本来不打算放在整个内核协议栈分析的系列里的,但我现在觉得vlan还是蛮重要的,而且讨论vlan源码的文章很少,不知道我这篇算不算第一篇 :D
vlan的代码都在net/8021q/的内核目录下,首先我们来看8021q模块 (net/8021q/vlan.c)
vlan_proto_init , vlan_cleanup_module 是模块的init/exit函数,我们来看vlan_proto_init,vlan_cleanup_module基本就是反过来做一遍
static int __init vlan_proto_init(void)
{
int err;
pr_info("%s v%s %s\n", vlan_fullname, vlan_version, vlan_copyright);
pr_info("All bugs added by %s\n", vlan_buggyright);
err = register_pernet_gen_device(&vlan_net_id, &vlan_net_ops);
if (err < 0)
goto err0;
err = register_netdevice_notifier(&vlan_notifier_block);
if (err < 0)
goto err2;
register_pernet_gen_device, register_netdevice_notifier是网络设备注册的常规流程
err = vlan_gvrp_init();
if (err < 0)
goto err3;
err = vlan_netlink_init();
if (err < 0)
goto err4;
dev_add_pack(&vlan_packet_type);
dev_add_pack,把802.1q当做另一种协议来处理
vlan_ioctl_set(vlan_ioctl_handler);
把vlan_ioctl_handler注册为vconfig命令的handler
return 0;
err4:
vlan_gvrp_uninit();
err3:
unregister_netdevice_notifier(&vlan_notifier_block);
err2:
unregister_pernet_gen_device(vlan_net_id, &vlan_net_ops);
err0:
return err;
}
vconfig配置vlan接口的参数被封装在vlan_ioctl_args中
struct vlan_ioctl_args {
int cmd; /* Should be one of the vlan_ioctl_cmds enum above. */
char device1[24];
union {
char device2[24];
int VID;
unsigned int skb_priority;
unsigned int name_type;
unsigned int bind_type;
unsigned int flag; /* Matches vlan_dev_info flags */
} u;
short vlan_qos;
};
vlan_ioctl_handler就是针对不同的vconfig的cmd参数有不同的行为,目前已知的cmd有:
SET_VLAN_INGRESS_PRIORITY_CMD
SET_VLAN_EGRESS_PRIORITY_CMD
SET_VLAN_FLAG_CMD
ADD_VLAN_CMD
DEL_VLAN_CMD
GET_VLAN_REALDEV_NAME_CMD
GET_VLAN_VID_CMD
对于添加vlan设备而言,最重要的无非是register_vlan_device咯
先提下vlan group的概念,我的理解是同一个物理设备上的vlan设备属于同一个vlan group,内核用全局哈希表struct hlist_head vlan_group_hash[VLAN_GRP_HASH_SIZE]保存所有的vlan group,哈希表的大小为32,以net_device的ifindex为哈希值。
struct vlan_group {
struct net_device *real_dev; /* The ethernet(like) device
* the vlan is attached to.
*/
unsigned int nr_vlans;
struct hlist_node hlist; /* linked list */
struct net_device **vlan_devices_arrays[VLAN_GROUP_ARRAY_SPLIT_PARTS];
struct rcu_head rcu;
};
基于real_device的vlan_group可以有多个vlan_device,基于vlan_id,vlan_group把所有的vlan_device分在一个二维net_device数组里,即vlan_device_arrays。该二维数组是一个VLAN_GROUP_ARRAY_SPLIT_PARTS * VLAN_GROUP_ARRAY_PART_LEN的二维数组,可以从vlan_group_get_device看出来
static inline struct net_device *vlan_group_get_device(struct vlan_group *vg,
u16 vlan_id)
{
struct net_device **array;
array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
return array ? array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] : NULL;
}
下面来看register_vlan_device:
首先进行一系列的check,之后调用alloc_netdev创建vlan_device,这个函数我们之前的文章讨论过。这里创建的net_device之后会接一个线性空间,里面是一个vlan_dev_info结构。alloc_netdev会调用vlan_setup,vlan_setup的代码相当直观:
void vlan_setup(struct net_device *dev)
{
ether_setup(dev);
dev->priv_flags |= IFF_802_1Q_VLAN;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
netdev_extended(dev)->ext_priv_flags &= ~IFF_TX_SKB_SHARING;
dev->tx_queue_len = 0;
dev->netdev_ops = &vlan_netdev_ops;
dev->destructor = free_netdev;
dev->ethtool_ops = &vlan_ethtool_ops;
memset(dev->broadcast, 0, ETH_ALEN);
}
static const struct net_device_ops vlan_netdev_ops = {
.ndo_change_mtu = vlan_dev_change_mtu,
.ndo_init = vlan_dev_init,
.ndo_uninit = vlan_dev_uninit,
.ndo_open = vlan_dev_open,
.ndo_stop = vlan_dev_stop,
.ndo_start_xmit = vlan_dev_hard_start_xmit,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = vlan_dev_set_mac_address,
.ndo_set_rx_mode = vlan_dev_set_rx_mode,
.ndo_set_multicast_list = vlan_dev_set_rx_mode,
.ndo_change_rx_flags = vlan_dev_change_rx_flags,
.ndo_do_ioctl = vlan_dev_ioctl,
.ndo_neigh_setup = vlan_dev_neigh_setup,
.ndo_get_stats = vlan_dev_get_stats,
}
与vlan_netdev_ops对应的是vlan_netdev_accel_ops,如果网卡是有vlan acceleration功能(比如自动计算vlan校验和等),就会被初始化为vlan_netdev_accel_ops
之后调用register_vlan_dev,该函数主要就是初始化对应的vlan_group->vlan_devices_arrays的数组成员,还有调用相应驱动的注册代码
int register_vlan_dev(struct net_device *dev)
{
struct vlan_dev_info *vlan = vlan_dev_info(dev);
struct net_device *real_dev = vlan->real_dev;
const struct net_device_ops *ops = real_dev->netdev_ops;
u16 vlan_id = vlan->vlan_id;
struct vlan_group *grp, *ngrp = NULL;
int err;
grp = __vlan_find_group(real_dev);
if (!grp) {
ngrp = grp = vlan_group_alloc(real_dev);
if (!grp)
return -ENOBUFS;
err = vlan_gvrp_init_applicant(real_dev);
if (err < 0)
goto out_free_group;
}
先拿到real_dev对应的vlan_group,如果没有就调用vlan_group_alloc一个
err = vlan_group_prealloc_vid(grp, vlan_id);
if (err < 0)
goto out_uninit_applicant;
vlan_group_prealloc_vid用来初始化vlan_group->vlan_devices_arrays对应的哈希数组
err = register_netdevice(dev);
if (err < 0)
goto out_uninit_applicant;
注册网络设备
/* Account for reference in struct vlan_dev_info */
dev_hold(real_dev);
vlan_transfer_operstate(real_dev, dev);
linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */
/* So, got the sucker initialized, now lets place
* it into our local structure.
*/
vlan_group_set_device(grp, vlan_id, dev);
grp->nr_vlans++;
if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX)
ops->ndo_vlan_rx_register(real_dev, ngrp);
if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
ops->ndo_vlan_rx_add_vid(real_dev, vlan_id);
调用驱动的ndo_vlan_rx_register, ndo_vlan_rx_add_vid初始化设备
return 0;
out_uninit_applicant:
if (ngrp)
vlan_gvrp_uninit_applicant(real_dev);
out_free_group:
if (ngrp) {
hlist_del_rcu(&ngrp->hlist);
/* Free the group, after all cpu's are done. */
call_rcu(&ngrp->rcu, vlan_rcu_free);
}
return err;
}
下面来研究下vlan_netdev_ops的操作:
vlan_dev_change_mtu,设置mtu,对于vlan设备而言mtu在vlan_dev_info->mtu中
vlan_dev_init,主要是设置dev->flags, dev->iflink, dev->state, dev->features, dev->dev_id, dev->gso_max_size,然后判断真实设备有没有NETIF_F_HW_VLAN_TX,如果设置了NETIF_F_HW_VLAN_TX,说明网卡可以自动处理802.1q的vlan头,因此上层无需考虑二层头的tci空间,直接有dev->hard_header_len = real_dev->hard_header_len,否则需要有dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN;同样的根据有没有NETIF_F_HW_VLAN_TX,设置的dev->netdev_ops也不同,但vlan_netdev_accel_ops和vlan_netdev_ops的唯一差别只是在发送函数ndo_start_xmit上
vlan_dev_open,很多是和net_device打开重复的调用,代码很清晰不多说了,最重要的就是调用netif_carrier_on;同样的vlan_dev_close最重要的就是调用netif_carrier_off
下面是vlan发送需要调用的两个重要函数:vlan_dev_hard_start_xmit,以及vlan_dev_hwaccel_hard_start_xmit
vlan_dev_hard_start_xmit
static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
struct net_device *dev)
{
int i = skb_get_queue_mapping(skb);
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
unsigned int len;
int ret;
/* Handle non-VLAN frames if they are sent to us, for example by DHCP.
*
* NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING
* OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
*/
if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) {
unsigned int orig_headroom = skb_headroom(skb);
u16 vlan_tci;
这个分支用来加上vlan头,前提是要么veth0>h_vlan_proto!=0x8100,此时没有vlan头;要么vlan设备打上了REORDER flag
vlan_dev_info(dev)->cnt_encap_on_xmit++;
vlan_tci = vlan_dev_info(dev)->vlan_id;
vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
skb = __vlan_put_tag(skb, vlan_tci);
if (!skb) {
txq->tx_dropped++;
return NETDEV_TX_OK;
}
if (orig_headroom < VLAN_HLEN)
vlan_dev_info(dev)->cnt_inc_headroom_on_tx++;
}
skb->dev = vlan_dev_info(dev)->real_dev;
vlan_dev_info(dev)->real_dev才是真正发送的设备
len = skb->len;
ret = dev_queue_xmit(skb);
调用dev_queue_xmit发送skb
if (likely(ret == NET_XMIT_SUCCESS)) {
txq->tx_packets++;
txq->tx_bytes += len;
} else
txq->tx_dropped++;
return NETDEV_TX_OK;
}
vlan_dev_hwaccel_hard_start_xmit
static netdev_tx_t vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb,
struct net_device *dev)
{
int i = skb_get_queue_mapping(skb);
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
u16 vlan_tci;
unsigned int len;
int ret;
vlan_tci = vlan_dev_info(dev)->vlan_id;
vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
这里__vlan_hwaccel_put_tag只是生成了vlan_tci之后,简单放到skb->vlan_tci里面,给报文添加vlan报头交给网卡去做
skb->dev = vlan_dev_info(dev)->real_dev;
len = skb->len;
ret = dev_queue_xmit(skb);
if (likely(ret == NET_XMIT_SUCCESS)) {
txq->tx_packets++;
txq->tx_bytes += len;
} else
txq->tx_dropped++;
return NETDEV_TX_OK;
}
对于接收报文而言,在设备接收到之后,如果是vlan设备,会再一次调用vlan_skb_recv,
static struct packet_type vlan_packet_type __read_mostly = {
.type = cpu_to_be16(ETH_P_8021Q),
.func = vlan_skb_recv, /* VLAN receive method */
};
vlan_skb_recv代码很直观,不多说了