linux内核网络协议栈学习笔记:vlan

时间:2022-09-02 11:08:07

这篇是我临时加的,本来不打算放在整个内核协议栈分析的系列里的,但我现在觉得vlan还是蛮重要的,而且讨论vlan源码的文章很少,不知道我这篇算不算第一篇 :D

vlan的代码都在net/8021q/的内核目录下,首先我们来看8021q模块 (net/8021q/vlan.c)

vlan_proto_init , vlan_cleanup_module 是模块的init/exit函数,我们来看vlan_proto_init,vlan_cleanup_module基本就是反过来做一遍

static int __init vlan_proto_init(void)
{   
    int err;

    pr_info("%s v%s %s\n", vlan_fullname, vlan_version, vlan_copyright);
    pr_info("All bugs added by %s\n", vlan_buggyright);
    
    err = register_pernet_gen_device(&vlan_net_id, &vlan_net_ops);
    if (err < 0)
        goto err0;
        
    err = register_netdevice_notifier(&vlan_notifier_block);
    if (err < 0)
        goto err2; 

register_pernet_gen_device, register_netdevice_notifier是网络设备注册的常规流程

    err = vlan_gvrp_init();
    if (err < 0)
        goto err3;

    err = vlan_netlink_init();
    if (err < 0)
        goto err4;

    dev_add_pack(&vlan_packet_type);

dev_add_pack,把802.1q当做另一种协议来处理
    vlan_ioctl_set(vlan_ioctl_handler);

把vlan_ioctl_handler注册为vconfig命令的handler
    return 0;

err4:
    vlan_gvrp_uninit();
err3:
    unregister_netdevice_notifier(&vlan_notifier_block);
err2:
    unregister_pernet_gen_device(vlan_net_id, &vlan_net_ops);
err0:
    return err;
}


vconfig配置vlan接口的参数被封装在vlan_ioctl_args中

struct vlan_ioctl_args {
    int cmd; /* Should be one of the vlan_ioctl_cmds enum above. */
    char device1[24];
        
        union {
        char device2[24];
        int VID;
        unsigned int skb_priority;
        unsigned int name_type;
        unsigned int bind_type;
        unsigned int flag; /* Matches vlan_dev_info flags */
        } u;
        
    short vlan_qos;       
};                


vlan_ioctl_handler就是针对不同的vconfig的cmd参数有不同的行为,目前已知的cmd有:

SET_VLAN_INGRESS_PRIORITY_CMD
SET_VLAN_EGRESS_PRIORITY_CMD
SET_VLAN_FLAG_CMD
ADD_VLAN_CMD
DEL_VLAN_CMD
GET_VLAN_REALDEV_NAME_CMD
GET_VLAN_VID_CMD

对于添加vlan设备而言,最重要的无非是register_vlan_device咯


先提下vlan group的概念,我的理解是同一个物理设备上的vlan设备属于同一个vlan group,内核用全局哈希表struct hlist_head vlan_group_hash[VLAN_GRP_HASH_SIZE]保存所有的vlan group,哈希表的大小为32,以net_device的ifindex为哈希值。

struct vlan_group {
    struct net_device   *real_dev; /* The ethernet(like) device
                        * the vlan is attached to.
                        */
    unsigned int        nr_vlans;
    struct hlist_node   hlist;  /* linked list */
    struct net_device **vlan_devices_arrays[VLAN_GROUP_ARRAY_SPLIT_PARTS];
    struct rcu_head     rcu;
};  

基于real_device的vlan_group可以有多个vlan_device,基于vlan_id,vlan_group把所有的vlan_device分在一个二维net_device数组里,即vlan_device_arrays。该二维数组是一个VLAN_GROUP_ARRAY_SPLIT_PARTS * VLAN_GROUP_ARRAY_PART_LEN的二维数组,可以从vlan_group_get_device看出来

static inline struct net_device *vlan_group_get_device(struct vlan_group *vg,
                               u16 vlan_id)
{   
    struct net_device **array;  
    array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
    return array ? array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] : NULL;
}


下面来看register_vlan_device:

首先进行一系列的check,之后调用alloc_netdev创建vlan_device,这个函数我们之前的文章讨论过。这里创建的net_device之后会接一个线性空间,里面是一个vlan_dev_info结构。alloc_netdev会调用vlan_setup,vlan_setup的代码相当直观:    

void vlan_setup(struct net_device *dev)
{   
    ether_setup(dev); 
    
    dev->priv_flags     |= IFF_802_1Q_VLAN;
    dev->priv_flags     &= ~IFF_XMIT_DST_RELEASE;
    netdev_extended(dev)->ext_priv_flags &= ~IFF_TX_SKB_SHARING;
    dev->tx_queue_len   = 0;
   
    dev->netdev_ops     = &vlan_netdev_ops;
    dev->destructor     = free_netdev;
    dev->ethtool_ops    = &vlan_ethtool_ops;

    memset(dev->broadcast, 0, ETH_ALEN);
}   

static const struct net_device_ops vlan_netdev_ops = {
    .ndo_change_mtu     = vlan_dev_change_mtu,
    .ndo_init       = vlan_dev_init,
    .ndo_uninit     = vlan_dev_uninit,
    .ndo_open       = vlan_dev_open,
    .ndo_stop       = vlan_dev_stop,
    .ndo_start_xmit =  vlan_dev_hard_start_xmit,
    .ndo_validate_addr  = eth_validate_addr,
    .ndo_set_mac_address    = vlan_dev_set_mac_address,
    .ndo_set_rx_mode    = vlan_dev_set_rx_mode,
    .ndo_set_multicast_list = vlan_dev_set_rx_mode,
    .ndo_change_rx_flags    = vlan_dev_change_rx_flags,
    .ndo_do_ioctl       = vlan_dev_ioctl,
    .ndo_neigh_setup    = vlan_dev_neigh_setup,
    .ndo_get_stats      = vlan_dev_get_stats,

}

与vlan_netdev_ops对应的是vlan_netdev_accel_ops,如果网卡是有vlan acceleration功能(比如自动计算vlan校验和等),就会被初始化为vlan_netdev_accel_ops


之后调用register_vlan_dev,该函数主要就是初始化对应的vlan_group->vlan_devices_arrays的数组成员,还有调用相应驱动的注册代码

int register_vlan_dev(struct net_device *dev)
{       
    struct vlan_dev_info *vlan = vlan_dev_info(dev);
    struct net_device *real_dev = vlan->real_dev;
    const struct net_device_ops *ops = real_dev->netdev_ops;
    u16 vlan_id = vlan->vlan_id;
    struct vlan_group *grp, *ngrp = NULL;
    int err;
    
    grp = __vlan_find_group(real_dev);
    if (!grp) {
        ngrp = grp = vlan_group_alloc(real_dev);
        if (!grp)
            return -ENOBUFS;
        err = vlan_gvrp_init_applicant(real_dev);
        if (err < 0)
            goto out_free_group;
    }
先拿到real_dev对应的vlan_group,如果没有就调用vlan_group_alloc一个

    err = vlan_group_prealloc_vid(grp, vlan_id);
    if (err < 0)
        goto out_uninit_applicant;
vlan_group_prealloc_vid用来初始化vlan_group->vlan_devices_arrays对应的哈希数组

    err = register_netdevice(dev);
    if (err < 0)
        goto out_uninit_applicant;
注册网络设备

    /* Account for reference in struct vlan_dev_info */
    dev_hold(real_dev);

    vlan_transfer_operstate(real_dev, dev);
    linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */

    /* So, got the sucker initialized, now lets place
     * it into our local structure.
     */
    vlan_group_set_device(grp, vlan_id, dev);
    grp->nr_vlans++;

    if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX)
        ops->ndo_vlan_rx_register(real_dev, ngrp);
    if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
        ops->ndo_vlan_rx_add_vid(real_dev, vlan_id);
调用驱动的ndo_vlan_rx_register, ndo_vlan_rx_add_vid初始化设备

    return 0;

out_uninit_applicant:
    if (ngrp)
        vlan_gvrp_uninit_applicant(real_dev);
out_free_group:
    if (ngrp) {
        hlist_del_rcu(&ngrp->hlist);
        /* Free the group, after all cpu's are done. */
        call_rcu(&ngrp->rcu, vlan_rcu_free);
    }
    return err;
}


下面来研究下vlan_netdev_ops的操作:

vlan_dev_change_mtu,设置mtu,对于vlan设备而言mtu在vlan_dev_info->mtu中

vlan_dev_init,主要是设置dev->flags, dev->iflink, dev->state, dev->features, dev->dev_id, dev->gso_max_size,然后判断真实设备有没有NETIF_F_HW_VLAN_TX,如果设置了NETIF_F_HW_VLAN_TX,说明网卡可以自动处理802.1q的vlan头,因此上层无需考虑二层头的tci空间,直接有dev->hard_header_len = real_dev->hard_header_len,否则需要有dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN;同样的根据有没有NETIF_F_HW_VLAN_TX,设置的dev->netdev_ops也不同,但vlan_netdev_accel_ops和vlan_netdev_ops的唯一差别只是在发送函数ndo_start_xmit上

vlan_dev_open,很多是和net_device打开重复的调用,代码很清晰不多说了,最重要的就是调用netif_carrier_on;同样的vlan_dev_close最重要的就是调用netif_carrier_off


下面是vlan发送需要调用的两个重要函数:vlan_dev_hard_start_xmit,以及vlan_dev_hwaccel_hard_start_xmit

vlan_dev_hard_start_xmit

static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
                        struct net_device *dev)
{
    int i = skb_get_queue_mapping(skb);
    struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
    struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
    unsigned int len;
    int ret;

    /* Handle non-VLAN frames if they are sent to us, for example by DHCP.
     *
     * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING
     * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
     */
    if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
        vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) {
        unsigned int orig_headroom = skb_headroom(skb);
        u16 vlan_tci;
这个分支用来加上vlan头,前提是要么veth0>h_vlan_proto!=0x8100,此时没有vlan头;要么vlan设备打上了REORDER flag       


        vlan_dev_info(dev)->cnt_encap_on_xmit++;
        vlan_tci = vlan_dev_info(dev)->vlan_id;
        vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
        skb = __vlan_put_tag(skb, vlan_tci);
        if (!skb) {
            txq->tx_dropped++;
            return NETDEV_TX_OK;
        }       
        if (orig_headroom < VLAN_HLEN)
            vlan_dev_info(dev)->cnt_inc_headroom_on_tx++;
    }

    skb->dev = vlan_dev_info(dev)->real_dev;

vlan_dev_info(dev)->real_dev才是真正发送的设备
    len = skb->len;
    ret = dev_queue_xmit(skb);
调用dev_queue_xmit发送skb

    if (likely(ret == NET_XMIT_SUCCESS)) {
        txq->tx_packets++;
        txq->tx_bytes += len;
    } else
        txq->tx_dropped++;

    return NETDEV_TX_OK;
}


vlan_dev_hwaccel_hard_start_xmit

static netdev_tx_t vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb,
                            struct net_device *dev)
{   
    int i = skb_get_queue_mapping(skb);
    struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
    u16 vlan_tci;
    unsigned int len;
    int ret;

    vlan_tci = vlan_dev_info(dev)->vlan_id;
    vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
    skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
这里__vlan_hwaccel_put_tag只是生成了vlan_tci之后,简单放到skb->vlan_tci里面,给报文添加vlan报头交给网卡去做

                             
    skb->dev = vlan_dev_info(dev)->real_dev;
    len = skb->len; 
    ret = dev_queue_xmit(skb);
   
    if (likely(ret == NET_XMIT_SUCCESS)) {
        txq->tx_packets++;
        txq->tx_bytes += len;
    } else
        txq->tx_dropped++;

    return NETDEV_TX_OK;
}


对于接收报文而言,在设备接收到之后,如果是vlan设备,会再一次调用vlan_skb_recv,

static struct packet_type vlan_packet_type __read_mostly = {
    .type = cpu_to_be16(ETH_P_8021Q),
    .func = vlan_skb_recv, /* VLAN receive method */
};

vlan_skb_recv代码很直观,不多说了