e1000 网卡使用NAPI

时间:2021-02-09 15:10:56
在e1000 网卡的probe函数中会调用netif_napi_add 来注册在第一次接收中断后的poll函数。
netif_napi_add(netdev, &adapter->napi, e1000_clean, 64);
在e1000_setup_rx_resources 函数中会申请一个4k的DMA buffer
static int e1000_setup_rx_resources(struct e1000_adapter *adapter,
                    struct e1000_rx_ring *rxdr)
{
    struct pci_dev *pdev = adapter->pdev;
    int size, desc_len;

    size = sizeof(struct e1000_rx_buffer) * rxdr->count;
    rxdr->buffer_info = vzalloc(size);
    if (!rxdr->buffer_info)
        return -ENOMEM;

    desc_len = sizeof(struct e1000_rx_desc);

    /* Round up to nearest 4K */

    rxdr->size = rxdr->count * desc_len;
    rxdr->size = ALIGN(rxdr->size, 4096);
//这里应该调用pci_alloc_consistent 比较好
    rxdr->desc = dma_alloc_coherent(&pdev->dev, rxdr->size, &rxdr->dma,
                    GFP_KERNEL);
    if (!rxdr->desc) {
setup_rx_desc_die:
        vfree(rxdr->buffer_info);
        return -ENOMEM;
    }

    memset(rxdr->desc, 0, rxdr->size);

    rxdr->next_to_clean = 0;
    rxdr->next_to_use = 0;
    rxdr->rx_skb_top = NULL;

    return 0;
}

在e1000_open->e1000_request_irq中会注册中断函数是e1000_intr
static int e1000_request_irq(struct e1000_adapter *adapter)
{
    struct net_device *netdev = adapter->netdev;
    irq_handler_t handler = e1000_intr;
    int irq_flags = IRQF_SHARED;
    int err;

    err = request_irq(adapter->pdev->irq, handler, irq_flags, netdev->name,
              netdev);
    if (err) {
        e_err(probe, "Unable to allocate interrupt Error: %d\n", err);
    }

    return err;
}
这样当第一次接收到rx的中断时,在e1000_intr 中通过napi_schedule_prep 来判断napi是否可以被调动
static irqreturn_t e1000_intr(int __always_unused irq, void *data)
{
    struct net_device *netdev = data;
    struct e1000_adapter *adapter = netdev_priv(netdev);
    struct e1000_hw *hw = &adapter->hw;
    u32 rctl, icr = er32(ICR);

    if (napi_schedule_prep(&adapter->napi)) {
        adapter->total_tx_bytes = 0;
        adapter->total_tx_packets = 0;
        adapter->total_rx_bytes = 0;
        adapter->total_rx_packets = 0;
        __napi_schedule(&adapter->napi);
    }

    return IRQ_HANDLED;
}
如果可以被调动的话,则调用__napi_schedule
void __napi_schedule(struct napi_struct *n)
{
    unsigned long flags;

    local_irq_save(flags);
    ____napi_schedule(this_cpu_ptr(&softnet_data), n);
    local_irq_restore(flags);
}
static inline void ____napi_schedule(struct softnet_data *sd,
                     struct napi_struct *napi)
{
    list_add_tail(&napi->poll_list, &sd->poll_list);
    __raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
这里会出发NET_RX_SOFTIRQ 软件中断
而在net_dev_init中会初始化软件中断的处理函数
    open_softirq(NET_TX_SOFTIRQ, net_tx_action);
    open_softirq(NET_RX_SOFTIRQ, net_rx_action);
这里的处理函数就是net_rx_action
static __latent_entropy void net_rx_action(struct softirq_action *h)
{
    struct softnet_data *sd = this_cpu_ptr(&softnet_data);
    unsigned long time_limit = jiffies +
        usecs_to_jiffies(netdev_budget_usecs);
    int budget = netdev_budget;
    LIST_HEAD(list);
    LIST_HEAD(repoll);

    local_irq_disable();
    list_splice_init(&sd->poll_list, &list);
    local_irq_enable();

    for (;;) {
        struct napi_struct *n;

        if (list_empty(&list)) {
            if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
                goto out;
            break;
        }

        n = list_first_entry(&list, struct napi_struct, poll_list);
//调用e1000的poll函数,直到list为null
        budget -= napi_poll(n, &repoll);

        /* If softirq window is exhausted then punt.
         * Allow this to run for 2 jiffies since which will allow
         * an average latency of 1.5/HZ.
         */
        if (unlikely(budget <= 0 ||
                 time_after_eq(jiffies, time_limit))) {
            sd->time_squeeze++;
            break;
        }
    }

    local_irq_disable();

    list_splice_tail_init(&sd->poll_list, &list);
    list_splice_tail(&repoll, &list);
    list_splice(&list, &sd->poll_list);
    if (!list_empty(&sd->poll_list))
        __raise_softirq_irqoff(NET_RX_SOFTIRQ);

    net_rps_action_and_irq_enable(sd);
out:
    __kfree_skb_flush();
}

static int napi_poll(struct napi_struct *n, struct list_head *repoll)
{
    void *have;
    int work, weight;

    list_del_init(&n->poll_list);

    have = netpoll_poll_lock(n);

    weight = n->weight;

    /* This NAPI_STATE_SCHED test is for avoiding a race
     * with netpoll's poll_napi().  Only the entity which
     * obtains the lock and sees NAPI_STATE_SCHED set will
     * actually make the ->poll() call.  Therefore we avoid
     * accidentally calling ->poll() when NAPI is not scheduled.
     */
    work = 0;
    if (test_bit(NAPI_STATE_SCHED, &n->state)) {
//最终在这里调用e1000的poll函数e1000_clean
        work = n->poll(n, weight);
        trace_napi_poll(n, work, weight);
    }

}

static int e1000_clean(struct napi_struct *napi, int budget)
{
    struct e1000_adapter *adapter = container_of(napi, struct e1000_adapter,
                             napi);
    int tx_clean_complete = 0, work_done = 0;

    tx_clean_complete = e1000_clean_tx_irq(adapter, &adapter->tx_ring[0]);

    adapter->clean_rx(adapter, &adapter->rx_ring[0], &work_done, budget);

    if (!tx_clean_complete)
        work_done = budget;

    /* If budget not fully consumed, exit the polling mode */
    if (work_done < budget) {
        if (likely(adapter->itr_setting & 3))
            e1000_set_itr(adapter);
//调用napi_complete_done从n->poll_list中删除e1000 对应的list,避免中断中被disable 太长时间。
        napi_complete_done(napi, work_done);
        if (!test_bit(__E1000_DOWN, &adapter->flags))
            e1000_irq_enable(adapter);
    }

    return work_done;
}