【Linux4.1.12源码分析】协议栈gro收包之IP层处理

时间:2022-04-16 11:05:03

IP层的offload定义是ip_packet_offload

static struct packet_offload ip_packet_offload __read_mostly = {
.type = cpu_to_be16(ETH_P_IP),
.callbacks = {
.gso_segment = inet_gso_segment,
.gro_receive = inet_gro_receive,
.gro_complete = inet_gro_complete,
},
};


inet_gro_receive函数

static struct sk_buff **inet_gro_receive(struct sk_buff **head,
struct sk_buff *skb)
{
const struct net_offload *ops;
struct sk_buff **pp = NULL;
struct sk_buff *p;
const struct iphdr *iph;
unsigned int hlen;
unsigned int off;
unsigned int id;
int flush = 1;
int proto;

off = skb_gro_offset(skb);
hlen = off + sizeof(*iph);
iph = skb_gro_header_fast(skb, off); //得到IP头,内核支持两种skb,放在线性区和放在frag
if (skb_gro_header_hard(skb, hlen)) {
iph = skb_gro_header_slow(skb, hlen, off);
if (unlikely(!iph))
goto out;
}

proto = iph->protocol; //得到传输层协议

rcu_read_lock();
ops = rcu_dereference(inet_offloads[proto]); //得到传输层对应的offload
if (!ops || !ops->callbacks.gro_receive) //如果未找到对应的offload,则报文将被提交给协议栈
goto out_unlock;

if (*(u8 *)iph != 0x45) //IP报文的协议版本必须为4,且报文头长度为20(5*4),否则报文将被提交给协议栈
goto out_unlock;

if (unlikely(ip_fast_csum((u8 *)iph, 5))) //IP头csum校验,如果通不过,则flush置1,报文将被提交给协议栈
goto out_unlock;

id = ntohl(*(__be32 *)&iph->id); //得到16位的ID值,3位flag和13位分片偏移
flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF)); //IP报文数据长度不等于gro_len或者报文携带DF标记,flush置1
id >>= 16;

for (p = *head; p; p = p->next) { //遍历gro_list中的报文
struct iphdr *iph2;

if (!NAPI_GRO_CB(p)->same_flow) //same_flow为零说明MAC的流匹配未通过,不需要下一步处理
continue;

iph2 = (struct iphdr *)(p->data + off); //得到报文的IP头,此时采用线性区的方式,从当前报文的IP头获取方式,此处也将会改变
/* The above works because, with the exception of the top
* (inner most) layer, we only aggregate pkts with the same
* hdr length so all the hdrs we'll need to verify will start
* at the same offset.
*/
if ((iph->protocol ^ iph2->protocol) | //IP层判断同一个流,要求:4层协议要相同
((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | //源地址要相同
((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) { //目标地址要相同
NAPI_GRO_CB(p)->same_flow = 0;
continue;
}

/* All fields must match except length and checksum. */
NAPI_GRO_CB(p)->flush |=
(iph->ttl ^ iph2->ttl) | //同一个流,但是ttl、tos、有一个报文包含DF标记,则需要flush当前该报文
(iph->tos ^ iph2->tos) |
((iph->frag_off ^ iph2->frag_off) & htons(IP_DF));

/* Save the IP ID check to be included later when we get to
* the transport layer so only the inner most IP ID is checked.
* This is because some GSO/TSO implementations do not
* correctly increment the IP ID for the outer hdrs.
*/
NAPI_GRO_CB(p)->flush_id =
((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id);
NAPI_GRO_CB(p)->flush |= flush; //刷新报文的flush
}

NAPI_GRO_CB(skb)->flush |= flush; //刷新当前报文的flush
skb_set_network_header(skb, off); //设置network header,可以找到IP头
/* The above will be needed by the transport layer if there is one
* immediately following this IP hdr.
*/

/* Note : No need to call skb_gro_postpull_rcsum() here,
* as we already checked checksum over ipv4 header was 0
*/
skb_gro_pull(skb, sizeof(*iph)); //报文移动到4层头
skb_set_transport_header(skb, skb_gro_offset(skb)); //设置传输层header值

pp = ops->callbacks.gro_receive(head, skb); //调用4层的offload

out_unlock:
rcu_read_unlock();

out:
NAPI_GRO_CB(skb)->flush |= flush; //刷新当前报文的flush,调用四层offload后,可能会刷新

return pp;
}

inet_gro_complete函数

static int inet_gro_complete(struct sk_buff *skb, int nhoff)
{
__be16 newlen = htons(skb->len - nhoff);
struct iphdr *iph = (struct iphdr *)(skb->data + nhoff); //找到IP头
const struct net_offload *ops;
int proto = iph->protocol;
int err = -ENOSYS;

if (skb->encapsulation)
skb_set_inner_network_header(skb, nhoff); //如果报文是封装报文,那么iph指向的就是内层报文

csum_replace2(&iph->check, iph->tot_len, newlen); //由于长度变化,刷新csum值
iph->tot_len = newlen; //指定IP头中的长度字段

rcu_read_lock();
ops = rcu_dereference(inet_offloads[proto]); //找到传输层的offload
if (WARN_ON(!ops || !ops->callbacks.gro_complete))
goto out_unlock;

/* Only need to add sizeof(*iph) to get to the next hdr below
* because any hdr with option will have been flushed in
* inet_gro_receive().
*/
err = ops->callbacks.gro_complete(skb, nhoff + sizeof(*iph)); //调用传输层的gro_complete函数

out_unlock:
rcu_read_unlock();

return err;
}

inet_gro_receive函数完成same_flow和flush判断,inet_gro_complete用于刷新IP头信息。