sk_buff是Linux网络协议栈最重要的数据结构之一,该数据结构贯穿于整个数据包处理的流程。由于协议采用分层结构,上层向下层传递数据时需要增加包头,下层向上层数据时又需要去掉包头。sk_buff中保存了L2,L3,L4层的头指针,这样在层传递时只需要对数据缓冲区改变头部信息,并调整sk_buff中的指针,而不需要拷贝数据,这样大大减少了内存拷贝的需要。
/**struct sk_buff {//介绍
*struct sk_buff - socket buffer
*@next: Next buffer in list
*@prev: Previous buffer in list
*@tstamp: Time we arrived
*@sk: Socket we are owned by
*@dev: Device we arrived on/are leaving by
*@cb: Control buffer. Free for use by every layer. Put private vars here
*@_skb_refdst: destination entry (with norefcount bit)
*@sp: the security path, used for xfrm
*@len: Length of actual data
*@data_len: Data length
*@mac_len: Length of link layer header
*@hdr_len: writable header length of cloned skb
*@csum: Checksum (must include start/offset pair)
*@csum_start: Offset from skb->head where checksumming should start
*@csum_offset: Offset from csum_start where checksum should be stored
*@priority: Packet queueing priority
*@local_df: allow local fragmentation
*@cloned: Head may be cloned (check refcnt to be sure)
*@ip_summed: Driver fed us an IP checksum
*@nohdr: Payload reference only, must not modify header
*@nfctinfo: Relationship of this skb to the connection
*@pkt_type: Packet class
*@fclone: skbuff clone status
*@ipvs_property: skbuff is owned by ipvs
*@peeked: this packet has been seen already, so stats have been
*done for it, don't do them again
*@nf_trace: netfilter packet trace flag
*@protocol: Packet protocol from driver
*@destructor: Destruct function
*@nfct: Associated connection, if any
*@nfct_reasm: netfilter conntrack re-assembly pointer
*@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
*@skb_iif: ifindex of device we arrived on
*@tc_index: Traffic control index
*@tc_verd: traffic control verdict
*@rxhash: the packet hash computed on receive
*@queue_mapping: Queue mapping for multiqueue devices
*@ndisc_nodetype: router type (from link layer)
*@ooo_okay: allow the mapping of a socket to a queue to be changed
*@l4_rxhash: indicate rxhash is a canonical 4-tuple hash over transport
*ports.
*@wifi_acked_valid: wifi_acked was set
*@wifi_acked: whether frame was acked on wifi or not
*@no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS
*@dma_cookie: a cookie to one of several possible DMA operations
*done by skb DMA functions
*@secmark: security marking
*@mark: Generic packet mark
*@dropcount: total number of sk_receive_queue overflows
*@vlan_tci: vlan tag control information
*@inner_transport_header: Inner transport layer header (encapsulation)
*@inner_network_header: Network layer header (encapsulation)
*@transport_header: Transport layer header
*@network_header: Network layer header
*@mac_header: Link layer header
*@tail: Tail pointer
*@end: End pointer
*@head: Head of buffer
*@data: Data head pointer
*@truesize: Buffer size
*@users: User count - see {datagram,tcp}.c
*/
struct sk_buff {
/* These two members must be first. */
struct sk_buff*next;
struct sk_buff*prev;
ktime_ttstamp;
struct sock*sk;
struct net_device*dev;
/*
* This is the control buffer. It is free to use for every
* layer. Please put your private variables there. If you
* want to keep them across layers you have to do a skb_clone()
* first. This is owned by whoever has the skb queued ATM.
*/
charcb[48] __aligned(8);
unsigned long_skb_refdst;
#ifdef CONFIG_XFRM
structsec_path*sp;
#endif
unsigned intlen,
data_len;
__u16mac_len,
hdr_len;
union {
__wsumcsum;
struct {
__u16csum_start;
__u16csum_offset;
};
};
__u32priority;
kmemcheck_bitfield_begin(flags1);
__u8local_df:1,
cloned:1,
ip_summed:2,
nohdr:1,
nfctinfo:3;
__u8pkt_type:3,
fclone:2,
ipvs_property:1,
peeked:1,
nf_trace:1;
kmemcheck_bitfield_end(flags1);
__be16protocol;
void(*destructor)(struct sk_buff *skb);
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct nf_conntrack*nfct;
#endif
#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
struct sk_buff*nfct_reasm;
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
struct nf_bridge_info*nf_bridge;
#endif
intskb_iif;
__u32rxhash;
__u16vlan_tci;
#ifdef CONFIG_NET_SCHED
__u16tc_index;/* traffic control index */
#ifdef CONFIG_NET_CLS_ACT
__u16tc_verd;/* traffic control verdict */
#endif
#endif
__u16queue_mapping;
kmemcheck_bitfield_begin(flags2);
#ifdef CONFIG_IPV6_NDISC_NODETYPE
__u8ndisc_nodetype:2;
#endif
__u8pfmemalloc:1;
__u8ooo_okay:1;
__u8l4_rxhash:1;
__u8wifi_acked_valid:1;
__u8wifi_acked:1;
__u8no_fcs:1;
__u8head_frag:1;
/* Encapsulation protocol and NIC drivers should use
* this flag to indicate to each other if the skb contains
* encapsulated packet or not and maybe use the inner packet
* headers if needed
*/
__u8encapsulation:1;
/* 7/9 bit hole (depending on ndisc_nodetype presence) */
kmemcheck_bitfield_end(flags2);
#ifdef CONFIG_NET_DMA
dma_cookie_tdma_cookie;
#endif
#ifdef CONFIG_NETWORK_SECMARK
__u32secmark;
#endif
union {
__u32mark;
__u32dropcount;
__u32reserved_tailroom;
};
sk_buff_data_tinner_transport_header;
sk_buff_data_tinner_network_header;
sk_buff_data_ttransport_header;
sk_buff_data_tnetwork_header;
sk_buff_data_tmac_header;
/* These elements must be at the end, see alloc_skb() for details. */
sk_buff_data_ttail;
sk_buff_data_tend;
unsigned char*head,
*data;
unsigned inttruesize;
atomic_tusers;
};
struct sk_buff *next *prev;//双向链表指针
ktime_t tstamp ;//时间撮
struct sock *sk; //对应于传输层,标示属于哪个socket ?
struct net_device *dev; //数据来自或者发送自哪个设备
char cb[48];//控制信息buffer,在每个层都可以用,并且目前为止足够大
int len; 实际总长度
int data_len; 数据的长度 //也许是paged的data
__u16 mac_len; 数据链路层头的长度
__u16 hdr_len; writable header length of cloned skb
sk_buff_data_t transport_header; 传输层头指针
sk_buff_data_t network_header; 网络层头指针
sk_buff_data_t mac_header; 数据链路层头
unsigned char *head; //buffer 头
unsigned char *data; 数据头
sk_buff_data_t tail; 数据结尾
sk_buff_data_t end; buffer 结尾
unsigned int truesize; //bufffer 大小
cloned 是不是cloned
mark 数据包mark
destructor 销毁函数指针
pkt_type : 根据二层头确定的包信息
__be16 protocol : 三层协议 IP ARP 等,用于和全局数组qtype_base中的数据对比,该数组可以通过dev_add_pack()注册.
}
由于该结构将用于各个层,内核提供了一系列的sk_buff的操作函数
skb_put() 减小tailroom,buffer下后扩展
skb_push() 减小headroom,buffer向上扩张
skb_trim() cut buffer到一个长度
skb_pull 从数据头cut一定长度的数据
skb_reserve 增大headroom,减少tailroom,只能用于buffer为空时
skb_headroom headroom的大小
skb_tailroom tailroom的太小
alloc_skb() 分配一个sk_buff结构及buffer区域
kfree_slb() reference 减一,并且free skb和buffer如果不再有引用
dev_alloc_skb() 方便接收数据的sk_buff的分配函数
dev_free_skb()
skb_shinfo() 获得和sk_buff 一块分配的struct skb_shared_info
skb_clone() //复制sk_buff ,但是buffer不变
pskb_copy() //拷贝sk_buff和私有的头部,常用于需要修改sk_buff的头部时
skb_copy() //完全拷贝
skb_queue_head_init()
skb_queue_head()
skb_queue_tail()
skb_dequeue_head()
skb_dequeue_tail()
skb_queue_purge() //list 清空
skb_queue_walk() //遍历list用
在Linux2.6中,struct sk_buf承担了socket的输入输出的传输缓存的任务。
首先,还是先看struct socket的定义
/** |
那么现在跳转到struct sock的定义处。由于struct sock的定义过长,所以只展示一部分。
struct sock { |
struct sk_buff_head { |