Linux网络协议栈之驱动框架
作者:bullbat
网卡驱动可以以模块的方式加载也可以内核初始化的时候加载,我们选定e100系列的网卡进行说明网卡驱动的一般框架。
网卡设备通用数据结构:
struct net_device
{
/*
* This is the first field of the "visible" part of this structure
* (i.e. as seen by users in the "Space.c" file). It is the name
* the interface.
*/
/*网络设备名*/
char name[IFNAMSIZ];
/* device name hash chain */
/*根据网络设备名以散列表的形式组织到dev_name_head散列表中,这样就可以通过网络
设备名快速地定位到网络设备*/
struct hlist_node name_hlist;
/*
* I/O specific fields
* FIXME: Merge these and struct ifmap into one
*/
/*网络设备共享内存的起始和终止地址*/
unsignedlong mem_end; /* shared mem end */
unsignedlong mem_start; /* shared mem start */
/*网络接口I/O基地址,在探测设备时被初始化ifconfig命令可显示和修改
当前命令*/
unsignedlong base_addr; /* device I/O address */
/*分配给设备的中断号,一般在初始化设备时被初始化*/
unsignedint irq; /* device IRQ number */
/*
* Some hardware also needs these fields, but they are not
* part of the usual set specified in Space.c.
*/
/*指定在多端口设备上使用那个端口*/
unsignedchar if_port; /* Selectable AUI, TP,..*/
/*为设备分配的DMA通道*/
unsignedchar dma; /* DMA channel */
/*设备状态*/
unsignedlong state;
/*网络设备组织*/
struct net_device *next;
/*驱动程序的初始化函数*/
/* The device initialization function. Called only once. */
int (*init)(struct net_device *dev);
/* ------- Fields preinitialized in Space.c finish here ------- */
/* Net device features */
/*接口支持特性*/
unsignedlong features;
#define NETIF_F_SG 1 /* Scatter/gather IO. */
#define NETIF_F_IP_CSUM 2 /* Can checksum only TCP/UDP over IPv4. */
#define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */
#define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */
#define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */
#define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */
#define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */
#define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */
#define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */
#define NETIF_F_VLAN_CHALLENGED 1024/* Device cannot handle VLAN packets */
#define NETIF_F_GSO 2048/* Enable software GSO. */
#define NETIF_F_LLTX 4096/* LockLess TX */
/* Segmentation offload features */
#define NETIF_F_GSO_SHIFT 16
#define NETIF_F_GSO_MASK 0xffff0000
#define NETIF_F_TSO (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT)
#define NETIF_F_UFO (SKB_GSO_UDP << NETIF_F_GSO_SHIFT)
#define NETIF_F_GSO_ROBUST (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT)
#define NETIF_F_TSO_ECN (SKB_GSO_TCP_ECN << NETIF_F_GSO_SHIFT)
#define NETIF_F_TSO6 (SKB_GSO_TCPV6 << NETIF_F_GSO_SHIFT)
/* List of features with software fallbacks. */
#define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6)
#define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
#define NETIF_F_ALL_CSUM (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM)
/*用于连接那些已经调度有数据报输出的网络设备指针*/
struct net_device *next_sched;
/* Interface index. Unique device identifier */
/*网络设备的索引号*/
int ifindex;
/*网络设备的唯一标识,主要用于虚拟隧道设备*/
int iflink;
/*提供给应用程序获得接口统计信息的接口*/
struct net_device_stats* (*get_stats)(struct net_device *dev);
/* List of functions to handle Wireless Extensions (instead of ioctl).
* See <net/iw_handler.h> for details. Jean II */
/*无线网相关*/
conststruct iw_handler_def * wireless_handlers;
/* Instance data managed by the core of Wireless Extensions. */
struct iw_public_data * wireless_data;
conststruct ethtool_ops *ethtool_ops;
/*
* This marks the end of the "visible" part of the structure. All
* fields hereafter are internal to the system, and may change at
* will (read: may be cleaned up at will).
*/
unsignedint flags; /* interface flags (a la BSD) */
/*记录当前网络设备IFF_PROMISC和IFF_ALLMULTI的状态,用来配合flags的设置*/
unsignedshort gflags;
unsignedshort priv_flags; /* Like 'flags' but invisible to userspace. */
unsignedshort padded; /* How much padding added by alloc_netdev() */
unsignedchar operstate; /* RFC2863 operstate */
unsignedchar link_mode; /* mapping policy to operstate */
unsigned mtu;/* interface MTU value */
unsignedshort type; /* interface hardware type */
unsignedshort hard_header_len; /* hardware hdr length */
struct net_device *master;/* Pointer to master device of a group,
* which this device is member of.
*/
/* Interface address info. */
/*MAC地址,通常初始化时从硬件中读出来*/
unsignedchar perm_addr[MAX_ADDR_LEN]; /* permanent hw address */
unsignedchar addr_len; /* hardware address length */
unsignedshort dev_id; /* for shared network cards */
struct dev_mc_list *mc_list; /* Multicast mac addresses */
int mc_count; /* Number of installed mcasts */
/*设置网络设备混杂模式计数器*/
int promiscuity;
/*设置网络设备接收所有组播报的计数器,每次设置或是退出操作,该字段
都会相应的加或减1,为0时,网络设备才真正不再接收组播报*/
int allmulti;
/* Protocol specific pointers */
void *atalk_ptr; /* AppleTalk link */
void *ip_ptr; /* IPv4 specific data */
void *dn_ptr; /* DECnet specific data */
void *ip6_ptr; /* IPv6 specific data */
void *ec_ptr; /* Econet specific data */
void *ax25_ptr; /* AX.25 specific data */
/*
* Cache line mostly used on receive path (including eth_type_trans())
*/
/*该结构实例通过该字段连接到softnet_data的poll_list成员上*/
struct list_head poll_list ____cacheline_aligned_in_smp;
/* Link to poll list */
/*轮询模式操作接口*/
int (*poll) (struct net_device *dev,int *quota);
/*读取数据包的配额,动态变化,由netdev_budget初始化,每次从网络设备中读取数据包后,
会从中减去本次读取的数据包数,当该配额等于或小于0时,结束当前轮询等待下层轮询
这样即使某个网络设备有大量的数据包输入,也能保证其他网络设备能及时收到数据包
在输入时,遍历网络设备轮询队列,从选定的网络设备中读取数据包,一旦已经读取的数据
包的数量操作配额,即停止本次读取,将该网络设备移至网络设备轮询队列的队尾,等待
下次轮询*/
int quota;
/*数据包输入软中断中,单个网络读取数据包的配额*/
int weight;
unsignedlong last_rx; /* Time of last Rx */
/* Interface address info used in eth_type_trans() */
unsignedchar dev_addr[MAX_ADDR_LEN]; /* hw address, (before bcast
because most packets are unicast) */
unsignedchar broadcast[MAX_ADDR_LEN]; /* hw bcast add */
/*
* Cache line mostly used on queue transmit path (qdisc)
*/
/* device queue lock */
spinlock_t queue_lock ____cacheline_aligned_in_smp;
/*当前使用的根排队规则,配置的排队规则生效时由qdisc_sleeping设置*/
struct Qdisc *qdisc;
/*当前配置的排队规则,生效时将被设置到qdisc*/
struct Qdisc *qdisc_sleeping;
/*通过链表方式记录配置所在网络的所有排队规则*/
struct list_head qdisc_list;
/*可在设备发送队列中排队的最大数据包*/
unsignedlong tx_queue_len; /* Max frames per queue allowed */
/* Partially transmitted GSO packet. */
struct sk_buff *gso_skb;
/* ingress path synchronizer */
spinlock_t ingress_lock;
/*数据包输入的排队规则*/
struct Qdisc *qdisc_ingress;
/*
* One part is mostly used on xmit path (device)
*/
/* hard_start_xmit synchronizer */
spinlock_t _xmit_lock ____cacheline_aligned_in_smp;
/* cpu id of processor entered to hard_start_xmit or -1,
if nobody entered there.
*/
int xmit_lock_owner;
void *priv; /* pointer to private data */
/*驱动提供给上一层发送数据包的接口,在发送数据包时必定会调用该接口*/
int (*hard_start_xmit) (struct sk_buff *skb,
struct net_device *dev);
/* These may be needed for future network-power-down code. */
unsignedlong trans_start; /* Time (in jiffies) of last Tx */
/*网络层确定传输已经超时,而调用驱动程序的tx_timeout接口的最短时间*/
int watchdog_timeo;/* used by dev_watchdog() */
/*用于检测网络设备处于正常的工作状态时,是否存在由于关闭队列功能
而导致发送超时的情况,一旦发生以上状况,就调用网络设备驱动的tx_timeout
接口处理*/
struct timer_list watchdog_timer;
/*
* refcnt is a very hot point, so align it on SMP
*/
/* Number of references to this device */
atomic_t refcnt ____cacheline_aligned_in_smp;
/* delayed register/unregister */
/*用来连接net_todo_list链表,包含已经注销即将结束的网络设备*/
struct list_head todo_list;
/* device index hash chain */
/*根据网络设备的索引,以散列表的形式组织到dev_index_hlist中*/
struct hlist_node index_hlist;
/* register/unregister state machine */
enum { NETREG_UNINITIALIZED=0,
NETREG_REGISTERED, /* completed register_netdevice */
NETREG_UNREGISTERING, /* called unregister_netdevice */
NETREG_UNREGISTERED, /* completed unregister todo */
NETREG_RELEASED, /* called free_netdev */
} reg_state;
/* Called after device is detached from network. */
void (*uninit)(struct net_device *dev);
/* Called after last user reference disappears. */
void (*destructor)(struct net_device *dev);
/* Pointers to interface service routines. */
/*启用设备函数指针,完成那个注册所需的系统资源,打开硬件极其所有
设备*/
int (*open)(struct net_device *dev);
int (*stop)(struct net_device *dev);
#define HAVE_NETDEV_POLL
/*根据先前检测到的源和目标硬件地址创建硬件首部*/
int (*hard_header) (struct sk_buff *skb,
struct net_device *dev,
unsigned short type,
void *daddr,
void *saddr,
unsigned len);
/*用来在传输包之前,ARP解析完成之后,重建硬件首部*/
int (*rebuild_header)(struct sk_buff *skb);
#define HAVE_MULTICAST
/*将组播地址列表更新到网络设备中*/
void (*set_multicast_list)(struct net_device *dev);
#define HAVE_SET_MAC_ADDR
/*修改硬件地址接口,需要网络设备支持该功能*/
int (*set_mac_address)(struct net_device *dev,
void *addr);
#define HAVE_PRIVATE_IOCTL
int (*do_ioctl)(struct net_device *dev,
struct ifreq *ifr, int cmd);
#define HAVE_SET_CONFIG
int (*set_config)(struct net_device *dev,
struct ifmap *map);
#define HAVE_HEADER_CACHE
/*根据ARP查询的结果填充hh_cache结构*/
int (*hard_header_cache)(struct neighbour *neigh,
struct hh_cache *hh);
void (*header_cache_update)(struct hh_cache *hh,
struct net_device *dev,
unsigned char * haddr);
#define HAVE_CHANGE_MTU
int (*change_mtu)(struct net_device *dev,int new_mtu);
#define HAVE_TX_TIMEOUT
void (*tx_timeout) (struct net_device *dev);
void (*vlan_rx_register)(struct net_device *dev,
struct vlan_group *grp);
void (*vlan_rx_add_vid)(struct net_device *dev,
unsigned short vid);
void (*vlan_rx_kill_vid)(struct net_device *dev,
unsigned short vid);
int (*hard_header_parse)(struct sk_buff *skb,
unsigned char *haddr);
/*设置邻居子系统相关的参数*/
int (*neigh_setup)(struct net_device *dev,struct neigh_parms *);
#ifdef CONFIG_NETPOLL
/*网络设备netpoll信息块*/
struct netpoll_info *npinfo;
#endif
#ifdef CONFIG_NET_POLL_CONTROLLER
/*该函数在禁止中断的情况下,要求驱动程序以轮询模式在接口上查询事件*/
void (*poll_controller)(struct net_device *dev);
#endif
/* bridge stuff */
struct net_bridge_port *br_port;
/* class/net/name entry */
struct class_device class_dev;
/* space for optional statistics and wireless sysfs groups */
struct attribute_group *sysfs_groups[3];
};
网卡驱动的注册是在e100_init_modle中,
staticint __init e100_init_module(void)
{
if(((1 << debug) - 1) & NETIF_MSG_DRV) {
printk(KERN_INFO PFX"%s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
printk(KERN_INFO PFX"%s\n", DRV_COPYRIGHT);
}
return pci_register_driver(&e100_driver);
}
可见,网卡驱动也就是和一般的PCI驱动编写一样。
staticstruct pci_driver e100_driver = {
.name = DRV_NAME,
.id_table = e100_id_table,
.probe = e100_probe,
.remove = __devexit_p(e100_remove),
#ifdef CONFIG_PM
/* Power Management hooks */
.suspend = e100_suspend,
.resume = e100_resume,
#endif
.shutdown = e100_shutdown,
.err_handler = &e100_err_handler,
};
如果网络设备驱动程序被编译进内核,则将在启动时被初始化,在运行时作为模块被加载。无论初始化是否被发生,由驱动程序控制的网络设备都会被注册。这种情形适用于所有的总线类型,无论是总线体系结构还是模块初始化代码调用注册函数,结果都是一样的。PCI设备驱动程序加载以至执行pci_drive->probe()函数。我们看看e100网卡的驱动注册过程:
staticint __devinit e100_probe(struct pci_dev *pdev,
conststruct pci_device_id *ent)
{
struct net_device *netdev;
struct nic *nic;
int err;
/*分配设备数据结构*/
if(!(netdev = alloc_etherdev(sizeof(struct nic)))) {
if(((1 << debug) - 1) & NETIF_MSG_PROBE)
printk(KERN_ERR PFX"Etherdev alloc failed, abort.\n");
return -ENOMEM;
}
/*初始化设备*/
netdev->open = e100_open;
netdev->stop = e100_close;
/*e100网络设备的hard_start_xmit接口实现,最终将数据包输出到硬件*/
netdev->hard_start_xmit = e100_xmit_frame;
netdev->get_stats = e100_get_stats;
netdev->set_multicast_list = e100_set_multicast_list;
netdev->set_mac_address = e100_set_mac_address;
netdev->change_mtu = e100_change_mtu;
netdev->do_ioctl = e100_do_ioctl;
SET_ETHTOOL_OPS(netdev, &e100_ethtool_ops);
netdev->tx_timeout = e100_tx_timeout;
netdev->watchdog_timeo = E100_WATCHDOG_PERIOD;
netdev->poll = e100_poll;
netdev->weight = E100_NAPI_WEIGHT;
#ifdef CONFIG_NET_POLL_CONTROLLER
/*为了实现netpoll接收报文功能,需要实现下面的函数调用,该函数
用来模拟网络设备发生中断,进行中断处理*/
netdev->poll_controller = e100_netpoll;
#endif
strncpy(netdev->name, pci_name(pdev),sizeof(netdev->name) - 1);
/*在alloc_etherdev中设置的私有属性,即结构nic,在这里提出来*/
nic = netdev_priv(netdev);
/*初始化该nic*/
nic->netdev = netdev;
nic->pdev = pdev;
nic->msg_enable = (1 << debug) - 1;
/*设置PCI设备私有数据为网络设备结构实例*/
pci_set_drvdata(pdev, netdev);
/* Initialize device before it's used by a driver. Ask low-level code
* to enable I/O and memory. Wake up the device if it was suspended.
* Beware, this function can fail.*/
if((err = pci_enable_device(pdev))) {
DPRINTK(PROBE, ERR,"Cannot enable PCI device, aborting.\n");
goto err_out_free_dev;
}
if(!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
DPRINTK(PROBE, ERR,"Cannot find proper PCI device "
"base address, aborting.\n");
err = -ENODEV;
goto err_out_disable_pdev;
}
/*保留资源,包括I/O和内存*/
if((err = pci_request_regions(pdev, DRV_NAME))) {
DPRINTK(PROBE, ERR,"Cannot obtain PCI resources, aborting.\n");
goto err_out_disable_pdev;
}
/*DMA相关,探测设备的DMA能力,如果设备支持DMA,
返回0*/
if((err = pci_set_dma_mask(pdev, DMA_32BIT_MASK))) {
DPRINTK(PROBE, ERR,"No usable DMA configuration, aborting.\n");
goto err_out_free_res;
}
SET_MODULE_OWNER(netdev);
SET_NETDEV_DEV(netdev, &pdev->dev);
/*控制状态寄存器映射内存资源*/
nic->csr = ioremap(pci_resource_start(pdev, 0),sizeof(struct csr));
if(!nic->csr) {
DPRINTK(PROBE, ERR,"Cannot map device registers, aborting.\n");
err = -ENOMEM;
goto err_out_free_res;
}
if(ent->driver_data)
nic->flags |= ich;
else
nic->flags &= ~ich;
/*初始化nic相关字段*/
e100_get_defaults(nic);
/* locks must be initialized before calling hw_reset */
spin_lock_init(&nic->cb_lock);
spin_lock_init(&nic->cmd_lock);
spin_lock_init(&nic->mdio_lock);
/* Reset the device before pci_set_master() in case device is in some
* funky state and has an interrupt pending - hint: we don't have the
* interrupt handler registered yet. */
/*设备复位,写相关寄存器方式实现*/
e100_hw_reset(nic);
/*启用设备*/
pci_set_master(pdev);
/*初始化两个软件时钟*/
init_timer(&nic->watchdog);
nic->watchdog.function = e100_watchdog;
nic->watchdog.data = (unsignedlong)nic;
init_timer(&nic->blink_timer);
nic->blink_timer.function = e100_blink_led;
nic->blink_timer.data = (unsignedlong)nic;
/*初始化工作队列*/
INIT_WORK(&nic->tx_timeout_task, e100_tx_timeout_task);
/*从DMA区分配*/
if((err = e100_alloc(nic))) {
DPRINTK(PROBE, ERR,"Cannot alloc driver memory, aborting.\n");
goto err_out_iounmap;
}
/*读取网卡的EEPROM。其中存放这网卡的MAC地址
*/
if((err = e100_eeprom_load(nic)))
goto err_out_free;
/*初始化nic的物理信息*/
e100_phy_init(nic);
memcpy(netdev->dev_addr, nic->eeprom, ETH_ALEN);
memcpy(netdev->perm_addr, nic->eeprom, ETH_ALEN);
/*验证网卡的MAC地址是否格式正确*/
if(!is_valid_ether_addr(netdev->perm_addr)) {
DPRINTK(PROBE, ERR,"Invalid MAC address from "
"EEPROM, aborting.\n");
err = -EAGAIN;
goto err_out_free;
}
/* Wol magic packet can be enabled from eeprom */
if((nic->mac >= mac_82558_D101_A4) &&
(nic->eeprom[eeprom_id] & eeprom_id_wol))
nic->flags |= wol_magic;
/* ack any pending wake events, disable PME */
/*,这个函数的第二个参数表示一种电源状态
PME#就是Power Management Event Signal,即电源管理事件信号.)PME#信号是PCI Power Spec中出镜率最高的一个名词.如果一个设备希望改变它的电源状态,它就可以发送一个PME#信号.而设备是否允许发送信号也是有开关的,并且每种状态都有一个开关
第三个参数是表示开还是关.即传递1进去就是enable,传递0进去就是disable*/
err = pci_enable_wake(pdev, 0, 0);
if (err)
DPRINTK(PROBE, ERR,"Error clearing wake event\n");
/*网络设备的名称前加上eth*/
strcpy(netdev->name, "eth%d");
/*注册网络设备*/
if((err = register_netdev(netdev))) {
DPRINTK(PROBE, ERR,"Cannot register net device, aborting.\n");
goto err_out_free;
}
DPRINTK(PROBE, INFO, "addr 0x%llx, irq %d, "
"MAC addr %02X:%02X:%02X:%02X:%02X:%02X\n",
(unsignedlong long)pci_resource_start(pdev, 0), pdev->irq,
netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5]);
return 0;
err_out_free:
e100_free(nic);
err_out_iounmap:
iounmap(nic->csr);
err_out_free_res:
pci_release_regions(pdev);
err_out_disable_pdev:
pci_disable_device(pdev);
err_out_free_dev:
pci_set_drvdata(pdev, NULL);
free_netdev(netdev);
return err;
}
其辅助函数:
分配网络设备结构
/*传入的参数为nic结构的大小*/
struct net_device *alloc_etherdev(int sizeof_priv)
{
return alloc_netdev(sizeof_priv,"eth%d", ether_setup);
}
struct net_device *alloc_netdev(int sizeof_priv,const char *name,
void (*setup)(struct net_device *))
{
void *p;
struct net_device *dev;
int alloc_size;
BUG_ON(strlen(name) >= sizeof(dev->name));
/* ensure 32-byte alignment of both the device and private area */
/*计算分配的大小为设备结构大小加上nic结构大小*/
alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
/*分配空间*/
p = kzalloc(alloc_size, GFP_KERNEL);
if (!p) {
printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
return NULL;
}
dev = (struct net_device *)
(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
/*计算padd大小为结构大小减去对其的数据大小*/
dev->padded = (char *)dev - (char *)p;
if (sizeof_priv)
/*私有数据为nic结构的起始地址*/
dev->priv = netdev_priv(dev);
/*调用参数中的函数指针,初始化设备结构*/
setup(dev);
strcpy(dev->name, name);
return dev;
}
/*分配设备结构时调用,用于初始化该设备结构*/
void ether_setup(struct net_device *dev)
{
dev->change_mtu = eth_change_mtu;
dev->hard_header = eth_header;
dev->rebuild_header = eth_rebuild_header;
dev->set_mac_address = eth_mac_addr;
dev->hard_header_cache = eth_header_cache;
dev->header_cache_update= eth_header_cache_update;
dev->hard_header_parse = eth_header_parse;
dev->type = ARPHRD_ETHER;
dev->hard_header_len = ETH_HLEN;
dev->mtu = ETH_DATA_LEN;
dev->addr_len = ETH_ALEN;
dev->tx_queue_len = 1000; /* Ethernet wants good queues */
dev->flags = IFF_BROADCAST|IFF_MULTICAST;
memset(dev->broadcast, 0xFF, ETH_ALEN);
}
注册网络设备的实际操作由register_netdev(netdev)调用register_netdevice()完成
int register_netdevice(struct net_device *dev)
{
struct hlist_head *head;
struct hlist_node *p;
int ret;
BUG_ON(dev_boot_phase);
ASSERT_RTNL();
/*2.6内核支持内核抢占,该函数检查是否需要从新调度
如果是,则进行调度,无论此时进行执行在内核空间还是
用户空间*/
might_sleep();
/*初始化设备的各个字段*/
/* When net_device's are persistent, this will be fatal. */
BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
spin_lock_init(&dev->queue_lock);
spin_lock_init(&dev->_xmit_lock);
dev->xmit_lock_owner = -1;
#ifdef CONFIG_NET_CLS_ACT
spin_lock_init(&dev->ingress_lock);
#endif
dev->iflink = -1;
/* Init, if this function is available */
/*如果有init函数,调用该函数进行初始化*/
if (dev->init) {
ret = dev->init(dev);
if (ret) {
if (ret > 0)
ret = -EIO;
goto out;
}
}
/*检测待注册的网络设备名是否有效*/
if (!dev_valid_name(dev->name)) {
ret = -EINVAL;
goto out;
}
/*为设备分配一个唯一索引号和一个用于虚拟隧道设备
的唯一标识。*/
dev->ifindex = dev_new_index();
if (dev->iflink == -1)
dev->iflink = dev->ifindex;
/* Check for existence of name */
/*将网络设备添加到dev_name_head散列表中,并检测是否
存在同名的网络设备*/
head = dev_name_hash(dev->name);
hlist_for_each(p, head) {
struct net_device *d
= hlist_entry(p,struct net_device, name_hlist);
if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
ret = -EEXIST;
goto out;
}
}
/* Fix illegal SG+CSUM combinations. */
/*只有在网络设备支持校验和计算的情况下,网络设备才能支持SG类型的聚合分散I/O
因为SG类型的聚合分散I/O特性没有传输层硬件检验和支持是无用的*/
if ((dev->features & NETIF_F_SG) &&
!(dev->features & NETIF_F_ALL_CSUM)) {
printk(KERN_NOTICE"%s: Dropping NETIF_F_SG since no checksum feature.\n",
dev->name);
dev->features &= ~NETIF_F_SG;
}
/* TSO requires that SG is present as well. */
/*TSO需要SG类型的聚合分散性I/O的支持,因此在后者不被支持时也将被禁用*/
if ((dev->features & NETIF_F_TSO) &&
!(dev->features & NETIF_F_SG)) {
printk(KERN_NOTICE"%s: Dropping NETIF_F_TSO since no SG feature.\n",
dev->name);
dev->features &= ~NETIF_F_TSO;
}
/*UFO需要NETIF_F_HW_CSUM和SG类型的聚合分散I/O的支持,因此在后者不被支持的情况下
也将被禁用*/
if (dev->features & NETIF_F_UFO) {
if (!(dev->features & NETIF_F_HW_CSUM)) {
printk(KERN_ERR"%s: Dropping NETIF_F_UFO since no "
"NETIF_F_HW_CSUM feature.\n",
dev->name);
dev->features &= ~NETIF_F_UFO;
}
if (!(dev->features & NETIF_F_SG)) {
printk(KERN_ERR"%s: Dropping NETIF_F_UFO since no "
"NETIF_F_SG feature.\n",
dev->name);
dev->features &= ~NETIF_F_UFO;
}
}
/*
* nil rebuild_header routine,
* that should be never called and used as just bug trap.
*/
/*初始化网络设备用于重建硬件首部的rebuild_header接口
*/
if (!dev->rebuild_header)
dev->rebuild_header = default_rebuild_header;
/*将网络设备的注册信息注册到sysfs文件系统中*/
ret = netdev_register_sysfs(dev);
if (ret)
goto out;
/*设置网络设备的状态,表示注册已经完成*/
dev->reg_state = NETREG_REGISTERED;
/*
* Default initial state at registry is that the
* device is present.
*/
/*设置相应位,表示设备对系统是可用的*/
set_bit(__LINK_STATE_PRESENT, &dev->state);
/*下面为初始化网络设备排队规则,并注册到网络设备的
链表和相关散列表中*/
dev->next = NULL;
dev_init_scheduler(dev);
write_lock_bh(&dev_base_lock);
*dev_tail = dev;
dev_tail = &dev->next;
hlist_add_head(&dev->name_hlist, head);
hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
dev_hold(dev);
write_unlock_bh(&dev_base_lock);
/* Notify protocols, that a new device appeared. */
/*通知所有对设备注册感兴趣的其他内核模块*/
raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
ret = 0;
out:
return ret;
}
PCI驱动中的其他函数意义同其名,我们看看挂起操作e100_suspend
staticint e100_suspend(struct pci_dev *pdev, pm_message_t state)
{
struct net_device *netdev = pci_get_drvdata(pdev);
struct nic *nic = netdev_priv(netdev);
/*如果网络设备处于激活状态,则等待网络设备完成轮询接收数据包*/
if (netif_running(netdev))
netif_poll_disable(nic->netdev);
/*删除监视网络设备工作状态的定时器*/
del_timer_sync(&nic->watchdog);
/*使设备驱动处于不可传递数据状态,并关闭网络设备的队列功能*/
netif_carrier_off(nic->netdev);
netif_device_detach(netdev);
pci_save_state(pdev);
/*
这个函数的第二个参数表示一种电源状态,咱们看到传递的一次是PCI_D3hot,一次是PCI_D3cold,
这就是使得设备可以从这两种状态中产生PME#信号.(PME#就是Power Management Event Signal,即电源管理事件信号
.)PME#信号是PCI Power Spec中出镜率最高的一个名词.如果一个设备希望改变它的电源状态,它就可以发送
一个PME#信号.而设备是否允许发送信号也是有开关的,并且每种状态都有一个开关.
所以这里的做法就是为D3hot和D3cold打开开关.而这里pci_enable_wake的第三个参数是表示开还是关.
即传递1进去就是enable,传递0进去就是disable.
*/
if ((nic->flags & wol_magic) | e100_asf(nic)) {
pci_enable_wake(pdev, PCI_D3hot, 1);
pci_enable_wake(pdev, PCI_D3cold, 1);
} else {
pci_enable_wake(pdev, PCI_D3hot, 0);
pci_enable_wake(pdev, PCI_D3cold, 0);
}
/*禁用设备*/
pci_disable_device(pdev);
/*释放中断*/
free_irq(pdev->irq, netdev);
/*设置PCI的电源状态*/
pci_set_power_state(pdev, PCI_D3hot);
return 0;
}
这样,网络设备的驱动框架就搭建起来了,驱动程序在模块初始化函数中注册网卡的PCI驱动,在probe函数中注册网卡设备驱动,初始化相关数据结构和函数指针。对于特定的网卡需要特定的数据结构来保存信息,硬件相关的操作需要按照对应网卡的约定来实现。对于e100系列网卡,数据结构nic保存了该网卡的所有信息。另外net_device中提供的函数指针在e100_probe中做了初始化,如e100_open,依据他们的名字我们可以猜到他们的意思和用途(e100_open做网卡的打开、启动、中断的注册等操作)。这里就不再深入了,如果对他们的实现细节感兴趣,需要参看其网卡的硬件手册。
后面我们在分析上层代码中会遇到一些操作特定网卡的函数指针,在这里就能找到其实现。