本文分析基于Linux Kernel 1.2.13
原创作品,转载请标明http://blog.csdn.net/yming0221/article/details/7547826
更多请查看专栏,地址http://blog.csdn.net/column/details/linux-kernel-net.html
作者:闫明
注:标题中的”(上)“,”(下)“表示分析过程基于数据包的传递方向:”(上)“表示分析是从底层向上分析、”(下)“表示分析是从上向下分析。
下面是发送数据的流程:
应用层发送数据包的入口函数是BSD socket层的sock_write()函数,在分析该函数之前,先分析下socket的创建,系统调用sys_socket()对应的BSD socket层函数为sock_socket()
sock_socket()函数
- /*
- * Perform the socket system call. we locate the appropriate
- * family, then create a fresh socket.
- */
- static int sock_socket(int family, int type, int protocol)
- {
- int i, fd;
- struct socket *sock;
- struct proto_ops *ops;
- /* Locate the correct protocol family. */
- for (i = 0; i < NPROTO; ++i) //查找对应的协议族
- {
- if (pops[i] == NULL) continue;
- if (pops[i]->family == family)
- break;
- }
- if (i == NPROTO) //查找未果,返回错误
- {
- return -EINVAL;
- }
- ops = pops[i];//指针指向该协议族的原型操作函数集
- /*
- * Check that this is a type that we know how to manipulate and
- * the protocol makes sense here. The family can still reject the
- * protocol later.
- */
- if ((type != SOCK_STREAM && type != SOCK_DGRAM &&
- type != SOCK_SEQPACKET && type != SOCK_RAW &&
- type != SOCK_PACKET) || protocol < 0)
- return(-EINVAL);
- /*
- * Allocate the socket and allow the family to set things up. if
- * the protocol is 0, the family is instructed to select an appropriate
- * default.
- */
- if (!(sock = sock_alloc())) //获取一个socket,已经完成了socket部分初始化设置
- {
- printk("NET: sock_socket: no more sockets\n");
- return(-ENOSR); /* Was: EAGAIN, but we are out of
- system resources! */
- }
- sock->type = type;
- sock->ops = ops;
- if ((i = sock->ops->create(sock, protocol)) < 0) //调用INET层函数,inet_create()函数,创建inet层的socket,即sock结构
- {
- sock_release(sock);
- return(i);
- }
- if ((fd = get_fd(SOCK_INODE(sock))) < 0) //根据sock结构中的inode,分配文件描述符
- {
- sock_release(sock);
- return(-EINVAL);
- }
- return(fd);
- }
该函数的大体功能:
1、分配socket,sock结构,用于BSD和INET层的socket
2、分配inode和file结构,用于文件操作
3、返回文件操作描述符,用于应用程序的使用
其中初始化分配一个socket的方法如下:
- /*
- * Allocate a socket.
- */
- struct socket *sock_alloc(void)
- {
- struct inode * inode;
- struct socket * sock;
- inode = get_empty_inode();//获一个空的文件结点
- if (!inode)
- return NULL;
- //文件结点相应字段赋值
- inode->i_mode = S_IFSOCK;
- inode->i_sock = 1;
- inode->i_uid = current->uid;
- inode->i_gid = current->gid;
- sock = &inode->u.socket_i;//给sicket结构指针赋值,可以看到inode和socket一一对应
- sock->state = SS_UNCONNECTED;
- sock->flags = 0;
- sock->ops = NULL;
- sock->data = NULL;
- sock->conn = NULL;
- sock->iconn = NULL;
- sock->next = NULL;
- sock->wait = &inode->i_wait;
- sock->inode = inode; /* "backlink": we could use pointer arithmetic instead */
- sock->fasync_list = NULL;
- sockets_in_use++;
- return sock;
- }
执行完,然后调用INET层的inet_create()函数
- /*
- * Create an inet socket.
- *
- * FIXME: Gcc would generate much better code if we set the parameters
- * up in in-memory structure order. Gcc68K even more so
- */
- //创建inet socket,即sock结构
- static int inet_create(struct socket *sock, int protocol)
- {
- struct sock *sk;
- struct proto *prot;
- int err;
- sk = (struct sock *) kmalloc(sizeof(*sk), GFP_KERNEL);//分配空间
- if (sk == NULL)
- return(-ENOBUFS);
- sk->num = 0;
- sk->reuse = 0;
- switch(sock->type)
- {
- case SOCK_STREAM:
- case SOCK_SEQPACKET:
- .................
- case SOCK_DGRAM:
- if (protocol && protocol != IPPROTO_UDP)
- {
- kfree_s((void *)sk, sizeof(*sk));
- return(-EPROTONOSUPPORT);
- }
- protocol = IPPROTO_UDP;
- sk->no_check = UDP_NO_CHECK;
- prot=&udp_prot;//原型指针指向UDP的原型定义
- break;
- case SOCK_RAW:
- .........................
- break;
- case SOCK_PACKET:
- ..........................
- break;
- default:
- kfree_s((void *)sk, sizeof(*sk));
- return(-ESOCKTNOSUPPORT);
- }
- sk->socket = sock;//可以看出sock和socket的对应关系
- .................
- sk->type = sock->type;
- sk->stamp.tv_sec=0;
- sk->protocol = protocol;
- sk->wmem_alloc = 0;
- sk->rmem_alloc = 0;
- sk->sndbuf = SK_WMEM_MAX;
- sk->rcvbuf = SK_RMEM_MAX;
- ......................................//sock的初始化
- /* this is how many unacked bytes we will accept for this socket. */
- sk->max_unacked = 2048; /* needs to be at most 2 full packets. */
- /* how many packets we should send before forcing an ack.
- if this is set to zero it is the same as sk->delay_acks = 0 */
- sk->max_ack_backlog = 0;
- sk->inuse = 0;
- sk->delay_acks = 0;
- skb_queue_head_init(&sk->write_queue);
- skb_queue_head_init(&sk->receive_queue);
- sk->mtu = 576;
- sk->prot = prot;
- sk->sleep = sock->wait;
- sk->daddr = 0;//远端地址
- sk->saddr = 0 /* ip_my_addr() */;//本地地址
- sk->err = 0;
- sk->next = NULL;
- sk->pair = NULL;
- sk->send_tail = NULL;//发送链表尾
- sk->send_head = NULL;//发送链表头
- ..............................
- skb_queue_head_init(&sk->back_log);//初始化双链表
- ..................................
- sk->ip_tos=0;
- sk->ip_ttl=64;
- ...................................
- if (sk->num) //本地端口号不空
- {
- /*
- * It assumes that any protocol which allows
- * the user to assign a number at socket
- * creation time automatically
- * shares.
- */
- put_sock(sk->num, sk);//根据端口号将sock结构加入sock表中
- sk->dummy_th.source = ntohs(sk->num);
- }
- if (sk->prot->init) //UDP的初始化函数为空
- {
- err = sk->prot->init(sk);
- if (err != 0)
- {
- destroy_sock(sk);
- return(err);
- }
- }
- return(0);
- }
返回文件描述的操作符
- /*
- * Obtains the first available file descriptor and sets it up for use.
- */
- //根据文件inode指针创建文件结构,并返回文件操作的操作符,用于应用程序的使用
- static int get_fd(struct inode *inode)
- {
- int fd;
- struct file *file;
- /*
- * Find a file descriptor suitable for return to the user.
- */
- file = get_empty_filp();
- if (!file)
- return(-1);
- for (fd = 0; fd < NR_OPEN; ++fd)
- if (!current->files->fd[fd])
- break;
- if (fd == NR_OPEN)
- {
- file->f_count = 0;
- return(-1);
- }
- FD_CLR(fd, ¤t->files->close_on_exec);
- current->files->fd[fd] = file;
- file->f_op = &socket_file_ops;
- file->f_mode = 3;
- file->f_flags = O_RDWR;
- file->f_count = 1;
- file->f_inode = inode;
- if (inode)
- inode->i_count++;
- file->f_pos = 0;
- return(fd);
- }
下面开始正式看发送数据的最顶层函数--sock_write()函数
- /*
- * Write data to a socket. We verify that the user area ubuf..ubuf+size-1 is
- * readable by the user process.
- */
- static int sock_write(struct inode *inode, struct file *file, char *ubuf, int size)
- {
- struct socket *sock;
- int err;
- if (!(sock = socki_lookup(inode))) //返回inode结构的对应的socket结构
- {
- printk("NET: sock_write: can't find socket for inode!\n");
- return(-EBADF);
- }
- if (sock->flags & SO_ACCEPTCON)
- return(-EINVAL);
- if(size<0)
- return -EINVAL;
- if(size==0)
- return 0;
- if ((err=verify_area(VERIFY_READ,ubuf,size))<0)
- return err;
- return(sock->ops->write(sock, ubuf, size,(file->f_flags & O_NONBLOCK)));//调用inet_write()函数
- }
inet_write()函数
- static int inet_write(struct socket *sock, char *ubuf, int size, int noblock)
- {
- return inet_send(sock,ubuf,size,noblock,0);
- }
inet_send()函数
- static int inet_send(struct socket *sock, void *ubuf, int size, int noblock,
- unsigned flags)
- {
- struct sock *sk = (struct sock *) sock->data;//从socket结构中取出sock指针
- if (sk->shutdown & SEND_SHUTDOWN)
- {
- send_sig(SIGPIPE, current, 1);
- return(-EPIPE);
- }
- if(sk->err)
- return inet_error(sk);
- /* We may need to bind the socket. */
- if(inet_autobind(sk)!=0)//自动分配本地端口号,并将sk根据端口号加入sock表中
- return(-EAGAIN);
- return(sk->prot->write(sk, (unsigned char *) ubuf, size, noblock, flags));//调用udp_write()函数
- }
这样系统就会调用传输层(还是以UDP为例)的函数udp_write()来发送数据,这样数据就从应用层到了传输层。下篇分析传输层向网络层的数据传输。