3、套接字的实现
套接字最先是在UNIX的BSD版本实现的,所以也叫做BSD套接字,它隐藏了各个协议之间的差异,并向上提供统一的接口。Linux中实现套接字的基本结构:
3.1、BSD套接字
3.1.1、核心数据结构
为了实现BSD套接字,内核提供一个重要的数据结构struct socket,它的定义如下:
struct socket {
socket_state state; //套接字状态
unsigned long flags;
struct proto_ops *ops; //操作函数集
struct fasync_struct *fasync_list;
struct file *file;//每个BSD套接字都有一个inode结点,通过文件对象与其关联起来
struct sock *sk; //socket内部结构,与具体的协议簇(比如PF_INET)相关
wait_queue_head_t wait;
short type; //套接字类型:如SOCK_STREAM, SOCK_DGRAM, SOCK_RAW, SOCK_RDM, SOCK_SEQPACKET, and SOCK_PACKET
unsigned char passcred;
};
//BSD套接字操作函数集
struct proto_ops {
int family;
struct module *owner;
int (*release) (struct socket *sock);
int (*bind) (struct socket *sock,
struct sockaddr *myaddr,
int sockaddr_len);
int (*connect) (struct socket *sock,
struct sockaddr *vaddr,
int sockaddr_len, int flags);
int (*socketpair)(struct socket *sock1,
struct socket *sock2);
int (*accept) (struct socket *sock,
struct socket *newsock, int flags);
int (*getname) (struct socket *sock,
struct sockaddr *addr,
int *sockaddr_len, int peer);
unsigned int (*poll) (struct file *file, struct socket *sock,
struct poll_table_struct *wait);
int (*ioctl) (struct socket *sock, unsigned int cmd,
unsigned long arg);
int (*listen) (struct socket *sock, int len);
int (*shutdown) (struct socket *sock, int flags);
int (*setsockopt)(struct socket *sock, int level,
int optname, char __user *optval, int optlen);
int (*getsockopt)(struct socket *sock, int level,
int optname, char __user *optval, int __user *optlen);
int (*sendmsg) (struct kiocb *iocb, struct socket *sock,
struct msghdr *m, size_t total_len);
int (*recvmsg) (struct kiocb *iocb, struct socket *sock,
struct msghdr *m, size_t total_len,
int flags);
int (*mmap) (struct file *file, struct socket *sock,
struct vm_area_struct * vma);
ssize_t (*sendpage) (struct socket *sock, struct page *page,
int offset, size_t size, int flags);
};
//BSD套接字状态
typedef enum {
SS_FREE = 0, /* not allocated */
SS_UNCONNECTED, /* unconnected to any socket */
SS_CONNECTING, /* in process of connecting */
SS_CONNECTED, /* connected to socket */
SS_DISCONNECTING /* in process of disconnecting */
} socket_state;
3.1.2、BSD套接字初始化
//BSD套接字的初始化
void __init sock_init(void)
{
int i;
/*
* Initialize all address (protocol) families.
*/
for (i = 0; i < NPROTO; i++)
net_families[i] = NULL; //协议簇数组初始化
/*
* Initialize sock SLAB cache.
*/
//分配sock缓存
sk_init();
#ifdef SLAB_SKB
/*
* Initialize skbuff SLAB cache
*/
skb_init();
#endif
/*
* Initialize the protocols module.
*/
init_inodecache();
//注册sockfs文件系统
register_filesystem(&sock_fs_type);
//安装sockfs
sock_mnt = kern_mount(&sock_fs_type);
/* The real protocol initialization is performed when
* do_initcalls is run.
*/
#ifdef CONFIG_NETFILTER
netfilter_init();
#endif
}
//net/socket.c
//sockfs文件系统的安装点
static struct vfsmount *sock_mnt;
//sockfs文件系统类型
static struct file_system_type sock_fs_type = {
.name = "sockfs",
.get_sb = sockfs_get_sb,
.kill_sb = kill_anon_super,
};
//地址簇及协议信息
static struct net_proto_family *net_families[NPROTO];
sock_init在系统初始化的被调用:
3.1.3、BSD套接字的系统调用
实际上,Linux内核只提供了一个与套接字相关的系统调用,即sys_socketcall,应用程序的所有套接字调用都会映射到这个系统调用上。
asmlinkage long sys_socketcall(int call, unsigned long __user *args)
{
unsigned long a[6];
unsigned long a0,a1;
int err;
if(call<1||call>SYS_RECVMSG)
return -EINVAL;
/* copy_from_user should be SMP safe. */
if (copy_from_user(a, args, nargs[call]))//从用户区拷贝参数
return -EFAULT;
a0=a[0];
a1=a[1];
switch(call) //调用相应的函数
{
case SYS_SOCKET:
err = sys_socket(a0,a1,a[2]);
break;
case SYS_BIND:
err = sys_bind(a0,(struct sockaddr __user *)a1, a[2]);
break;
case SYS_CONNECT:
err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
break;
case SYS_LISTEN:
err = sys_listen(a0,a1);
break;
case SYS_ACCEPT:
err = sys_accept(a0,(struct sockaddr __user *)a1, (int __user *)a[2]);
break;
case SYS_GETSOCKNAME:
err = sys_getsockname(a0,(struct sockaddr __user *)a1, (int __user *)a[2]);
break;
case SYS_GETPEERNAME:
err = sys_getpeername(a0, (struct sockaddr __user *)a1, (int __user *)a[2]);
break;
case SYS_SOCKETPAIR:
err = sys_socketpair(a0,a1, a[2], (int __user *)a[3]);
break;
case SYS_SEND:
err = sys_send(a0, (void __user *)a1, a[2], a[3]);
break;
case SYS_SENDTO:
err = sys_sendto(a0,(void __user *)a1, a[2], a[3],
(struct sockaddr __user *)a[4], a[5]);
break;
case SYS_RECV:
err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
break;
case SYS_RECVFROM:
err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
(struct sockaddr __user *)a[4], (int __user *)a[5]);
break;
case SYS_SHUTDOWN:
err = sys_shutdown(a0,a1);
break;
case SYS_SETSOCKOPT:
err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
break;
case SYS_GETSOCKOPT:
err = sys_getsockopt(a0, a1, a[2], (char __user *)a[3], (int __user *)a[4]);
break;
case SYS_SENDMSG:
err = sys_sendmsg(a0, (struct msghdr __user *) a1, a[2]);
break;
case SYS_RECVMSG:
err = sys_recvmsg(a0, (struct msghdr __user *) a1, a[2]);
break;
default:
err = -EINVAL;
break;
}
return err;
}
//include/asm/unistd.h
#define __NR_socketcall 102 //系统调用号
下面来看一下sys_socket的实现:
Code
3.2、INET套接字
INET套接字就是支持 Internet 地址族的套接字,它位于TCP协议之上, BSD套接字之下,如下:
3.2.1、数据结构
Code
inet_init()函数:
Code
sock_register()函数:
Code
inet_create()函数
Code