首页 > 代码库 > Internet协议族
Internet协议族
Linux在2.6.32版本可支持37种协议族,每个协议族用一个net_proto_family结构实例来表示,在系统初始化时,以各协议族对应的协议族为下标,调用sock_register()将结构注册到全局数组net_families[NPROTO]。此外还有一个地址族的概念,地址族用地址族常量来标识,到目前为止,协议族常量和地址族常量是一一对应的,且值相同
/include/linux/sock.h
/* Supported address families. */ #define AF_UNSPEC 0 #define AF_UNIX 1 /* Unix domain sockets */ #define AF_LOCAL 1 /* POSIX name for AF_UNIX */ #define AF_INET 2 /* Internet IP Protocol */ #define AF_AX25 3 /* Amateur Radio AX.25 */ #define AF_IPX 4 /* Novell IPX */ #define AF_APPLETALK 5 /* AppleTalk DDP */ #define AF_NETROM 6 /* Amateur Radio NET/ROM */ #define AF_BRIDGE 7 /* Multiprotocol bridge */ #define AF_ATMPVC 8 /* ATM PVCs */ #define AF_X25 9 /* Reserved for X.25 project */ #define AF_INET6 10 /* IP version 6 */ #define AF_ROSE 11 /* Amateur Radio X.25 PLP */ #define AF_DECnet 12 /* Reserved for DECnet project */ #define AF_NETBEUI 13 /* Reserved for 802.2LLC project*/ #define AF_SECURITY 14 /* Security callback pseudo AF */ #define AF_KEY 15 /* PF_KEY key management API */ #define AF_NETLINK 16 #define AF_ROUTE AF_NETLINK /* Alias to emulate 4.4BSD */ #define AF_PACKET 17 /* Packet family */ #define AF_ASH 18 /* Ash */ #define AF_ECONET 19 /* Acorn Econet */ #define AF_ATMSVC 20 /* ATM SVCs */ #define AF_RDS 21 /* RDS sockets */ #define AF_SNA 22 /* Linux SNA Project (nutters!) */ #define AF_IRDA 23 /* IRDA sockets */ #define AF_PPPOX 24 /* PPPoX sockets */ #define AF_WANPIPE 25 /* Wanpipe API Sockets */ #define AF_LLC 26 /* Linux LLC */ #define AF_CAN 29 /* Controller Area Network */ #define AF_TIPC 30 /* TIPC sockets */ #define AF_BLUETOOTH 31 /* Bluetooth sockets */ #define AF_IUCV 32 /* IUCV sockets */ #define AF_RXRPC 33 /* RxRPC sockets */ #define AF_ISDN 34 /* mISDN sockets */ #define AF_PHONET 35 /* Phonet sockets */ #define AF_IEEE802154 36 /* IEEE802154 sockets */ #define AF_MAX 37 /* For now.. */ /* Protocol families, same as address families. */ #define PF_UNSPEC AF_UNSPEC #define PF_UNIX AF_UNIX #define PF_LOCAL AF_LOCAL #define PF_INET AF_INET #define PF_AX25 AF_AX25 #define PF_IPX AF_IPX #define PF_APPLETALK AF_APPLETALK #define PF_NETROM AF_NETROM #define PF_BRIDGE AF_BRIDGE #define PF_ATMPVC AF_ATMPVC #define PF_X25 AF_X25 #define PF_INET6 AF_INET6 #define PF_ROSE AF_ROSE #define PF_DECnet AF_DECnet #define PF_NETBEUI AF_NETBEUI #define PF_SECURITY AF_SECURITY #define PF_KEY AF_KEY #define PF_NETLINK AF_NETLINK #define PF_ROUTE AF_ROUTE #define PF_PACKET AF_PACKET #define PF_ASH AF_ASH #define PF_ECONET AF_ECONET #define PF_ATMSVC AF_ATMSVC #define PF_RDS AF_RDS #define PF_SNA AF_SNA #define PF_IRDA AF_IRDA #define PF_PPPOX AF_PPPOX #define PF_WANPIPE AF_WANPIPE #define PF_LLC AF_LLC #define PF_CAN AF_CAN #define PF_TIPC AF_TIPC #define PF_BLUETOOTH AF_BLUETOOTH #define PF_IUCV AF_IUCV #define PF_RXRPC AF_RXRPC #define PF_ISDN AF_ISDN #define PF_PHONET AF_PHONET #define PF_IEEE802154 AF_IEEE802154 #define PF_MAX AF_MAX
对应不同的协议族,其传输层的结构和实现有着巨大的差异,因此其各自的套接口创建函数也会有很大区别,而通过net_proto_family结构屏蔽了这些区别,使得各个协议在初始化的时候,可以统一用sock_register()注册到net_families数组中。因此实际上net_proto_family结构提供了一个协议族到套接口创建之间的接口
net_proto_family
struct net_proto_family { int family; int (*create)(struct net *net, struct socket *sock, int protocol); struct module *owner; }; /** * sock_register - add a socket protocol handler * @ops: description of protocol * * This function is called by a protocol handler that wants to * advertise its address family, and have it linked into the * socket interface. The value ops->family coresponds to the * socket system call protocol family. */ int sock_register(const struct net_proto_family *ops) { int err; if (ops->family >= NPROTO) { printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO); return -ENOBUFS; } spin_lock(&net_family_lock); if (net_families[ops->family]) err = -EEXIST; else { net_families[ops->family] = ops; err = 0; } spin_unlock(&net_family_lock); printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); return err; }
inet_protosw
inet_protosw结构在每次创建套接口时会用到,此结构也只在套接口层起作用/* This is used to register socket interfaces for IP protocols. */ struct inet_protosw { struct list_head list; /* These two fields form the lookup key. */ unsigned short type; /* This is the 2nd argument to socket(2). */ unsigned short protocol; /* This is the L4 protocol number. */ struct proto *prot; const struct proto_ops *ops; int capability; /* Which (if any) capability do * we need to use this socket * interface? */ char no_check; /* checksum on rcv/xmit/none? */ unsigned char flags; /* See INET_PROTOSW_* below. */ };type
标识套接口的类型,对于Internet协议族共有三种类型SOCK_STREAM、SOCK_DGRAM和SOCK_RAW,与应用程序层创建套接口函数socket()的第二个参数type取值恰好对应
protocol
标识协议族中四层协议号,Internet协议族中的值包括IPPROTO_TCP、IPPROTO_UDP等
prot
套接口网络层接口。TCP为tcp_prot;UDP为udp_prot;原始套接口raw_prot。
ops
套接口传输层接口。TCP为inet_stream_ops;UDP为inet_dgram_ops;原始套接口inet_sockraw_ops。
capability
当大于零时,需要检验当前创建套接口的进程是否有这种能力。
flags
INET_PROTOSW_REUSE 标识端口是否能被重用
INET_PROTOSW_PERMANENT 标识此协议不能被替换或卸载
INET_PROTOSW_ICSK 标识是不是连续的套接口
inet_register_protosw()将inet_protosw实例注册到inet_sw散列表中,而inet_unregister_protosw()可以将指定的inet_protosw实例从inetsw散列表中注销,但是注意:被标识为INET_PROTOSW_PERMANENT的inet_protosw实例不能重载或注销。
void inet_register_protosw(struct inet_protosw *p) { struct list_head *lh; struct inet_protosw *answer; int protocol = p->protocol; struct list_head *last_perm; spin_lock_bh(&inetsw_lock); if (p->type >= SOCK_MAX) goto out_illegal; /* If we are trying to override a permanent protocol, bail. */ answer = NULL; last_perm = &inetsw[p->type]; list_for_each(lh, &inetsw[p->type]) { answer = list_entry(lh, struct inet_protosw, list); /* Check only the non-wild match. */ if (INET_PROTOSW_PERMANENT & answer->flags) { if (protocol == answer->protocol) break; last_perm = lh; } answer = NULL; } if (answer) goto out_permanent; /* Add the new entry after the last permanent entry if any, so that * the new entry does not override a permanent entry when matched with * a wild-card protocol. But it is allowed to override any existing * non-permanent entry. This means that when we remove this entry, the * system automatically returns to the old behavior. */ list_add_rcu(&p->list, last_perm); out: spin_unlock_bh(&inetsw_lock); return; out_permanent: printk(KERN_ERR "Attempt to override permanent protocol %d.\n", protocol); goto out; out_illegal: printk(KERN_ERR "Ignoring attempt to register invalid socket type %d.\n", p->type); goto out; }
net_protocol
net_protocol是一个非常重要的结构,定义了协议族中支持的传输层协议以及传输层的报文接收例程。此结构是网络层和传输层之前的桥梁,当网络数据报文从网络层流向传输层时,会调用此结构中的传输层协议数据报接收处理函数。
/* This is used to register protocols. */ struct net_protocol { int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, u32 info); int (*gso_send_check)(struct sk_buff *skb); struct sk_buff *(*gso_segment)(struct sk_buff *skb, int features); struct sk_buff **(*gro_receive)(struct sk_buff **head, struct sk_buff *skb); int (*gro_complete)(struct sk_buff *skb); unsigned int no_policy:1, netns_ok:1; };int (*handler)(struct sk_buff *skb)
传输层协议数据报接收处理函数,当网络层接收IP数据报之后,根据IP数据报所指示传输层协议,调用对应传输层net_protocol结构的该例程接收报文。
void (*err_handler)(struct sk_buff *skb, u32 info)
在ICMP模块中接收到差错报文后,会解析差错报文,并根据差错报文中原始的IP首部,调用对应传输层的异常处理函数err_handler()。
int (*gso_send_check)(struct sk_buff *skb);
struct sk_buff *(*gso_segment)(struct sk_buff *skb, int features);
GSO是网络设备支持传输层的一个功能
当GSO数据报输出时到达网络设备,如果网络设备不支持GSO的情况,则需要传输层对输出的数据报重新进行GSO分段和校验和的计算。因此需要网络层提供接口给设备层,能够访问到传输层的GSO分段和校验和的计算功能,对输出的数据报进行分段和执行校验和
unsigned int no_policy
标识在路由时是否进行策略路由,TCP和UDP默认不进行策略路由
内核中为Internet协议定义了4个net_protocol结构实例-------icmp_protocol,udp_protocol,tcp_protocol,igmp_protocol,分别于ICMP、UDP、TCP、IGMP协议一一对应。在Internet协议族初始化时,调用inet_add_protocol()将它们注册到net_protocol结构指针数组inet_protos[MAX_INET_PROTOS]中,在系统运行过程中,随时可以用内核模块加载/卸载的方式,调用函数inet_add_protocol/inet_del_protocol将net_protocol结构实例注册到inet_protos[]数组中,或从中删除。
Internet协议族的初始化
Internet协议族的初始化函数为inet_init(),通过fs_initcall(inet_init),将inet_init()加到内核的初始化列表中,保证了此函数会在系统启动时被调用
static int __init inet_init(void) { struct sk_buff *dummy_skb; struct inet_protosw *q; struct list_head *r; int rc = -EINVAL; BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)); rc = proto_register(&tcp_prot, 1); if (rc) goto out; rc = proto_register(&udp_prot, 1); if (rc) goto out_unregister_tcp_proto; rc = proto_register(&raw_prot, 1); if (rc) goto out_unregister_udp_proto; /* * Tell SOCKET that we are alive... */ (void)sock_register(&inet_family_ops); #ifdef CONFIG_SYSCTL ip_static_sysctl_init(); #endif /* * Add all the base protocols. */ if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0) printk(KERN_CRIT "inet_init: Cannot add ICMP protocol\n"); if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0) printk(KERN_CRIT "inet_init: Cannot add UDP protocol\n"); if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0) printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n"); #ifdef CONFIG_IP_MULTICAST if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0) printk(KERN_CRIT "inet_init: Cannot add IGMP protocol\n"); #endif /* Register the socket-side information for inet_create. */ for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r) INIT_LIST_HEAD(r); for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q) inet_register_protosw(q); /* * Set the ARP module up */ arp_init(); /* * Set the IP module up */ ip_init(); tcp_v4_init(); /* Setup TCP slab cache for open requests. */ tcp_init(); /* Setup UDP memory threshold */ udp_init(); /* Add UDP-Lite (RFC 3828) */ udplite4_register(); /* * Set the ICMP layer up */ if (icmp_init() < 0) panic("Failed to create the ICMP control socket.\n"); /* * Initialise the multicast router */ #if defined(CONFIG_IP_MROUTE) if (ip_mr_init()) printk(KERN_CRIT "inet_init: Cannot init ipv4 mroute\n"); #endif /* * Initialise per-cpu ipv4 mibs */ if (init_ipv4_mibs()) printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ipv4_proc_init(); ipfrag_init(); dev_add_pack(&ip_packet_type); rc = 0; out: return rc; out_unregister_udp_proto: proto_unregister(&udp_prot); out_unregister_tcp_proto: proto_unregister(&tcp_prot); goto out; }