iproute2 - ip 命令源码分析

时间:2024-03-12 13:38:02

 

参考文档

https://en.wikipedia.org/wiki/Iproute2

https://en.wikipedia.org/wiki/Netlink

 

查看Makefile

IPOBJ=ip.o ipaddress.o ipaddrlabel.o iproute.o iprule.o ipnetns.o \
    rtm_map.o iptunnel.o ip6tunnel.o tunnel.o ipneigh.o ipntable.o iplink.o \
    ipmaddr.o ipmonitor.o ipmroute.o ipprefix.o iptuntap.o iptoken.o \
    ipxfrm.o xfrm_state.o xfrm_policy.o xfrm_monitor.o \
    iplink_vlan.o link_veth.o link_gre.o iplink_can.o \
    iplink_macvlan.o ipl2tp.o link_vti.o link_vti6.o \
    iplink_vxlan.o tcp_metrics.o iplink_ipoib.o ipnetconf.o link_ip6tnl.o \
    link_iptnl.o link_gre6.o iplink_bond.o iplink_bond_slave.o iplink_hsr.o \
    iplink_bridge.o iplink_bridge_slave.o ipfou.o iplink_ipvlan.o \
    iplink_geneve.o iplink_vrf.o iproute_lwtunnel.o ipmacsec.o ipila.o

RTMONOBJ=rtmon.o

 

make后输出

ip
    CC       ip.o
    CC       ipaddress.o
    CC       ipaddrlabel.o
    CC       iproute.o
    CC       iprule.o
    CC       ipnetns.o
    CC       rtm_map.o
    CC       iptunnel.o
    CC       ip6tunnel.o
    CC       tunnel.o
    CC       ipneigh.o
    CC       ipntable.o
    CC       iplink.o
    CC       ipmaddr.o
    CC       ipmonitor.o
    CC       ipmroute.o
    CC       ipprefix.o
    CC       iptuntap.o
    CC       iptoken.o
    CC       ipxfrm.o
    CC       xfrm_state.o
    CC       xfrm_policy.o
    CC       xfrm_monitor.o
    CC       iplink_vlan.o
    CC       link_veth.o
    CC       link_gre.o
    CC       iplink_can.o
    CC       iplink_macvlan.o
    CC       ipl2tp.o
    CC       link_vti.o
    CC       link_vti6.o
    CC       iplink_vxlan.o
    CC       tcp_metrics.o
    CC       iplink_ipoib.o
    CC       ipnetconf.o
    CC       link_ip6tnl.o
    CC       link_iptnl.o
    CC       link_gre6.o
    CC       iplink_bond.o
    CC       iplink_bond_slave.o
    CC       iplink_hsr.o
    CC       iplink_bridge.o
    CC       iplink_bridge_slave.o
    CC       ipfou.o
    CC       iplink_ipvlan.o
    CC       iplink_geneve.o
    CC       iplink_vrf.o
    CC       iproute_lwtunnel.o
    CC       ipmacsec.o
    CC       ipila.o
    LINK     ip
    CC       rtmon.o
    LINK     rtmon

 

ip.c 文件主要流程

// ip route add 127.0.0/8 via 172.16.17.2
int
main(int argc, char **argv) { char *basename; // 声明数组指针 char *batch_file = NULL; basename = strrchr(argv[0], \'/\'); // strrchr函数从后向前解析第一个输入参数 if (basename == NULL)         basename = argv[0]; else basename++; // basename 为第一个参数的值ip

argc = 6
**argv=[
"ip"
"route"
"add"
"127.0.0/8"
"via"
"172.16.17.2"
]

while 参数个数 > 1
将第2个参数赋值给opt 判断是否是可可选项
如果opt的值 -- , argc = 5, argv = 1, 结束循环
 如果opt != - , 结束循环
如果opt[1] == - , opt指针+1
 
如果是 -loops 选项,调用 atoi函数
 或如果是 family 选项,preferred_family = readfamily函数读协议栈
 或如果是 4 选项, preferred_family = AF_INET
或如果是 6 选项 preferred_family = AF_INET6
或如果是 0 选项 preferred_family = AF_PACKET;

或如果是 I 选项 preferred_family = AF_IPX;
或如果是 D 选项 preferred_family = AF_DECnet;
或如果是 M 选项 preferred_family = AF_MPLS;
或如果是 B 选项 preferred_family = AF_BRIDGE
或如果是 human 选项 ++human_readable ??
或如果是 iec 选项 ++use_iec; ??
或如果是 stats 选项 ++show_stats ??
或如果是 details 选项 ++show_details ??
或如果是 resolve 选项 ++resolve_hosts
或如果是 oneline 选项 ++oneline ??
或如果是 timestamp 选项 ++timestamp ??
或如果是 tshort 选项 timestamptimestamp_short 都+1
或如果是 -Version 选项 打印SNAPSHOT 变量
或如果是 force 选项 ++force
或如果是 batch 选项 将参数1的值赋值给 数组batch_file
或如果是 brief 选项 ++brief
或如果是 json 选项 ++json
或如果是 rcvbuf 选项 调用 get_unsigned(&size, argv[1], 0) 获得bufsize的大小
或如果是 color 选项 调用 enable_color() 函数
或如果是 help 选项 调用 usage() 函数
或如果是 netns
或如果是 all 选项 do_all = true
或者 打印错误信息
 
argc参数个数减1, argv的值的+1

判断循环while循环参数个数是否大于1

执行 rtnl_open(&rth, 0) 函数 初始化信息

如果 argc 的参数 > 1时
do_cmd(argv[1], argc-1, argv+1)



while (argc > 1) { char *opt = argv[1]; if (strcmp(opt, "--") == 0) { argc--; argv++; break; } if (opt[0] != \'-\') break; if (opt[1] == \'-\') opt++; if (matches(opt, "-loops") == 0) { argc--; argv++; if (argc <= 1) usage(); max_flush_loops = atoi(argv[1]); } else if (matches(opt, "-family") == 0) { argc--; argv++; if (argc <= 1) usage(); if (strcmp(argv[1], "help") == 0) usage(); else preferred_family = read_family(argv[1]); if (preferred_family == AF_UNSPEC) invarg("invalid protocol family", argv[1]); } else if (strcmp(opt, "-4") == 0) { preferred_family = AF_INET; } else if (strcmp(opt, "-6") == 0) { preferred_family = AF_INET6; } else if (strcmp(opt, "-0") == 0) { preferred_family = AF_PACKET; } else if (strcmp(opt, "-I") == 0) { preferred_family = AF_IPX; } else if (strcmp(opt, "-D") == 0) { preferred_family = AF_DECnet; } else if (strcmp(opt, "-M") == 0) { preferred_family = AF_MPLS; } else if (strcmp(opt, "-B") == 0) { preferred_family = AF_BRIDGE; } else if (matches(opt, "-human") == 0 || matches(opt, "-human-readable") == 0) { ++human_readable; } else if (matches(opt, "-iec") == 0) { ++use_iec; } else if (matches(opt, "-stats") == 0 || matches(opt, "-statistics") == 0) { ++show_stats; } else if (matches(opt, "-details") == 0) { ++show_details; } else if (matches(opt, "-resolve") == 0) { ++resolve_hosts; } else if (matches(opt, "-oneline") == 0) { ++oneline; } else if (matches(opt, "-timestamp") == 0) { ++timestamp; } else if (matches(opt, "-tshort") == 0) { ++timestamp; ++timestamp_short; #if 0 } else if (matches(opt, "-numeric") == 0) { rtnl_names_numeric++; #endif } else if (matches(opt, "-Version") == 0) { printf("ip utility, iproute2-ss%s\n", SNAPSHOT); exit(0); } else if (matches(opt, "-force") == 0) { ++force; } else if (matches(opt, "-batch") == 0) { argc--; argv++; if (argc <= 1) usage(); batch_file = argv[1]; } else if (matches(opt, "-brief") == 0) { ++brief; } else if (matches(opt, "-json") == 0) { ++json; } else if (matches(opt, "-rcvbuf") == 0) { unsigned int size; argc--; argv++; if (argc <= 1) usage(); if (get_unsigned(&size, argv[1], 0)) { fprintf(stderr, "Invalid rcvbuf size \'%s\'\n", argv[1]); exit(-1); } rcvbuf = size; } else if (matches(opt, "-color") == 0) { enable_color(); } else if (matches(opt, "-help") == 0) { usage(); } else if (matches(opt, "-netns") == 0) { NEXT_ARG(); if (netns_switch(argv[1])) exit(-1); } else if (matches(opt, "-all") == 0) { do_all = true; } else { fprintf(stderr, "Option \"%s\" is unknown, try \"ip -help\".\n", opt); exit(-1); } argc--; argv++; } _SL_ = oneline ? "\\" : "\n"; if (json) check_if_color_enabled(); if (batch_file) return batch(batch_file); if (rtnl_open(&rth, 0) < 0) exit(1); if (strlen(basename) > 2) return do_cmd(basename+2, argc, argv); if (argc > 1) return do_cmd(argv[1], argc-1, argv+1); rtnl_close(&rth); usage(); }

 

初始化函数 rtnl_open(&rth, 0)

第一个参数  rth 的地址, rth 是 rtnl_handle 结构,定义在 include/libnetlink.h 头文件中

struct rtnl_handle {
    int            fd;    // fd是socket 打开的值
    struct sockaddr_nl    local;  // local 是本地地址
    struct sockaddr_nl    peer;  // peer 是邻居地址
    __u32            seq;     // seq 是32位序列号
    __u32            dump;  // dump 是32位
    int            proto;  // proto 是协议号
    FILE               *dump_fp;  // dump_fp 是文件名
    int            flags; // flags 是 标志字段
};

 

rtnl_open 函数是libnetlink函数库中的函数 man 3 libnetlink

#include <asm/types.h>
#include <libnetlink.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>

/* 打开一个rtnetlink socket接口,并且将状态信息保存到rtnl_handle数据结构中 */
int rtnl_open(struct rtnl_handle *rth, unsigned subscriptions)
第一个参数: rtnl_handle 是即将要保存的状态信息
第二个参数: subscription是组播号
成功返回0, 失败返回负整数

 

调用do_cmd函数

do_cmd(argv[1], argc-1, argv+1)

第一个参数 argv[1] 是 对象

第二个参数 argv+1 是指针后移

 

static int do_cmd(const char *argv0, int argc, char **argv)
{
    const struct cmd *c;

    for (c = cmds; c->cmd; ++c) {
        if (matches(argv0, c->cmd) == 0)
            return -(c->func(argc-1, argv+1));
    }

    fprintf(stderr, "Object \"%s\" is unknown, try \"ip help\".\n", argv0);
    return EXIT_FAILURE;
}

 

结构cmd命令

static const struct cmd {
    const char *cmd;
    int (*func)(int argc, char **argv);
} cmds[] = {
    { "address",    do_ipaddr },
    { "addrlabel",    do_ipaddrlabel },
    { "maddress",    do_multiaddr },
    { "route",    do_iproute },
    { "rule",    do_iprule },
    { "neighbor",    do_ipneigh },
    { "neighbour",    do_ipneigh },
    { "ntable",    do_ipntable },
    { "ntbl",    do_ipntable },
    { "link",    do_iplink },
    { "l2tp",    do_ipl2tp },
    { "fou",    do_ipfou },
    { "ila",    do_ipila },
    { "macsec",    do_ipmacsec },
    { "tunnel",    do_iptunnel },
    { "tunl",    do_iptunnel },
    { "tuntap",    do_iptuntap },
    { "tap",    do_iptuntap },
    { "token",    do_iptoken },
    { "tcpmetrics",    do_tcp_metrics },
    { "tcp_metrics", do_tcp_metrics },
    { "monitor",    do_ipmonitor },
    { "xfrm",    do_xfrm },
    { "mroute",    do_multiroute },
    { "mrule",    do_multirule },
    { "netns",    do_netns },
    { "netconf",    do_ipnetconf },
    { "vrf",    do_ipvrf},
    { "sr",        do_seg6 },
    { "help",    do_help },
    { 0 }
};

 

 

 

 

命令行一般格式

ip [ OPTIONS ] OBJECT { COMMAND [ ARGUMENTS ] }

 

 

一、OPTIONS 选项

说明:option 以 - 或 -- 开头

 

-V, -Version  打印版本信息且退出, 在SNAPSHOT.h头文件中定义

 

 

二、OBJECT 对象

OBJECT := { link | address | addrlabel | route | rule | neigh |

      ntable | tunnel | tuntap | maddress | mroute | mrule |

      monitor | xfrm | netns | l2tp | tcp_metrics | token | macsec }

 

 

link 对象表示网络设备

 

rotue 对象 表示对路由表进行操作

路由表选路流程:

 1. 根据前缀最长匹配

 2. 根据TOS字段匹配

 3. 根据优先级来匹配

 4. 如果还有,则第一条匹配

 

添加路由

ip route add 10.0.0./24 via 193.233.7.65
ip ro chg 10.0.0/24 via 193.233.7.65 dev dummy
ip route add nat 192.203.80.142 via 193.233.7.83

 

 

ipaddress.c 地址模块解析

 

函数一:

参数:
argc是整形变量,参数个数
**argv是参数列表
flush 是整形变量

全局变量

static struct
{
  int ifindex;   // 接口序列
  int family;    // 协议簇
  int oneline;   // 一行
  int showqueue; // 显示队列
  inet_prefix pfx;
  int scope, scopemask;
  int flags, flagmask;
  int up;       // 接口状态
  char *label;
  int flushed;
  char *flushb;
  int flushp;
  int flushe;
  int group;
} filter;

 

extern struct rtnl_handle rth;

 

rtnl_handle 定义在库文件中

struct rtnl_handle
{
  int fd;                    // 打开文件指针
  struct sockaddr_nl local;  // 本地地址
  struct sockaddr_nl peer;   // 邻居地址
  __u32 seq;                 // 序列号
  __u32 dump;                
};

 

调用方法
ipaddr_list_or_flush(0, NULL, 0);


static
int ipaddr_list_or_flush(int argc, char **argv, int flush) { struct nlmsg_list *linfo = NULL; struct nlmsg_list *ainfo = NULL; struct nlmsg_list *l, *n; char *filter_dev = NULL; int no_link = 0;
   // 第一步: 将filter结构初始化 ipaddr_reset_filter(oneline); filter.showqueue
= 1; if (filter.family == AF_UNSPEC) filter.family = preferred_family;
  // filter.group
= INIT_NETDEV_GROUP; if (flush) { if (argc <= 0) { fprintf(stderr, "Flush requires arguments.\n"); return -1; } if (filter.family == AF_PACKET) { fprintf(stderr, "Cannot flush link addresses.\n"); return -1; } } while (argc > 0) { if (strcmp(*argv, "to") == 0) { NEXT_ARG(); get_prefix(&filter.pfx, *argv, filter.family); if (filter.family == AF_UNSPEC) filter.family = filter.pfx.family; } else if (strcmp(*argv, "scope") == 0) { unsigned scope = 0; NEXT_ARG(); filter.scopemask = -1; if (rtnl_rtscope_a2n(&scope, *argv)) { if (strcmp(*argv, "all") != 0) invarg("invalid \"scope\"\n", *argv); scope = RT_SCOPE_NOWHERE; filter.scopemask = 0; } filter.scope = scope; } else if (strcmp(*argv, "up") == 0) { filter.up = 1; } else if (strcmp(*argv, "dynamic") == 0) { filter.flags &= ~IFA_F_PERMANENT; filter.flagmask |= IFA_F_PERMANENT; } else if (strcmp(*argv, "permanent") == 0) { filter.flags |= IFA_F_PERMANENT; filter.flagmask |= IFA_F_PERMANENT; } else if (strcmp(*argv, "secondary") == 0 || strcmp(*argv, "temporary") == 0) { filter.flags |= IFA_F_SECONDARY; filter.flagmask |= IFA_F_SECONDARY; } else if (strcmp(*argv, "primary") == 0) { filter.flags &= ~IFA_F_SECONDARY; filter.flagmask |= IFA_F_SECONDARY; } else if (strcmp(*argv, "tentative") == 0) { filter.flags |= IFA_F_TENTATIVE; filter.flagmask |= IFA_F_TENTATIVE; } else if (strcmp(*argv, "deprecated") == 0) { filter.flags |= IFA_F_DEPRECATED; filter.flagmask |= IFA_F_DEPRECATED; } else if (strcmp(*argv, "home") == 0) { filter.flags |= IFA_F_HOMEADDRESS; filter.flagmask |= IFA_F_HOMEADDRESS; } else if (strcmp(*argv, "nodad") == 0) { filter.flags |= IFA_F_NODAD; filter.flagmask |= IFA_F_NODAD; } else if (strcmp(*argv, "dadfailed") == 0) { filter.flags |= IFA_F_DADFAILED; filter.flagmask |= IFA_F_DADFAILED; } else if (strcmp(*argv, "label") == 0) { NEXT_ARG(); filter.label = *argv; } else if (strcmp(*argv, "group") == 0) { NEXT_ARG(); if (rtnl_group_a2n(&filter.group, *argv)) invarg("Invalid \"group\" value\n", *argv); } else { if (strcmp(*argv, "dev") == 0) { NEXT_ARG(); } if (matches(*argv, "help") == 0) usage(); if (filter_dev) duparg2("dev", *argv); filter_dev = *argv; } argv++; argc--; } if (rtnl_wilddump_request(&rth, preferred_family, RTM_GETLINK) < 0) { perror("Cannot send dump request"); exit(1); } if (rtnl_dump_filter(&rth, store_nlmsg, &linfo, NULL, NULL) < 0) { fprintf(stderr, "Dump terminated\n"); exit(1); } if (filter_dev) { filter.ifindex = ll_name_to_index(filter_dev); if (filter.ifindex <= 0) { fprintf(stderr, "Device \"%s\" does not exist.\n", filter_dev); return -1; } } if (flush) { int round = 0; char flushb[4096-512]; filter.flushb = flushb; filter.flushp = 0; filter.flushe = sizeof(flushb); while ((max_flush_loops == 0) || (round < max_flush_loops)) { const struct rtnl_dump_filter_arg a[3] = { { .filter = print_addrinfo_secondary, .arg1 = stdout, .junk = NULL, .arg2 = NULL }, { .filter = print_addrinfo_primary, .arg1 = stdout, .junk = NULL, .arg2 = NULL }, { .filter = NULL, .arg1 = NULL, .junk = NULL, .arg2 = NULL }, }; if (rtnl_wilddump_request(&rth, filter.family, RTM_GETADDR) < 0) { perror("Cannot send dump request"); exit(1); } filter.flushed = 0; if (rtnl_dump_filter_l(&rth, a) < 0) { fprintf(stderr, "Flush terminated\n"); exit(1); } if (filter.flushed == 0) { flush_done: if (show_stats) { if (round == 0) printf("Nothing to flush.\n"); else printf("*** Flush is complete after %d round%s ***\n", round, round>1?"s":""); } fflush(stdout); return 0; } round++; if (flush_update() < 0) return 1; if (show_stats) { printf("\n*** Round %d, deleting %d addresses ***\n", round, filter.flushed); fflush(stdout); } /* If we are flushing, and specifying primary, then we * want to flush only a single round. Otherwise, we\'ll * start flushing secondaries that were promoted to * primaries. */ if (!(filter.flags & IFA_F_SECONDARY) && (filter.flagmask & IFA_F_SECONDARY)) goto flush_done; } fprintf(stderr, "*** Flush remains incomplete after %d rounds. ***\n", max_flush_loops); fflush(stderr); return 1; } if (filter.family != AF_PACKET) { if (rtnl_wilddump_request(&rth, filter.family, RTM_GETADDR) < 0) { perror("Cannot send dump request"); exit(1); } if (rtnl_dump_filter(&rth, store_nlmsg, &ainfo, NULL, NULL) < 0) { fprintf(stderr, "Dump terminated\n"); exit(1); } } if (filter.family && filter.family != AF_PACKET) { struct nlmsg_list **lp; lp=&linfo; if (filter.oneline) no_link = 1; while ((l=*lp)!=NULL) { int ok = 0; struct ifinfomsg *ifi = NLMSG_DATA(&l->h); struct nlmsg_list *a; for (a=ainfo; a; a=a->next) { struct nlmsghdr *n = &a->h; struct ifaddrmsg *ifa = NLMSG_DATA(n); if (ifa->ifa_index != ifi->ifi_index || (filter.family && filter.family != ifa->ifa_family)) continue; if ((filter.scope^ifa->ifa_scope)&filter.scopemask) continue; if ((filter.flags^ifa->ifa_flags)&filter.flagmask) continue; if (filter.pfx.family || filter.label) { struct rtattr *tb[IFA_MAX+1]; parse_rtattr(tb, IFA_MAX, IFA_RTA(ifa), IFA_PAYLOAD(n)); if (!tb[IFA_LOCAL]) tb[IFA_LOCAL] = tb[IFA_ADDRESS]; if (filter.pfx.family && tb[IFA_LOCAL]) { inet_prefix dst; memset(&dst, 0, sizeof(dst)); dst.family = ifa->ifa_family; memcpy(&dst.data, RTA_DATA(tb[IFA_LOCAL]), RTA_PAYLOAD(tb[IFA_LOCAL])); if (inet_addr_match(&dst, &filter.pfx, filter.pfx.bitlen)) continue; } if (filter.label) { SPRINT_BUF(b1); const char *label; if (tb[IFA_LABEL]) label = RTA_DATA(tb[IFA_LABEL]); else label = ll_idx_n2a(ifa->ifa_index, b1); if (fnmatch(filter.label, label, 0) != 0) continue; } } ok = 1; break; } if (!ok) *lp = l->next; else lp = &l->next; } } for (l=linfo; l; l = n) { n = l->next; if (no_link || print_linkinfo(NULL, &l->h, stdout) == 0) { struct ifinfomsg *ifi = NLMSG_DATA(&l->h); if (filter.family != AF_PACKET) print_selected_addrinfo(ifi->ifi_index, ainfo, stdout); } fflush(stdout); free(l); } return 0; }