ovs-vswitchd.c的main函数最终会进入一个while循环,在这个无限循环中,里面最重要的两个函数是bridge_run()和netdev_run()。
?
?
Openvswitch主要管理两种类型的设备,一个是创建的虚拟网桥,一个是连接到虚拟网桥上的设备。
?
其中bridge_run就是初始化数据库中已经创建的虚拟网桥。
?
一、虚拟网桥的初始化bridge_run
?
bridge_run会调用bridge_run__,bridge_run__中最重要的是对于所有的网桥,都调用ofproto_run
?
static void bridge_run__(void) { …… ????/* Let each bridge do the work that it needs to do. */ ????HMAP_FOR_EACH (br, node, &all_bridges) { ????????ofproto_run(br->ofproto); ????} }
|
?
Int ofproto_run(struct ofproto *p)会调用error = p->ofproto_class->run(p);
?
ofproto_class的定义在ofproto-provider.h中,它的实现定义在ofproto-dpif.c中,这里面的所有的函数,在这个文件中都有定义。
?
const struct ofproto_class ofproto_dpif_class = { ????init, ????enumerate_types, ????enumerate_names, ????del, ????port_open_type, ????type_run, ????type_wait, ????alloc, ????construct, ????destruct, ????dealloc, ????run, ????wait, ????NULL, /* get_memory_usage. */ ????type_get_memory_usage, ????flush, ????query_tables, ????set_tables_version, ????port_alloc, ????port_construct, ????port_destruct, ????port_dealloc, ????port_modified, ????port_reconfigured, ????port_query_by_name, ????port_add, ????port_del, ????port_get_stats, ????port_dump_start, ????port_dump_next, ????port_dump_done, ????port_poll, ????port_poll_wait, ????port_is_lacp_current, ????port_get_lacp_stats, ????NULL, /* rule_choose_table */ ????rule_alloc, ????rule_construct, ????rule_insert, ????rule_delete, ????rule_destruct, ????rule_dealloc, ????rule_get_stats, ????rule_execute, ????set_frag_handling, ????packet_out, ????set_netflow, ????get_netflow_ids, ????set_sflow, ????set_ipfix, ????set_cfm, ????cfm_status_changed, ????get_cfm_status, ????set_lldp, ????get_lldp_status, ????set_aa, ????aa_mapping_set, ????aa_mapping_unset, ????aa_vlan_get_queued, ????aa_vlan_get_queue_size, ????set_bfd, ????bfd_status_changed, ????get_bfd_status, ????set_stp, ????get_stp_status, ????set_stp_port, ????get_stp_port_status, ????get_stp_port_stats, ????set_rstp, ????get_rstp_status, ????set_rstp_port, ????get_rstp_port_status, ????set_queues, ????bundle_set, ????bundle_remove, ????mirror_set__, ????mirror_get_stats__, ????set_flood_vlans, ????is_mirror_output_bundle, ????forward_bpdu_changed, ????set_mac_table_config, ????set_mcast_snooping, ????set_mcast_snooping_port, ????set_realdev, ????NULL, /* meter_get_features */ ????NULL, /* meter_set */ ????NULL, /* meter_get */ ????NULL, /* meter_del */ ????group_alloc, /* group_alloc */ ????group_construct, /* group_construct */ ????group_destruct, /* group_destruct */ ????group_dealloc, /* group_dealloc */ ????group_modify, /* group_modify */ ????group_get_stats, /* group_get_stats */ ????get_datapath_version, /* get_datapath_version */ };
|
?
在ofproto-provider.h中注释里是这样说的。
这里定义了四类数据结构
Struct ofproto表示一个交换机
Struct ofport表示交换机上的一个端口
Struct rule表示交换机上的一条flow规则
Struct ofgroup表示一个flow规则组
?
上面说到启动的过程中,会调用ofproto_class->run,也即会调用ofproto-dpif.c中的static int run(struct ofproto *ofproto_)函数。
?
在这个函数中,会初始化netflow, sflow, ipfix,stp, rstp, mac address learning等一系列操作。
?
bridge_run还会调用static void bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg),其中ovs_cfg是从ovsdb-server里面读取出来的配置。
?
在这个函数里面,对于每一个网桥,将网卡添加进去
HMAP_FOR_EACH (br, node, &all_bridges) { ????bridge_add_ports(br, &br->wanted_ports); ????shash_destroy(&br->wanted_ports); }
|
?
static void bridge_add_ports(struct bridge *br, const struct shash *wanted_ports) { ????/* First add interfaces that request a particular port number. */ ????bridge_add_ports__(br, wanted_ports, true); ? ????/* Then add interfaces that want automatic port number assignment. ?????* We add these afterward to avoid accidentally taking a specifically ?????* requested port number. */ ????bridge_add_ports__(br, wanted_ports, false); }
|
?
static void bridge_add_ports__(struct bridge *br, const struct shash *wanted_ports, bool with_requested_port)会调用
static bool iface_create(struct bridge *br, const struct ovsrec_interface *iface_cfg, const struct ovsrec_port *port_cfg)会调用
static int iface_do_create(const struct bridge *br, const struct ovsrec_interface *iface_cfg, const struct ovsrec_port *port_cfg, ofp_port_t *ofp_portp, struct netdev **netdevp, char **errp)会调用
int ofproto_port_add(struct ofproto *ofproto, struct netdev *netdev, ofp_port_t *ofp_portp)会调用
?
error = ofproto->ofproto_class->port_add(ofproto, netdev);
|
?
会调用ofproto-dpif.c中的ofproto_dpif_class的static int port_add(struct ofproto *ofproto_, struct netdev *netdev)函数。
?
会调用int dpif_port_add(struct dpif *dpif, struct netdev *netdev, odp_port_t *port_nop)会调用
?
error = dpif->dpif_class->port_add(dpif, netdev, &port_no);
|
?
会调用dpif_netlink_class的port_add函数,也即dpif_netlink_port_add,也即
static int dpif_netlink_port_add(struct dpif *dpif_, struct netdev *netdev,odp_port_t *port_nop)会调用
static int dpif_netlink_port_add__(struct dpif_netlink *dpif, struct netdev *netdev, odp_port_t *port_nop)
?
在这个函数里面,会调用netlink的API,命令为OVS_VPORT_CMD_NEW
?
const char *name = netdev_vport_get_dpif_port(netdev, ??????????????????????????????????????????????????namebuf, sizeof namebuf); struct dpif_netlink_vport request, reply; struct nl_sock **socksp = NULL; ? if (dpif->handlers) { ????socksp = vport_create_socksp(dpif, &error); ????if (!socksp) { ????????return error; ????} } ? dpif_netlink_vport_init(&request); request.cmd = OVS_VPORT_CMD_NEW; request.dp_ifindex = dpif->dp_ifindex; request.type = netdev_to_ovs_vport_type(netdev); ? request.name = name; ? upcall_pids = vport_socksp_to_pids(socksp, dpif->n_handlers); request.n_upcall_pids = socksp ? dpif->n_handlers : 1; request.upcall_pids = upcall_pids; error = dpif_netlink_vport_transact(&request, &reply, &buf);
|
?
?
二、虚拟网卡的初始化netdev_run()
?
void netdev_run(void) ????OVS_EXCLUDED(netdev_class_mutex, netdev_mutex) { ????struct netdev_registered_class *rc; ? ????netdev_initialize(); ????ovs_mutex_lock(&netdev_class_mutex); ????HMAP_FOR_EACH (rc, hmap_node, &netdev_classes) { ????????if (rc->class->run) { ????????????rc->class->run(); ????????} ????} ????ovs_mutex_unlock(&netdev_class_mutex); }
|
?
依次循环调用netdev_classes中的每一个run。
?
对于不同类型的虚拟网卡,都有对应的netdev_class。
?
例如对于dpdk的网卡有
?
static const struct netdev_class dpdk_class = ????NETDEV_DPDK_CLASS( ????????"dpdk", ????????NULL, ????????netdev_dpdk_construct, ????????netdev_dpdk_destruct, ????????netdev_dpdk_set_multiq, ????????netdev_dpdk_eth_send, ????????netdev_dpdk_get_carrier, ????????netdev_dpdk_get_stats, ????????netdev_dpdk_get_features, ????????netdev_dpdk_get_status, ????????netdev_dpdk_rxq_recv);
|
?
对于物理网卡,也需要有相应的netdev_class
?
const struct netdev_class netdev_linux_class = ????NETDEV_LINUX_CLASS( ????????"system", ????????netdev_linux_construct, ????????netdev_linux_get_stats, ????????netdev_linux_get_features, ????????netdev_linux_get_status);
|
?
对于连接到KVM的tap网卡
const struct netdev_class netdev_tap_class = ????NETDEV_LINUX_CLASS( ????????"tap", ????????netdev_linux_construct_tap, ????????netdev_tap_get_stats, ????????netdev_linux_get_features, ????????netdev_linux_get_status);
|
?
对于虚拟的软网卡,比如veth pair
const struct netdev_class netdev_internal_class = ????NETDEV_LINUX_CLASS( ????????"internal", ????????netdev_linux_construct, ????????netdev_internal_get_stats, ????????NULL, /* get_features */ ????????netdev_internal_get_status);
|
?
其中NETDEV_LINUX_CLASS是一个宏,不是所有的参数都需要全部填写。
#define NETDEV_LINUX_CLASS(NAME, CONSTRUCT, GET_STATS, ???????????????????????????GET_FEATURES, GET_STATUS) { ????NAME, ???????????????????????????????????????????????????????????????? ????NULL, ????netdev_linux_run, ????netdev_linux_wait, ???????????????????????????????????????????????????????????????? ????netdev_linux_alloc, ????CONSTRUCT, ????netdev_linux_destruct, ????netdev_linux_dealloc, ????NULL, /* get_config */ ????NULL, /* set_config */ ????NULL, /* get_tunnel_config */ ????NULL, /* build header */ ????NULL, /* push header */ ????NULL, /* pop header */ ????NULL, /* get_numa_id */ ????NULL, /* set_multiq */ ???????????????????????????????????????????????????????????????? ????netdev_linux_send, ????netdev_linux_send_wait, ???????????????????????????????????????????????????????????????? ????netdev_linux_set_etheraddr, ????netdev_linux_get_etheraddr, ????netdev_linux_get_mtu, ????netdev_linux_set_mtu, ????netdev_linux_get_ifindex, ????netdev_linux_get_carrier, ????netdev_linux_get_carrier_resets, ????netdev_linux_set_miimon_interval, ????GET_STATS, ???????????????????????????????????????????????????????????????? ????GET_FEATURES, ????netdev_linux_set_advertisements, ???????????????????????????????????????????????????????????????? ????netdev_linux_set_policing, ????netdev_linux_get_qos_types, ????netdev_linux_get_qos_capabilities, ????netdev_linux_get_qos, ????netdev_linux_set_qos, ????netdev_linux_get_queue, ????netdev_linux_set_queue, ????netdev_linux_delete_queue, ????netdev_linux_get_queue_stats, ????netdev_linux_queue_dump_start, ????netdev_linux_queue_dump_next, ????netdev_linux_queue_dump_done, ????netdev_linux_dump_queue_stats, ???????????????????????????????????????????????????????????????? ????netdev_linux_get_in4, ????netdev_linux_set_in4, ????netdev_linux_get_in6, ????netdev_linux_add_router, ????netdev_linux_get_next_hop, ????GET_STATUS, ????netdev_linux_arp_lookup, ???????????????????????????????????????????????????????????????? ????netdev_linux_update_flags, ???????????????????????????????????????????????????????????????? ????netdev_linux_rxq_alloc, ????netdev_linux_rxq_construct, ????netdev_linux_rxq_destruct, ????netdev_linux_rxq_dealloc, ????netdev_linux_rxq_recv, ????netdev_linux_rxq_wait, ????netdev_linux_rxq_drain, }
|
?
rc->class->run()调用的是netdev-linux.c下的netdev_linux_run
?
netdev_linux_run会调用netlink的sock得到虚拟网卡的状态,并且更新状态。
?
error = nl_sock_recv(sock, &buf, false); if (!error) { ????struct rtnetlink_change change; ????if (rtnetlink_parse(&buf, &change)) { ????????struct netdev *netdev_ = netdev_from_name(change.ifname); ????????if (netdev_ && is_netdev_linux_class(netdev_->netdev_class)) { ???????????struct netdev_linux *netdev = netdev_linux_cast(netdev_); ???????????ovs_mutex_lock(&netdev->mutex); ???????????netdev_linux_update(netdev, &change); ???????????ovs_mutex_unlock(&netdev->mutex); ????????} ????????netdev_close(netdev_); ?????} }
|
Openvswitch原理与代码分析(2): ovs-vswitchd的启动