首页 > 代码库 > Openvswitch原理与代码分析(2): ovs-vswitchd的启动

Openvswitch原理与代码分析(2): ovs-vswitchd的启动

ovs-vswitchd.c的main函数最终会进入一个while循环,在这个无限循环中,里面最重要的两个函数是bridge_run()和netdev_run()。

?

技术分享

?

Openvswitch主要管理两种类型的设备,一个是创建的虚拟网桥,一个是连接到虚拟网桥上的设备。

?

其中bridge_run就是初始化数据库中已经创建的虚拟网桥。

?

一、虚拟网桥的初始化bridge_run

?

bridge_run会调用bridge_run__,bridge_run__中最重要的是对于所有的网桥,都调用ofproto_run

?

  1. static void
  2. bridge_run__(void)
  3. {
  4. ……
  5. ????/* Let each bridge do the work that it needs to do. */
  6. ????HMAP_FOR_EACH (br, node, &all_bridges) {
  7. ????????ofproto_run(br->ofproto);
  8. ????}
  9. }

?

Int ofproto_run(struct ofproto *p)会调用error = p->ofproto_class->run(p);

?

ofproto_class的定义在ofproto-provider.h中,它的实现定义在ofproto-dpif.c中,这里面的所有的函数,在这个文件中都有定义。

?

  1. const struct ofproto_class ofproto_dpif_class = {
  2. ????init,
  3. ????enumerate_types,
  4. ????enumerate_names,
  5. ????del,
  6. ????port_open_type,
  7. ????type_run,
  8. ????type_wait,
  9. ????alloc,
  10. ????construct,
  11. ????destruct,
  12. ????dealloc,
  13. ????run,
  14. ????wait,
  15. ????NULL, /* get_memory_usage. */
  16. ????type_get_memory_usage,
  17. ????flush,
  18. ????query_tables,
  19. ????set_tables_version,
  20. ????port_alloc,
  21. ????port_construct,
  22. ????port_destruct,
  23. ????port_dealloc,
  24. ????port_modified,
  25. ????port_reconfigured,
  26. ????port_query_by_name,
  27. ????port_add,
  28. ????port_del,
  29. ????port_get_stats,
  30. ????port_dump_start,
  31. ????port_dump_next,
  32. ????port_dump_done,
  33. ????port_poll,
  34. ????port_poll_wait,
  35. ????port_is_lacp_current,
  36. ????port_get_lacp_stats,
  37. ????NULL, /* rule_choose_table */
  38. ????rule_alloc,
  39. ????rule_construct,
  40. ????rule_insert,
  41. ????rule_delete,
  42. ????rule_destruct,
  43. ????rule_dealloc,
  44. ????rule_get_stats,
  45. ????rule_execute,
  46. ????set_frag_handling,
  47. ????packet_out,
  48. ????set_netflow,
  49. ????get_netflow_ids,
  50. ????set_sflow,
  51. ????set_ipfix,
  52. ????set_cfm,
  53. ????cfm_status_changed,
  54. ????get_cfm_status,
  55. ????set_lldp,
  56. ????get_lldp_status,
  57. ????set_aa,
  58. ????aa_mapping_set,
  59. ????aa_mapping_unset,
  60. ????aa_vlan_get_queued,
  61. ????aa_vlan_get_queue_size,
  62. ????set_bfd,
  63. ????bfd_status_changed,
  64. ????get_bfd_status,
  65. ????set_stp,
  66. ????get_stp_status,
  67. ????set_stp_port,
  68. ????get_stp_port_status,
  69. ????get_stp_port_stats,
  70. ????set_rstp,
  71. ????get_rstp_status,
  72. ????set_rstp_port,
  73. ????get_rstp_port_status,
  74. ????set_queues,
  75. ????bundle_set,
  76. ????bundle_remove,
  77. ????mirror_set__,
  78. ????mirror_get_stats__,
  79. ????set_flood_vlans,
  80. ????is_mirror_output_bundle,
  81. ????forward_bpdu_changed,
  82. ????set_mac_table_config,
  83. ????set_mcast_snooping,
  84. ????set_mcast_snooping_port,
  85. ????set_realdev,
  86. ????NULL, /* meter_get_features */
  87. ????NULL, /* meter_set */
  88. ????NULL, /* meter_get */
  89. ????NULL, /* meter_del */
  90. ????group_alloc, /* group_alloc */
  91. ????group_construct, /* group_construct */
  92. ????group_destruct, /* group_destruct */
  93. ????group_dealloc, /* group_dealloc */
  94. ????group_modify, /* group_modify */
  95. ????group_get_stats, /* group_get_stats */
  96. ????get_datapath_version, /* get_datapath_version */
  97. };

?

在ofproto-provider.h中注释里是这样说的。

这里定义了四类数据结构

Struct ofproto表示一个交换机

Struct ofport表示交换机上的一个端口

Struct rule表示交换机上的一条flow规则

Struct ofgroup表示一个flow规则组

?

上面说到启动的过程中,会调用ofproto_class->run,也即会调用ofproto-dpif.c中的static int run(struct ofproto *ofproto_)函数。

?

在这个函数中,会初始化netflow, sflow, ipfix,stp, rstp, mac address learning等一系列操作。

?

bridge_run还会调用static void bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg),其中ovs_cfg是从ovsdb-server里面读取出来的配置。

?

在这个函数里面,对于每一个网桥,将网卡添加进去

  1. HMAP_FOR_EACH (br, node, &all_bridges) {
  2. ????bridge_add_ports(br, &br->wanted_ports);
  3. ????shash_destroy(&br->wanted_ports);
  4. }

?

  1. static void
  2. bridge_add_ports(struct bridge *br, const struct shash *wanted_ports)
  3. {
  4. ????/* First add interfaces that request a particular port number. */
  5. ????bridge_add_ports__(br, wanted_ports, true);
  6. ?
  7. ????/* Then add interfaces that want automatic port number assignment.
  8. ?????* We add these afterward to avoid accidentally taking a specifically
  9. ?????* requested port number. */
  10. ????bridge_add_ports__(br, wanted_ports, false);
  11. }

?

static void bridge_add_ports__(struct bridge *br, const struct shash *wanted_ports, bool with_requested_port)会调用

static bool iface_create(struct bridge *br, const struct ovsrec_interface *iface_cfg, const struct ovsrec_port *port_cfg)会调用

static int iface_do_create(const struct bridge *br, const struct ovsrec_interface *iface_cfg, const struct ovsrec_port *port_cfg, ofp_port_t *ofp_portp, struct netdev **netdevp, char **errp)会调用

int ofproto_port_add(struct ofproto *ofproto, struct netdev *netdev, ofp_port_t *ofp_portp)会调用

?

  1. error = ofproto->ofproto_class->port_add(ofproto, netdev);

?

会调用ofproto-dpif.c中的ofproto_dpif_class的static int port_add(struct ofproto *ofproto_, struct netdev *netdev)函数。

?

会调用int dpif_port_add(struct dpif *dpif, struct netdev *netdev, odp_port_t *port_nop)会调用

?

  1. error = dpif->dpif_class->port_add(dpif, netdev, &port_no);

?

会调用dpif_netlink_class的port_add函数,也即dpif_netlink_port_add,也即

static int dpif_netlink_port_add(struct dpif *dpif_, struct netdev *netdev,odp_port_t *port_nop)会调用

static int dpif_netlink_port_add__(struct dpif_netlink *dpif, struct netdev *netdev, odp_port_t *port_nop)

?

在这个函数里面,会调用netlink的API,命令为OVS_VPORT_CMD_NEW

?

  1. const char *name = netdev_vport_get_dpif_port(netdev,
  2. ??????????????????????????????????????????????????namebuf, sizeof namebuf);
  3. struct dpif_netlink_vport request, reply;
  4. struct nl_sock **socksp = NULL;
  5. ?
  6. if (dpif->handlers) {
  7. ????socksp = vport_create_socksp(dpif, &error);
  8. ????if (!socksp) {
  9. ????????return error;
  10. ????}
  11. }
  12. ?
  13. dpif_netlink_vport_init(&request);
  14. request.cmd = OVS_VPORT_CMD_NEW;
  15. request.dp_ifindex = dpif->dp_ifindex;
  16. request.type = netdev_to_ovs_vport_type(netdev);
  17. ?
  18. request.name = name;
  19. ?
  20. upcall_pids = vport_socksp_to_pids(socksp, dpif->n_handlers);
  21. request.n_upcall_pids = socksp ? dpif->n_handlers : 1;
  22. request.upcall_pids = upcall_pids;
  23. error = dpif_netlink_vport_transact(&request, &reply, &buf);

?

?

二、虚拟网卡的初始化netdev_run()

?

  1. void
  2. netdev_run(void)
  3. ????OVS_EXCLUDED(netdev_class_mutex, netdev_mutex)
  4. {
  5. ????struct netdev_registered_class *rc;
  6. ?
  7. ????netdev_initialize();
  8. ????ovs_mutex_lock(&netdev_class_mutex);
  9. ????HMAP_FOR_EACH (rc, hmap_node, &netdev_classes) {
  10. ????????if (rc->class->run) {
  11. ????????????rc->class->run();
  12. ????????}
  13. ????}
  14. ????ovs_mutex_unlock(&netdev_class_mutex);
  15. }

?

依次循环调用netdev_classes中的每一个run。

?

对于不同类型的虚拟网卡,都有对应的netdev_class。

?

例如对于dpdk的网卡有

?

  1. static const struct netdev_class dpdk_class =
  2. ????NETDEV_DPDK_CLASS(
  3. ????????"dpdk",
  4. ????????NULL,
  5. ????????netdev_dpdk_construct,
  6. ????????netdev_dpdk_destruct,
  7. ????????netdev_dpdk_set_multiq,
  8. ????????netdev_dpdk_eth_send,
  9. ????????netdev_dpdk_get_carrier,
  10. ????????netdev_dpdk_get_stats,
  11. ????????netdev_dpdk_get_features,
  12. ????????netdev_dpdk_get_status,
  13. ????????netdev_dpdk_rxq_recv);

?

对于物理网卡,也需要有相应的netdev_class

?

  1. const struct netdev_class netdev_linux_class =
  2. ????NETDEV_LINUX_CLASS(
  3. ????????"system",
  4. ????????netdev_linux_construct,
  5. ????????netdev_linux_get_stats,
  6. ????????netdev_linux_get_features,
  7. ????????netdev_linux_get_status);

?

对于连接到KVM的tap网卡

  1. const struct netdev_class netdev_tap_class =
  2. ????NETDEV_LINUX_CLASS(
  3. ????????"tap",
  4. ????????netdev_linux_construct_tap,
  5. ????????netdev_tap_get_stats,
  6. ????????netdev_linux_get_features,
  7. ????????netdev_linux_get_status);

?

对于虚拟的软网卡,比如veth pair

  1. const struct netdev_class netdev_internal_class =
  2. ????NETDEV_LINUX_CLASS(
  3. ????????"internal",
  4. ????????netdev_linux_construct,
  5. ????????netdev_internal_get_stats,
  6. ????????NULL, /* get_features */
  7. ????????netdev_internal_get_status);

?

其中NETDEV_LINUX_CLASS是一个宏,不是所有的参数都需要全部填写。

  1. #define NETDEV_LINUX_CLASS(NAME, CONSTRUCT, GET_STATS,
  2. ???????????????????????????GET_FEATURES, GET_STATUS)
  3. {
  4. ????NAME,
  5. ????????????????????????????????????????????????????????????????
  6. ????NULL,
  7. ????netdev_linux_run,
  8. ????netdev_linux_wait,
  9. ????????????????????????????????????????????????????????????????
  10. ????netdev_linux_alloc,
  11. ????CONSTRUCT,
  12. ????netdev_linux_destruct,
  13. ????netdev_linux_dealloc,
  14. ????NULL, /* get_config */
  15. ????NULL, /* set_config */
  16. ????NULL, /* get_tunnel_config */
  17. ????NULL, /* build header */
  18. ????NULL, /* push header */
  19. ????NULL, /* pop header */
  20. ????NULL, /* get_numa_id */
  21. ????NULL, /* set_multiq */
  22. ????????????????????????????????????????????????????????????????
  23. ????netdev_linux_send,
  24. ????netdev_linux_send_wait,
  25. ????????????????????????????????????????????????????????????????
  26. ????netdev_linux_set_etheraddr,
  27. ????netdev_linux_get_etheraddr,
  28. ????netdev_linux_get_mtu,
  29. ????netdev_linux_set_mtu,
  30. ????netdev_linux_get_ifindex,
  31. ????netdev_linux_get_carrier,
  32. ????netdev_linux_get_carrier_resets,
  33. ????netdev_linux_set_miimon_interval,
  34. ????GET_STATS,
  35. ????????????????????????????????????????????????????????????????
  36. ????GET_FEATURES,
  37. ????netdev_linux_set_advertisements,
  38. ????????????????????????????????????????????????????????????????
  39. ????netdev_linux_set_policing,
  40. ????netdev_linux_get_qos_types,
  41. ????netdev_linux_get_qos_capabilities,
  42. ????netdev_linux_get_qos,
  43. ????netdev_linux_set_qos,
  44. ????netdev_linux_get_queue,
  45. ????netdev_linux_set_queue,
  46. ????netdev_linux_delete_queue,
  47. ????netdev_linux_get_queue_stats,
  48. ????netdev_linux_queue_dump_start,
  49. ????netdev_linux_queue_dump_next,
  50. ????netdev_linux_queue_dump_done,
  51. ????netdev_linux_dump_queue_stats,
  52. ????????????????????????????????????????????????????????????????
  53. ????netdev_linux_get_in4,
  54. ????netdev_linux_set_in4,
  55. ????netdev_linux_get_in6,
  56. ????netdev_linux_add_router,
  57. ????netdev_linux_get_next_hop,
  58. ????GET_STATUS,
  59. ????netdev_linux_arp_lookup,
  60. ????????????????????????????????????????????????????????????????
  61. ????netdev_linux_update_flags,
  62. ????????????????????????????????????????????????????????????????
  63. ????netdev_linux_rxq_alloc,
  64. ????netdev_linux_rxq_construct,
  65. ????netdev_linux_rxq_destruct,
  66. ????netdev_linux_rxq_dealloc,
  67. ????netdev_linux_rxq_recv,
  68. ????netdev_linux_rxq_wait,
  69. ????netdev_linux_rxq_drain,
  70. }

?

rc->class->run()调用的是netdev-linux.c下的netdev_linux_run

?

netdev_linux_run会调用netlink的sock得到虚拟网卡的状态,并且更新状态。

?

  1. error = nl_sock_recv(sock, &buf, false);
  2. if (!error) {
  3. ????struct rtnetlink_change change;
  4. ????if (rtnetlink_parse(&buf, &change)) {
  5. ????????struct netdev *netdev_ = netdev_from_name(change.ifname);
  6. ????????if (netdev_ && is_netdev_linux_class(netdev_->netdev_class)) {
  7. ???????????struct netdev_linux *netdev = netdev_linux_cast(netdev_);
  8. ???????????ovs_mutex_lock(&netdev->mutex);
  9. ???????????netdev_linux_update(netdev, &change);
  10. ???????????ovs_mutex_unlock(&netdev->mutex);
  11. ????????}
  12. ????????netdev_close(netdev_);
  13. ?????}
  14. }

Openvswitch原理与代码分析(2): ovs-vswitchd的启动