Divide-and-conquer security group beasts

In one specific compute node, the security group rules can be enormous quantity. This patch adds a step-by-step processing method to deal with the large number of the security group rules. And also changes or adds some LOG. Related-Bug: #1813703 Related-Bug: #1813704 Related-Bug: #1813707 Conflicts: neutron/common/constants.py Conflicts: neutron/agent/securitygroups_rpc.py neutron/common/constants.py neutron/plugins/ml2/drivers/openvswitch/agent/ovs_neutron_agent.py Conflicts: neutron/agent/common/ovs_lib.py neutron/common/constants.py Change-Id: I57bf27ec75cf848271c5a28b22beee12b8bd5faa (cherry picked from commit 6ac420df7e) (cherry picked from commit f5d110e15f) (cherry picked from commit 5424b9a68c)
2019-02-20 16:47:42 +08:00 · 2019-02-20 16:47:42 +08:00 · 51a7666533
parent 850cc4f977
commit 51a7666533
5 changed files with 39 additions and 12 deletions
--- a/neutron/agent/common/ovs_lib.py
+++ b/neutron/agent/common/ovs_lib.py
@ -31,6 +31,7 @@ from neutron._i18n import _
 from neutron.agent.common import ip_lib
 from neutron.agent.common import utils
 from neutron.agent.ovsdb import api as ovsdb_api
+from neutron.common import constants as common_constants
 from neutron.conf.agent import ovs_conf
 from neutron.plugins.common import constants as p_const
 from neutron.plugins.ml2.drivers.openvswitch.agent.common \
@ -408,7 +409,7 @@ class OVSBridge(BaseOVS):
                    msg = "cannot use 'strict' with 'add' action"
                    raise exceptions.InvalidInput(error_message=msg)

-        strict_param = ["--strict"] if strict else []
+        extra_param = ["--strict"] if strict else []

        if action == 'del' and {} in kwargs_list:
            # the 'del' case simplifies itself if kwargs_list has at least
@ -417,8 +418,12 @@ class OVSBridge(BaseOVS):
        else:
            flow_strs = [_build_flow_expr_str(kw, action, strict)
                         for kw in kwargs_list]
-            self.run_ofctl('%s-flows' % action, strict_param + ['-'],
-                           '\n'.join(flow_strs))
+            LOG.debug("Processing %d OpenFlow rules.", len(flow_strs))
+
+            step = common_constants.AGENT_RES_PROCESSING_STEP
+            for i in range(0, len(flow_strs), step):
+                self.run_ofctl('%s-flows' % action, extra_param + ['-'],
+                               '\n'.join(flow_strs[i:i + step]))

    def add_flow(self, **kwargs):
        self.do_action_flows('add', [kwargs])
--- a/neutron/agent/securitygroups_rpc.py
+++ b/neutron/agent/securitygroups_rpc.py
@ -23,6 +23,7 @@ import oslo_messaging

 from neutron.agent import firewall
 from neutron.api.rpc.handlers import securitygroups_rpc
+from neutron.common import constants as common_constants
 from neutron.conf.agent import securitygroups_rpc as sc_cfg


@ -132,12 +133,17 @@ class SecurityGroupAgentRpc(object):
        self._apply_port_filter(device_ids)

    def _apply_port_filter(self, device_ids, update_filter=False):
+        step = common_constants.AGENT_RES_PROCESSING_STEP
        if self.use_enhanced_rpc:
-            devices_info = self.plugin_rpc.security_group_info_for_devices(
-                self.context, list(device_ids))
-            devices = devices_info['devices']
-            security_groups = devices_info['security_groups']
-            security_group_member_ips = devices_info['sg_member_ips']
+            devices = {}
+            security_groups = {}
+            security_group_member_ips = {}
+            for i in range(0, len(device_ids), step):
+                devices_info = self.plugin_rpc.security_group_info_for_devices(
+                    self.context, list(device_ids)[i:i + step])
+                devices.update(devices_info['devices'])
+                security_groups.update(devices_info['security_groups'])
+                security_group_member_ips.update(devices_info['sg_member_ips'])
        else:
            devices = self.plugin_rpc.security_group_rules_for_devices(
                self.context, list(device_ids))
--- a/neutron/common/constants.py
+++ b/neutron/common/constants.py
@ -247,3 +247,15 @@ FLOATING_IP_HOST_NEEDS_BINDING = "FLOATING_IP_HOST_NEEDS_BINDING"
 # Possible types of values (e.g. in QoS rule types)
 VALUES_TYPE_CHOICES = "choices"
 VALUES_TYPE_RANGE = "range"
+
+# Number of resources for neutron agent side functions to deal
+# with large sets.
+# Setting this value does not count on special conditions, it is just a human
+# countable or scalable number. [1] gives us the method to test the scale
+# issue. And we have tested the value of 1000, 500, 200, 100. But for 100,
+# ovs-agent will have a lower timeout probability. And according to the
+# testing result, step size 100 can indeed cost about 10% much more time
+# than 500/1000. But such extra time looks inevitably needed to be sacrificed
+# for the restart success rate.
+# [1] http://paste.openstack.org/show/745685/
+AGENT_RES_PROCESSING_STEP = 100
--- a/neutron/plugins/ml2/drivers/openvswitch/agent/ovs_neutron_agent.py
+++ b/neutron/plugins/ml2/drivers/openvswitch/agent/ovs_neutron_agent.py
@ -1673,8 +1673,8 @@ class OVSNeutronAgent(l2population_rpc.L2populationRpcCallBackTunnelMixin,
                                 port_info.get('updated', set()))
        need_binding_devices = []
        skipped_devices = set()
+        start = time.time()
        if devices_added_updated:
-            start = time.time()
            (skipped_devices, need_binding_devices,
            failed_devices['added']) = (
                self.treat_devices_added_or_updated(
@ -1699,6 +1699,10 @@ class OVSNeutronAgent(l2population_rpc.L2populationRpcCallBackTunnelMixin,
        self._add_port_tag_info(need_binding_devices)
        self.sg_agent.setup_port_filters(added_ports,
                                         port_info.get('updated', set()))
+        LOG.info("process_network_ports - iteration:%(iter_num)d - "
+                 "agent port security group processed in %(elapsed).3f",
+                 {'iter_num': self.iter_num,
+                  'elapsed': time.time() - start})
        failed_devices['added'] |= self._bind_devices(need_binding_devices)

        if 'removed' in port_info and port_info['removed']:
--- a/neutron/tests/unit/agent/test_securitygroups_rpc.py
+++ b/neutron/tests/unit/agent/test_securitygroups_rpc.py
@ -1060,7 +1060,7 @@ class SecurityGroupAgentEnhancedRpcTestCase(
                                        mock.call.defer_apply(),
                                        mock.call.remove_port_filter(
                                            self.fake_device),
-                                        ])
+                                        ], any_order=True)

    def test_security_groups_rule_updated_enhanced_rpc(self):
        sg_list = ['fake_sgid1', 'fake_sgid3']
@ -1123,7 +1123,7 @@ class SecurityGroupAgentEnhancedRpcTestCase(
                 mock.call.update_port_filter(self.fake_device),
                 mock.call.process_trusted_ports([])]

-        self.firewall.assert_has_calls(calls)
+        self.firewall.assert_has_calls(calls, any_order=True)

    def test_refresh_firewall_devices_enhanced_rpc(self):
        self.agent.prepare_devices_filter(['fake_device'])
@ -1146,7 +1146,7 @@ class SecurityGroupAgentEnhancedRpcTestCase(
                 mock.call.update_port_filter(self.fake_device),
                 mock.call.process_trusted_ports([]),
                 ]
-        self.firewall.assert_has_calls(calls)
+        self.firewall.assert_has_calls(calls, any_order=True)

    def test_refresh_firewall_none_enhanced_rpc(self):
        self.agent.refresh_firewall([])