From bdd35405548c1d60072cd71ef648a724bf1d31d2 Mon Sep 17 00:00:00 2001
From: Slawek Kaplonski <skaplons@redhat.com>
Date: Fri, 1 Mar 2019 14:36:07 +0100
Subject: [PATCH] Restart all L3 agents in test that check no packet loss in HA
 routers

In fullstack test
test_l3_agent.test_ha_router_restart_agents_no_packet_lost
restarts of L3 agents where done in 2 steps:
1. restart of all standby agents,
2. restart of all active agents.

It was done like that because of bug [1] and [2].
Now when those bugs are fixed, lets change this test to
some "more probable" scenario. So agents will be restarted
without checking which one is master and which is standby.
However agents will be restarted one by one instead of doing
restarts in (almost) exactly same time.

Restarting all agents in same time caused still some issue
on my local testing environment but I suspect that it might be
some problem related to the nature of fullstack tests and to the
fact that 2 different "nodes" are in fact simulated by namespaces only.

[1] https://bugs.launchpad.net/neutron/+bug/1776459
[2] https://bugs.launchpad.net/neutron/+bug/1798475

Change-Id: I731211b56a57d44636e741009721522f67c12368
---
 neutron/tests/fullstack/base.py          |  5 +++--
 neutron/tests/fullstack/test_l3_agent.py | 23 ++---------------------
 2 files changed, 5 insertions(+), 23 deletions(-)

diff --git a/neutron/tests/fullstack/base.py b/neutron/tests/fullstack/base.py
index 6fe81a37c87..c45dea75fe7 100644
--- a/neutron/tests/fullstack/base.py
+++ b/neutron/tests/fullstack/base.py
@@ -97,12 +97,13 @@ class BaseFullStackTestCase(testlib_api.MySQLTestCaseMixin,
 
     def _assert_ping_during_agents_restart(
             self, agents, src_namespace, ips, restart_timeout=10,
-            ping_timeout=1, count=10):
+            ping_timeout=1, count=10, max_workers=None):
+        max_workers = max_workers or len(agents)
         with net_helpers.async_ping(
                 src_namespace, ips, timeout=ping_timeout,
                 count=count) as done:
             LOG.debug("Restarting agents")
-            executor = futures.ThreadPoolExecutor(max_workers=len(agents))
+            executor = futures.ThreadPoolExecutor(max_workers=max_workers)
             restarts = [agent.restart(executor=executor)
                         for agent in agents]
 
diff --git a/neutron/tests/fullstack/test_l3_agent.py b/neutron/tests/fullstack/test_l3_agent.py
index 9b7fc0c9196..21100711b00 100644
--- a/neutron/tests/fullstack/test_l3_agent.py
+++ b/neutron/tests/fullstack/test_l3_agent.py
@@ -318,19 +318,6 @@ class TestHAL3Agent(TestL3Agent):
             if self._get_keepalived_state(keepalived_state_file) == "master":
                 return keepalived_state_file
 
-    def _get_l3_agents_with_ha_state(self, l3_agents, router_id, ha_state):
-        found_agents = []
-        agents_hosting_router = self.client.list_l3_agent_hosting_routers(
-            router_id)['agents']
-        for agent in l3_agents:
-            agent_host = agent.neutron_cfg_fixture.get_host()
-            for agent_hosting_router in agents_hosting_router:
-                if (agent_hosting_router['host'] == agent_host and
-                        agent_hosting_router['ha_state'] == ha_state):
-                    found_agents.append(agent)
-                    break
-        return found_agents
-
     def test_keepalived_multiple_sighups_does_not_forfeit_mastership(self):
         """Setup a complete "Neutron stack" - both an internal and an external
            network+subnet, and a router connected to both.
@@ -410,16 +397,10 @@ class TestHAL3Agent(TestL3Agent):
         external_vm.block_until_ping(router_ip)
 
         l3_agents = [host.agents['l3'] for host in self.environment.hosts]
-        l3_standby_agents = self._get_l3_agents_with_ha_state(
-            l3_agents, router['id'], 'standby')
-        l3_active_agents = self._get_l3_agents_with_ha_state(
-            l3_agents, router['id'], 'active')
 
         self._assert_ping_during_agents_restart(
-            l3_standby_agents, external_vm.namespace, [router_ip], count=60)
-
-        self._assert_ping_during_agents_restart(
-            l3_active_agents, external_vm.namespace, [router_ip], count=60)
+            l3_agents, external_vm.namespace, [router_ip], count=60,
+            max_workers=1)
 
     def test_gateway_ip_changed(self):
         self._test_gateway_ip_changed()