From b12c5d5d12a83414ff27b9fab5362fb90231fa77 Mon Sep 17 00:00:00 2001 From: Alex Katz Date: Mon, 23 Mar 2020 13:45:57 +0200 Subject: [PATCH] Test that DHCP lease is correctly served when DHCP agent is down +changed flake8 to version 3.3.0 to support python3.6 Stop agents and then reboot VM. All the dnsmasq processes should stay alive while the agents are down to serve existing DHCP leases. Because of the dnsmasq process is restarted right after the DHCP agent became UP and running we need to make sure that we are waiting enough time for the dnsmasq processes to be available. Change-Id: I06b8ad68657fef0d381fe4f9f2e85105c181d0c0 --- linters-requirements.txt | 2 +- .../faults/agents/test_neutron_agents.py | 156 +++++++++++++++--- 2 files changed, 132 insertions(+), 26 deletions(-) diff --git a/linters-requirements.txt b/linters-requirements.txt index aa08616d7..399ccedd1 100644 --- a/linters-requirements.txt +++ b/linters-requirements.txt @@ -1,7 +1,7 @@ # pep8 and flake8 requirements ansible-lint==4.2.0 # MIT -flake8==2.5.5 # MIT +flake8==3.3.0 # MIT flake8-import-order==0.12 # LGPLv3 mypy>=0.740 # MIT pylint>=1.9 # GPLv2 diff --git a/tobiko/tests/faults/agents/test_neutron_agents.py b/tobiko/tests/faults/agents/test_neutron_agents.py index ef0731950..1c140c194 100644 --- a/tobiko/tests/faults/agents/test_neutron_agents.py +++ b/tobiko/tests/faults/agents/test_neutron_agents.py @@ -14,15 +14,22 @@ # under the License. from __future__ import absolute_import +import time + import testtools +from oslo_log import log + import tobiko from tobiko.openstack import neutron +from tobiko.openstack import nova from tobiko.openstack import stacks from tobiko.openstack import topology from tobiko.shell import ping from tobiko.shell import sh +LOG = log.getLogger(__name__) + class AgentTestMixin(object): @@ -41,36 +48,107 @@ class AgentTestMixin(object): "sudo systemctl start %s" % service_name, ssh_client=agent_host.ssh_client) - def get_process_pids_for_resource(self, process_name, resource_id, agents): + def get_process_pids_for_resource(self, process_name, command_filter, + agents, timeout=120, interval=2): + '''Search for PIDs that match creteria on requested hosts''' + + start_time = time.time() pids_per_agent = {} + LOG.debug(f'Search for {process_name} processes on {agents}') for agent in agents: + LOG.debug(f'Search for {process_name} process on {agent["host"]}') agent_host = topology.get_openstack_node(hostname=agent['host']) - processes_on_host = sh.list_processes( - command=process_name, ssh_client=agent_host.ssh_client) - pid = self.get_pid( - agent_host.ssh_client, resource_id, processes_on_host) + ssh_client = agent_host.ssh_client + time_left = start_time + timeout - time.time() + while time_left > 0: + pid = self.get_pid(ssh_client, command_filter, process_name) + if pid: + pids_per_agent[agent['host']] = pid + LOG.debug(f'{process_name} process has {pid} PID on ' + f'{agent["host"]} host') + break + time_left = start_time + timeout - time.time() + LOG.debug(f'Retrying, time left: {time_left}') + time.sleep(interval) if not pid: - self.fail("%(process)s process for router: %(id)s " - "not found on host %(host)s" % { - 'process': process_name, - 'id': resource_id, - 'host': agent['host']}) - pids_per_agent[agent['host']] = pid + self.fail(f'No {process_name} process found on host ' + f'{agent["host"]} that matches {command_filter}') return pids_per_agent - def get_pid(self, ssh_client, resource_id, processes): + def get_pid(self, ssh_client, command_filter, process_name): + processes = sh.list_processes(command=process_name, + ssh_client=ssh_client) for process in processes: - cmdline_result = sh.execute( - "cat /proc/%s/cmdline" % process.pid, ssh_client=ssh_client) - if resource_id in cmdline_result.stdout: - return process.pid + try: + command = sh.execute(f'cat /proc/{process.pid}/cmdline', + ssh_client=ssh_client) + if command_filter in command.stdout: + return process.pid + else: + LOG.debug(f'No {command_filter} has been found in details' + f' of the following command: {command.stdout}') + except sh.ShellCommandFailed: + LOG.debug(f'Process {process.pid} has been terminated right ' + f'after the process list has been collected') return None + def wait_processes_destroyed( + self, command_filter, pids_per_agent, timeout=120, interval=2): + '''Wait for processes to be terminated on hosts + + Make sure that all processes from the list are terminated or return + an error otherwise. Tricky situation may happen when the different + process with same PID can be spawned so then need to check it against + `command_filter`. + ''' + + start_time = time.time() + LOG.debug(f'Waiting for processes to be finished: {pids_per_agent}') + for agent, pid in pids_per_agent.items(): + host = topology.get_openstack_node(hostname=agent) + destroyed = False + time_left = start_time + timeout - time.time() + while time_left > 0: + LOG.debug(f'Check if {pid} has been terminated on {agent}') + if self.is_destroyed(pid, command_filter, host.ssh_client): + destroyed = True + break + time.sleep(interval) + time_left = start_time + timeout - time.time() + LOG.debug(f'Retrying, time left: {time_left}') + if not destroyed: + self.fail(f'Process {pid} has not been finished in {timeout}' + f' sec on {agent}') + else: + LOG.debug(f'Process {pid} has been finished on {agent}') + + def is_destroyed(self, pid, command_filter, shell): + '''Check if process has been terminated''' + + processes = sh.list_processes(ssh_client=shell) + process = processes.with_attributes(pid=pid) + process_destroyed = False + if not process: + LOG.debug(f'No PID {pid} has been found in process list') + process_destroyed = True + else: + try: + command = sh.execute(f'cat /proc/{pid}/cmdline', + ssh_client=shell) + if command_filter not in command.stdout: + LOG.debug(f'Different process with same PID {pid} exist') + process_destroyed = True + except sh.ShellCommandFailed: + LOG.debug(f'Process {pid} has been terminated right after the' + f' process list has been collected') + process_destroyed = True + return process_destroyed + class DHCPAgentTest(testtools.TestCase, AgentTestMixin): #: Resources stack with Nova server to send messages to - stack = tobiko.required_setup_fixture(stacks.CirrosPeerServerStackFixture) + stack = tobiko.required_setup_fixture(stacks.CirrosServerStackFixture) def setUp(self): super(DHCPAgentTest, self).setUp() @@ -89,14 +167,18 @@ class DHCPAgentTest(testtools.TestCase, AgentTestMixin): self.agent_service_name, self.stopped_agents) def test_stop_dhcp_agent(self): + '''Test that dnsmasq processes are not broken after DHCP agent restart + + Dnsmasq processes should stay alive if DHCP agent is turned off and + then restarted once DHCP agent is returned to active state. + ''' network_dhcp_agents = neutron.list_dhcp_agent_hosting_network( self.stack.network) network_dnsmasq_pids = self.get_process_pids_for_resource( "dnsmasq", self.stack.network, network_dhcp_agents) + self.stop_service_on_agents( self.agent_service_name, network_dhcp_agents) - # Now check if dnsmasq processes are still run and have got same pids - # like before dhcp agent's stop self.assertEqual( network_dnsmasq_pids, self.get_process_pids_for_resource( @@ -104,13 +186,37 @@ class DHCPAgentTest(testtools.TestCase, AgentTestMixin): self.start_service_on_agents( self.agent_service_name, network_dhcp_agents) + self.wait_processes_destroyed(self.stack.network, network_dnsmasq_pids) + self.get_process_pids_for_resource( + "dnsmasq", self.stack.network, network_dhcp_agents) - # And finally check if dnsmasq processes are still run and have got - # same pids like at the beginning of the test - self.assertEqual( - network_dnsmasq_pids, - self.get_process_pids_for_resource( - "dnsmasq", self.stack.network, network_dhcp_agents)) + def test_dhcp_lease_served_when_dhcp_agent_down(self): + '''Test that DHCP lease is correctly served when DHCP agent is down + + Make sure that the VM will receive IP address after the reboot. + DHCP agent should be down during the VM reboot. VM should receive + the same IP address that was assigned to it before the reboot. + ''' + ping.ping_until_received( + self.stack.ip_address).assert_replied() + + network_dhcp_agents = neutron.list_dhcp_agent_hosting_network( + self.stack.network) + network_dnsmasq_pids = self.get_process_pids_for_resource( + "dnsmasq", self.stack.network, network_dhcp_agents) + self.stop_service_on_agents( + self.agent_service_name, network_dhcp_agents) + + nova.shutoff_server(self.stack.resources.server.physical_resource_id) + nova.activate_server(self.stack.resources.server.physical_resource_id) + ping.ping_until_received( + self.stack.ip_address).assert_replied() + + self.start_service_on_agents( + self.agent_service_name, network_dhcp_agents) + self.wait_processes_destroyed(self.stack.network, network_dnsmasq_pids) + self.get_process_pids_for_resource( + "dnsmasq", self.stack.network, network_dhcp_agents) class L3AgentTest(testtools.TestCase, AgentTestMixin):