Move Neutron agents health check to OpenStack tests

Change-Id: Ifb73be8c2b41737384e63ff80ae632d23aba3f67
This commit is contained in:
Federico Ressi 2020-08-31 12:29:16 +02:00
parent 45e786a4f8
commit 55614f57d6
11 changed files with 142 additions and 54 deletions

View File

@ -20,9 +20,10 @@ from tobiko.openstack.neutron import _extension
from tobiko.openstack.neutron import _port
NeutronClientFixture = _client.NeutronClientFixture
ServiceUnavailable = _client.ServiceUnavailable
neutron_client = _client.neutron_client
get_neutron_client = _client.get_neutron_client
NeutronClientFixture = _client.NeutronClientFixture
find_network = _client.find_network
list_networks = _client.list_networks
find_subnet = _client.find_subnet

View File

@ -22,6 +22,9 @@ import tobiko
from tobiko.openstack import _client
ServiceUnavailable = neutronclient.exceptions.ServiceUnavailable
class NeutronClientFixture(_client.OpenstackClientFixture):
def init_client(self, session):

View File

@ -16,8 +16,10 @@
from __future__ import absolute_import
from tobiko.openstack.tests import _neutron
from tobiko.openstack.tests import _nova
test_neutron_agents_are_alive = _neutron.test_neutron_agents_are_alive
test_evacuable_server_creation = _nova.test_evacuable_server_creation
test_server_creation = _nova.test_server_creation

View File

@ -0,0 +1,45 @@
from __future__ import absolute_import
import json
from oslo_log import log
import tobiko
from tobiko.openstack import neutron
LOG = log.getLogger(__name__)
def test_neutron_agents_are_alive(timeout=300., interval=5.):
test_case = tobiko.get_test_case()
for attempt in tobiko.retry(timeout=timeout, interval=interval):
LOG.debug("Look for unhealthy Neutron agents...")
try:
# get Neutron agent list
agents = neutron.list_agents()
except neutron.ServiceUnavailable as ex:
attempt.check_limits()
# retry because Neutron server could still be unavailable after
# a disruption
LOG.debug(f"Waiting for neutron service... ({ex})")
continue # Let retry
if not agents:
test_case.fail("Neutron has no agents")
dead_agents = agents.with_items(alive=False)
if dead_agents:
dead_agents_details = json.dumps(agents, indent=4, sort_keys=True)
try:
test_case.fail("Unhealthy agent(s) found:\n"
f"{dead_agents_details}\n")
except tobiko.FailureException:
attempt.check_limits()
# retry because some Neutron agent could still be unavailable
# after a disruption
LOG.debug("Waiting for Neutron agents to get alive...\n"
f"{dead_agents_details}")
continue
LOG.debug(f"All {len(agents)} Neutron agents are alive.")
return agents

View File

@ -9,7 +9,6 @@ from tobiko.tripleo import pacemaker
from tobiko.tripleo import processes
from tobiko.tripleo import containers
from tobiko.tripleo import nova
from tobiko.tripleo import neutron as neutron_ooo
from tobiko.tripleo import undercloud
from tobiko.tripleo import validations
@ -19,7 +18,7 @@ def overcloud_health_checks(passive_checks_only=False):
check_pacemaker_resources_health()
check_overcloud_processes_health()
nova.check_nova_services_health()
neutron_ooo.check_neutron_agents_health()
tests.test_neutron_agents_are_alive()
if not passive_checks_only:
# create a uniq stack
check_vm_create()

View File

@ -23,6 +23,7 @@ from tobiko import config
from tobiko.openstack import neutron
from tobiko.openstack import nova
from tobiko.openstack import stacks
from tobiko.openstack import tests
CONF = config.CONF
@ -190,3 +191,12 @@ class AgentTest(testtools.TestCase):
def test_skip_if_missing_agents_with_big_count(self):
self.test_skip_if_missing_agents(count=1000000,
should_skip=True)
class NeutronAgentTest(testtools.TestCase):
def test_neutron_agents_are_alive(self):
agents = tests.test_neutron_agents_are_alive()
# check has agents and they are all alive
self.assertNotEqual([], agents)
self.assertNotEqual([], agents.with_items(alive=True))

View File

@ -26,7 +26,6 @@ from tobiko import tripleo
from tobiko.tripleo import pacemaker
from tobiko.tripleo import services
from tobiko.tripleo import processes
from tobiko.tripleo import neutron
import tobiko
CONF = config.CONF
@ -144,14 +143,3 @@ class OvercloudProcessesTest(testtools.TestCase):
def test_overcloud_processes(self):
ops = processes.OvercloudProcessesStatus()
self.assertTrue(ops.basic_overcloud_processes_running)
@tripleo.skip_if_missing_overcloud
class OvercloudNeutronAgentsTest(testtools.TestCase):
"""
Assert that a the neutron agents are healthy
across the overcloud nodes
"""
def test_neutron_agents_health(self):
neutron.check_neutron_agents_health()

View File

@ -0,0 +1,25 @@
# Copyright (c) 2020 Red Hat
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from __future__ import absolute_import
import testtools
from tobiko.openstack import tests
class NeutronAgentTest(testtools.TestCase):
def test_agents_are_alive(self):
tests.test_neutron_agents_are_alive()

View File

@ -0,0 +1,54 @@
# Copyright (c) 2020 Red Hat
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from __future__ import absolute_import
import mock
from tobiko.openstack import tests
from tobiko.tests.unit import openstack
class NeutronAgentTest(openstack.OpenstackTest):
def setUp(self):
super(NeutronAgentTest, self).setUp()
get_neutron_client = self.patch_get_neutron_client()
self.neutron_client = get_neutron_client.return_value
self.patch_time()
def patch_list_agents(self, *args, **kwargs):
self.neutron_client.list_agents = mock.MagicMock(*args, **kwargs)
def test_neutron_agents_are_alive_when_healthy(self):
self.patch_list_agents(return_value=[{'alive': True}])
agents = tests.test_neutron_agents_are_alive()
self.assertEqual([{'alive': True}], agents)
def test_neutron_agents_are_alive_when_no_agents(self):
self.patch_list_agents(return_value=[])
ex = self.assertRaises(self.failureException,
tests.test_neutron_agents_are_alive)
self.assertEqual('Neutron has no agents', str(ex))
def test_neutron_agents_are_alive_when_unhealthy(self):
self.patch_list_agents(return_value=[{'alive': False}])
ex = self.assertRaises(self.failureException,
tests.test_neutron_agents_are_alive)
self.assertEqual("Unhealthy agent(s) found:\n"
"[\n"
" {\n"
' "alive": false\n'
" }\n"
"]\n", str(ex))

View File

@ -1,39 +0,0 @@
from __future__ import absolute_import
import time
from neutronclient.common import exceptions as neutron_exc
from oslo_log import log
import tobiko
from tobiko.openstack import neutron
LOG = log.getLogger(__name__)
def check_neutron_agents_health(timeout=300, interval=5):
failures = []
neutron_client = neutron.get_neutron_client()
start = time.time()
while time.time() - start < timeout:
try:
# get neutron agent list
agents = neutron_client.list_agents()
except neutron_exc.ServiceUnavailable:
# retry in case neutron server was unavailable after disruption
LOG.warning("neutron server was not available - retrying...")
time.sleep(interval)
else:
LOG.info("neutron agents status retrieved")
break
for agent in agents['agents']:
if not agent['alive']:
failures.append('failed agent: {}\n\n'.format(agent))
if failures:
tobiko.fail(
'neutron agents are unhealthy:\n{!s}', '\n'.join(failures))
else:
LOG.info('All neutron agents are healthy!')