From 72575889c834b9e93923102e4f5ee82e1031276f Mon Sep 17 00:00:00 2001 From: Roman Safronov Date: Thu, 10 Feb 2022 12:26:43 +0200 Subject: [PATCH] Validate network downtime during live migration This patch adds the ability to measure network downtime during live migration process. A fixture has been added to start and stop a background pinger process and also read status from it. The downtime measurement has 0.2 seconds granularity. In order to reduce overall traffic the ping payload size set to minimal value. Change-Id: I83c6a5d49f5d4da05deb677907e5048ecdd2242b --- ...uring-live-migration-5e8305be270de680.yaml | 9 +++ tempest/common/utils/net_downtime.py | 63 +++++++++++++++++++ tempest/config.py | 6 ++ .../test_network_advanced_server_ops.py | 19 ++++++ 4 files changed, 97 insertions(+) create mode 100644 releasenotes/notes/measure-downtime-during-live-migration-5e8305be270de680.yaml create mode 100644 tempest/common/utils/net_downtime.py diff --git a/releasenotes/notes/measure-downtime-during-live-migration-5e8305be270de680.yaml b/releasenotes/notes/measure-downtime-during-live-migration-5e8305be270de680.yaml new file mode 100644 index 0000000000..9f4abd12cc --- /dev/null +++ b/releasenotes/notes/measure-downtime-during-live-migration-5e8305be270de680.yaml @@ -0,0 +1,9 @@ +--- +features: + - | + Added new module net_downtime including the fixture NetDowntimeMeter that + can be used to measure how long the connectivity with an IP is lost + during certain operations like a server live migration. + The configuration option allowed_network_downtime has been added with a + default value of 5.0 seconds, which would be the maximum time that + the connectivity downtime is expected to last. diff --git a/tempest/common/utils/net_downtime.py b/tempest/common/utils/net_downtime.py new file mode 100644 index 0000000000..9675ec83b8 --- /dev/null +++ b/tempest/common/utils/net_downtime.py @@ -0,0 +1,63 @@ +# Copyright 2022 OpenStack Foundation +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import signal +import subprocess + +import fixtures + +from oslo_log import log + + +LOG = log.getLogger(__name__) + + +class NetDowntimeMeter(fixtures.Fixture): + def __init__(self, dest_ip, interval='0.2'): + self.dest_ip = dest_ip + # Note: for intervals lower than 0.2 ping requires root privileges + self.interval = interval + self.ping_process = None + + def _setUp(self): + self.start_background_pinger() + + def start_background_pinger(self): + cmd = ['ping', '-q', '-s1'] + cmd.append('-i{}'.format(self.interval)) + cmd.append(self.dest_ip) + LOG.debug("Starting background pinger to '{}' with interval {}".format( + self.dest_ip, self.interval)) + self.ping_process = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + self.addCleanup(self.cleanup) + + def cleanup(self): + if self.ping_process and self.ping_process.poll() is None: + LOG.debug('Terminating background pinger with pid {}'.format( + self.ping_process.pid)) + self.ping_process.terminate() + self.ping_process = None + + def get_downtime(self): + self.ping_process.send_signal(signal.SIGQUIT) + # Example of the expected output: + # 264/274 packets, 3% loss + output = self.ping_process.stderr.readline().strip().decode('utf-8') + if output and len(output.split()[0].split('/')) == 2: + succ, total = output.split()[0].split('/') + return (int(total) - int(succ)) * float(self.interval) + else: + LOG.warning('Unexpected output obtained from the pinger: %s', + output) diff --git a/tempest/config.py b/tempest/config.py index ebde421a06..4098f32c5c 100644 --- a/tempest/config.py +++ b/tempest/config.py @@ -965,6 +965,12 @@ ValidationGroup = [ default='ecdsa', help='Type of key to use for ssh connections. ' 'Valid types are rsa, ecdsa'), + cfg.IntOpt('allowed_network_downtime', + default=5.0, + help="Allowed VM network connection downtime during live " + "migration, in seconds. " + "When the measured downtime exceeds this value, an " + "exception is raised."), ] volume_group = cfg.OptGroup(name='volume', diff --git a/tempest/scenario/test_network_advanced_server_ops.py b/tempest/scenario/test_network_advanced_server_ops.py index b48ac3c864..1c00212a01 100644 --- a/tempest/scenario/test_network_advanced_server_ops.py +++ b/tempest/scenario/test_network_advanced_server_ops.py @@ -15,7 +15,9 @@ import testtools +from oslo_log import log from tempest.common import utils +from tempest.common.utils import net_downtime from tempest.common import waiters from tempest import config from tempest.lib import decorators @@ -23,6 +25,8 @@ from tempest.scenario import manager CONF = config.CONF +LOG = log.getLogger(__name__) + class TestNetworkAdvancedServerOps(manager.NetworkScenarioTest): """Check VM connectivity after some advanced instance operations executed: @@ -252,6 +256,11 @@ class TestNetworkAdvancedServerOps(manager.NetworkScenarioTest): block_migration = (CONF.compute_feature_enabled. block_migration_for_live_migration) old_host = self.get_host_for_server(server['id']) + + downtime_meter = net_downtime.NetDowntimeMeter( + floating_ip['floating_ip_address']) + self.useFixture(downtime_meter) + self.admin_servers_client.live_migrate_server( server['id'], host=None, block_migration=block_migration, disk_over_commit=False) @@ -261,6 +270,16 @@ class TestNetworkAdvancedServerOps(manager.NetworkScenarioTest): new_host = self.get_host_for_server(server['id']) self.assertNotEqual(old_host, new_host, 'Server did not migrate') + downtime = downtime_meter.get_downtime() + self.assertIsNotNone(downtime) + LOG.debug("Downtime seconds measured with downtime_meter = %r", + downtime) + allowed_downtime = CONF.validation.allowed_network_downtime + self.assertLess( + downtime, allowed_downtime, + "Downtime of {} seconds is higher than expected '{}'".format( + downtime, allowed_downtime)) + self._wait_server_status_and_check_network_connectivity( server, keypair, floating_ip)