Validate network downtime during live migration
This patch adds the ability to measure network downtime during live migration process. A fixture has been added to start and stop a background pinger process and also read status from it. The downtime measurement has 0.2 seconds granularity. In order to reduce overall traffic the ping payload size set to minimal value. Change-Id: I83c6a5d49f5d4da05deb677907e5048ecdd2242b
This commit is contained in:
parent
569c7a89f5
commit
72575889c8
@ -0,0 +1,9 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
Added new module net_downtime including the fixture NetDowntimeMeter that
|
||||
can be used to measure how long the connectivity with an IP is lost
|
||||
during certain operations like a server live migration.
|
||||
The configuration option allowed_network_downtime has been added with a
|
||||
default value of 5.0 seconds, which would be the maximum time that
|
||||
the connectivity downtime is expected to last.
|
63
tempest/common/utils/net_downtime.py
Normal file
63
tempest/common/utils/net_downtime.py
Normal file
@ -0,0 +1,63 @@
|
||||
# Copyright 2022 OpenStack Foundation
|
||||
# All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
import signal
|
||||
import subprocess
|
||||
|
||||
import fixtures
|
||||
|
||||
from oslo_log import log
|
||||
|
||||
|
||||
LOG = log.getLogger(__name__)
|
||||
|
||||
|
||||
class NetDowntimeMeter(fixtures.Fixture):
|
||||
def __init__(self, dest_ip, interval='0.2'):
|
||||
self.dest_ip = dest_ip
|
||||
# Note: for intervals lower than 0.2 ping requires root privileges
|
||||
self.interval = interval
|
||||
self.ping_process = None
|
||||
|
||||
def _setUp(self):
|
||||
self.start_background_pinger()
|
||||
|
||||
def start_background_pinger(self):
|
||||
cmd = ['ping', '-q', '-s1']
|
||||
cmd.append('-i{}'.format(self.interval))
|
||||
cmd.append(self.dest_ip)
|
||||
LOG.debug("Starting background pinger to '{}' with interval {}".format(
|
||||
self.dest_ip, self.interval))
|
||||
self.ping_process = subprocess.Popen(
|
||||
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
self.addCleanup(self.cleanup)
|
||||
|
||||
def cleanup(self):
|
||||
if self.ping_process and self.ping_process.poll() is None:
|
||||
LOG.debug('Terminating background pinger with pid {}'.format(
|
||||
self.ping_process.pid))
|
||||
self.ping_process.terminate()
|
||||
self.ping_process = None
|
||||
|
||||
def get_downtime(self):
|
||||
self.ping_process.send_signal(signal.SIGQUIT)
|
||||
# Example of the expected output:
|
||||
# 264/274 packets, 3% loss
|
||||
output = self.ping_process.stderr.readline().strip().decode('utf-8')
|
||||
if output and len(output.split()[0].split('/')) == 2:
|
||||
succ, total = output.split()[0].split('/')
|
||||
return (int(total) - int(succ)) * float(self.interval)
|
||||
else:
|
||||
LOG.warning('Unexpected output obtained from the pinger: %s',
|
||||
output)
|
@ -965,6 +965,12 @@ ValidationGroup = [
|
||||
default='ecdsa',
|
||||
help='Type of key to use for ssh connections. '
|
||||
'Valid types are rsa, ecdsa'),
|
||||
cfg.IntOpt('allowed_network_downtime',
|
||||
default=5.0,
|
||||
help="Allowed VM network connection downtime during live "
|
||||
"migration, in seconds. "
|
||||
"When the measured downtime exceeds this value, an "
|
||||
"exception is raised."),
|
||||
]
|
||||
|
||||
volume_group = cfg.OptGroup(name='volume',
|
||||
|
@ -15,7 +15,9 @@
|
||||
|
||||
import testtools
|
||||
|
||||
from oslo_log import log
|
||||
from tempest.common import utils
|
||||
from tempest.common.utils import net_downtime
|
||||
from tempest.common import waiters
|
||||
from tempest import config
|
||||
from tempest.lib import decorators
|
||||
@ -23,6 +25,8 @@ from tempest.scenario import manager
|
||||
|
||||
CONF = config.CONF
|
||||
|
||||
LOG = log.getLogger(__name__)
|
||||
|
||||
|
||||
class TestNetworkAdvancedServerOps(manager.NetworkScenarioTest):
|
||||
"""Check VM connectivity after some advanced instance operations executed:
|
||||
@ -252,6 +256,11 @@ class TestNetworkAdvancedServerOps(manager.NetworkScenarioTest):
|
||||
block_migration = (CONF.compute_feature_enabled.
|
||||
block_migration_for_live_migration)
|
||||
old_host = self.get_host_for_server(server['id'])
|
||||
|
||||
downtime_meter = net_downtime.NetDowntimeMeter(
|
||||
floating_ip['floating_ip_address'])
|
||||
self.useFixture(downtime_meter)
|
||||
|
||||
self.admin_servers_client.live_migrate_server(
|
||||
server['id'], host=None, block_migration=block_migration,
|
||||
disk_over_commit=False)
|
||||
@ -261,6 +270,16 @@ class TestNetworkAdvancedServerOps(manager.NetworkScenarioTest):
|
||||
new_host = self.get_host_for_server(server['id'])
|
||||
self.assertNotEqual(old_host, new_host, 'Server did not migrate')
|
||||
|
||||
downtime = downtime_meter.get_downtime()
|
||||
self.assertIsNotNone(downtime)
|
||||
LOG.debug("Downtime seconds measured with downtime_meter = %r",
|
||||
downtime)
|
||||
allowed_downtime = CONF.validation.allowed_network_downtime
|
||||
self.assertLess(
|
||||
downtime, allowed_downtime,
|
||||
"Downtime of {} seconds is higher than expected '{}'".format(
|
||||
downtime, allowed_downtime))
|
||||
|
||||
self._wait_server_status_and_check_network_connectivity(
|
||||
server, keypair, floating_ip)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user