Measure metadata downtime during live-migration
This patch adds to test_server_connectivity_live_migration a verification of the downtime that affects the metadata service during a live-migration. A script is executed on the VM instance in background while the instance is live-migrated. After live-migration is completed, the script output shows the duration of the metadata outage and the test compares that duration with a configured threshold. Change-Id: Id1c58a2fec14ff268c22efc3551fa36f3480f1a0
This commit is contained in:
parent
0e599c0d1f
commit
b530c1ffb7
@ -22,6 +22,32 @@ from oslo_log import log
|
|||||||
|
|
||||||
LOG = log.getLogger(__name__)
|
LOG = log.getLogger(__name__)
|
||||||
|
|
||||||
|
PASSED = 'PASSED'
|
||||||
|
FAILED = 'FAILED'
|
||||||
|
METADATA_SCRIPT_PATH = '/tmp/metadata_meter_script.sh'
|
||||||
|
METADATA_RESULTS_PATH = '/tmp/metadata_meter.log'
|
||||||
|
METADATA_PID_PATH = '/tmp/metadata_meter.pid'
|
||||||
|
# /proc/uptime is used because it include two decimals in cirros, while
|
||||||
|
# `date +%s.%N` does not work in cirros (min granularity is seconds)
|
||||||
|
METADATA_SCRIPT = """#!/bin/sh
|
||||||
|
echo $$ > %(metadata_meter_pidfile)s
|
||||||
|
old_time=$(cut -d" " -f1 /proc/uptime)
|
||||||
|
while true; do
|
||||||
|
curl http://169.254.169.254/latest/meta-data/hostname 2>/dev/null | \
|
||||||
|
grep -q `hostname`
|
||||||
|
result=$?
|
||||||
|
new_time=$(cut -d" " -f1 /proc/uptime)
|
||||||
|
runtime=$(awk -v new=$new_time -v old=$old_time "BEGIN {print new-old}")
|
||||||
|
old_time=$new_time
|
||||||
|
if [ $result -eq 0 ]; then
|
||||||
|
echo "PASSED $runtime"
|
||||||
|
else
|
||||||
|
echo "FAILED $runtime"
|
||||||
|
fi
|
||||||
|
sleep %(interval)s
|
||||||
|
done
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
class NetDowntimeMeter(fixtures.Fixture):
|
class NetDowntimeMeter(fixtures.Fixture):
|
||||||
def __init__(self, dest_ip, interval='0.2'):
|
def __init__(self, dest_ip, interval='0.2'):
|
||||||
@ -61,3 +87,63 @@ class NetDowntimeMeter(fixtures.Fixture):
|
|||||||
else:
|
else:
|
||||||
LOG.warning('Unexpected output obtained from the pinger: %s',
|
LOG.warning('Unexpected output obtained from the pinger: %s',
|
||||||
output)
|
output)
|
||||||
|
|
||||||
|
|
||||||
|
class MetadataDowntimeMeter(fixtures.Fixture):
|
||||||
|
def __init__(self, ssh_client,
|
||||||
|
interval='0.2', script_path=METADATA_SCRIPT_PATH,
|
||||||
|
output_path=METADATA_RESULTS_PATH,
|
||||||
|
pidfile_path=METADATA_PID_PATH):
|
||||||
|
self.ssh_client = ssh_client
|
||||||
|
self.interval = interval
|
||||||
|
self.script_path = script_path
|
||||||
|
self.output_path = output_path
|
||||||
|
self.pidfile_path = pidfile_path
|
||||||
|
self.pid = None
|
||||||
|
|
||||||
|
def _setUp(self):
|
||||||
|
self.addCleanup(self.cleanup)
|
||||||
|
self.upload_metadata_script()
|
||||||
|
self.run_metadata_script()
|
||||||
|
|
||||||
|
def upload_metadata_script(self):
|
||||||
|
metadata_script = METADATA_SCRIPT % {
|
||||||
|
'metadata_meter_pidfile': self.pidfile_path,
|
||||||
|
'interval': self.interval}
|
||||||
|
echo_cmd = "echo '{}' > {}".format(
|
||||||
|
metadata_script, self.script_path)
|
||||||
|
chmod_cmd = 'chmod +x {}'.format(self.script_path)
|
||||||
|
self.ssh_client.exec_command(';'.join((echo_cmd, chmod_cmd)))
|
||||||
|
LOG.debug('script created: %s', self.script_path)
|
||||||
|
LOG.debug(self.ssh_client.exec_command(
|
||||||
|
'cat {}'.format(self.script_path)))
|
||||||
|
|
||||||
|
def run_metadata_script(self):
|
||||||
|
self.ssh_client.exec_command('{} > {} &'.format(self.script_path,
|
||||||
|
self.output_path))
|
||||||
|
self.pid = self.ssh_client.exec_command(
|
||||||
|
'cat {}'.format(self.pidfile_path)).strip()
|
||||||
|
LOG.debug('running metadata downtime meter script in background with '
|
||||||
|
'PID = %s', self.pid)
|
||||||
|
|
||||||
|
def get_results(self):
|
||||||
|
output = self.ssh_client.exec_command(
|
||||||
|
'cat {}'.format(self.output_path))
|
||||||
|
results = {}
|
||||||
|
results['successes'] = output.count(PASSED)
|
||||||
|
results['failures'] = output.count(FAILED)
|
||||||
|
downtime = {PASSED: 0.0, FAILED: 0.0}
|
||||||
|
for line in output.splitlines():
|
||||||
|
key, value = line.strip().split()
|
||||||
|
downtime[key] += float(value)
|
||||||
|
|
||||||
|
results['downtime'] = downtime
|
||||||
|
LOG.debug('metadata downtime meter results: %r', results)
|
||||||
|
return results
|
||||||
|
|
||||||
|
def cleanup(self):
|
||||||
|
if self.pid:
|
||||||
|
self.ssh_client.exec_command('kill {}'.format(self.pid))
|
||||||
|
LOG.debug('killed metadata downtime script with PID %s', self.pid)
|
||||||
|
else:
|
||||||
|
LOG.debug('No metadata downtime script found')
|
||||||
|
@ -964,6 +964,12 @@ ValidationGroup = [
|
|||||||
"migration, in seconds. "
|
"migration, in seconds. "
|
||||||
"When the measured downtime exceeds this value, an "
|
"When the measured downtime exceeds this value, an "
|
||||||
"exception is raised."),
|
"exception is raised."),
|
||||||
|
cfg.FloatOpt('allowed_metadata_downtime',
|
||||||
|
default=6.0,
|
||||||
|
help="Allowed VM metadata connection downtime during live "
|
||||||
|
"migration, in seconds. "
|
||||||
|
"When the measured downtime exceeds this value, an "
|
||||||
|
"exception is raised."),
|
||||||
]
|
]
|
||||||
|
|
||||||
volume_group = cfg.OptGroup(name='volume',
|
volume_group = cfg.OptGroup(name='volume',
|
||||||
|
@ -17,6 +17,7 @@ import testtools
|
|||||||
|
|
||||||
from oslo_log import log
|
from oslo_log import log
|
||||||
from tempest.common import utils
|
from tempest.common import utils
|
||||||
|
from tempest.common.utils.linux import remote_client
|
||||||
from tempest.common.utils import net_downtime
|
from tempest.common.utils import net_downtime
|
||||||
from tempest.common import waiters
|
from tempest.common import waiters
|
||||||
from tempest import config
|
from tempest import config
|
||||||
@ -189,6 +190,12 @@ class BaseTestNetworkAdvancedServerOps(manager.NetworkScenarioTest):
|
|||||||
floating_ip['floating_ip_address'])
|
floating_ip['floating_ip_address'])
|
||||||
self.useFixture(downtime_meter)
|
self.useFixture(downtime_meter)
|
||||||
|
|
||||||
|
metadata_downtime_meter = net_downtime.MetadataDowntimeMeter(
|
||||||
|
remote_client.RemoteClient(floating_ip['floating_ip_address'],
|
||||||
|
CONF.validation.image_ssh_user,
|
||||||
|
pkey=keypair['private_key']))
|
||||||
|
self.useFixture(metadata_downtime_meter)
|
||||||
|
|
||||||
migration_kwargs = {'host': None, 'block_migration': block_migration}
|
migration_kwargs = {'host': None, 'block_migration': block_migration}
|
||||||
|
|
||||||
# check if microversion is less than 2.25 because of
|
# check if microversion is less than 2.25 because of
|
||||||
@ -230,6 +237,18 @@ class BaseTestNetworkAdvancedServerOps(manager.NetworkScenarioTest):
|
|||||||
"Downtime of {} seconds is higher than expected '{}'".format(
|
"Downtime of {} seconds is higher than expected '{}'".format(
|
||||||
downtime, allowed_downtime))
|
downtime, allowed_downtime))
|
||||||
|
|
||||||
|
metadata_downtime_results = metadata_downtime_meter.get_results()
|
||||||
|
self.assertGreater(metadata_downtime_results['successes'], 0)
|
||||||
|
LOG.debug("Metadata Downtime seconds measured = %r",
|
||||||
|
metadata_downtime_results['downtime'])
|
||||||
|
allowed_metadata_downtime = CONF.validation.allowed_metadata_downtime
|
||||||
|
metadata_downtime_failed = \
|
||||||
|
metadata_downtime_results['downtime']['FAILED']
|
||||||
|
self.assertLessEqual(
|
||||||
|
metadata_downtime_failed, allowed_metadata_downtime,
|
||||||
|
"Metadata downtime: {} seconds is higher than expected: {}".format(
|
||||||
|
metadata_downtime_failed, allowed_metadata_downtime))
|
||||||
|
|
||||||
def _test_server_connectivity_cold_migration_revert(self, source_host=None,
|
def _test_server_connectivity_cold_migration_revert(self, source_host=None,
|
||||||
dest_host=None):
|
dest_host=None):
|
||||||
keypair = self.create_keypair(client=self.keypairs_client)
|
keypair = self.create_keypair(client=self.keypairs_client)
|
||||||
|
Loading…
Reference in New Issue
Block a user