libvirt: support management of downtime during migration

Currently live migration runs with the default maximum downtime
setting defined by QEMU. This is often inadequate to allow
migration of large VMs to ever complete. Rather than trying to
invent a new policy for changing downtime in OpenStack, copy
the existing logic that is successfully battle tested by the
oVirt project in VDSM.

Note that setting the downtime step delay based on guest RAM size
is an inexact science, as RAM size is only one factor influencing
success of migration. Just as important is the rate of dirtying
data in the guest, but this is based on guest workload which is
not something Nova has visibility into. The bottleneck is the
network which needs to be able to keep up with the dirtying of
data in the guest. The greater the overall RAM size, the more
time is required to transfer the total guest memory. So for
larger guest sizes, we need to allow greater time for the guest
to attempt to successfully migrate before increasing the max
downtime. Scaling downtime step delay according to the overall
guest RAM size is a reasonable, albeit not foolproof, way to
tune migration to increase chances of success.

This adds three host level config parameters which admins can
use to control the base downtime value and the rate at which
downtime is allowed to be increased during migration.

Related-bug: #1429220
DocImpact: three new libvirt configuration parameters in
           nova.conf allow the administrator to control
           the maximum permitted downtime for migration
           making migration more likely to complete for
           large VMs.
Change-Id: I1992ffe9d3b2ff8d436cf1c419af9a238a8fecd8
This commit is contained in:
Daniel P. Berrange 2015-03-06 16:12:28 +00:00
parent bccd178461
commit 07c7e5caf2
3 changed files with 223 additions and 12 deletions

View File

@ -591,6 +591,9 @@ class Domain(object):
error_code=VIR_ERR_INTERNAL_ERROR,
error_domain=VIR_FROM_QEMU)
def migrateSetMaxDowntime(self, downtime):
pass
def attachDevice(self, xml):
disk_info = _parse_disk_info(etree.fromstring(xml))
disk_info['_attached'] = True

View File

@ -6225,6 +6225,7 @@ class LibvirtConnTestCase(test.NoDBTestCase):
self.assertFalse(mock_exist.called)
self.assertFalse(mock_shutil.called)
@mock.patch.object(time, "time")
@mock.patch.object(time, "sleep",
side_effect=lambda x: eventlet.sleep(0))
@mock.patch.object(host.DomainJobInfo, "for_domain")
@ -6232,11 +6233,13 @@ class LibvirtConnTestCase(test.NoDBTestCase):
@mock.patch.object(fakelibvirt.Connection, "_mark_running")
def _test_live_migration_monitoring(self,
job_info_records,
time_records,
expect_success,
mock_running,
mock_save,
mock_job_info,
mock_sleep):
mock_sleep,
mock_time):
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
instance = objects.Instance(**self.test_instance)
dom = fakelibvirt.Domain(drvr._get_connection(), "<domain/>", True)
@ -6245,17 +6248,29 @@ class LibvirtConnTestCase(test.NoDBTestCase):
def fake_job_info(hostself):
while True:
self.assertTrue(len(job_info_records) > 0)
rec = job_info_records.pop()
rec = job_info_records.pop(0)
if type(rec) == str:
if rec == "thread-finish":
finish_event.send()
elif rec == "domain-stop":
dom.destroy()
else:
if len(time_records) > 0:
time_records.pop(0)
return rec
return rec
def fake_time():
if len(time_records) > 0:
return time_records[0]
else:
return int(
datetime.datetime(2001, 1, 20, 20, 1, 0)
.strftime('%s'))
mock_job_info.side_effect = fake_job_info
mock_time.side_effect = fake_time
dest = mock.sentinel.migrate_dest
migrate_data = mock.sentinel.migrate_data
@ -6299,7 +6314,7 @@ class LibvirtConnTestCase(test.NoDBTestCase):
type=fakelibvirt.VIR_DOMAIN_JOB_COMPLETED),
]
self._test_live_migration_monitoring(domain_info_records, True)
self._test_live_migration_monitoring(domain_info_records, [], True)
def test_live_migration_monitor_success_race(self):
# A normalish sequence but we're too slow to see the
@ -6319,7 +6334,7 @@ class LibvirtConnTestCase(test.NoDBTestCase):
type=fakelibvirt.VIR_DOMAIN_JOB_NONE),
]
self._test_live_migration_monitoring(domain_info_records, True)
self._test_live_migration_monitoring(domain_info_records, [], True)
def test_live_migration_monitor_failed(self):
# A failed sequence where we see all the expected events
@ -6337,7 +6352,7 @@ class LibvirtConnTestCase(test.NoDBTestCase):
type=fakelibvirt.VIR_DOMAIN_JOB_FAILED),
]
self._test_live_migration_monitoring(domain_info_records, False)
self._test_live_migration_monitoring(domain_info_records, [], False)
def test_live_migration_monitor_failed_race(self):
# A failed sequence where we are too slow to see the
@ -6356,7 +6371,7 @@ class LibvirtConnTestCase(test.NoDBTestCase):
type=fakelibvirt.VIR_DOMAIN_JOB_NONE),
]
self._test_live_migration_monitoring(domain_info_records, False)
self._test_live_migration_monitoring(domain_info_records, [], False)
def test_live_migration_monitor_cancelled(self):
# A cancelled sequence where we see all the events
@ -6375,7 +6390,78 @@ class LibvirtConnTestCase(test.NoDBTestCase):
type=fakelibvirt.VIR_DOMAIN_JOB_CANCELLED),
]
self._test_live_migration_monitoring(domain_info_records, False)
self._test_live_migration_monitoring(domain_info_records, [], False)
@mock.patch.object(fakelibvirt.virDomain, "migrateSetMaxDowntime")
@mock.patch.object(libvirt_driver.LibvirtDriver,
"_migration_downtime_steps")
def test_live_migration_monitor_downtime(self, mock_downtime_steps,
mock_set_downtime):
# We've setup 4 fake downtime steps - first value is the
# time delay, second is the downtime value
downtime_steps = [
(90, 10),
(180, 50),
(270, 200),
(500, 300),
]
mock_downtime_steps.return_value = downtime_steps
# Each one of these fake times is used for time.time()
# when a new domain_info_records entry is consumed.
# Times are chosen so that only the first 3 downtime
# steps are needed.
fake_times = [0, 1, 30, 95, 150, 200, 300]
# A normal sequence where see all the normal job states
domain_info_records = [
host.DomainJobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_NONE),
host.DomainJobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
host.DomainJobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
host.DomainJobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
host.DomainJobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
host.DomainJobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
"thread-finish",
"domain-stop",
host.DomainJobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_COMPLETED),
]
self._test_live_migration_monitoring(domain_info_records,
fake_times, True)
mock_set_downtime.assert_has_calls([mock.call(10),
mock.call(50),
mock.call(200)])
def test_live_migration_downtime_steps(self):
self.flags(live_migration_downtime=400, group='libvirt')
self.flags(live_migration_downtime_steps=10, group='libvirt')
self.flags(live_migration_downtime_delay=30, group='libvirt')
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
steps = drvr._migration_downtime_steps(3.0)
self.assertEqual([
(0, 37),
(90, 38),
(180, 39),
(270, 42),
(360, 46),
(450, 55),
(540, 70),
(630, 98),
(720, 148),
(810, 238),
(900, 400),
], list(steps))
@mock.patch.object(utils, "spawn")
@mock.patch.object(libvirt_driver.LibvirtDriver, "_live_migration_monitor")

View File

@ -112,6 +112,13 @@ libvirt = None
LOG = logging.getLogger(__name__)
# Downtime period in milliseconds
LIVE_MIGRATION_DOWNTIME_MIN = 100
# Step count
LIVE_MIGRATION_DOWNTIME_STEPS_MIN = 3
# Delay in seconds
LIVE_MIGRATION_DOWNTIME_DELAY_MIN = 10
libvirt_opts = [
cfg.StrOpt('rescue_image_id',
help='Rescue ami image. This will not be used if an image id '
@ -160,6 +167,23 @@ libvirt_opts = [
cfg.IntOpt('live_migration_bandwidth',
default=0,
help='Maximum bandwidth to be used during migration, in Mbps'),
cfg.IntOpt('live_migration_downtime',
default=500,
help='Maximum permitted downtime, in milliseconds, for live '
'migration switchover. Will be rounded up to a minimum '
'of %dms. Use a large value if guest liveness is '
'unimportant.' % LIVE_MIGRATION_DOWNTIME_MIN),
cfg.IntOpt('live_migration_downtime_steps',
default=10,
help='Number of incremental steps to reach max downtime value. '
'Will be rounded up to a minimum of %d steps' %
LIVE_MIGRATION_DOWNTIME_STEPS_MIN),
cfg.IntOpt('live_migration_downtime_delay',
default=75,
help='Time to wait, in seconds, between each step increase '
'of the migration downtime. Minimum delay is %d seconds. '
'Value is per GiB of guest RAM, with lower bound of a '
'minimum of 2 GiB' % LIVE_MIGRATION_DOWNTIME_DELAY_MIN),
cfg.StrOpt('snapshot_image_format',
choices=('raw', 'qcow2', 'vmdk', 'vdi'),
help='Snapshot image format. Defaults to same as source image'),
@ -5583,10 +5607,85 @@ class LibvirtDriver(driver.ComputeDriver):
LOG.debug("Migration operation thread has finished",
instance=instance)
@staticmethod
def _migration_downtime_steps(data_gb):
'''Calculate downtime value steps and time between increases.
:param data_gb: total GB of RAM and disk to transfer
This looks at the total downtime steps and upper bound
downtime value and uses an exponential backoff. So initially
max downtime is increased by small amounts, and as time goes
by it is increased by ever larger amounts
For example, with 10 steps, 30 second step delay, 3 GB
of RAM and 400ms target maximum downtime, the downtime will
be increased every 90 seconds in the following progression:
- 0 seconds -> set downtime to 37ms
- 90 seconds -> set downtime to 38ms
- 180 seconds -> set downtime to 39ms
- 270 seconds -> set downtime to 42ms
- 360 seconds -> set downtime to 46ms
- 450 seconds -> set downtime to 55ms
- 540 seconds -> set downtime to 70ms
- 630 seconds -> set downtime to 98ms
- 720 seconds -> set downtime to 148ms
- 810 seconds -> set downtime to 238ms
- 900 seconds -> set downtime to 400ms
This allows the guest a good chance to complete migration
with a small downtime value.
'''
downtime = CONF.libvirt.live_migration_downtime
steps = CONF.libvirt.live_migration_downtime_steps
delay = CONF.libvirt.live_migration_downtime_delay
if downtime < LIVE_MIGRATION_DOWNTIME_MIN:
downtime = LIVE_MIGRATION_DOWNTIME_MIN
if steps < LIVE_MIGRATION_DOWNTIME_STEPS_MIN:
steps = LIVE_MIGRATION_DOWNTIME_STEPS_MIN
if delay < LIVE_MIGRATION_DOWNTIME_DELAY_MIN:
delay = LIVE_MIGRATION_DOWNTIME_DELAY_MIN
delay = int(delay * data_gb)
offset = downtime / float(steps + 1)
base = (downtime - offset) ** (1 / float(steps))
for i in range(steps + 1):
yield (int(delay * i), int(offset + base ** i))
def _live_migration_data_gb(self, instance):
'''Calculate total amount of data to be transferred
:param instance: the nova.objects.Instance being migrated
Calculates the total amount of data that needs to be
transferred during the live migration. The actual
amount copied will be larger than this, due to the
guest OS continuing to dirty RAM while the migration
is taking place. So this value represents the minimal
data size possible.
:returns: data size to be copied in GB
'''
ram_gb = instance.flavor.memory_mb * units.Mi / units.Gi
if ram_gb < 2:
ram_gb = 2
# TODO(berrange) calculate size of any disks when doing
# a block migration
return ram_gb
def _live_migration_monitor(self, context, instance, dest, post_method,
recover_method, block_migration,
migrate_data, dom, finish_event):
data_gb = self._live_migration_data_gb(instance)
downtime_steps = list(self._migration_downtime_steps(data_gb))
n = 0
start = time.time()
while True:
info = host.DomainJobInfo.for_domain(dom)
@ -5637,6 +5736,34 @@ class LibvirtDriver(driver.ComputeDriver):
LOG.debug("Migration not running yet",
instance=instance)
elif info.type == libvirt.VIR_DOMAIN_JOB_UNBOUNDED:
# Migration is still running
#
# This is where we wire up calls to change live
# migration status. eg change max downtime, cancel
# the operation, change max bandwidth
now = time.time()
elapsed = now - start
# See if we need to increase the max downtime. We
# ignore failures, since we'd rather continue trying
# to migrate
if (len(downtime_steps) > 0 and
elapsed > downtime_steps[0][0]):
downtime = downtime_steps.pop(0)
LOG.info(_LI("Increasing downtime to %(downtime)dms "
"after %(waittime)d sec elapsed time"),
{"downtime": downtime[1],
"waittime": downtime[0]},
instance=instance)
try:
dom.migrateSetMaxDowntime(downtime[1])
except libvirt.libvirtError as e:
LOG.warn(
_LW("Unable to increase max downtime to %(time)d"
"ms: %(e)s"),
{"time": downtime[1], "e": e}, instance=instance)
# We loop every 500ms, so don't log on every
# iteration to avoid spamming logs for long
# running migrations. Just once every 5 secs
@ -5675,11 +5802,6 @@ class LibvirtDriver(driver.ComputeDriver):
"remaining_memory": info.memory_remaining,
"total_memory": info.memory_total}, instance=instance)
# Migration is still running
#
# This is where we'd wire up calls to change live
# migration status. eg change max downtime, cancel
# the operation, change max bandwidth
n = n + 1
elif info.type == libvirt.VIR_DOMAIN_JOB_COMPLETED:
# Migration is all done