Add live migration support
story: 2005585 Task: #30774 Change-Id: I5dc6db643900a6bfcc427b4b9ee23b5557b091a4 Signed-off-by: Tomi Juvonen <tomi.juvonen@nokia.com>
This commit is contained in:
parent
a232dda364
commit
46e13601a5
|
@ -164,7 +164,7 @@ def remove_session(session_id):
|
|||
downloads = _download_get_all(session, session_id)
|
||||
if downloads:
|
||||
for download in downloads:
|
||||
download.delete(download)
|
||||
session.delete(download)
|
||||
|
||||
hosts = _hosts_get(session, session_id)
|
||||
if hosts:
|
||||
|
|
|
@ -69,6 +69,12 @@ opts = [
|
|||
cfg.StrOpt('local_cache_dir',
|
||||
default="/tmp",
|
||||
help="Local cache directory"),
|
||||
cfg.StrOpt('live_migration_retries',
|
||||
default=5,
|
||||
help="Number of live migration retries"),
|
||||
cfg.StrOpt('live_migration_wait_time',
|
||||
default=600,
|
||||
help="How long to wait live migration to be done"),
|
||||
]
|
||||
|
||||
CONF.register_opts(opts)
|
||||
|
|
|
@ -565,8 +565,11 @@ class Workflow(BaseWorkflow):
|
|||
self.notify_action_done(project, instance)
|
||||
elif instance.action == 'OWN_ACTION':
|
||||
pass
|
||||
elif instance.action == 'LIVE_MIGRATE':
|
||||
if not self.live_migrate_server(instance):
|
||||
return False
|
||||
self.notify_action_done(project, instance)
|
||||
else:
|
||||
# TBD LIVE_MIGRATE not supported
|
||||
raise Exception('%s: instance %s action '
|
||||
'%s not supported' %
|
||||
(self.session_id, instance.instance_id,
|
||||
|
@ -576,7 +579,7 @@ class Workflow(BaseWorkflow):
|
|||
def _wait_host_empty(self, host):
|
||||
hid = self.nova.hypervisors.search(host)[0].id
|
||||
vcpus_used_last = 0
|
||||
# wait 4min to get host empty
|
||||
# wait 4min to get host emptys
|
||||
for j in range(48):
|
||||
hvisor = self.nova.hypervisors.get(hid)
|
||||
vcpus_used = hvisor.__getattr__('vcpus_used')
|
||||
|
@ -592,6 +595,75 @@ class Workflow(BaseWorkflow):
|
|||
LOG.info('%s host still not empty' % host)
|
||||
return False
|
||||
|
||||
def live_migrate_server(self, instance):
|
||||
server_id = instance.instance_id
|
||||
server = self.nova.servers.get(server_id)
|
||||
instance.state = server.__dict__.get('OS-EXT-STS:vm_state')
|
||||
orig_host = str(server.__dict__.get('OS-EXT-SRV-ATTR:host'))
|
||||
LOG.info('live_migrate_server %s state %s host %s' % (server_id,
|
||||
instance.state,
|
||||
orig_host))
|
||||
orig_vm_state = instance.state
|
||||
last_vm_status = str(server.__dict__.get('status'))
|
||||
last_migration_status = "active"
|
||||
try:
|
||||
server.live_migrate()
|
||||
waited = 0
|
||||
migrate_retries = 0
|
||||
while waited != self.conf.live_migration_wait_time:
|
||||
time.sleep(1)
|
||||
server = self.nova.servers.get(server_id)
|
||||
host = str(server.__dict__.get('OS-EXT-SRV-ATTR:host'))
|
||||
vm_status = str(server.__dict__.get('status'))
|
||||
instance.state = server.__dict__.get('OS-EXT-STS:vm_state')
|
||||
instance.host = host
|
||||
if vm_status != last_vm_status:
|
||||
LOG.info('instance %s status changed: %s' % (server_id,
|
||||
vm_status))
|
||||
if instance.state == 'error':
|
||||
LOG.error('instance %s live migration failed'
|
||||
% server_id)
|
||||
return False
|
||||
elif orig_vm_state != instance.state:
|
||||
LOG.info('instance %s state changed: %s' % (server_id,
|
||||
instance.state))
|
||||
elif host != orig_host:
|
||||
LOG.info('instance %s live migrated to host %s' %
|
||||
(server_id, host))
|
||||
return True
|
||||
migration = (
|
||||
self.nova.migrations.list(instance_uuid=server_id)[0])
|
||||
if migration.status == 'error':
|
||||
if migrate_retries == self.conf.live_migration_retries:
|
||||
LOG.error('instance %s live migration failed after '
|
||||
'%d retries' %
|
||||
(server_id,
|
||||
self.conf.live_migration_retries))
|
||||
return False
|
||||
# When live migrate fails it can fail fast after calling
|
||||
# To have Nova time to be ready for next live migration
|
||||
# There needs to be enough time to wait before retry
|
||||
# And waiting more on next retry have better chance to
|
||||
# Have live migration finally through
|
||||
time.sleep(2 * (migrate_retries + 5))
|
||||
LOG.info('instance %s live migration failed, retry'
|
||||
% server_id)
|
||||
server.live_migrate()
|
||||
waited = 0
|
||||
migrate_retries = migrate_retries + 1
|
||||
elif migration.status != last_migration_status:
|
||||
LOG.info('instance %s live migration status changed: %s'
|
||||
% (server_id, migration.status))
|
||||
waited = waited + 1
|
||||
last_migration_status = migration.status
|
||||
last_vm_status = vm_status
|
||||
LOG.error('instance %s live migration did not finish in %ss, '
|
||||
'state: %s' % (server_id, waited, instance.state))
|
||||
except Exception as e:
|
||||
LOG.error('server %s live migration failed, Exception=%s' %
|
||||
(server_id, e))
|
||||
return False
|
||||
|
||||
def migrate_server(self, instance):
|
||||
# TBD this method should be enhanced for errors and to have failed
|
||||
# instance back to state active instead of error
|
||||
|
@ -599,7 +671,8 @@ class Workflow(BaseWorkflow):
|
|||
server = self.nova.servers.get(server_id)
|
||||
instance.state = server.__dict__.get('OS-EXT-STS:vm_state')
|
||||
orig_host = str(server.__dict__.get('OS-EXT-SRV-ATTR:host'))
|
||||
LOG.info('server %s state %s host %s' % (server_id, instance.state,
|
||||
LOG.info('migrate_server %s state %s host %s' % (server_id,
|
||||
instance.state,
|
||||
orig_host))
|
||||
last_vm_state = instance.state
|
||||
retry_migrate = 2
|
||||
|
|
Loading…
Reference in New Issue