Add cold migration support

Add support for cold migration. You need to setup LXD by doing the following
in order to migrate a container from one host to another:

On each host:

lxc config set core.https_address [::]
lxc config set core.trust_password some-password

Then add the hosts you have each compute node

lxc remote add <hostname> <hostname>

Signed-off-by: Chuck Short <chuck.short@canonical.com>
This commit is contained in:
Chuck Short
2015-09-19 20:37:17 -04:00
parent 698c7b19bc
commit adffbe2ea2
5 changed files with 136 additions and 42 deletions

View File

@@ -63,19 +63,17 @@ class LXDContainerConfig(object):
return config
def create_container(self, context, instance, image_meta, injected_files,
admin_password, network_info, block_device_info, rescue,
migrate):
admin_password, network_info, block_device_info, rescue):
LOG.debug('Creating container config')
container_config = self._create_container_config(context, instance, image_meta,
injected_files, admin_password, network_info,
block_device_info, rescue, migrate)
block_device_info, rescue)
return container_config
def _create_container_config(self, context, instance, image_meta, injected_files,
admin_password, network_info, block_device_info, rescue,
migrate):
admin_password, network_info, block_device_info, rescue):
name = instance.uuid
# Ensure the directory exists and is writable
@@ -233,7 +231,7 @@ class LXDContainerConfig(object):
def configure_container_net_device(self, instance, vif):
LOG.debug('Configure LXD network device')
container_config = self._get_container_config(instance, vif)
container_config = self.get_container_config(instance)
container_network_config = self.vif_driver.get_config(instance, vif)
@@ -247,21 +245,46 @@ class LXDContainerConfig(object):
'type': 'nic'})
return container_config
def _get_container_config(self, instance, network_info):
def configure_container_migrate(self, instance, container_ws):
LOG.debug('Creating container config for migration.')
container_config = self.get_container_config(instance)
container_config = self.add_config(container_config, 'source',
self.configure_lxd_ws(container_config, container_ws))
return container_config
def configure_lxd_ws(self, container_config, container_ws):
container_url = 'wss://%s:8443/1.0/operations/%s/websocket' \
% (CONF.my_ip, container_ws['operation'])
container_config = self.add_config(container_config, 'source',
{'base-image': '',
"mode": "pull",
"operation": container_url,
"secrets": {
"control": container_ws['control'],
"fs": container_ws['fs']
},
"type": "migration"
})
return container_config
def get_container_config(self, instance):
LOG.debug('Fetching LXD configuration')
container_update = self._init_container_config()
container_old = self.container_client.client(
'config', instance=instance.uuid,
host=instance.host)
host=instance.host)
container_config = self._convert(container_old['config'])
container_devices = self._convert(container_old['devices'])
container_update['name'] = instance.uuid
container_update['profiles'] = [str(CONF.lxd.default_profile)]
container_update['config'] = container_config
container_update['devices'] = container_devices
LOG.debug(pprint.pprint(container_update))
return container_update
def _get_network_device(self, instance):

View File

@@ -48,7 +48,7 @@ class LXDContainerImage(object):
if self.container_client.client('alias_defined',
instance=instance.image_ref,
host=instance.host):
host=instance.node):
return
lxd_image = self._get_lxd_image(image_meta)

View File

@@ -17,15 +17,19 @@ import pprint
from nova import exception
from nova import i18n
from nova import utils
from oslo_config import cfg
from oslo_log import log as logging
from oslo_utils import excutils
from nclxd.nova.virt.lxd import container_client
from nclxd.nova.virt.lxd import container_config
from nclxd.nova.virt.lxd import container_utils
from nclxd.nova.virt.lxd import container_ops
_ = i18n._
_LE = i18n._LE
CONF = cfg.CONF
LOG = logging.getLogger(__name__)
@@ -37,6 +41,7 @@ class LXDContainerMigrate(object):
self.virtapi = virtapi
self.container_config = container_config.LXDContainerConfig()
self.container_client = container_client.LXDContainerClient()
self.container_utils = container_utils.LXDContainerUtils()
self.container_ops = container_ops.LXDContainerOperations(self.virtapi)
def migrate_disk_and_power_off(self, context, instance, dest,
@@ -45,12 +50,45 @@ class LXDContainerMigrate(object):
retry_interval=0):
LOG.debug("migrate_disk_and_power_off called", instance=instance)
try:
self.container_utils.container_stop(instance.uuid, instance)
container_ws = self.container_utils.container_migrate(instance.uuid,
instance)
container_config = (
self.container_config.configure_container_migrate(
instance, container_ws))
utils.spawn(
self.container_utils.container_init,
container_config, instance, dest)
except Exception as ex:
with excutils.save_and_reraise_exception():
LOG.exception(_LE('Failed to migration container: %(e)s'),
{'e': e}, instance=instance)
# disk_info is not used
return ""
disk_info = {}
return disk_info
def confirm_migration(self, migration, instance, network_info):
LOG.debug("confirm_migration called", instance=instance)
try:
src_host = migration['source_compute']
dst_host = migration['dest_compute']
if not self.container_client.client('defined', instance=instance.uuid,
host=dst_host):
LOG.exception(_LE('Failed to migrate host'))
LOG.info(_LI('Succesfuly migrated instnace %(instance)s'),
{'instance': instance.uuid}, instance=instance)
except Exception as ex:
with excutils.save_and_reraise_exception():
LOG.exception(_LE('Failed to confirm migration: %(e)s'),
{'e': ex}, instance=instance)
finally:
self.container_utils.container_destroy(instance.uuid, src_host)
def finish_revert_migration(self, context, instance, network_info,
block_device_info=None, power_on=True):
LOG.debug("finish_revert_migration called", instance=instance)
@@ -60,6 +98,19 @@ class LXDContainerMigrate(object):
block_device_info=None, power_on=True):
LOG.debug("finish_migration called", instance=instance)
try:
container_config = self.container_config.get_container_config(instance)
LOG.debug(pprint.pprint(container_config))
self.container_ops.start_container(container_config, instance, network_info,
need_vif_plugged=True)
LOG.info(_LI('Succesfuly migrated instnace %(instance)s on %(host)s'),
{'instance': instance.uuid, 'host': migration['dest_compute']},
instance=instance)
except Exception as ex:
with excutils.save_and_reraise_exception():
LOG.exception(_LE('Failed to confirm migration: %(e)s'),
{'e': ex}, instance=instance)
def live_migration(self, context, instance_ref, dest, post_method,
recover_method, block_migration=False,
migrate_data=None):

View File

@@ -73,7 +73,7 @@ class LXDContainerOperations(object):
def spawn(self, context, instance, image_meta, injected_files,
admin_password, network_info=None, block_device_info=None,
need_vif_plugged=True, rescue=False, host=None):
need_vif_plugged=True, rescue=False):
msg = ('Spawning container '
'network_info=%(network_info)s '
'image_meta=%(image_meta)s '
@@ -95,8 +95,7 @@ class LXDContainerOperations(object):
try:
self.create_container(context, instance, image_meta, injected_files, admin_password,
network_info, block_device_info, rescue, need_vif_plugged, host,
migrate=None)
network_info, block_device_info, rescue, need_vif_plugged)
except Exception as ex:
with excutils.save_and_reraise_exception():
LOG.exception(_LE('Upload image failed: %(e)s'),
@@ -106,26 +105,26 @@ class LXDContainerOperations(object):
LOG.debug('Creation took %s seconds to boot.' % total)
def create_container(self, context, instance, image_meta, injected_files, admin_password,
network_info, block_device_info, rescue, need_vif_plugged, host, migrate):
if not host:
host = instance.host
network_info, block_device_info, rescue, need_vif_plugged):
if not self.container_client.client('defined', instance=instance.uuid, host=instance.host):
container_config = self.container_config.create_container(context, instance, image_meta,
injected_files, admin_password, network_info,
block_device_info, rescue, migrate)
block_device_info, rescue)
eventlet.spawn(self.container_utils.container_init,
container_config,
instance,
host).wait()
instance.host).wait()
self._start_container(container_config, instance, network_info, need_vif_plugged)
self.start_container(container_config, instance, network_info, need_vif_plugged)
def _start_container(self, container_config, instance, network_info, need_vif_plugged):
def start_container(self, container_config, instance, network_info, need_vif_plugged):
LOG.debug('Starting instance')
if self.container_client.client('running', instance=instance.uuid,
host=instance.host):
return
timeout = CONF.vif_plugging_timeout
# check to see if neutron is ready before
# doing anything else
@@ -169,7 +168,7 @@ class LXDContainerOperations(object):
def destroy(self, context, instance, network_info, block_device_info=None,
destroy_disks=True, migrate_data=None):
self.container_utils.container_destroy(instance.uuid, instance)
self.container_utils.container_destroy(instance.uuid, instance.host)
self.cleanup(context, instance, network_info, block_device_info)
def power_off(self, instance, timeout=0, retry_interval=0):
@@ -200,7 +199,7 @@ class LXDContainerOperations(object):
self.container_utils.container_stop(instance.uuid, instance)
self._container_local_copy(instance)
self.container_utils.container_destroy(instance.uuid, instance)
self.container_utils.container_destroy(instance.uuid, instance.host)
self.spawn(context, instance, image_meta, injected_files=None,
admin_password=None, network_info=network_info, block_device_info=None,
@@ -237,7 +236,7 @@ class LXDContainerOperations(object):
}
self.container_utils.container_move(old_name, container_config, instance)
self.container_utils.container_destroy(instance.uuid, instance)
self.container_utils.container_destroy(instance.uuid, instance.host)
def cleanup(self, context, instance, network_info, block_device_info=None,
destroy_disks=True, migrate_data=None, destroy_vifs=True):
@@ -260,6 +259,8 @@ class LXDContainerOperations(object):
LOG.debug('in console output')
console_log = self.container_dir.get_console_path(instance.uuid)
if not os.path.exists(console_log):
return
uid = pwd.getpwuid(os.getuid()).pw_uid
utils.execute('chown', '%s:%s' % (uid, uid),
console_log, run_as_root=True)

View File

@@ -62,7 +62,7 @@ class LXDContainerUtils(object):
except Exception as ex:
with excutils.save_and_reraise_exception():
LOG.error(_LE('Failed to start container %(instance)s: %(reason)s'),
{'instance': instance.uuid, 'reason': ex})
{'instance': instance.uuid, 'reason': ex}, instance=instance)
def container_stop(self, instance_name, instance):
LOG.debug('Container stop')
@@ -94,26 +94,26 @@ class LXDContainerUtils(object):
except Exception as ex:
with excutils.save_and_reraise_exception():
LOG.error(_LE('Failed to reboot container %(instance)s: %(reason)s'),
{'instance': instance.uuid, 'reason': ex})
{'instance': instance.uuid, 'reason': ex}, instance=instance)
def container_destroy(self, instance_name, instance):
def container_destroy(self, instance_name, host):
LOG.debug('Container destroy')
try:
if not self.container_client.client('defined', instance=instance_name,
host=instance.host):
host=host):
return
(state, data) = self.container_client.client('destroy', instance=instance_name,
host=instance.host)
host=host)
self.container_client.client('wait',
oid=data.get('operation').split('/')[3],
host=instance.host)
host=host)
LOG.info(_LI('Succesfully destroyed container %s'),
instance.uuid, instance=instance)
instance_name)
except Exception as ex:
with excutils.save_and_reraise_exception():
LOG.error(_LE('Failed to destroy container %(instance)s: %(reason)s'),
{'instance': instance.uuid, 'reason': ex})
{'instance': instance_name, 'reason': ex})
def container_pause(self, instance_name, instance):
LOG.debug('Container pause')
@@ -130,7 +130,7 @@ class LXDContainerUtils(object):
except Exception as ex:
with excutils.save_and_reraise_exception():
LOG.error(_LE('Failed to pause container %(instance)s: %(reason)s'),
{'instance': instance.uuid, 'reason': ex})
{'instance': instance.uuid, 'reason': ex}, instance=instance)
def conatainer_unpause(self, instance_name, instance):
LOG.debug('Container unpause')
@@ -163,7 +163,7 @@ class LXDContainerUtils(object):
except Exception as ex:
with excutils.save_and_reraise_exception():
LOG.error(_LE('Failed to rename container %(instance)s: %(reason)s'),
{'instance': instance.uuid, 'reason': ex}, host=instance.host)
{'instance': instance.uuid, 'reason': ex}, instance=instance)
def container_copy(self, container_config, instance):
LOG.debug('Copying container')
@@ -174,6 +174,8 @@ class LXDContainerUtils(object):
operation_id = data.get('operation').split('/')[3]
self.container_client.client('wait', oid=operation_id,
host=instance.host)
LOG.info(_LI('Succesfully copied container %s'),
instance.uuid, instance=instance)
except Exception as ex:
with excutils.save_and_reraise_exception():
LOG.error(_LE('Failed to rename container %(instance): %(reason)s'),
@@ -189,11 +191,25 @@ class LXDContainerUtils(object):
operation_id = data.get('operation').split('/')[3]
self.container_client.client('wait', oid=operation_id,
host=instance.host)
LOG.info(_LI('Succesfully renamed container %s'),
instance.uuid, instance=instance)
except Exception as ex:
with excutils.save_and_reraise_exception():
LOG.error(_LE('Failed to rename container %(instance)s: %(reason)s'),
{'instance': instance.uuid, 'reason': ex})
{'instance': instance.uuid, 'reason': ex}, instance=instance)
def container_migrate(self, instance_name, instance):
LOG.debug('Migrate contianer')
try:
return self.container_client.client('migrate',
instance=instance_name,
host=instance.host)
LOG.info(_LI('Succesfully migrated container %s'),
instance.uuid, instance=instance)
except Exception as ex:
with excutils.save_and_reraise_exception():
LOG.error(_LE('Failed to rename container %(instance): %(reason)s'),
{'instance': instance_name, 'reason': ex}, instance=instance)
def container_init(self, container_config, instance, host):
LOG.debug('Initializing container')
@@ -204,19 +220,22 @@ class LXDContainerUtils(object):
operation_id = data.get('operation').split('/')[3]
self.container_client.client('wait',
oid=operation_id,
host=instance.host)
host=host)
LOG.info(_LI('Succesfully created container %s'),
instance.uuid, instance=instance)
except Exception as ex:
with excutils.save_and_reraise_exception():
LOG.error(_LE('Failed to create container %(instance)s: %(reason)s'),
{'instance': instance.uuid, 'reason': ex})
{'instance': instance.uuid, 'reason': ex}, instance=instance)
def _wait_for_state(self, operation_id, instance, power_state, host=None):
if not host:
host = instance.host
def _wait_for_state(self, operation_id, instance, power_state):
instance.refresh()
(state, data) = self.container_client.client('operation_info',
oid=operation_id,
host=instance.host)
host=host)
status_code = data['metadata']['status_code']
if status_code in [200, 202]:
LOG.debug('')