Add cold migration support
Add support for cold migration. You need to setup LXD by doing the following in order to migrate a container from one host to another: On each host: lxc config set core.https_address [::] lxc config set core.trust_password some-password Then add the hosts you have each compute node lxc remote add <hostname> <hostname> Signed-off-by: Chuck Short <chuck.short@canonical.com>
This commit is contained in:
@@ -63,19 +63,17 @@ class LXDContainerConfig(object):
|
||||
return config
|
||||
|
||||
def create_container(self, context, instance, image_meta, injected_files,
|
||||
admin_password, network_info, block_device_info, rescue,
|
||||
migrate):
|
||||
admin_password, network_info, block_device_info, rescue):
|
||||
LOG.debug('Creating container config')
|
||||
|
||||
container_config = self._create_container_config(context, instance, image_meta,
|
||||
injected_files, admin_password, network_info,
|
||||
block_device_info, rescue, migrate)
|
||||
block_device_info, rescue)
|
||||
|
||||
return container_config
|
||||
|
||||
def _create_container_config(self, context, instance, image_meta, injected_files,
|
||||
admin_password, network_info, block_device_info, rescue,
|
||||
migrate):
|
||||
admin_password, network_info, block_device_info, rescue):
|
||||
|
||||
name = instance.uuid
|
||||
# Ensure the directory exists and is writable
|
||||
@@ -233,7 +231,7 @@ class LXDContainerConfig(object):
|
||||
|
||||
def configure_container_net_device(self, instance, vif):
|
||||
LOG.debug('Configure LXD network device')
|
||||
container_config = self._get_container_config(instance, vif)
|
||||
container_config = self.get_container_config(instance)
|
||||
|
||||
container_network_config = self.vif_driver.get_config(instance, vif)
|
||||
|
||||
@@ -247,21 +245,46 @@ class LXDContainerConfig(object):
|
||||
'type': 'nic'})
|
||||
return container_config
|
||||
|
||||
def _get_container_config(self, instance, network_info):
|
||||
def configure_container_migrate(self, instance, container_ws):
|
||||
LOG.debug('Creating container config for migration.')
|
||||
container_config = self.get_container_config(instance)
|
||||
|
||||
container_config = self.add_config(container_config, 'source',
|
||||
self.configure_lxd_ws(container_config, container_ws))
|
||||
|
||||
return container_config
|
||||
|
||||
def configure_lxd_ws(self, container_config, container_ws):
|
||||
container_url = 'wss://%s:8443/1.0/operations/%s/websocket' \
|
||||
% (CONF.my_ip, container_ws['operation'])
|
||||
container_config = self.add_config(container_config, 'source',
|
||||
{'base-image': '',
|
||||
"mode": "pull",
|
||||
"operation": container_url,
|
||||
"secrets": {
|
||||
"control": container_ws['control'],
|
||||
"fs": container_ws['fs']
|
||||
},
|
||||
"type": "migration"
|
||||
})
|
||||
return container_config
|
||||
|
||||
def get_container_config(self, instance):
|
||||
LOG.debug('Fetching LXD configuration')
|
||||
container_update = self._init_container_config()
|
||||
|
||||
container_old = self.container_client.client(
|
||||
'config', instance=instance.uuid,
|
||||
host=instance.host)
|
||||
host=instance.host)
|
||||
|
||||
container_config = self._convert(container_old['config'])
|
||||
container_devices = self._convert(container_old['devices'])
|
||||
|
||||
container_update['name'] = instance.uuid
|
||||
container_update['profiles'] = [str(CONF.lxd.default_profile)]
|
||||
container_update['config'] = container_config
|
||||
container_update['devices'] = container_devices
|
||||
|
||||
LOG.debug(pprint.pprint(container_update))
|
||||
|
||||
return container_update
|
||||
|
||||
def _get_network_device(self, instance):
|
||||
|
||||
@@ -48,7 +48,7 @@ class LXDContainerImage(object):
|
||||
|
||||
if self.container_client.client('alias_defined',
|
||||
instance=instance.image_ref,
|
||||
host=instance.host):
|
||||
host=instance.node):
|
||||
return
|
||||
|
||||
lxd_image = self._get_lxd_image(image_meta)
|
||||
|
||||
@@ -17,15 +17,19 @@ import pprint
|
||||
|
||||
from nova import exception
|
||||
from nova import i18n
|
||||
from nova import utils
|
||||
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
from oslo_utils import excutils
|
||||
|
||||
from nclxd.nova.virt.lxd import container_client
|
||||
from nclxd.nova.virt.lxd import container_config
|
||||
from nclxd.nova.virt.lxd import container_utils
|
||||
from nclxd.nova.virt.lxd import container_ops
|
||||
|
||||
_ = i18n._
|
||||
_LE = i18n._LE
|
||||
|
||||
CONF = cfg.CONF
|
||||
LOG = logging.getLogger(__name__)
|
||||
@@ -37,6 +41,7 @@ class LXDContainerMigrate(object):
|
||||
self.virtapi = virtapi
|
||||
self.container_config = container_config.LXDContainerConfig()
|
||||
self.container_client = container_client.LXDContainerClient()
|
||||
self.container_utils = container_utils.LXDContainerUtils()
|
||||
self.container_ops = container_ops.LXDContainerOperations(self.virtapi)
|
||||
|
||||
def migrate_disk_and_power_off(self, context, instance, dest,
|
||||
@@ -45,12 +50,45 @@ class LXDContainerMigrate(object):
|
||||
retry_interval=0):
|
||||
LOG.debug("migrate_disk_and_power_off called", instance=instance)
|
||||
|
||||
try:
|
||||
self.container_utils.container_stop(instance.uuid, instance)
|
||||
|
||||
container_ws = self.container_utils.container_migrate(instance.uuid,
|
||||
instance)
|
||||
container_config = (
|
||||
self.container_config.configure_container_migrate(
|
||||
instance, container_ws))
|
||||
utils.spawn(
|
||||
self.container_utils.container_init,
|
||||
container_config, instance, dest)
|
||||
except Exception as ex:
|
||||
with excutils.save_and_reraise_exception():
|
||||
LOG.exception(_LE('Failed to migration container: %(e)s'),
|
||||
{'e': e}, instance=instance)
|
||||
|
||||
# disk_info is not used
|
||||
return ""
|
||||
disk_info = {}
|
||||
return disk_info
|
||||
|
||||
def confirm_migration(self, migration, instance, network_info):
|
||||
LOG.debug("confirm_migration called", instance=instance)
|
||||
|
||||
try:
|
||||
src_host = migration['source_compute']
|
||||
dst_host = migration['dest_compute']
|
||||
|
||||
if not self.container_client.client('defined', instance=instance.uuid,
|
||||
host=dst_host):
|
||||
LOG.exception(_LE('Failed to migrate host'))
|
||||
LOG.info(_LI('Succesfuly migrated instnace %(instance)s'),
|
||||
{'instance': instance.uuid}, instance=instance)
|
||||
except Exception as ex:
|
||||
with excutils.save_and_reraise_exception():
|
||||
LOG.exception(_LE('Failed to confirm migration: %(e)s'),
|
||||
{'e': ex}, instance=instance)
|
||||
finally:
|
||||
self.container_utils.container_destroy(instance.uuid, src_host)
|
||||
|
||||
def finish_revert_migration(self, context, instance, network_info,
|
||||
block_device_info=None, power_on=True):
|
||||
LOG.debug("finish_revert_migration called", instance=instance)
|
||||
@@ -60,6 +98,19 @@ class LXDContainerMigrate(object):
|
||||
block_device_info=None, power_on=True):
|
||||
LOG.debug("finish_migration called", instance=instance)
|
||||
|
||||
try:
|
||||
container_config = self.container_config.get_container_config(instance)
|
||||
LOG.debug(pprint.pprint(container_config))
|
||||
self.container_ops.start_container(container_config, instance, network_info,
|
||||
need_vif_plugged=True)
|
||||
LOG.info(_LI('Succesfuly migrated instnace %(instance)s on %(host)s'),
|
||||
{'instance': instance.uuid, 'host': migration['dest_compute']},
|
||||
instance=instance)
|
||||
except Exception as ex:
|
||||
with excutils.save_and_reraise_exception():
|
||||
LOG.exception(_LE('Failed to confirm migration: %(e)s'),
|
||||
{'e': ex}, instance=instance)
|
||||
|
||||
def live_migration(self, context, instance_ref, dest, post_method,
|
||||
recover_method, block_migration=False,
|
||||
migrate_data=None):
|
||||
|
||||
@@ -73,7 +73,7 @@ class LXDContainerOperations(object):
|
||||
|
||||
def spawn(self, context, instance, image_meta, injected_files,
|
||||
admin_password, network_info=None, block_device_info=None,
|
||||
need_vif_plugged=True, rescue=False, host=None):
|
||||
need_vif_plugged=True, rescue=False):
|
||||
msg = ('Spawning container '
|
||||
'network_info=%(network_info)s '
|
||||
'image_meta=%(image_meta)s '
|
||||
@@ -95,8 +95,7 @@ class LXDContainerOperations(object):
|
||||
|
||||
try:
|
||||
self.create_container(context, instance, image_meta, injected_files, admin_password,
|
||||
network_info, block_device_info, rescue, need_vif_plugged, host,
|
||||
migrate=None)
|
||||
network_info, block_device_info, rescue, need_vif_plugged)
|
||||
except Exception as ex:
|
||||
with excutils.save_and_reraise_exception():
|
||||
LOG.exception(_LE('Upload image failed: %(e)s'),
|
||||
@@ -106,26 +105,26 @@ class LXDContainerOperations(object):
|
||||
LOG.debug('Creation took %s seconds to boot.' % total)
|
||||
|
||||
def create_container(self, context, instance, image_meta, injected_files, admin_password,
|
||||
network_info, block_device_info, rescue, need_vif_plugged, host, migrate):
|
||||
|
||||
if not host:
|
||||
host = instance.host
|
||||
|
||||
network_info, block_device_info, rescue, need_vif_plugged):
|
||||
if not self.container_client.client('defined', instance=instance.uuid, host=instance.host):
|
||||
container_config = self.container_config.create_container(context, instance, image_meta,
|
||||
injected_files, admin_password, network_info,
|
||||
block_device_info, rescue, migrate)
|
||||
block_device_info, rescue)
|
||||
|
||||
eventlet.spawn(self.container_utils.container_init,
|
||||
container_config,
|
||||
instance,
|
||||
host).wait()
|
||||
instance.host).wait()
|
||||
|
||||
self._start_container(container_config, instance, network_info, need_vif_plugged)
|
||||
self.start_container(container_config, instance, network_info, need_vif_plugged)
|
||||
|
||||
def _start_container(self, container_config, instance, network_info, need_vif_plugged):
|
||||
def start_container(self, container_config, instance, network_info, need_vif_plugged):
|
||||
LOG.debug('Starting instance')
|
||||
|
||||
if self.container_client.client('running', instance=instance.uuid,
|
||||
host=instance.host):
|
||||
return
|
||||
|
||||
timeout = CONF.vif_plugging_timeout
|
||||
# check to see if neutron is ready before
|
||||
# doing anything else
|
||||
@@ -169,7 +168,7 @@ class LXDContainerOperations(object):
|
||||
|
||||
def destroy(self, context, instance, network_info, block_device_info=None,
|
||||
destroy_disks=True, migrate_data=None):
|
||||
self.container_utils.container_destroy(instance.uuid, instance)
|
||||
self.container_utils.container_destroy(instance.uuid, instance.host)
|
||||
self.cleanup(context, instance, network_info, block_device_info)
|
||||
|
||||
def power_off(self, instance, timeout=0, retry_interval=0):
|
||||
@@ -200,7 +199,7 @@ class LXDContainerOperations(object):
|
||||
|
||||
self.container_utils.container_stop(instance.uuid, instance)
|
||||
self._container_local_copy(instance)
|
||||
self.container_utils.container_destroy(instance.uuid, instance)
|
||||
self.container_utils.container_destroy(instance.uuid, instance.host)
|
||||
|
||||
self.spawn(context, instance, image_meta, injected_files=None,
|
||||
admin_password=None, network_info=network_info, block_device_info=None,
|
||||
@@ -237,7 +236,7 @@ class LXDContainerOperations(object):
|
||||
}
|
||||
|
||||
self.container_utils.container_move(old_name, container_config, instance)
|
||||
self.container_utils.container_destroy(instance.uuid, instance)
|
||||
self.container_utils.container_destroy(instance.uuid, instance.host)
|
||||
|
||||
def cleanup(self, context, instance, network_info, block_device_info=None,
|
||||
destroy_disks=True, migrate_data=None, destroy_vifs=True):
|
||||
@@ -260,6 +259,8 @@ class LXDContainerOperations(object):
|
||||
LOG.debug('in console output')
|
||||
|
||||
console_log = self.container_dir.get_console_path(instance.uuid)
|
||||
if not os.path.exists(console_log):
|
||||
return
|
||||
uid = pwd.getpwuid(os.getuid()).pw_uid
|
||||
utils.execute('chown', '%s:%s' % (uid, uid),
|
||||
console_log, run_as_root=True)
|
||||
|
||||
@@ -62,7 +62,7 @@ class LXDContainerUtils(object):
|
||||
except Exception as ex:
|
||||
with excutils.save_and_reraise_exception():
|
||||
LOG.error(_LE('Failed to start container %(instance)s: %(reason)s'),
|
||||
{'instance': instance.uuid, 'reason': ex})
|
||||
{'instance': instance.uuid, 'reason': ex}, instance=instance)
|
||||
|
||||
def container_stop(self, instance_name, instance):
|
||||
LOG.debug('Container stop')
|
||||
@@ -94,26 +94,26 @@ class LXDContainerUtils(object):
|
||||
except Exception as ex:
|
||||
with excutils.save_and_reraise_exception():
|
||||
LOG.error(_LE('Failed to reboot container %(instance)s: %(reason)s'),
|
||||
{'instance': instance.uuid, 'reason': ex})
|
||||
{'instance': instance.uuid, 'reason': ex}, instance=instance)
|
||||
|
||||
def container_destroy(self, instance_name, instance):
|
||||
def container_destroy(self, instance_name, host):
|
||||
LOG.debug('Container destroy')
|
||||
try:
|
||||
if not self.container_client.client('defined', instance=instance_name,
|
||||
host=instance.host):
|
||||
host=host):
|
||||
return
|
||||
|
||||
(state, data) = self.container_client.client('destroy', instance=instance_name,
|
||||
host=instance.host)
|
||||
host=host)
|
||||
self.container_client.client('wait',
|
||||
oid=data.get('operation').split('/')[3],
|
||||
host=instance.host)
|
||||
host=host)
|
||||
LOG.info(_LI('Succesfully destroyed container %s'),
|
||||
instance.uuid, instance=instance)
|
||||
instance_name)
|
||||
except Exception as ex:
|
||||
with excutils.save_and_reraise_exception():
|
||||
LOG.error(_LE('Failed to destroy container %(instance)s: %(reason)s'),
|
||||
{'instance': instance.uuid, 'reason': ex})
|
||||
{'instance': instance_name, 'reason': ex})
|
||||
|
||||
def container_pause(self, instance_name, instance):
|
||||
LOG.debug('Container pause')
|
||||
@@ -130,7 +130,7 @@ class LXDContainerUtils(object):
|
||||
except Exception as ex:
|
||||
with excutils.save_and_reraise_exception():
|
||||
LOG.error(_LE('Failed to pause container %(instance)s: %(reason)s'),
|
||||
{'instance': instance.uuid, 'reason': ex})
|
||||
{'instance': instance.uuid, 'reason': ex}, instance=instance)
|
||||
|
||||
def conatainer_unpause(self, instance_name, instance):
|
||||
LOG.debug('Container unpause')
|
||||
@@ -163,7 +163,7 @@ class LXDContainerUtils(object):
|
||||
except Exception as ex:
|
||||
with excutils.save_and_reraise_exception():
|
||||
LOG.error(_LE('Failed to rename container %(instance)s: %(reason)s'),
|
||||
{'instance': instance.uuid, 'reason': ex}, host=instance.host)
|
||||
{'instance': instance.uuid, 'reason': ex}, instance=instance)
|
||||
|
||||
def container_copy(self, container_config, instance):
|
||||
LOG.debug('Copying container')
|
||||
@@ -174,6 +174,8 @@ class LXDContainerUtils(object):
|
||||
operation_id = data.get('operation').split('/')[3]
|
||||
self.container_client.client('wait', oid=operation_id,
|
||||
host=instance.host)
|
||||
LOG.info(_LI('Succesfully copied container %s'),
|
||||
instance.uuid, instance=instance)
|
||||
except Exception as ex:
|
||||
with excutils.save_and_reraise_exception():
|
||||
LOG.error(_LE('Failed to rename container %(instance): %(reason)s'),
|
||||
@@ -189,11 +191,25 @@ class LXDContainerUtils(object):
|
||||
operation_id = data.get('operation').split('/')[3]
|
||||
self.container_client.client('wait', oid=operation_id,
|
||||
host=instance.host)
|
||||
LOG.info(_LI('Succesfully renamed container %s'),
|
||||
instance.uuid, instance=instance)
|
||||
except Exception as ex:
|
||||
with excutils.save_and_reraise_exception():
|
||||
LOG.error(_LE('Failed to rename container %(instance)s: %(reason)s'),
|
||||
{'instance': instance.uuid, 'reason': ex})
|
||||
{'instance': instance.uuid, 'reason': ex}, instance=instance)
|
||||
|
||||
def container_migrate(self, instance_name, instance):
|
||||
LOG.debug('Migrate contianer')
|
||||
try:
|
||||
return self.container_client.client('migrate',
|
||||
instance=instance_name,
|
||||
host=instance.host)
|
||||
LOG.info(_LI('Succesfully migrated container %s'),
|
||||
instance.uuid, instance=instance)
|
||||
except Exception as ex:
|
||||
with excutils.save_and_reraise_exception():
|
||||
LOG.error(_LE('Failed to rename container %(instance): %(reason)s'),
|
||||
{'instance': instance_name, 'reason': ex}, instance=instance)
|
||||
|
||||
def container_init(self, container_config, instance, host):
|
||||
LOG.debug('Initializing container')
|
||||
@@ -204,19 +220,22 @@ class LXDContainerUtils(object):
|
||||
operation_id = data.get('operation').split('/')[3]
|
||||
self.container_client.client('wait',
|
||||
oid=operation_id,
|
||||
host=instance.host)
|
||||
host=host)
|
||||
LOG.info(_LI('Succesfully created container %s'),
|
||||
instance.uuid, instance=instance)
|
||||
except Exception as ex:
|
||||
with excutils.save_and_reraise_exception():
|
||||
LOG.error(_LE('Failed to create container %(instance)s: %(reason)s'),
|
||||
{'instance': instance.uuid, 'reason': ex})
|
||||
{'instance': instance.uuid, 'reason': ex}, instance=instance)
|
||||
|
||||
def _wait_for_state(self, operation_id, instance, power_state, host=None):
|
||||
if not host:
|
||||
host = instance.host
|
||||
|
||||
def _wait_for_state(self, operation_id, instance, power_state):
|
||||
instance.refresh()
|
||||
(state, data) = self.container_client.client('operation_info',
|
||||
oid=operation_id,
|
||||
host=instance.host)
|
||||
host=host)
|
||||
status_code = data['metadata']['status_code']
|
||||
if status_code in [200, 202]:
|
||||
LOG.debug('')
|
||||
|
||||
Reference in New Issue
Block a user