From 0673aef9ff336b29a744cea02103ff992b50cbc6 Mon Sep 17 00:00:00 2001 From: Devananda van der Veen Date: Thu, 27 Dec 2012 17:19:21 -0800 Subject: [PATCH] Improve baremetal driver error handling Several improvements to baremetal driver are implemented in this patch. There is now significantly more error handling during spawn(). It also includes an addition to nova/tests/utils.py to provide additional sample information from get_test_network_info(). blueprint general-bare-metal-provisioning-framework Change-Id: I65d93051d7fcfd79f4d24d4ddb62fb1a55bee646 --- nova/tests/baremetal/test_driver.py | 2 +- nova/tests/utils.py | 7 +- nova/virt/baremetal/base.py | 29 +++--- nova/virt/baremetal/db/api.py | 4 + nova/virt/baremetal/db/sqlalchemy/api.py | 31 ++++++ nova/virt/baremetal/driver.py | 126 +++++++++++++---------- nova/virt/baremetal/fake.py | 39 ++----- nova/virt/baremetal/utils.py | 29 +++++- 8 files changed, 160 insertions(+), 107 deletions(-) diff --git a/nova/tests/baremetal/test_driver.py b/nova/tests/baremetal/test_driver.py index d060d3a5d934..4c4e01719ba4 100644 --- a/nova/tests/baremetal/test_driver.py +++ b/nova/tests/baremetal/test_driver.py @@ -159,7 +159,7 @@ class BareMetalDriverWithDBTestCase(bm_db_base.BMDBTestCase): db.bm_node_update(self.context, self.node['id'], {'id': 9876}) - self.assertRaises(exception.InstanceNotFound, + self.assertRaises(exception.NovaException, self.driver.spawn, **self.spawn_params) row = db.bm_node_get(self.context, 9876) diff --git a/nova/tests/utils.py b/nova/tests/utils.py index 9fabab5935d5..45d0d295b3aa 100644 --- a/nova/tests/utils.py +++ b/nova/tests/utils.py @@ -81,6 +81,7 @@ def get_test_network_info(count=1): fake_ip = '0.0.0.0/0' fake_ip_2 = '0.0.0.1/0' fake_ip_3 = '0.0.0.1/0' + fake_netmask = '255.255.255.255' fake_vlan = 100 fake_bridge_interface = 'eth0' network = {'bridge': fake, @@ -91,11 +92,13 @@ def get_test_network_info(count=1): 'injected': False} mapping = {'mac': fake, 'dhcp_server': fake, + 'dns': ['fake1', 'fake2'], 'gateway': fake, 'gateway_v6': fake, - 'ips': [{'ip': fake_ip}, {'ip': fake_ip}]} + 'ips': [{'ip': fake_ip, 'netmask': fake_netmask}, + {'ip': fake_ip, 'netmask': fake_netmask}]} if ipv6: - mapping['ip6s'] = [{'ip': fake_ip}, + mapping['ip6s'] = [{'ip': fake_ip, 'netmask': fake_netmask}, {'ip': fake_ip_2}, {'ip': fake_ip_3}] return [(network, mapping) for x in xrange(0, count)] diff --git a/nova/virt/baremetal/base.py b/nova/virt/baremetal/base.py index 97d4f8ca359e..cf7a33a0a087 100644 --- a/nova/virt/baremetal/base.py +++ b/nova/virt/baremetal/base.py @@ -21,27 +21,26 @@ from nova.virt.baremetal import baremetal_states class NodeDriver(object): - def define_vars(self, instance, network_info, block_device_info): + def __init__(self): + pass + + def cache_images(self, context, node, instance, **kwargs): raise NotImplementedError() - def create_image(self, var, context, image_meta, node, instance, - injected_files=None, admin_password=None): + def destroy_images(self, context, node, instance): raise NotImplementedError() - def destroy_images(self, var, context, node, instance): + def activate_bootloader(self, context, node, instance): raise NotImplementedError() - def activate_bootloader(self, var, context, node, instance, image_meta): + def deactivate_bootloader(self, context, node, instance): raise NotImplementedError() - def deactivate_bootloader(self, var, context, node, instance): - raise NotImplementedError() - - def activate_node(self, var, context, node, instance): + def activate_node(self, context, node, instance): """For operations after power on.""" raise NotImplementedError() - def deactivate_node(self, var, context, node, instance): + def deactivate_node(self, context, node, instance): """For operations before power off.""" raise NotImplementedError() @@ -52,16 +51,20 @@ class NodeDriver(object): class PowerManager(object): def __init__(self, **kwargs): + self.state = baremetal_states.DELETED pass def activate_node(self): - return baremetal_states.ACTIVE + self.state = baremetal_states.ACTIVE + return self.state def reboot_node(self): - return baremetal_states.ACTIVE + self.state = baremetal_states.ACTIVE + return self.state def deactivate_node(self): - return baremetal_states.DELETED + self.state = baremetal_states.DELETED + return self.state def is_power_on(self): """Returns True or False according as the node's power state""" diff --git a/nova/virt/baremetal/db/api.py b/nova/virt/baremetal/db/api.py index 3ff533c6cd99..206a59b4f539 100644 --- a/nova/virt/baremetal/db/api.py +++ b/nova/virt/baremetal/db/api.py @@ -98,6 +98,10 @@ def bm_node_update(context, bm_node_id, values): return IMPL.bm_node_update(context, bm_node_id, values) +def bm_node_set_uuid_safe(context, bm_node_id, uuid): + return IMPL.bm_node_set_uuid_safe(context, bm_node_id, uuid) + + def bm_pxe_ip_create(context, address, server_address): return IMPL.bm_pxe_ip_create(context, address, server_address) diff --git a/nova/virt/baremetal/db/sqlalchemy/api.py b/nova/virt/baremetal/db/sqlalchemy/api.py index abdb19fb76f7..36c66c24f8fe 100644 --- a/nova/virt/baremetal/db/sqlalchemy/api.py +++ b/nova/virt/baremetal/db/sqlalchemy/api.py @@ -150,6 +150,37 @@ def bm_node_update(context, bm_node_id, values): update(values) +@require_admin_context +def bm_node_set_uuid_safe(context, bm_node_id, values): + """Associate an instance to a node safely + + Associate an instance to a node only if that node is not yet assocated. + Allow the caller to set any other fields they require in the same + operation. For example, this is used to set the node's task_state to + BUILDING at the beginning of driver.spawn(). + + """ + if 'instance_uuid' not in values: + raise exception.NovaException(_( + "instance_uuid must be supplied to bm_node_set_uuid_safe")) + + session = get_session() + with session.begin(): + query = model_query(context, models.BareMetalNode, + session=session, read_deleted="no").\ + filter_by(id=bm_node_id) + + count = query.filter_by(instance_uuid=None).\ + update(values, synchronize_session=False) + if count != 1: + raise exception.NovaException(_( + "Failed to associate instance %(uuid)s to baremetal node " + "%(id)s.") % {'id': bm_node_id, + 'uuid': values['instance_uuid']}) + ref = query.first() + return ref + + @require_admin_context def bm_node_destroy(context, bm_node_id): model_query(context, models.BareMetalNode).\ diff --git a/nova/virt/baremetal/driver.py b/nova/virt/baremetal/driver.py index 217f36fed857..0bc9fec63535 100644 --- a/nova/virt/baremetal/driver.py +++ b/nova/virt/baremetal/driver.py @@ -56,7 +56,7 @@ opts = [ default='nova.virt.baremetal.pxe.PXE', help='Baremetal driver back-end (pxe or tilera)'), cfg.StrOpt('power_manager', - default='nova.virt.baremetal.ipmi.Ipmi', + default='nova.virt.baremetal.ipmi.IPMI', help='Baremetal power management method'), cfg.StrOpt('tftp_root', default='/tftpboot', @@ -93,14 +93,16 @@ def _get_baremetal_node_by_instance_uuid(instance_uuid): return node -def _update_baremetal_state(context, node, instance, state): - instance_uuid = None - if instance: - instance_uuid = instance['uuid'] - db.bm_node_update(context, node['id'], - {'instance_uuid': instance_uuid, - 'task_state': state, - }) +def _update_state(context, node, instance, state): + """Update the node state in baremetal DB + + If instance is not supplied, reset the instance_uuid field for this node. + + """ + values = {'task_state': state} + if not instance: + values['instance_uuid'] = None + db.bm_node_update(context, node['id'], values) def get_power_manager(**kwargs): @@ -174,57 +176,70 @@ class BareMetalDriver(driver.ComputeDriver): def spawn(self, context, instance, image_meta, injected_files, admin_password, network_info=None, block_device_info=None): - nodename = instance.get('node') - if not nodename: - raise exception.NovaException(_("Baremetal node id not supplied" - " to driver")) - node = db.bm_node_get(context, nodename) - if node['instance_uuid']: - raise exception.NovaException(_("Baremetal node %s already" - " in use") % nodename) - # TODO(deva): split this huge try: block into manageable parts + node_id = instance.get('node') + if not node_id: + raise exception.NovaException(_( + "Baremetal node id not supplied to driver")) + + # NOTE(deva): this db method will raise an exception if the node is + # already in use. We call it here to ensure no one else + # allocates this node before we begin provisioning it. + node = db.bm_node_set_uuid_safe(context, node_id, + {'instance_uuid': instance['uuid'], + 'task_state': baremetal_states.BUILDING}) + pm = get_power_manager(node=node, instance=instance) + try: - _update_baremetal_state(context, node, instance, - baremetal_states.BUILDING) - - var = self.driver.define_vars(instance, network_info, - block_device_info) - self._plug_vifs(instance, network_info, context=context) - self.firewall_driver.setup_basic_filtering(instance, network_info) - self.firewall_driver.prepare_instance_filter(instance, - network_info) + self.firewall_driver.setup_basic_filtering( + instance, network_info) + self.firewall_driver.prepare_instance_filter( + instance, network_info) + self.firewall_driver.apply_instance_filter( + instance, network_info) - self.driver.create_image(var, context, image_meta, node, - instance, - injected_files=injected_files, - admin_password=admin_password) - self.driver.activate_bootloader(var, context, node, - instance, image_meta) - pm = get_power_manager(node=node, instance=instance) - state = pm.activate_node() - - _update_baremetal_state(context, node, instance, state) - - self.driver.activate_node(var, context, node, instance) - self.firewall_driver.apply_instance_filter(instance, network_info) - - block_device_mapping = driver.block_device_info_get_mapping( - block_device_info) + block_device_mapping = driver.\ + block_device_info_get_mapping(block_device_info) for vol in block_device_mapping: connection_info = vol['connection_info'] mountpoint = vol['mount_device'] - self.attach_volume(connection_info, instance['name'], - mountpoint) + self.attach_volume( + connection_info, instance['name'], mountpoint) - pm.start_console() + try: + image_info = self.driver.cache_images( + context, node, instance, + admin_password=admin_password, + image_meta=image_meta, + injected_files=injected_files, + network_info=network_info, + ) + try: + self.driver.activate_bootloader(context, node, instance) + except Exception, e: + self.driver.deactivate_bootloader(context, node, instance) + raise e + except Exception, e: + self.driver.destroy_images(context, node, instance) + raise e except Exception, e: - # TODO(deva): add tooling that can revert a failed spawn - _update_baremetal_state(context, node, instance, - baremetal_states.ERROR) + # TODO(deva): do network and volume cleanup here raise e + else: + # NOTE(deva): pm.activate_node should not raise exceptions. + # We check its success in "finally" block + pm.activate_node() + pm.start_console() + finally: + if pm.state != baremetal_states.ACTIVE: + pm.state = baremetal_states.ERROR + try: + _update_state(context, node, instance, pm.state) + except exception.DBError, e: + LOG.warning(_("Failed to update state record for " + "baremetal node %s") % instance['uuid']) def reboot(self, instance, network_info, reboot_type, block_device_info=None): @@ -232,7 +247,7 @@ class BareMetalDriver(driver.ComputeDriver): ctx = nova_context.get_admin_context() pm = get_power_manager(node=node, instance=instance) state = pm.reboot_node() - _update_baremetal_state(ctx, node, instance, state) + _update_state(ctx, node, instance, state) def destroy(self, instance, network_info, block_device_info=None): ctx = nova_context.get_admin_context() @@ -246,10 +261,7 @@ class BareMetalDriver(driver.ComputeDriver): % instance['uuid']) return - var = self.driver.define_vars(instance, network_info, - block_device_info) - - self.driver.deactivate_node(var, ctx, node, instance) + self.driver.deactivate_node(ctx, node, instance) pm = get_power_manager(node=node, instance=instance) @@ -267,9 +279,9 @@ class BareMetalDriver(driver.ComputeDriver): mountpoint = vol['mount_device'] self.detach_volume(connection_info, instance['name'], mountpoint) - self.driver.deactivate_bootloader(var, ctx, node, instance) + self.driver.deactivate_bootloader(ctx, node, instance) - self.driver.destroy_images(var, ctx, node, instance) + self.driver.destroy_images(ctx, node, instance) # stop firewall self.firewall_driver.unfilter_instance(instance, @@ -277,7 +289,7 @@ class BareMetalDriver(driver.ComputeDriver): self._unplug_vifs(instance, network_info) - _update_baremetal_state(ctx, node, None, state) + _update_state(ctx, node, None, state) def power_off(self, instance): """Power off the specified instance.""" diff --git a/nova/virt/baremetal/fake.py b/nova/virt/baremetal/fake.py index 920eb0c10e11..16f599d534e9 100644 --- a/nova/virt/baremetal/fake.py +++ b/nova/virt/baremetal/fake.py @@ -21,33 +21,25 @@ from nova.virt.baremetal import base from nova.virt.firewall import NoopFirewallDriver -def get_baremetal_nodes(): - return FakeDriver() - - class FakeDriver(base.NodeDriver): - def define_vars(self, instance, network_info, block_device_info): - return {} - - def create_image(self, var, context, image_meta, node, instance, - injected_files=None, admin_password=None): + def cache_images(self, context, node, instance, **kwargs): pass - def destroy_images(self, var, context, node, instance): + def destroy_images(self, context, node, instance): pass - def activate_bootloader(self, var, context, node, instance, image_meta): + def activate_bootloader(self, context, node, instance): pass - def deactivate_bootloader(self, var, context, node, instance): + def deactivate_bootloader(self, context, node, instance): pass - def activate_node(self, var, context, node, instance): + def activate_node(self, context, node, instance): """For operations after power on.""" pass - def deactivate_node(self, var, context, node, instance): + def deactivate_node(self, context, node, instance): """For operations before power off.""" pass @@ -57,23 +49,8 @@ class FakeDriver(base.NodeDriver): class FakePowerManager(base.PowerManager): - def activate_node(self): - return baremetal_states.ACTIVE - - def reboot_node(self): - return baremetal_states.ACTIVE - - def deactivate_node(self): - return baremetal_states.DELETED - - def is_power_on(self): - return True - - def start_console(self): - pass - - def stop_console(self): - pass + def __init__(self, **kwargs): + super(FakePowerManager, self).__init__(**kwargs) class FakeFirewallDriver(NoopFirewallDriver): diff --git a/nova/virt/baremetal/utils.py b/nova/virt/baremetal/utils.py index e34ca60f3aa7..902dda9e887c 100644 --- a/nova/virt/baremetal/utils.py +++ b/nova/virt/baremetal/utils.py @@ -18,9 +18,9 @@ import os from nova.openstack.common import log as logging +from nova.virt.disk import api as disk_api from nova.virt.libvirt import utils as libvirt_utils - LOG = logging.getLogger(__name__) @@ -30,8 +30,31 @@ def cache_image(context, target, image_id, user_id, project_id): user_id, project_id) +def inject_into_image(image, key, net, metadata, admin_password, + files, partition, use_cow=False): + try: + disk_api.inject_data(image, key, net, metadata, admin_password, + files, partition, use_cow) + except Exception as e: + LOG.warn(_("Failed to inject data into image %(image)s. " + "Error: %(e)s") % locals()) + + def unlink_without_raise(path): try: - libvirt_utils.file_delete(path) + os.unlink(path) except OSError: - LOG.exception(_("failed to unlink %s") % path) + LOG.exception(_("Failed to unlink %s") % path) + + +def write_to_file(path, contents): + with open(path, 'w') as f: + f.write(contents) + + +def create_link_without_raise(source, link): + try: + os.symlink(source, link) + except OSError: + LOG.exception(_("Failed to create symlink from %(source)s to %(link)s") + % locals())