Improve baremetal driver error handling

Several improvements to baremetal driver are implemented in this patch.
There is now significantly more error handling during spawn().

It also includes an addition to nova/tests/utils.py to provide
additional sample information from get_test_network_info().

blueprint general-bare-metal-provisioning-framework

Change-Id: I65d93051d7fcfd79f4d24d4ddb62fb1a55bee646
This commit is contained in:
Devananda van der Veen 2012-12-27 17:19:21 -08:00
parent 4690b501b5
commit 0673aef9ff
8 changed files with 160 additions and 107 deletions

View File

@ -159,7 +159,7 @@ class BareMetalDriverWithDBTestCase(bm_db_base.BMDBTestCase):
db.bm_node_update(self.context, self.node['id'], db.bm_node_update(self.context, self.node['id'],
{'id': 9876}) {'id': 9876})
self.assertRaises(exception.InstanceNotFound, self.assertRaises(exception.NovaException,
self.driver.spawn, **self.spawn_params) self.driver.spawn, **self.spawn_params)
row = db.bm_node_get(self.context, 9876) row = db.bm_node_get(self.context, 9876)

View File

@ -81,6 +81,7 @@ def get_test_network_info(count=1):
fake_ip = '0.0.0.0/0' fake_ip = '0.0.0.0/0'
fake_ip_2 = '0.0.0.1/0' fake_ip_2 = '0.0.0.1/0'
fake_ip_3 = '0.0.0.1/0' fake_ip_3 = '0.0.0.1/0'
fake_netmask = '255.255.255.255'
fake_vlan = 100 fake_vlan = 100
fake_bridge_interface = 'eth0' fake_bridge_interface = 'eth0'
network = {'bridge': fake, network = {'bridge': fake,
@ -91,11 +92,13 @@ def get_test_network_info(count=1):
'injected': False} 'injected': False}
mapping = {'mac': fake, mapping = {'mac': fake,
'dhcp_server': fake, 'dhcp_server': fake,
'dns': ['fake1', 'fake2'],
'gateway': fake, 'gateway': fake,
'gateway_v6': fake, 'gateway_v6': fake,
'ips': [{'ip': fake_ip}, {'ip': fake_ip}]} 'ips': [{'ip': fake_ip, 'netmask': fake_netmask},
{'ip': fake_ip, 'netmask': fake_netmask}]}
if ipv6: if ipv6:
mapping['ip6s'] = [{'ip': fake_ip}, mapping['ip6s'] = [{'ip': fake_ip, 'netmask': fake_netmask},
{'ip': fake_ip_2}, {'ip': fake_ip_2},
{'ip': fake_ip_3}] {'ip': fake_ip_3}]
return [(network, mapping) for x in xrange(0, count)] return [(network, mapping) for x in xrange(0, count)]

View File

@ -21,27 +21,26 @@ from nova.virt.baremetal import baremetal_states
class NodeDriver(object): class NodeDriver(object):
def define_vars(self, instance, network_info, block_device_info): def __init__(self):
pass
def cache_images(self, context, node, instance, **kwargs):
raise NotImplementedError() raise NotImplementedError()
def create_image(self, var, context, image_meta, node, instance, def destroy_images(self, context, node, instance):
injected_files=None, admin_password=None):
raise NotImplementedError() raise NotImplementedError()
def destroy_images(self, var, context, node, instance): def activate_bootloader(self, context, node, instance):
raise NotImplementedError() raise NotImplementedError()
def activate_bootloader(self, var, context, node, instance, image_meta): def deactivate_bootloader(self, context, node, instance):
raise NotImplementedError() raise NotImplementedError()
def deactivate_bootloader(self, var, context, node, instance): def activate_node(self, context, node, instance):
raise NotImplementedError()
def activate_node(self, var, context, node, instance):
"""For operations after power on.""" """For operations after power on."""
raise NotImplementedError() raise NotImplementedError()
def deactivate_node(self, var, context, node, instance): def deactivate_node(self, context, node, instance):
"""For operations before power off.""" """For operations before power off."""
raise NotImplementedError() raise NotImplementedError()
@ -52,16 +51,20 @@ class NodeDriver(object):
class PowerManager(object): class PowerManager(object):
def __init__(self, **kwargs): def __init__(self, **kwargs):
self.state = baremetal_states.DELETED
pass pass
def activate_node(self): def activate_node(self):
return baremetal_states.ACTIVE self.state = baremetal_states.ACTIVE
return self.state
def reboot_node(self): def reboot_node(self):
return baremetal_states.ACTIVE self.state = baremetal_states.ACTIVE
return self.state
def deactivate_node(self): def deactivate_node(self):
return baremetal_states.DELETED self.state = baremetal_states.DELETED
return self.state
def is_power_on(self): def is_power_on(self):
"""Returns True or False according as the node's power state""" """Returns True or False according as the node's power state"""

View File

@ -98,6 +98,10 @@ def bm_node_update(context, bm_node_id, values):
return IMPL.bm_node_update(context, bm_node_id, values) return IMPL.bm_node_update(context, bm_node_id, values)
def bm_node_set_uuid_safe(context, bm_node_id, uuid):
return IMPL.bm_node_set_uuid_safe(context, bm_node_id, uuid)
def bm_pxe_ip_create(context, address, server_address): def bm_pxe_ip_create(context, address, server_address):
return IMPL.bm_pxe_ip_create(context, address, server_address) return IMPL.bm_pxe_ip_create(context, address, server_address)

View File

@ -150,6 +150,37 @@ def bm_node_update(context, bm_node_id, values):
update(values) update(values)
@require_admin_context
def bm_node_set_uuid_safe(context, bm_node_id, values):
"""Associate an instance to a node safely
Associate an instance to a node only if that node is not yet assocated.
Allow the caller to set any other fields they require in the same
operation. For example, this is used to set the node's task_state to
BUILDING at the beginning of driver.spawn().
"""
if 'instance_uuid' not in values:
raise exception.NovaException(_(
"instance_uuid must be supplied to bm_node_set_uuid_safe"))
session = get_session()
with session.begin():
query = model_query(context, models.BareMetalNode,
session=session, read_deleted="no").\
filter_by(id=bm_node_id)
count = query.filter_by(instance_uuid=None).\
update(values, synchronize_session=False)
if count != 1:
raise exception.NovaException(_(
"Failed to associate instance %(uuid)s to baremetal node "
"%(id)s.") % {'id': bm_node_id,
'uuid': values['instance_uuid']})
ref = query.first()
return ref
@require_admin_context @require_admin_context
def bm_node_destroy(context, bm_node_id): def bm_node_destroy(context, bm_node_id):
model_query(context, models.BareMetalNode).\ model_query(context, models.BareMetalNode).\

View File

@ -56,7 +56,7 @@ opts = [
default='nova.virt.baremetal.pxe.PXE', default='nova.virt.baremetal.pxe.PXE',
help='Baremetal driver back-end (pxe or tilera)'), help='Baremetal driver back-end (pxe or tilera)'),
cfg.StrOpt('power_manager', cfg.StrOpt('power_manager',
default='nova.virt.baremetal.ipmi.Ipmi', default='nova.virt.baremetal.ipmi.IPMI',
help='Baremetal power management method'), help='Baremetal power management method'),
cfg.StrOpt('tftp_root', cfg.StrOpt('tftp_root',
default='/tftpboot', default='/tftpboot',
@ -93,14 +93,16 @@ def _get_baremetal_node_by_instance_uuid(instance_uuid):
return node return node
def _update_baremetal_state(context, node, instance, state): def _update_state(context, node, instance, state):
instance_uuid = None """Update the node state in baremetal DB
if instance:
instance_uuid = instance['uuid'] If instance is not supplied, reset the instance_uuid field for this node.
db.bm_node_update(context, node['id'],
{'instance_uuid': instance_uuid, """
'task_state': state, values = {'task_state': state}
}) if not instance:
values['instance_uuid'] = None
db.bm_node_update(context, node['id'], values)
def get_power_manager(**kwargs): def get_power_manager(**kwargs):
@ -174,57 +176,70 @@ class BareMetalDriver(driver.ComputeDriver):
def spawn(self, context, instance, image_meta, injected_files, def spawn(self, context, instance, image_meta, injected_files,
admin_password, network_info=None, block_device_info=None): admin_password, network_info=None, block_device_info=None):
nodename = instance.get('node')
if not nodename:
raise exception.NovaException(_("Baremetal node id not supplied"
" to driver"))
node = db.bm_node_get(context, nodename)
if node['instance_uuid']:
raise exception.NovaException(_("Baremetal node %s already"
" in use") % nodename)
# TODO(deva): split this huge try: block into manageable parts node_id = instance.get('node')
if not node_id:
raise exception.NovaException(_(
"Baremetal node id not supplied to driver"))
# NOTE(deva): this db method will raise an exception if the node is
# already in use. We call it here to ensure no one else
# allocates this node before we begin provisioning it.
node = db.bm_node_set_uuid_safe(context, node_id,
{'instance_uuid': instance['uuid'],
'task_state': baremetal_states.BUILDING})
pm = get_power_manager(node=node, instance=instance)
try: try:
_update_baremetal_state(context, node, instance,
baremetal_states.BUILDING)
var = self.driver.define_vars(instance, network_info,
block_device_info)
self._plug_vifs(instance, network_info, context=context) self._plug_vifs(instance, network_info, context=context)
self.firewall_driver.setup_basic_filtering(instance, network_info) self.firewall_driver.setup_basic_filtering(
self.firewall_driver.prepare_instance_filter(instance, instance, network_info)
network_info) self.firewall_driver.prepare_instance_filter(
instance, network_info)
self.firewall_driver.apply_instance_filter(
instance, network_info)
self.driver.create_image(var, context, image_meta, node, block_device_mapping = driver.\
instance, block_device_info_get_mapping(block_device_info)
injected_files=injected_files,
admin_password=admin_password)
self.driver.activate_bootloader(var, context, node,
instance, image_meta)
pm = get_power_manager(node=node, instance=instance)
state = pm.activate_node()
_update_baremetal_state(context, node, instance, state)
self.driver.activate_node(var, context, node, instance)
self.firewall_driver.apply_instance_filter(instance, network_info)
block_device_mapping = driver.block_device_info_get_mapping(
block_device_info)
for vol in block_device_mapping: for vol in block_device_mapping:
connection_info = vol['connection_info'] connection_info = vol['connection_info']
mountpoint = vol['mount_device'] mountpoint = vol['mount_device']
self.attach_volume(connection_info, instance['name'], self.attach_volume(
mountpoint) connection_info, instance['name'], mountpoint)
pm.start_console() try:
image_info = self.driver.cache_images(
context, node, instance,
admin_password=admin_password,
image_meta=image_meta,
injected_files=injected_files,
network_info=network_info,
)
try:
self.driver.activate_bootloader(context, node, instance)
except Exception, e:
self.driver.deactivate_bootloader(context, node, instance)
raise e
except Exception, e:
self.driver.destroy_images(context, node, instance)
raise e
except Exception, e: except Exception, e:
# TODO(deva): add tooling that can revert a failed spawn # TODO(deva): do network and volume cleanup here
_update_baremetal_state(context, node, instance,
baremetal_states.ERROR)
raise e raise e
else:
# NOTE(deva): pm.activate_node should not raise exceptions.
# We check its success in "finally" block
pm.activate_node()
pm.start_console()
finally:
if pm.state != baremetal_states.ACTIVE:
pm.state = baremetal_states.ERROR
try:
_update_state(context, node, instance, pm.state)
except exception.DBError, e:
LOG.warning(_("Failed to update state record for "
"baremetal node %s") % instance['uuid'])
def reboot(self, instance, network_info, reboot_type, def reboot(self, instance, network_info, reboot_type,
block_device_info=None): block_device_info=None):
@ -232,7 +247,7 @@ class BareMetalDriver(driver.ComputeDriver):
ctx = nova_context.get_admin_context() ctx = nova_context.get_admin_context()
pm = get_power_manager(node=node, instance=instance) pm = get_power_manager(node=node, instance=instance)
state = pm.reboot_node() state = pm.reboot_node()
_update_baremetal_state(ctx, node, instance, state) _update_state(ctx, node, instance, state)
def destroy(self, instance, network_info, block_device_info=None): def destroy(self, instance, network_info, block_device_info=None):
ctx = nova_context.get_admin_context() ctx = nova_context.get_admin_context()
@ -246,10 +261,7 @@ class BareMetalDriver(driver.ComputeDriver):
% instance['uuid']) % instance['uuid'])
return return
var = self.driver.define_vars(instance, network_info, self.driver.deactivate_node(ctx, node, instance)
block_device_info)
self.driver.deactivate_node(var, ctx, node, instance)
pm = get_power_manager(node=node, instance=instance) pm = get_power_manager(node=node, instance=instance)
@ -267,9 +279,9 @@ class BareMetalDriver(driver.ComputeDriver):
mountpoint = vol['mount_device'] mountpoint = vol['mount_device']
self.detach_volume(connection_info, instance['name'], mountpoint) self.detach_volume(connection_info, instance['name'], mountpoint)
self.driver.deactivate_bootloader(var, ctx, node, instance) self.driver.deactivate_bootloader(ctx, node, instance)
self.driver.destroy_images(var, ctx, node, instance) self.driver.destroy_images(ctx, node, instance)
# stop firewall # stop firewall
self.firewall_driver.unfilter_instance(instance, self.firewall_driver.unfilter_instance(instance,
@ -277,7 +289,7 @@ class BareMetalDriver(driver.ComputeDriver):
self._unplug_vifs(instance, network_info) self._unplug_vifs(instance, network_info)
_update_baremetal_state(ctx, node, None, state) _update_state(ctx, node, None, state)
def power_off(self, instance): def power_off(self, instance):
"""Power off the specified instance.""" """Power off the specified instance."""

View File

@ -21,33 +21,25 @@ from nova.virt.baremetal import base
from nova.virt.firewall import NoopFirewallDriver from nova.virt.firewall import NoopFirewallDriver
def get_baremetal_nodes():
return FakeDriver()
class FakeDriver(base.NodeDriver): class FakeDriver(base.NodeDriver):
def define_vars(self, instance, network_info, block_device_info): def cache_images(self, context, node, instance, **kwargs):
return {}
def create_image(self, var, context, image_meta, node, instance,
injected_files=None, admin_password=None):
pass pass
def destroy_images(self, var, context, node, instance): def destroy_images(self, context, node, instance):
pass pass
def activate_bootloader(self, var, context, node, instance, image_meta): def activate_bootloader(self, context, node, instance):
pass pass
def deactivate_bootloader(self, var, context, node, instance): def deactivate_bootloader(self, context, node, instance):
pass pass
def activate_node(self, var, context, node, instance): def activate_node(self, context, node, instance):
"""For operations after power on.""" """For operations after power on."""
pass pass
def deactivate_node(self, var, context, node, instance): def deactivate_node(self, context, node, instance):
"""For operations before power off.""" """For operations before power off."""
pass pass
@ -57,23 +49,8 @@ class FakeDriver(base.NodeDriver):
class FakePowerManager(base.PowerManager): class FakePowerManager(base.PowerManager):
def activate_node(self): def __init__(self, **kwargs):
return baremetal_states.ACTIVE super(FakePowerManager, self).__init__(**kwargs)
def reboot_node(self):
return baremetal_states.ACTIVE
def deactivate_node(self):
return baremetal_states.DELETED
def is_power_on(self):
return True
def start_console(self):
pass
def stop_console(self):
pass
class FakeFirewallDriver(NoopFirewallDriver): class FakeFirewallDriver(NoopFirewallDriver):

View File

@ -18,9 +18,9 @@
import os import os
from nova.openstack.common import log as logging from nova.openstack.common import log as logging
from nova.virt.disk import api as disk_api
from nova.virt.libvirt import utils as libvirt_utils from nova.virt.libvirt import utils as libvirt_utils
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
@ -30,8 +30,31 @@ def cache_image(context, target, image_id, user_id, project_id):
user_id, project_id) user_id, project_id)
def inject_into_image(image, key, net, metadata, admin_password,
files, partition, use_cow=False):
try:
disk_api.inject_data(image, key, net, metadata, admin_password,
files, partition, use_cow)
except Exception as e:
LOG.warn(_("Failed to inject data into image %(image)s. "
"Error: %(e)s") % locals())
def unlink_without_raise(path): def unlink_without_raise(path):
try: try:
libvirt_utils.file_delete(path) os.unlink(path)
except OSError: except OSError:
LOG.exception(_("failed to unlink %s") % path) LOG.exception(_("Failed to unlink %s") % path)
def write_to_file(path, contents):
with open(path, 'w') as f:
f.write(contents)
def create_link_without_raise(source, link):
try:
os.symlink(source, link)
except OSError:
LOG.exception(_("Failed to create symlink from %(source)s to %(link)s")
% locals())