Improve baremetal driver error handling

Several improvements to baremetal driver are implemented in this patch.
There is now significantly more error handling during spawn().

It also includes an addition to nova/tests/utils.py to provide
additional sample information from get_test_network_info().

blueprint general-bare-metal-provisioning-framework

Change-Id: I65d93051d7fcfd79f4d24d4ddb62fb1a55bee646
This commit is contained in:
Devananda van der Veen 2012-12-27 17:19:21 -08:00
parent 4690b501b5
commit 0673aef9ff
8 changed files with 160 additions and 107 deletions

View File

@ -159,7 +159,7 @@ class BareMetalDriverWithDBTestCase(bm_db_base.BMDBTestCase):
db.bm_node_update(self.context, self.node['id'],
{'id': 9876})
self.assertRaises(exception.InstanceNotFound,
self.assertRaises(exception.NovaException,
self.driver.spawn, **self.spawn_params)
row = db.bm_node_get(self.context, 9876)

View File

@ -81,6 +81,7 @@ def get_test_network_info(count=1):
fake_ip = '0.0.0.0/0'
fake_ip_2 = '0.0.0.1/0'
fake_ip_3 = '0.0.0.1/0'
fake_netmask = '255.255.255.255'
fake_vlan = 100
fake_bridge_interface = 'eth0'
network = {'bridge': fake,
@ -91,11 +92,13 @@ def get_test_network_info(count=1):
'injected': False}
mapping = {'mac': fake,
'dhcp_server': fake,
'dns': ['fake1', 'fake2'],
'gateway': fake,
'gateway_v6': fake,
'ips': [{'ip': fake_ip}, {'ip': fake_ip}]}
'ips': [{'ip': fake_ip, 'netmask': fake_netmask},
{'ip': fake_ip, 'netmask': fake_netmask}]}
if ipv6:
mapping['ip6s'] = [{'ip': fake_ip},
mapping['ip6s'] = [{'ip': fake_ip, 'netmask': fake_netmask},
{'ip': fake_ip_2},
{'ip': fake_ip_3}]
return [(network, mapping) for x in xrange(0, count)]

View File

@ -21,27 +21,26 @@ from nova.virt.baremetal import baremetal_states
class NodeDriver(object):
def define_vars(self, instance, network_info, block_device_info):
def __init__(self):
pass
def cache_images(self, context, node, instance, **kwargs):
raise NotImplementedError()
def create_image(self, var, context, image_meta, node, instance,
injected_files=None, admin_password=None):
def destroy_images(self, context, node, instance):
raise NotImplementedError()
def destroy_images(self, var, context, node, instance):
def activate_bootloader(self, context, node, instance):
raise NotImplementedError()
def activate_bootloader(self, var, context, node, instance, image_meta):
def deactivate_bootloader(self, context, node, instance):
raise NotImplementedError()
def deactivate_bootloader(self, var, context, node, instance):
raise NotImplementedError()
def activate_node(self, var, context, node, instance):
def activate_node(self, context, node, instance):
"""For operations after power on."""
raise NotImplementedError()
def deactivate_node(self, var, context, node, instance):
def deactivate_node(self, context, node, instance):
"""For operations before power off."""
raise NotImplementedError()
@ -52,16 +51,20 @@ class NodeDriver(object):
class PowerManager(object):
def __init__(self, **kwargs):
self.state = baremetal_states.DELETED
pass
def activate_node(self):
return baremetal_states.ACTIVE
self.state = baremetal_states.ACTIVE
return self.state
def reboot_node(self):
return baremetal_states.ACTIVE
self.state = baremetal_states.ACTIVE
return self.state
def deactivate_node(self):
return baremetal_states.DELETED
self.state = baremetal_states.DELETED
return self.state
def is_power_on(self):
"""Returns True or False according as the node's power state"""

View File

@ -98,6 +98,10 @@ def bm_node_update(context, bm_node_id, values):
return IMPL.bm_node_update(context, bm_node_id, values)
def bm_node_set_uuid_safe(context, bm_node_id, uuid):
return IMPL.bm_node_set_uuid_safe(context, bm_node_id, uuid)
def bm_pxe_ip_create(context, address, server_address):
return IMPL.bm_pxe_ip_create(context, address, server_address)

View File

@ -150,6 +150,37 @@ def bm_node_update(context, bm_node_id, values):
update(values)
@require_admin_context
def bm_node_set_uuid_safe(context, bm_node_id, values):
"""Associate an instance to a node safely
Associate an instance to a node only if that node is not yet assocated.
Allow the caller to set any other fields they require in the same
operation. For example, this is used to set the node's task_state to
BUILDING at the beginning of driver.spawn().
"""
if 'instance_uuid' not in values:
raise exception.NovaException(_(
"instance_uuid must be supplied to bm_node_set_uuid_safe"))
session = get_session()
with session.begin():
query = model_query(context, models.BareMetalNode,
session=session, read_deleted="no").\
filter_by(id=bm_node_id)
count = query.filter_by(instance_uuid=None).\
update(values, synchronize_session=False)
if count != 1:
raise exception.NovaException(_(
"Failed to associate instance %(uuid)s to baremetal node "
"%(id)s.") % {'id': bm_node_id,
'uuid': values['instance_uuid']})
ref = query.first()
return ref
@require_admin_context
def bm_node_destroy(context, bm_node_id):
model_query(context, models.BareMetalNode).\

View File

@ -56,7 +56,7 @@ opts = [
default='nova.virt.baremetal.pxe.PXE',
help='Baremetal driver back-end (pxe or tilera)'),
cfg.StrOpt('power_manager',
default='nova.virt.baremetal.ipmi.Ipmi',
default='nova.virt.baremetal.ipmi.IPMI',
help='Baremetal power management method'),
cfg.StrOpt('tftp_root',
default='/tftpboot',
@ -93,14 +93,16 @@ def _get_baremetal_node_by_instance_uuid(instance_uuid):
return node
def _update_baremetal_state(context, node, instance, state):
instance_uuid = None
if instance:
instance_uuid = instance['uuid']
db.bm_node_update(context, node['id'],
{'instance_uuid': instance_uuid,
'task_state': state,
})
def _update_state(context, node, instance, state):
"""Update the node state in baremetal DB
If instance is not supplied, reset the instance_uuid field for this node.
"""
values = {'task_state': state}
if not instance:
values['instance_uuid'] = None
db.bm_node_update(context, node['id'], values)
def get_power_manager(**kwargs):
@ -174,57 +176,70 @@ class BareMetalDriver(driver.ComputeDriver):
def spawn(self, context, instance, image_meta, injected_files,
admin_password, network_info=None, block_device_info=None):
nodename = instance.get('node')
if not nodename:
raise exception.NovaException(_("Baremetal node id not supplied"
" to driver"))
node = db.bm_node_get(context, nodename)
if node['instance_uuid']:
raise exception.NovaException(_("Baremetal node %s already"
" in use") % nodename)
# TODO(deva): split this huge try: block into manageable parts
node_id = instance.get('node')
if not node_id:
raise exception.NovaException(_(
"Baremetal node id not supplied to driver"))
# NOTE(deva): this db method will raise an exception if the node is
# already in use. We call it here to ensure no one else
# allocates this node before we begin provisioning it.
node = db.bm_node_set_uuid_safe(context, node_id,
{'instance_uuid': instance['uuid'],
'task_state': baremetal_states.BUILDING})
pm = get_power_manager(node=node, instance=instance)
try:
_update_baremetal_state(context, node, instance,
baremetal_states.BUILDING)
var = self.driver.define_vars(instance, network_info,
block_device_info)
self._plug_vifs(instance, network_info, context=context)
self.firewall_driver.setup_basic_filtering(instance, network_info)
self.firewall_driver.prepare_instance_filter(instance,
network_info)
self.firewall_driver.setup_basic_filtering(
instance, network_info)
self.firewall_driver.prepare_instance_filter(
instance, network_info)
self.firewall_driver.apply_instance_filter(
instance, network_info)
self.driver.create_image(var, context, image_meta, node,
instance,
injected_files=injected_files,
admin_password=admin_password)
self.driver.activate_bootloader(var, context, node,
instance, image_meta)
pm = get_power_manager(node=node, instance=instance)
state = pm.activate_node()
_update_baremetal_state(context, node, instance, state)
self.driver.activate_node(var, context, node, instance)
self.firewall_driver.apply_instance_filter(instance, network_info)
block_device_mapping = driver.block_device_info_get_mapping(
block_device_info)
block_device_mapping = driver.\
block_device_info_get_mapping(block_device_info)
for vol in block_device_mapping:
connection_info = vol['connection_info']
mountpoint = vol['mount_device']
self.attach_volume(connection_info, instance['name'],
mountpoint)
self.attach_volume(
connection_info, instance['name'], mountpoint)
pm.start_console()
try:
image_info = self.driver.cache_images(
context, node, instance,
admin_password=admin_password,
image_meta=image_meta,
injected_files=injected_files,
network_info=network_info,
)
try:
self.driver.activate_bootloader(context, node, instance)
except Exception, e:
self.driver.deactivate_bootloader(context, node, instance)
raise e
except Exception, e:
self.driver.destroy_images(context, node, instance)
raise e
except Exception, e:
# TODO(deva): add tooling that can revert a failed spawn
_update_baremetal_state(context, node, instance,
baremetal_states.ERROR)
# TODO(deva): do network and volume cleanup here
raise e
else:
# NOTE(deva): pm.activate_node should not raise exceptions.
# We check its success in "finally" block
pm.activate_node()
pm.start_console()
finally:
if pm.state != baremetal_states.ACTIVE:
pm.state = baremetal_states.ERROR
try:
_update_state(context, node, instance, pm.state)
except exception.DBError, e:
LOG.warning(_("Failed to update state record for "
"baremetal node %s") % instance['uuid'])
def reboot(self, instance, network_info, reboot_type,
block_device_info=None):
@ -232,7 +247,7 @@ class BareMetalDriver(driver.ComputeDriver):
ctx = nova_context.get_admin_context()
pm = get_power_manager(node=node, instance=instance)
state = pm.reboot_node()
_update_baremetal_state(ctx, node, instance, state)
_update_state(ctx, node, instance, state)
def destroy(self, instance, network_info, block_device_info=None):
ctx = nova_context.get_admin_context()
@ -246,10 +261,7 @@ class BareMetalDriver(driver.ComputeDriver):
% instance['uuid'])
return
var = self.driver.define_vars(instance, network_info,
block_device_info)
self.driver.deactivate_node(var, ctx, node, instance)
self.driver.deactivate_node(ctx, node, instance)
pm = get_power_manager(node=node, instance=instance)
@ -267,9 +279,9 @@ class BareMetalDriver(driver.ComputeDriver):
mountpoint = vol['mount_device']
self.detach_volume(connection_info, instance['name'], mountpoint)
self.driver.deactivate_bootloader(var, ctx, node, instance)
self.driver.deactivate_bootloader(ctx, node, instance)
self.driver.destroy_images(var, ctx, node, instance)
self.driver.destroy_images(ctx, node, instance)
# stop firewall
self.firewall_driver.unfilter_instance(instance,
@ -277,7 +289,7 @@ class BareMetalDriver(driver.ComputeDriver):
self._unplug_vifs(instance, network_info)
_update_baremetal_state(ctx, node, None, state)
_update_state(ctx, node, None, state)
def power_off(self, instance):
"""Power off the specified instance."""

View File

@ -21,33 +21,25 @@ from nova.virt.baremetal import base
from nova.virt.firewall import NoopFirewallDriver
def get_baremetal_nodes():
return FakeDriver()
class FakeDriver(base.NodeDriver):
def define_vars(self, instance, network_info, block_device_info):
return {}
def create_image(self, var, context, image_meta, node, instance,
injected_files=None, admin_password=None):
def cache_images(self, context, node, instance, **kwargs):
pass
def destroy_images(self, var, context, node, instance):
def destroy_images(self, context, node, instance):
pass
def activate_bootloader(self, var, context, node, instance, image_meta):
def activate_bootloader(self, context, node, instance):
pass
def deactivate_bootloader(self, var, context, node, instance):
def deactivate_bootloader(self, context, node, instance):
pass
def activate_node(self, var, context, node, instance):
def activate_node(self, context, node, instance):
"""For operations after power on."""
pass
def deactivate_node(self, var, context, node, instance):
def deactivate_node(self, context, node, instance):
"""For operations before power off."""
pass
@ -57,23 +49,8 @@ class FakeDriver(base.NodeDriver):
class FakePowerManager(base.PowerManager):
def activate_node(self):
return baremetal_states.ACTIVE
def reboot_node(self):
return baremetal_states.ACTIVE
def deactivate_node(self):
return baremetal_states.DELETED
def is_power_on(self):
return True
def start_console(self):
pass
def stop_console(self):
pass
def __init__(self, **kwargs):
super(FakePowerManager, self).__init__(**kwargs)
class FakeFirewallDriver(NoopFirewallDriver):

View File

@ -18,9 +18,9 @@
import os
from nova.openstack.common import log as logging
from nova.virt.disk import api as disk_api
from nova.virt.libvirt import utils as libvirt_utils
LOG = logging.getLogger(__name__)
@ -30,8 +30,31 @@ def cache_image(context, target, image_id, user_id, project_id):
user_id, project_id)
def inject_into_image(image, key, net, metadata, admin_password,
files, partition, use_cow=False):
try:
disk_api.inject_data(image, key, net, metadata, admin_password,
files, partition, use_cow)
except Exception as e:
LOG.warn(_("Failed to inject data into image %(image)s. "
"Error: %(e)s") % locals())
def unlink_without_raise(path):
try:
libvirt_utils.file_delete(path)
os.unlink(path)
except OSError:
LOG.exception(_("failed to unlink %s") % path)
LOG.exception(_("Failed to unlink %s") % path)
def write_to_file(path, contents):
with open(path, 'w') as f:
f.write(contents)
def create_link_without_raise(source, link):
try:
os.symlink(source, link)
except OSError:
LOG.exception(_("Failed to create symlink from %(source)s to %(link)s")
% locals())