Improve baremetal driver error handling
Several improvements to baremetal driver are implemented in this patch. There is now significantly more error handling during spawn(). It also includes an addition to nova/tests/utils.py to provide additional sample information from get_test_network_info(). blueprint general-bare-metal-provisioning-framework Change-Id: I65d93051d7fcfd79f4d24d4ddb62fb1a55bee646
This commit is contained in:
parent
4690b501b5
commit
0673aef9ff
|
@ -159,7 +159,7 @@ class BareMetalDriverWithDBTestCase(bm_db_base.BMDBTestCase):
|
|||
db.bm_node_update(self.context, self.node['id'],
|
||||
{'id': 9876})
|
||||
|
||||
self.assertRaises(exception.InstanceNotFound,
|
||||
self.assertRaises(exception.NovaException,
|
||||
self.driver.spawn, **self.spawn_params)
|
||||
|
||||
row = db.bm_node_get(self.context, 9876)
|
||||
|
|
|
@ -81,6 +81,7 @@ def get_test_network_info(count=1):
|
|||
fake_ip = '0.0.0.0/0'
|
||||
fake_ip_2 = '0.0.0.1/0'
|
||||
fake_ip_3 = '0.0.0.1/0'
|
||||
fake_netmask = '255.255.255.255'
|
||||
fake_vlan = 100
|
||||
fake_bridge_interface = 'eth0'
|
||||
network = {'bridge': fake,
|
||||
|
@ -91,11 +92,13 @@ def get_test_network_info(count=1):
|
|||
'injected': False}
|
||||
mapping = {'mac': fake,
|
||||
'dhcp_server': fake,
|
||||
'dns': ['fake1', 'fake2'],
|
||||
'gateway': fake,
|
||||
'gateway_v6': fake,
|
||||
'ips': [{'ip': fake_ip}, {'ip': fake_ip}]}
|
||||
'ips': [{'ip': fake_ip, 'netmask': fake_netmask},
|
||||
{'ip': fake_ip, 'netmask': fake_netmask}]}
|
||||
if ipv6:
|
||||
mapping['ip6s'] = [{'ip': fake_ip},
|
||||
mapping['ip6s'] = [{'ip': fake_ip, 'netmask': fake_netmask},
|
||||
{'ip': fake_ip_2},
|
||||
{'ip': fake_ip_3}]
|
||||
return [(network, mapping) for x in xrange(0, count)]
|
||||
|
|
|
@ -21,27 +21,26 @@ from nova.virt.baremetal import baremetal_states
|
|||
|
||||
class NodeDriver(object):
|
||||
|
||||
def define_vars(self, instance, network_info, block_device_info):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def cache_images(self, context, node, instance, **kwargs):
|
||||
raise NotImplementedError()
|
||||
|
||||
def create_image(self, var, context, image_meta, node, instance,
|
||||
injected_files=None, admin_password=None):
|
||||
def destroy_images(self, context, node, instance):
|
||||
raise NotImplementedError()
|
||||
|
||||
def destroy_images(self, var, context, node, instance):
|
||||
def activate_bootloader(self, context, node, instance):
|
||||
raise NotImplementedError()
|
||||
|
||||
def activate_bootloader(self, var, context, node, instance, image_meta):
|
||||
def deactivate_bootloader(self, context, node, instance):
|
||||
raise NotImplementedError()
|
||||
|
||||
def deactivate_bootloader(self, var, context, node, instance):
|
||||
raise NotImplementedError()
|
||||
|
||||
def activate_node(self, var, context, node, instance):
|
||||
def activate_node(self, context, node, instance):
|
||||
"""For operations after power on."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def deactivate_node(self, var, context, node, instance):
|
||||
def deactivate_node(self, context, node, instance):
|
||||
"""For operations before power off."""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
@ -52,16 +51,20 @@ class NodeDriver(object):
|
|||
class PowerManager(object):
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
self.state = baremetal_states.DELETED
|
||||
pass
|
||||
|
||||
def activate_node(self):
|
||||
return baremetal_states.ACTIVE
|
||||
self.state = baremetal_states.ACTIVE
|
||||
return self.state
|
||||
|
||||
def reboot_node(self):
|
||||
return baremetal_states.ACTIVE
|
||||
self.state = baremetal_states.ACTIVE
|
||||
return self.state
|
||||
|
||||
def deactivate_node(self):
|
||||
return baremetal_states.DELETED
|
||||
self.state = baremetal_states.DELETED
|
||||
return self.state
|
||||
|
||||
def is_power_on(self):
|
||||
"""Returns True or False according as the node's power state"""
|
||||
|
|
|
@ -98,6 +98,10 @@ def bm_node_update(context, bm_node_id, values):
|
|||
return IMPL.bm_node_update(context, bm_node_id, values)
|
||||
|
||||
|
||||
def bm_node_set_uuid_safe(context, bm_node_id, uuid):
|
||||
return IMPL.bm_node_set_uuid_safe(context, bm_node_id, uuid)
|
||||
|
||||
|
||||
def bm_pxe_ip_create(context, address, server_address):
|
||||
return IMPL.bm_pxe_ip_create(context, address, server_address)
|
||||
|
||||
|
|
|
@ -150,6 +150,37 @@ def bm_node_update(context, bm_node_id, values):
|
|||
update(values)
|
||||
|
||||
|
||||
@require_admin_context
|
||||
def bm_node_set_uuid_safe(context, bm_node_id, values):
|
||||
"""Associate an instance to a node safely
|
||||
|
||||
Associate an instance to a node only if that node is not yet assocated.
|
||||
Allow the caller to set any other fields they require in the same
|
||||
operation. For example, this is used to set the node's task_state to
|
||||
BUILDING at the beginning of driver.spawn().
|
||||
|
||||
"""
|
||||
if 'instance_uuid' not in values:
|
||||
raise exception.NovaException(_(
|
||||
"instance_uuid must be supplied to bm_node_set_uuid_safe"))
|
||||
|
||||
session = get_session()
|
||||
with session.begin():
|
||||
query = model_query(context, models.BareMetalNode,
|
||||
session=session, read_deleted="no").\
|
||||
filter_by(id=bm_node_id)
|
||||
|
||||
count = query.filter_by(instance_uuid=None).\
|
||||
update(values, synchronize_session=False)
|
||||
if count != 1:
|
||||
raise exception.NovaException(_(
|
||||
"Failed to associate instance %(uuid)s to baremetal node "
|
||||
"%(id)s.") % {'id': bm_node_id,
|
||||
'uuid': values['instance_uuid']})
|
||||
ref = query.first()
|
||||
return ref
|
||||
|
||||
|
||||
@require_admin_context
|
||||
def bm_node_destroy(context, bm_node_id):
|
||||
model_query(context, models.BareMetalNode).\
|
||||
|
|
|
@ -56,7 +56,7 @@ opts = [
|
|||
default='nova.virt.baremetal.pxe.PXE',
|
||||
help='Baremetal driver back-end (pxe or tilera)'),
|
||||
cfg.StrOpt('power_manager',
|
||||
default='nova.virt.baremetal.ipmi.Ipmi',
|
||||
default='nova.virt.baremetal.ipmi.IPMI',
|
||||
help='Baremetal power management method'),
|
||||
cfg.StrOpt('tftp_root',
|
||||
default='/tftpboot',
|
||||
|
@ -93,14 +93,16 @@ def _get_baremetal_node_by_instance_uuid(instance_uuid):
|
|||
return node
|
||||
|
||||
|
||||
def _update_baremetal_state(context, node, instance, state):
|
||||
instance_uuid = None
|
||||
if instance:
|
||||
instance_uuid = instance['uuid']
|
||||
db.bm_node_update(context, node['id'],
|
||||
{'instance_uuid': instance_uuid,
|
||||
'task_state': state,
|
||||
})
|
||||
def _update_state(context, node, instance, state):
|
||||
"""Update the node state in baremetal DB
|
||||
|
||||
If instance is not supplied, reset the instance_uuid field for this node.
|
||||
|
||||
"""
|
||||
values = {'task_state': state}
|
||||
if not instance:
|
||||
values['instance_uuid'] = None
|
||||
db.bm_node_update(context, node['id'], values)
|
||||
|
||||
|
||||
def get_power_manager(**kwargs):
|
||||
|
@ -174,57 +176,70 @@ class BareMetalDriver(driver.ComputeDriver):
|
|||
|
||||
def spawn(self, context, instance, image_meta, injected_files,
|
||||
admin_password, network_info=None, block_device_info=None):
|
||||
nodename = instance.get('node')
|
||||
if not nodename:
|
||||
raise exception.NovaException(_("Baremetal node id not supplied"
|
||||
" to driver"))
|
||||
node = db.bm_node_get(context, nodename)
|
||||
if node['instance_uuid']:
|
||||
raise exception.NovaException(_("Baremetal node %s already"
|
||||
" in use") % nodename)
|
||||
|
||||
# TODO(deva): split this huge try: block into manageable parts
|
||||
node_id = instance.get('node')
|
||||
if not node_id:
|
||||
raise exception.NovaException(_(
|
||||
"Baremetal node id not supplied to driver"))
|
||||
|
||||
# NOTE(deva): this db method will raise an exception if the node is
|
||||
# already in use. We call it here to ensure no one else
|
||||
# allocates this node before we begin provisioning it.
|
||||
node = db.bm_node_set_uuid_safe(context, node_id,
|
||||
{'instance_uuid': instance['uuid'],
|
||||
'task_state': baremetal_states.BUILDING})
|
||||
pm = get_power_manager(node=node, instance=instance)
|
||||
|
||||
try:
|
||||
_update_baremetal_state(context, node, instance,
|
||||
baremetal_states.BUILDING)
|
||||
|
||||
var = self.driver.define_vars(instance, network_info,
|
||||
block_device_info)
|
||||
|
||||
self._plug_vifs(instance, network_info, context=context)
|
||||
|
||||
self.firewall_driver.setup_basic_filtering(instance, network_info)
|
||||
self.firewall_driver.prepare_instance_filter(instance,
|
||||
network_info)
|
||||
self.firewall_driver.setup_basic_filtering(
|
||||
instance, network_info)
|
||||
self.firewall_driver.prepare_instance_filter(
|
||||
instance, network_info)
|
||||
self.firewall_driver.apply_instance_filter(
|
||||
instance, network_info)
|
||||
|
||||
self.driver.create_image(var, context, image_meta, node,
|
||||
instance,
|
||||
injected_files=injected_files,
|
||||
admin_password=admin_password)
|
||||
self.driver.activate_bootloader(var, context, node,
|
||||
instance, image_meta)
|
||||
pm = get_power_manager(node=node, instance=instance)
|
||||
state = pm.activate_node()
|
||||
|
||||
_update_baremetal_state(context, node, instance, state)
|
||||
|
||||
self.driver.activate_node(var, context, node, instance)
|
||||
self.firewall_driver.apply_instance_filter(instance, network_info)
|
||||
|
||||
block_device_mapping = driver.block_device_info_get_mapping(
|
||||
block_device_info)
|
||||
block_device_mapping = driver.\
|
||||
block_device_info_get_mapping(block_device_info)
|
||||
for vol in block_device_mapping:
|
||||
connection_info = vol['connection_info']
|
||||
mountpoint = vol['mount_device']
|
||||
self.attach_volume(connection_info, instance['name'],
|
||||
mountpoint)
|
||||
self.attach_volume(
|
||||
connection_info, instance['name'], mountpoint)
|
||||
|
||||
pm.start_console()
|
||||
try:
|
||||
image_info = self.driver.cache_images(
|
||||
context, node, instance,
|
||||
admin_password=admin_password,
|
||||
image_meta=image_meta,
|
||||
injected_files=injected_files,
|
||||
network_info=network_info,
|
||||
)
|
||||
try:
|
||||
self.driver.activate_bootloader(context, node, instance)
|
||||
except Exception, e:
|
||||
self.driver.deactivate_bootloader(context, node, instance)
|
||||
raise e
|
||||
except Exception, e:
|
||||
self.driver.destroy_images(context, node, instance)
|
||||
raise e
|
||||
except Exception, e:
|
||||
# TODO(deva): add tooling that can revert a failed spawn
|
||||
_update_baremetal_state(context, node, instance,
|
||||
baremetal_states.ERROR)
|
||||
# TODO(deva): do network and volume cleanup here
|
||||
raise e
|
||||
else:
|
||||
# NOTE(deva): pm.activate_node should not raise exceptions.
|
||||
# We check its success in "finally" block
|
||||
pm.activate_node()
|
||||
pm.start_console()
|
||||
finally:
|
||||
if pm.state != baremetal_states.ACTIVE:
|
||||
pm.state = baremetal_states.ERROR
|
||||
try:
|
||||
_update_state(context, node, instance, pm.state)
|
||||
except exception.DBError, e:
|
||||
LOG.warning(_("Failed to update state record for "
|
||||
"baremetal node %s") % instance['uuid'])
|
||||
|
||||
def reboot(self, instance, network_info, reboot_type,
|
||||
block_device_info=None):
|
||||
|
@ -232,7 +247,7 @@ class BareMetalDriver(driver.ComputeDriver):
|
|||
ctx = nova_context.get_admin_context()
|
||||
pm = get_power_manager(node=node, instance=instance)
|
||||
state = pm.reboot_node()
|
||||
_update_baremetal_state(ctx, node, instance, state)
|
||||
_update_state(ctx, node, instance, state)
|
||||
|
||||
def destroy(self, instance, network_info, block_device_info=None):
|
||||
ctx = nova_context.get_admin_context()
|
||||
|
@ -246,10 +261,7 @@ class BareMetalDriver(driver.ComputeDriver):
|
|||
% instance['uuid'])
|
||||
return
|
||||
|
||||
var = self.driver.define_vars(instance, network_info,
|
||||
block_device_info)
|
||||
|
||||
self.driver.deactivate_node(var, ctx, node, instance)
|
||||
self.driver.deactivate_node(ctx, node, instance)
|
||||
|
||||
pm = get_power_manager(node=node, instance=instance)
|
||||
|
||||
|
@ -267,9 +279,9 @@ class BareMetalDriver(driver.ComputeDriver):
|
|||
mountpoint = vol['mount_device']
|
||||
self.detach_volume(connection_info, instance['name'], mountpoint)
|
||||
|
||||
self.driver.deactivate_bootloader(var, ctx, node, instance)
|
||||
self.driver.deactivate_bootloader(ctx, node, instance)
|
||||
|
||||
self.driver.destroy_images(var, ctx, node, instance)
|
||||
self.driver.destroy_images(ctx, node, instance)
|
||||
|
||||
# stop firewall
|
||||
self.firewall_driver.unfilter_instance(instance,
|
||||
|
@ -277,7 +289,7 @@ class BareMetalDriver(driver.ComputeDriver):
|
|||
|
||||
self._unplug_vifs(instance, network_info)
|
||||
|
||||
_update_baremetal_state(ctx, node, None, state)
|
||||
_update_state(ctx, node, None, state)
|
||||
|
||||
def power_off(self, instance):
|
||||
"""Power off the specified instance."""
|
||||
|
|
|
@ -21,33 +21,25 @@ from nova.virt.baremetal import base
|
|||
from nova.virt.firewall import NoopFirewallDriver
|
||||
|
||||
|
||||
def get_baremetal_nodes():
|
||||
return FakeDriver()
|
||||
|
||||
|
||||
class FakeDriver(base.NodeDriver):
|
||||
|
||||
def define_vars(self, instance, network_info, block_device_info):
|
||||
return {}
|
||||
|
||||
def create_image(self, var, context, image_meta, node, instance,
|
||||
injected_files=None, admin_password=None):
|
||||
def cache_images(self, context, node, instance, **kwargs):
|
||||
pass
|
||||
|
||||
def destroy_images(self, var, context, node, instance):
|
||||
def destroy_images(self, context, node, instance):
|
||||
pass
|
||||
|
||||
def activate_bootloader(self, var, context, node, instance, image_meta):
|
||||
def activate_bootloader(self, context, node, instance):
|
||||
pass
|
||||
|
||||
def deactivate_bootloader(self, var, context, node, instance):
|
||||
def deactivate_bootloader(self, context, node, instance):
|
||||
pass
|
||||
|
||||
def activate_node(self, var, context, node, instance):
|
||||
def activate_node(self, context, node, instance):
|
||||
"""For operations after power on."""
|
||||
pass
|
||||
|
||||
def deactivate_node(self, var, context, node, instance):
|
||||
def deactivate_node(self, context, node, instance):
|
||||
"""For operations before power off."""
|
||||
pass
|
||||
|
||||
|
@ -57,23 +49,8 @@ class FakeDriver(base.NodeDriver):
|
|||
|
||||
class FakePowerManager(base.PowerManager):
|
||||
|
||||
def activate_node(self):
|
||||
return baremetal_states.ACTIVE
|
||||
|
||||
def reboot_node(self):
|
||||
return baremetal_states.ACTIVE
|
||||
|
||||
def deactivate_node(self):
|
||||
return baremetal_states.DELETED
|
||||
|
||||
def is_power_on(self):
|
||||
return True
|
||||
|
||||
def start_console(self):
|
||||
pass
|
||||
|
||||
def stop_console(self):
|
||||
pass
|
||||
def __init__(self, **kwargs):
|
||||
super(FakePowerManager, self).__init__(**kwargs)
|
||||
|
||||
|
||||
class FakeFirewallDriver(NoopFirewallDriver):
|
||||
|
|
|
@ -18,9 +18,9 @@
|
|||
import os
|
||||
|
||||
from nova.openstack.common import log as logging
|
||||
from nova.virt.disk import api as disk_api
|
||||
from nova.virt.libvirt import utils as libvirt_utils
|
||||
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -30,8 +30,31 @@ def cache_image(context, target, image_id, user_id, project_id):
|
|||
user_id, project_id)
|
||||
|
||||
|
||||
def inject_into_image(image, key, net, metadata, admin_password,
|
||||
files, partition, use_cow=False):
|
||||
try:
|
||||
disk_api.inject_data(image, key, net, metadata, admin_password,
|
||||
files, partition, use_cow)
|
||||
except Exception as e:
|
||||
LOG.warn(_("Failed to inject data into image %(image)s. "
|
||||
"Error: %(e)s") % locals())
|
||||
|
||||
|
||||
def unlink_without_raise(path):
|
||||
try:
|
||||
libvirt_utils.file_delete(path)
|
||||
os.unlink(path)
|
||||
except OSError:
|
||||
LOG.exception(_("failed to unlink %s") % path)
|
||||
LOG.exception(_("Failed to unlink %s") % path)
|
||||
|
||||
|
||||
def write_to_file(path, contents):
|
||||
with open(path, 'w') as f:
|
||||
f.write(contents)
|
||||
|
||||
|
||||
def create_link_without_raise(source, link):
|
||||
try:
|
||||
os.symlink(source, link)
|
||||
except OSError:
|
||||
LOG.exception(_("Failed to create symlink from %(source)s to %(link)s")
|
||||
% locals())
|
||||
|
|
Loading…
Reference in New Issue