Merge "ironic: check fresh data when sync_power_state doesn't line up" into stable/rocky
This commit is contained in:
commit
16c5931488
|
@ -7605,6 +7605,53 @@ class ComputeManager(manager.Manager):
|
|||
# silently ignore.
|
||||
pass
|
||||
|
||||
def _stop_unexpected_shutdown_instance(self, context, vm_state,
|
||||
db_instance, orig_db_power_state,
|
||||
vm_power_state):
|
||||
# this is an exceptional case; make sure our data is up
|
||||
# to date before slamming through a power off
|
||||
# TODO(jroll) remove the check for TypeError here;
|
||||
# it's only here to be able to backport this without
|
||||
# breaking out-of-tree drivers.
|
||||
try:
|
||||
vm_instance = self.driver.get_info(db_instance,
|
||||
use_cache=False)
|
||||
vm_power_state = vm_instance.state
|
||||
except TypeError:
|
||||
LOG.warning("Your virt driver appears to not support the "
|
||||
"'use_cache' parameter to the 'get_info' method; "
|
||||
"please update your virt driver.")
|
||||
|
||||
# if it still looks off, go ahead and call stop()
|
||||
if vm_power_state in (power_state.SHUTDOWN,
|
||||
power_state.CRASHED):
|
||||
|
||||
LOG.warning("Instance shutdown by itself. Calling the "
|
||||
"stop API. Current vm_state: %(vm_state)s, "
|
||||
"current task_state: %(task_state)s, "
|
||||
"original DB power_state: %(db_power_state)s, "
|
||||
"current VM power_state: %(vm_power_state)s",
|
||||
{'vm_state': vm_state,
|
||||
'task_state': db_instance.task_state,
|
||||
'db_power_state': orig_db_power_state,
|
||||
'vm_power_state': vm_power_state},
|
||||
instance=db_instance)
|
||||
try:
|
||||
# Note(maoy): here we call the API instead of
|
||||
# brutally updating the vm_state in the database
|
||||
# to allow all the hooks and checks to be performed.
|
||||
if db_instance.shutdown_terminate:
|
||||
self.compute_api.delete(context, db_instance)
|
||||
else:
|
||||
self.compute_api.stop(context, db_instance)
|
||||
except Exception:
|
||||
# Note(maoy): there is no need to propagate the error
|
||||
# because the same power_state will be retrieved next
|
||||
# time and retried.
|
||||
# For example, there might be another task scheduled.
|
||||
LOG.exception("error during stop() in sync_power_state.",
|
||||
instance=db_instance)
|
||||
|
||||
def _sync_instance_power_state(self, context, db_instance, vm_power_state,
|
||||
use_slave=False):
|
||||
"""Align instance power state between the database and hypervisor.
|
||||
|
@ -7674,31 +7721,9 @@ class ComputeManager(manager.Manager):
|
|||
# The only rational power state should be RUNNING
|
||||
if vm_power_state in (power_state.SHUTDOWN,
|
||||
power_state.CRASHED):
|
||||
LOG.warning("Instance shutdown by itself. Calling the "
|
||||
"stop API. Current vm_state: %(vm_state)s, "
|
||||
"current task_state: %(task_state)s, "
|
||||
"original DB power_state: %(db_power_state)s, "
|
||||
"current VM power_state: %(vm_power_state)s",
|
||||
{'vm_state': vm_state,
|
||||
'task_state': db_instance.task_state,
|
||||
'db_power_state': orig_db_power_state,
|
||||
'vm_power_state': vm_power_state},
|
||||
instance=db_instance)
|
||||
try:
|
||||
# Note(maoy): here we call the API instead of
|
||||
# brutally updating the vm_state in the database
|
||||
# to allow all the hooks and checks to be performed.
|
||||
if db_instance.shutdown_terminate:
|
||||
self.compute_api.delete(context, db_instance)
|
||||
else:
|
||||
self.compute_api.stop(context, db_instance)
|
||||
except Exception:
|
||||
# Note(maoy): there is no need to propagate the error
|
||||
# because the same power_state will be retrieved next
|
||||
# time and retried.
|
||||
# For example, there might be another task scheduled.
|
||||
LOG.exception("error during stop() in sync_power_state.",
|
||||
instance=db_instance)
|
||||
self._stop_unexpected_shutdown_instance(
|
||||
context, vm_state, db_instance, orig_db_power_state,
|
||||
vm_power_state)
|
||||
elif vm_power_state == power_state.SUSPENDED:
|
||||
LOG.warning("Instance is suspended unexpectedly. Calling "
|
||||
"the stop API.", instance=db_instance)
|
||||
|
|
|
@ -1778,10 +1778,14 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
|
|||
power_state.RUNNING)
|
||||
mock_refresh.assert_called_once_with(use_slave=False)
|
||||
|
||||
@mock.patch.object(fake_driver.FakeDriver, 'get_info')
|
||||
@mock.patch.object(objects.Instance, 'refresh')
|
||||
@mock.patch.object(objects.Instance, 'save')
|
||||
def test_sync_instance_power_state_running_stopped(self, mock_save,
|
||||
mock_refresh):
|
||||
mock_refresh,
|
||||
mock_get_info):
|
||||
mock_get_info.return_value = hardware.InstanceInfo(
|
||||
state=power_state.SHUTDOWN)
|
||||
instance = self._get_sync_instance(power_state.RUNNING,
|
||||
vm_states.ACTIVE)
|
||||
self.compute._sync_instance_power_state(self.context, instance,
|
||||
|
@ -1789,11 +1793,12 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
|
|||
self.assertEqual(instance.power_state, power_state.SHUTDOWN)
|
||||
mock_refresh.assert_called_once_with(use_slave=False)
|
||||
self.assertTrue(mock_save.called)
|
||||
mock_get_info.assert_called_once_with(instance, use_cache=False)
|
||||
|
||||
def _test_sync_to_stop(self, power_state, vm_state, driver_power_state,
|
||||
def _test_sync_to_stop(self, vm_power_state, vm_state, driver_power_state,
|
||||
stop=True, force=False, shutdown_terminate=False):
|
||||
instance = self._get_sync_instance(
|
||||
power_state, vm_state, shutdown_terminate=shutdown_terminate)
|
||||
vm_power_state, vm_state, shutdown_terminate=shutdown_terminate)
|
||||
|
||||
with test.nested(
|
||||
mock.patch.object(objects.Instance, 'refresh'),
|
||||
|
@ -1801,7 +1806,12 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
|
|||
mock.patch.object(self.compute.compute_api, 'stop'),
|
||||
mock.patch.object(self.compute.compute_api, 'delete'),
|
||||
mock.patch.object(self.compute.compute_api, 'force_stop'),
|
||||
) as (mock_refresh, mock_save, mock_stop, mock_delete, mock_force):
|
||||
mock.patch.object(self.compute.driver, 'get_info')
|
||||
) as (mock_refresh, mock_save, mock_stop, mock_delete, mock_force,
|
||||
mock_get_info):
|
||||
mock_get_info.return_value = hardware.InstanceInfo(
|
||||
state=driver_power_state)
|
||||
|
||||
self.compute._sync_instance_power_state(self.context, instance,
|
||||
driver_power_state)
|
||||
if shutdown_terminate:
|
||||
|
@ -1811,6 +1821,11 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
|
|||
mock_force.assert_called_once_with(self.context, instance)
|
||||
else:
|
||||
mock_stop.assert_called_once_with(self.context, instance)
|
||||
if (vm_state == vm_states.ACTIVE and
|
||||
vm_power_state in (power_state.SHUTDOWN,
|
||||
power_state.CRASHED)):
|
||||
mock_get_info.assert_called_once_with(instance,
|
||||
use_cache=False)
|
||||
mock_refresh.assert_called_once_with(use_slave=False)
|
||||
self.assertTrue(mock_save.called)
|
||||
|
||||
|
|
|
@ -1243,6 +1243,34 @@ class IronicDriverTestCase(test.NoDBTestCase):
|
|||
mock_gbiu.assert_called_once_with(instance.uuid,
|
||||
fields=ironic_driver._NODE_FIELDS)
|
||||
|
||||
@mock.patch.object(ironic_driver.IronicDriver, '_get_node_list')
|
||||
@mock.patch.object(objects.InstanceList, 'get_uuids_by_host')
|
||||
@mock.patch.object(objects.ServiceList, 'get_all_computes_by_hv_type')
|
||||
@mock.patch.object(FAKE_CLIENT.node, 'get_by_instance_uuid')
|
||||
def test_get_info_skip_cache(self, mock_gbiu, mock_svc_by_hv,
|
||||
mock_uuids_by_host, mock_get_node_list):
|
||||
properties = {'memory_mb': 512, 'cpus': 2}
|
||||
power_state = ironic_states.POWER_ON
|
||||
node = _get_cached_node(
|
||||
instance_uuid=self.instance_uuid, properties=properties,
|
||||
power_state=power_state)
|
||||
|
||||
mock_gbiu.return_value = node
|
||||
mock_svc_by_hv.return_value = []
|
||||
mock_get_node_list.return_value = [node]
|
||||
|
||||
# ironic_states.POWER_ON should be mapped to
|
||||
# nova_states.RUNNING
|
||||
instance = fake_instance.fake_instance_obj('fake-context',
|
||||
uuid=self.instance_uuid)
|
||||
mock_uuids_by_host.return_value = [instance.uuid]
|
||||
result = self.driver.get_info(instance, use_cache=False)
|
||||
self.assertEqual(hardware.InstanceInfo(state=nova_states.RUNNING),
|
||||
result)
|
||||
# verify we hit the ironic API for fresh data
|
||||
mock_gbiu.assert_called_once_with(instance.uuid,
|
||||
fields=ironic_driver._NODE_FIELDS)
|
||||
|
||||
@mock.patch.object(ironic_driver.IronicDriver, '_get_node_list')
|
||||
@mock.patch.object(objects.InstanceList, 'get_uuids_by_host')
|
||||
@mock.patch.object(objects.ServiceList, 'get_all_computes_by_hv_type')
|
||||
|
|
|
@ -158,10 +158,15 @@ class ComputeDriver(object):
|
|||
"""
|
||||
pass
|
||||
|
||||
def get_info(self, instance):
|
||||
def get_info(self, instance, use_cache=True):
|
||||
"""Get the current status of an instance.
|
||||
|
||||
:param instance: nova.objects.instance.Instance object
|
||||
:param use_cache: boolean to indicate if the driver should be allowed
|
||||
to use cached data to return instance status.
|
||||
This only applies to drivers which cache instance
|
||||
state information. For drivers that do not use a
|
||||
cache, this parameter can be ignored.
|
||||
:returns: An InstanceInfo object
|
||||
"""
|
||||
# TODO(Vek): Need to pass context in for access to auth_token
|
||||
|
|
|
@ -343,7 +343,7 @@ class FakeDriver(driver.ComputeDriver):
|
|||
raise exception.InterfaceDetachFailed(
|
||||
instance_uuid=instance.uuid)
|
||||
|
||||
def get_info(self, instance):
|
||||
def get_info(self, instance, use_cache=True):
|
||||
if instance.uuid not in self.instances:
|
||||
raise exception.InstanceNotFound(instance_id=instance.uuid)
|
||||
i = self.instances[instance.uuid]
|
||||
|
|
|
@ -176,7 +176,7 @@ class HyperVDriver(driver.ComputeDriver):
|
|||
"""Cleanup after instance being destroyed by Hypervisor."""
|
||||
self.unplug_vifs(instance, network_info)
|
||||
|
||||
def get_info(self, instance):
|
||||
def get_info(self, instance, use_cache=True):
|
||||
return self._vmops.get_info(instance)
|
||||
|
||||
def attach_volume(self, context, connection_info, instance, mountpoint,
|
||||
|
|
|
@ -888,15 +888,30 @@ class IronicDriver(virt_driver.ComputeDriver):
|
|||
return node
|
||||
return _sync_node_from_cache()
|
||||
|
||||
def get_info(self, instance):
|
||||
def get_info(self, instance, use_cache=True):
|
||||
"""Get the current state and resource usage for this instance.
|
||||
|
||||
If the instance is not found this method returns (a dictionary
|
||||
with) NOSTATE and all resources == 0.
|
||||
|
||||
:param instance: the instance object.
|
||||
:param use_cache: boolean to indicate if the driver should be allowed
|
||||
to use cached data to return instance status.
|
||||
If false, pull fresh data from ironic.
|
||||
:returns: an InstanceInfo object
|
||||
"""
|
||||
def _fetch_from_ironic(self, instance):
|
||||
try:
|
||||
node = self._validate_instance_and_node(instance)
|
||||
return hardware.InstanceInfo(
|
||||
state=map_power_state(node.power_state))
|
||||
except exception.InstanceNotFound:
|
||||
return hardware.InstanceInfo(
|
||||
state=map_power_state(ironic_states.NOSTATE))
|
||||
|
||||
if not use_cache:
|
||||
return _fetch_from_ironic(self, instance)
|
||||
|
||||
# we should already have a cache for our nodes, refreshed on every
|
||||
# RT loop. but if we don't have a cache, generate it.
|
||||
if not self.node_cache:
|
||||
|
@ -907,11 +922,7 @@ class IronicDriver(virt_driver.ComputeDriver):
|
|||
break
|
||||
else:
|
||||
# if we can't find the instance, fall back to ironic
|
||||
try:
|
||||
node = self._validate_instance_and_node(instance)
|
||||
except exception.InstanceNotFound:
|
||||
return hardware.InstanceInfo(
|
||||
state=map_power_state(ironic_states.NOSTATE))
|
||||
return _fetch_from_ironic(self, instance)
|
||||
|
||||
return hardware.InstanceInfo(state=map_power_state(node.power_state))
|
||||
|
||||
|
|
|
@ -5443,7 +5443,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||
{'xml': xml}, instance=instance)
|
||||
return xml
|
||||
|
||||
def get_info(self, instance):
|
||||
def get_info(self, instance, use_cache=True):
|
||||
"""Retrieve information from libvirt for a specific instance.
|
||||
|
||||
If a libvirt error is encountered during lookup, we might raise a
|
||||
|
@ -5451,6 +5451,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||
libvirt error is.
|
||||
|
||||
:param instance: nova.objects.instance.Instance object
|
||||
:param use_cache: unused in this driver
|
||||
:returns: An InstanceInfo object
|
||||
"""
|
||||
guest = self._host.get_guest(instance)
|
||||
|
|
|
@ -115,10 +115,11 @@ class PowerVMDriver(driver.ComputeDriver):
|
|||
{'op': op, 'display_name': instance.display_name,
|
||||
'name': instance.name}, instance=instance)
|
||||
|
||||
def get_info(self, instance):
|
||||
def get_info(self, instance, use_cache=True):
|
||||
"""Get the current status of an instance.
|
||||
|
||||
:param instance: nova.objects.instance.Instance object
|
||||
:param use_cache: unused in this driver
|
||||
:returns: An InstanceInfo object.
|
||||
"""
|
||||
return vm.get_vm_info(self.adapter, instance)
|
||||
|
|
|
@ -517,7 +517,7 @@ class VMwareVCDriver(driver.ComputeDriver):
|
|||
"""Poll for rebooting instances."""
|
||||
self._vmops.poll_rebooting_instances(timeout, instances)
|
||||
|
||||
def get_info(self, instance):
|
||||
def get_info(self, instance, use_cache=True):
|
||||
"""Return info about the VM instance."""
|
||||
return self._vmops.get_info(instance)
|
||||
|
||||
|
|
|
@ -369,7 +369,7 @@ class XenAPIDriver(driver.ComputeDriver):
|
|||
"""Unplug VIFs from networks."""
|
||||
self._vmops.unplug_vifs(instance, network_info)
|
||||
|
||||
def get_info(self, instance):
|
||||
def get_info(self, instance, use_cache=True):
|
||||
"""Return data about VM instance."""
|
||||
return self._vmops.get_info(instance)
|
||||
|
||||
|
|
|
@ -122,7 +122,7 @@ class ZVMDriver(driver.ComputeDriver):
|
|||
def get_available_nodes(self, refresh=False):
|
||||
return self._hypervisor.get_available_nodes(refresh=refresh)
|
||||
|
||||
def get_info(self, instance):
|
||||
def get_info(self, instance, use_cache=True):
|
||||
_guest = guest.Guest(self._hypervisor, instance)
|
||||
return _guest.get_info()
|
||||
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
---
|
||||
fixes:
|
||||
- |
|
||||
Fixes a race condition that could allow a newly created Ironic
|
||||
instance to be powered off after deployment, without letting
|
||||
the user power it back on.
|
||||
upgrade:
|
||||
- |
|
||||
Adds a ``use_cache`` parameter to the virt driver ``get_info``
|
||||
method. Out of tree drivers should add support for this
|
||||
parameter.
|
Loading…
Reference in New Issue