diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 660785e1adf4..83463db6a425 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -193,6 +193,10 @@ timeout_opts = [ default=0, help="Automatically confirm resizes after N seconds. " "Set to 0 to disable."), + cfg.IntOpt("shutdown_timeout", + default=60, + help="Total amount of time to wait in seconds for an instance " + "to perform a clean shutdown."), ] running_deleted_opts = [ @@ -568,6 +572,11 @@ class ComputeManager(manager.Manager): target = messaging.Target(version='3.32') + # How long to wait in seconds before re-issuing a shutdown + # signal to a instance during power off. The overall + # time to wait is set by CONF.shutdown_timeout. + SHUTDOWN_RETRY_INTERVAL = 10 + def __init__(self, compute_driver=None, *args, **kwargs): """Load configuration options and connect to the hypervisor.""" self.virtapi = ComputeVirtAPI(self) @@ -2186,6 +2195,25 @@ class ComputeManager(manager.Manager): instance=instance) self._set_instance_error_state(context, instance) + def _get_power_off_values(self, context, instance, clean_shutdown): + """Get the timing configuration for powering down this instance.""" + if clean_shutdown: + timeout = compute_utils.get_value_from_system_metadata(instance, + key='image_os_shutdown_timeout', type=int, + default=CONF.shutdown_timeout) + retry_interval = self.SHUTDOWN_RETRY_INTERVAL + else: + timeout = 0 + retry_interval = 0 + + return timeout, retry_interval + + def _power_off_instance(self, context, instance, clean_shutdown=True): + """Power off an instance on this host.""" + timeout, retry_interval = self._get_power_off_values(context, + instance, clean_shutdown) + self.driver.power_off(instance, timeout, retry_interval) + def _shutdown_instance(self, context, instance, bdms, requested_networks=None, notify=True, try_deallocate_networks=True): @@ -2377,14 +2405,14 @@ class ComputeManager(manager.Manager): @reverts_task_state @wrap_instance_event @wrap_instance_fault - def stop_instance(self, context, instance): + def stop_instance(self, context, instance, clean_shutdown=True): """Stopping an instance on this host.""" @utils.synchronized(instance.uuid) def do_stop_instance(): self._notify_about_instance_usage(context, instance, "power_off.start") - self.driver.power_off(instance) + self._power_off_instance(context, instance, clean_shutdown) current_power_state = self._get_power_state(context, instance) instance.power_state = current_power_state instance.vm_state = vm_states.STOPPED diff --git a/nova/compute/utils.py b/nova/compute/utils.py index 775698c482f0..6a00c138979e 100644 --- a/nova/compute/utils.py +++ b/nova/compute/utils.py @@ -219,6 +219,25 @@ def get_image_metadata(context, image_api, image_id_or_uri, instance): return utils.get_image_from_system_metadata(system_meta) +def get_value_from_system_metadata(instance, key, type, default): + """Get a value of a specified type from image metadata. + + @param instance: The instance object + @param key: The name of the property to get + @param type: The python type the value is be returned as + @param default: The value to return if key is not set or not the right type + """ + value = instance.system_metadata.get(key, default) + try: + return type(value) + except ValueError: + LOG.warning(_LW("Metadata value %(value)s for %(key)s is not of " + "type %(type)s. Using default value %(default)s."), + {'value': value, 'key': key, 'type': type, + 'default': default}, instance=instance) + return default + + def notify_usage_exists(notifier, context, instance_ref, current_period=False, ignore_missing_network_data=True, system_metadata=None, extra_usage_info=None): diff --git a/nova/tests/api/ec2/test_cloud.py b/nova/tests/api/ec2/test_cloud.py index db8a7177d819..34f16a718899 100644 --- a/nova/tests/api/ec2/test_cloud.py +++ b/nova/tests/api/ec2/test_cloud.py @@ -2539,7 +2539,8 @@ class CloudTestCase(test.TestCase): self.stubs.Set(fake_virt.FakeDriver, 'power_on', fake_power_on) - def fake_power_off(self, instance): + def fake_power_off(self, instance, + shutdown_timeout, shutdown_attempts): virt_driver['powered_off'] = True self.stubs.Set(fake_virt.FakeDriver, 'power_off', fake_power_off) diff --git a/nova/tests/compute/test_compute.py b/nova/tests/compute/test_compute.py index 8991a7ab6067..5d24778bcf88 100644 --- a/nova/tests/compute/test_compute.py +++ b/nova/tests/compute/test_compute.py @@ -2134,7 +2134,8 @@ class ComputeTestCase(BaseTestCase): called = {'power_off': False} - def fake_driver_power_off(self, instance): + def fake_driver_power_off(self, instance, + shutdown_timeout, shutdown_attempts): called['power_off'] = True self.stubs.Set(nova.virt.fake.FakeDriver, 'power_off', diff --git a/nova/tests/compute/test_compute_utils.py b/nova/tests/compute/test_compute_utils.py index a9d13b00bb70..9d841f592e3a 100644 --- a/nova/tests/compute/test_compute_utils.py +++ b/nova/tests/compute/test_compute_utils.py @@ -724,6 +724,28 @@ class ComputeGetImageMetadataTestCase(test.TestCase): self.assertThat(expected, matchers.DictMatches(image_meta)) +class ComputeUtilsGetValFromSysMetadata(test.TestCase): + + def test_get_value_from_system_metadata(self): + instance = fake_instance.fake_instance_obj('fake-context') + system_meta = {'int_val': 1, + 'int_string': '2', + 'not_int': 'Nope'} + instance.system_metadata = system_meta + + result = compute_utils.get_value_from_system_metadata( + instance, 'int_val', int, 0) + self.assertEqual(1, result) + + result = compute_utils.get_value_from_system_metadata( + instance, 'int_string', int, 0) + self.assertEqual(2, result) + + result = compute_utils.get_value_from_system_metadata( + instance, 'not_int', int, 0) + self.assertEqual(0, result) + + class ComputeUtilsGetNWInfo(test.TestCase): def test_instance_object_none_info_cache(self): inst = fake_instance.fake_instance_obj('fake-context', diff --git a/nova/tests/virt/libvirt/test_driver.py b/nova/tests/virt/libvirt/test_driver.py index b1b80437a08d..f8ae1616f681 100644 --- a/nova/tests/virt/libvirt/test_driver.py +++ b/nova/tests/virt/libvirt/test_driver.py @@ -6193,6 +6193,82 @@ class LibvirtConnTestCase(test.TestCase, conn._hard_reboot(self.context, instance, network_info, block_device_info) + def _test_clean_shutdown(self, seconds_to_shutdown, + timeout, retry_interval, + shutdown_attempts, succeeds): + info_tuple = ('fake', 'fake', 'fake', 'also_fake') + shutdown_count = [] + + def count_shutdowns(): + shutdown_count.append("shutdown") + + # Mock domain + mock_domain = self.mox.CreateMock(libvirt.virDomain) + + mock_domain.info().AndReturn( + (libvirt_driver.VIR_DOMAIN_RUNNING,) + info_tuple) + mock_domain.shutdown().WithSideEffects(count_shutdowns) + + retry_countdown = retry_interval + for x in xrange(min(seconds_to_shutdown, timeout)): + mock_domain.info().AndReturn( + (libvirt_driver.VIR_DOMAIN_RUNNING,) + info_tuple) + if retry_countdown == 0: + mock_domain.shutdown().WithSideEffects(count_shutdowns) + retry_countdown = retry_interval + else: + retry_countdown -= 1 + + if seconds_to_shutdown < timeout: + mock_domain.info().AndReturn( + (libvirt_driver.VIR_DOMAIN_SHUTDOWN,) + info_tuple) + + self.mox.ReplayAll() + + def fake_lookup_by_name(instance_name): + return mock_domain + + def fake_create_domain(**kwargs): + self.reboot_create_called = True + + conn = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + instance = {"name": "instancename", "id": "instanceid", + "uuid": "875a8070-d0b9-4949-8b31-104d125c9a64"} + self.stubs.Set(conn, '_lookup_by_name', fake_lookup_by_name) + self.stubs.Set(conn, '_create_domain', fake_create_domain) + result = conn._clean_shutdown(instance, timeout, retry_interval) + + self.assertEqual(succeeds, result) + self.assertEqual(shutdown_attempts, len(shutdown_count)) + + def test_clean_shutdown_first_time(self): + self._test_clean_shutdown(seconds_to_shutdown=2, + timeout=5, + retry_interval=3, + shutdown_attempts=1, + succeeds=True) + + def test_clean_shutdown_with_retry(self): + self._test_clean_shutdown(seconds_to_shutdown=4, + timeout=5, + retry_interval=3, + shutdown_attempts=2, + succeeds=True) + + def test_clean_shutdown_failure(self): + self._test_clean_shutdown(seconds_to_shutdown=6, + timeout=5, + retry_interval=3, + shutdown_attempts=2, + succeeds=False) + + def test_clean_shutdown_no_wait(self): + self._test_clean_shutdown(seconds_to_shutdown=6, + timeout=0, + retry_interval=3, + shutdown_attempts=1, + succeeds=False) + def test_resume(self): dummyxml = ("instance-0000000a" "" diff --git a/nova/tests/virt/test_ironic_api_contracts.py b/nova/tests/virt/test_ironic_api_contracts.py index b63a8dd632d9..730ba942a255 100644 --- a/nova/tests/virt/test_ironic_api_contracts.py +++ b/nova/tests/virt/test_ironic_api_contracts.py @@ -103,7 +103,7 @@ class IronicAPIContractsTestCase(test.NoDBTestCase): self._check_method(driver.ComputeDriver.power_off, "ComputeDriver.power_off", - ['self', 'instance']) + ['self', 'instance', 'timeout', 'retry_interval']) self._check_method(driver.ComputeDriver.power_on, "ComputeDriver.power_on", diff --git a/nova/virt/baremetal/driver.py b/nova/virt/baremetal/driver.py index ec99d4de45d4..7f94a9862381 100644 --- a/nova/virt/baremetal/driver.py +++ b/nova/virt/baremetal/driver.py @@ -407,8 +407,9 @@ class BareMetalDriver(driver.ComputeDriver): """Cleanup after instance being destroyed.""" pass - def power_off(self, instance, node=None): + def power_off(self, instance, timeout=0, retry_interval=0, node=None): """Power off the specified instance.""" + # TODO(PhilDay): Add support for timeout (clean shutdown) if not node: node = _get_baremetal_node_by_instance_uuid(instance['uuid']) pm = get_power_manager(node=node, instance=instance) diff --git a/nova/virt/driver.py b/nova/virt/driver.py index b64459e2ad19..ea05ba2878ec 100644 --- a/nova/virt/driver.py +++ b/nova/virt/driver.py @@ -589,10 +589,13 @@ class ComputeDriver(object): # TODO(Vek): Need to pass context in for access to auth_token raise NotImplementedError() - def power_off(self, instance): + def power_off(self, instance, timeout=0, retry_interval=0): """Power off the specified instance. :param instance: nova.objects.instance.Instance + :param timeout: time to wait for GuestOS to shutdown + :param retry_interval: How often to signal guest while + waiting for it to shutdown """ raise NotImplementedError() diff --git a/nova/virt/fake.py b/nova/virt/fake.py index 5ffcbf8f367f..ce7f194caeee 100644 --- a/nova/virt/fake.py +++ b/nova/virt/fake.py @@ -184,7 +184,7 @@ class FakeDriver(driver.ComputeDriver): block_device_info=None): pass - def power_off(self, instance): + def power_off(self, instance, shutdown_timeout=0, shutdown_attempts=0): pass def power_on(self, context, instance, network_info, block_device_info): diff --git a/nova/virt/hyperv/driver.py b/nova/virt/hyperv/driver.py index 5b666cfa1228..61d750ee3bcd 100644 --- a/nova/virt/hyperv/driver.py +++ b/nova/virt/hyperv/driver.py @@ -109,7 +109,8 @@ class HyperVDriver(driver.ComputeDriver): def resume(self, context, instance, network_info, block_device_info=None): self._vmops.resume(instance) - def power_off(self, instance): + def power_off(self, instance, timeout=0, retry_interval=0): + # TODO(PhilDay): Add support for timeout (clean shutdown) self._vmops.power_off(instance) def power_on(self, context, instance, network_info, diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 1675a92c4a37..b5ecf741325a 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -2182,8 +2182,85 @@ class LibvirtDriver(driver.ComputeDriver): dom = self._lookup_by_name(instance['name']) dom.resume() - def power_off(self, instance): + def _clean_shutdown(self, instance, timeout, retry_interval): + """Attempt to shutdown the instance gracefully. + + :param instance: The instance to be shutdown + :param timeout: How long to wait in seconds for the instance to + shutdown + :param retry_interval: How often in seconds to signal the instance + to shutdown while waiting + + :returns: True if the shutdown succeeded + """ + + # List of states that represent a shutdown instance + SHUTDOWN_STATES = [power_state.SHUTDOWN, + power_state.CRASHED] + + try: + dom = self._lookup_by_name(instance["name"]) + except exception.InstanceNotFound: + # If the instance has gone then we don't need to + # wait for it to shutdown + return True + + (state, _max_mem, _mem, _cpus, _t) = dom.info() + state = LIBVIRT_POWER_STATE[state] + if state in SHUTDOWN_STATES: + LOG.info(_LI("Instance already shutdown."), + instance=instance) + return True + + LOG.debug("Shutting down instance from state %s", state, + instance=instance) + dom.shutdown() + retry_countdown = retry_interval + + for sec in six.moves.range(timeout): + + dom = self._lookup_by_name(instance["name"]) + (state, _max_mem, _mem, _cpus, _t) = dom.info() + state = LIBVIRT_POWER_STATE[state] + + if state in SHUTDOWN_STATES: + LOG.info(_LI("Instance shutdown successfully after %d " + "seconds."), sec, instance=instance) + return True + + # Note(PhilD): We can't assume that the Guest was able to process + # any previous shutdown signal (for example it may + # have still been startingup, so within the overall + # timeout we re-trigger the shutdown every + # retry_interval + if retry_countdown == 0: + retry_countdown = retry_interval + # Instance could shutdown at any time, in which case we + # will get an exception when we call shutdown + try: + LOG.debug("Instance in state %s after %d seconds - " + "resending shutdown", state, sec, + instance=instance) + dom.shutdown() + except libvirt.libvirtError: + # Assume this is because its now shutdown, so loop + # one more time to clean up. + LOG.debug("Ignoring libvirt exception from shutdown " + "request.", instance=instance) + continue + else: + retry_countdown -= 1 + + time.sleep(1) + + LOG.info(_LI("Instance failed to shutdown in %d seconds."), + timeout, instance=instance) + return False + + def power_off(self, instance, timeout=0, retry_interval=0): """Power off the specified instance.""" + if timeout: + self._clean_shutdown(instance, timeout, retry_interval) self._destroy(instance) def power_on(self, context, instance, network_info, diff --git a/nova/virt/vmwareapi/driver.py b/nova/virt/vmwareapi/driver.py index 0adb52de97d1..2bbf8698b01d 100644 --- a/nova/virt/vmwareapi/driver.py +++ b/nova/virt/vmwareapi/driver.py @@ -494,8 +494,9 @@ class VMwareVCDriver(driver.ComputeDriver): _vmops = self._get_vmops_for_compute_node(instance.node) _vmops.unrescue(instance) - def power_off(self, instance): + def power_off(self, instance, timeout=0, retry_interval=0): """Power off the specified instance.""" + # TODO(PhilDay): Add support for timeout (clean shutdown) _vmops = self._get_vmops_for_compute_node(instance['node']) _vmops.power_off(instance) diff --git a/nova/virt/xenapi/driver.py b/nova/virt/xenapi/driver.py index 9cd6fd4be2d5..5ec51ce89863 100644 --- a/nova/virt/xenapi/driver.py +++ b/nova/virt/xenapi/driver.py @@ -299,8 +299,9 @@ class XenAPIDriver(driver.ComputeDriver): """Unrescue the specified instance.""" self._vmops.unrescue(instance) - def power_off(self, instance): + def power_off(self, instance, timeout=0, retry_interval=0): """Power off the specified instance.""" + # TODO(PhilDay): Add support for timeout (clean shutdown) self._vmops.power_off(instance) def power_on(self, context, instance, network_info,