diff --git a/releasenotes/notes/resize-timeout-0f6ede6ec9b644b7.yaml b/releasenotes/notes/resize-timeout-0f6ede6ec9b644b7.yaml new file mode 100644 index 000000000..60b693320 --- /dev/null +++ b/releasenotes/notes/resize-timeout-0f6ede6ec9b644b7.yaml @@ -0,0 +1,11 @@ +--- +features: + - | + Timeout of vm resize operations can be configured applying the + `migration_max_retries` and `migration_interval` in the `nova` + section used for migrations. Default values are 180 and 5 seconds. +fixes: + - | + Fixed the issue when resize actions failed when the resize took more that + 120 seconds. After this patch, the default timeout is 900 seconds (15 minutes) + which should be a reasonable value for most OpenStack installations. diff --git a/watcher/common/nova_helper.py b/watcher/common/nova_helper.py index c9c9bbdb3..0cdb67aae 100644 --- a/watcher/common/nova_helper.py +++ b/watcher/common/nova_helper.py @@ -261,7 +261,7 @@ class NovaHelper(object): "cold migration for instance %s failed", instance_id) return False - def resize_instance(self, instance_id, flavor, retry=120): + def resize_instance(self, instance_id, flavor, retry=None, interval=None): """This method resizes given instance with specified flavor. This method uses the Nova built-in resize() @@ -272,12 +272,18 @@ class NovaHelper(object): :param instance_id: the unique id of the instance to resize. :param flavor: the name or ID of the flavor to resize to. + :param retry: maximum number of retries before giving up + :param interval: interval in seconds between retries """ LOG.debug( "Trying a resize of instance %(instance)s to " "flavor '%(flavor)s'", {'instance': instance_id, 'flavor': flavor}) + # Use config defaults if not provided in method parameters + retry = retry or CONF.nova.migration_max_retries + interval = interval or CONF.nova.migration_interval + # Looking for the instance to resize instance = self.find_instance(instance_id) @@ -311,7 +317,7 @@ class NovaHelper(object): and retry: instance = self.nova.servers.get(instance.id) LOG.debug('Waiting the resize of %s to %s', instance, flavor_id) - time.sleep(1) + time.sleep(interval) retry -= 1 instance_status = getattr(instance, 'status') diff --git a/watcher/tests/common/test_nova_helper.py b/watcher/tests/common/test_nova_helper.py index e8eae4748..3c522612b 100644 --- a/watcher/tests/common/test_nova_helper.py +++ b/watcher/tests/common/test_nova_helper.py @@ -325,6 +325,91 @@ class TestNovaHelper(base.TestCase): self.flavor_name) self.assertFalse(is_success) + @mock.patch.object(nova_helper.NovaHelper, 'confirm_resize') + def test_watcher_resize_instance_retry_success( + self, mock_confirm_resize, mock_glance, mock_cinder, + mock_neutron, mock_nova): + """Test that resize_instance uses config timeout by default""" + nova_util = nova_helper.NovaHelper() + server = self.fake_server(self.instance_uuid) + server.status = 'RESIZING' + setattr(server, 'OS-EXT-STS:vm_state', 'resizing') + + resized_server = copy.deepcopy(server) + resized_server.status = 'VERIFY_RESIZE' + setattr(resized_server, 'OS-EXT-STS:vm_state', 'resized') + + # This means instance will be found as VERIFY_RESIZE in second retry + nova_util.nova.servers.get.side_effect = (server, server, + resized_server) + + mock_confirm_resize.return_value = True + + self.flags(migration_max_retries=20, migration_interval=4, + group='nova') + # Resize will succeed because status changes to VERIFY_RESIZE + is_success = nova_util.resize_instance( + self.instance_uuid, self.flavor_name + ) + + # Should succeed + self.assertTrue(is_success) + # It will sleep 2 times because it will be found as VERIFY_RESIZE in + # the second retry + self.assertEqual(2, self.mock_sleep.call_count) + # Verify all sleep calls used 4 second interval + for call in self.mock_sleep.call_args_list: + self.assertEqual(call[0][0], 4) + + def test_watcher_resize_instance_retry_default( + self, mock_glance, mock_cinder, mock_neutron, mock_nova): + """Test that resize_instance uses config timeout by default""" + nova_util = nova_helper.NovaHelper() + server = self.fake_server(self.instance_uuid) + server.status = 'RESIZING' + setattr(server, 'OS-EXT-STS:vm_state', 'resizing') + + nova_util.nova.servers.get.side_effect = server + + # Resize will timeout because status never changes + is_success = nova_util.resize_instance( + self.instance_uuid, self.flavor_name + ) + + # Should fail due to timeout + self.assertFalse(is_success) + # With default migration_max_retries and migration_interval, should + # sleep 180 times for 5 seconds + self.assertEqual(180, self.mock_sleep.call_count) + # Verify sleep calls used 5 second + for call in self.mock_sleep.call_args_list: + self.assertEqual(call[0][0], 5) + + def test_watcher_resize_instance_retry_custom( + self, mock_glance, mock_cinder, mock_neutron, mock_nova): + """Test that watcher_non_live_migrate respects explicit retry value""" + nova_util = nova_helper.NovaHelper() + server = self.fake_server(self.instance_uuid) + server.status = 'RESIZING' + setattr(server, 'OS-EXT-STS:vm_state', 'resizing') + + # Set config to a custom values to ensure custom values are used + self.flags(migration_max_retries=10, + migration_interval=3, group='nova') + + is_success = nova_util.resize_instance( + self.instance_uuid, self.flavor_name + ) + + # Should fail due to timeout + self.assertFalse(is_success) + # It should sleep migration_max_retries times with migration_interval + # seconds + self.assertEqual(10, self.mock_sleep.call_count) + # Verify all sleep calls used migration_interval + for call in self.mock_sleep.call_args_list: + self.assertEqual(call[0][0], 3) + @mock.patch.object(time, 'sleep', mock.Mock()) def test_live_migrate_instance(self, mock_glance, mock_cinder, mock_neutron, mock_nova):