Make VM resize timeout configurable with migration defaults

This patch is applying the same approach, configuration parameters and
values that the ones applied to the vm migrations in [1].

Note that, internally, vm resize are treated by nova very similarly to
migrations so it make sense to reuse the same parameters and default
values.

Related-Bug: #2131663

[1] https://review.opendev.org/c/openstack/watcher/+/967693

Change-Id: Ic81147e19f86d4a8efbecb539b4b83674e79e646
Signed-off-by: Alfredo Moralejo <amoralej@redhat.com>
This commit is contained in:
Alfredo Moralejo
2025-11-27 09:35:24 +01:00
parent 13d73e9b4e
commit e427fa68a3
3 changed files with 103 additions and 2 deletions

View File

@@ -0,0 +1,11 @@
---
features:
- |
Timeout of vm resize operations can be configured applying the
`migration_max_retries` and `migration_interval` in the `nova`
section used for migrations. Default values are 180 and 5 seconds.
fixes:
- |
Fixed the issue when resize actions failed when the resize took more that
120 seconds. After this patch, the default timeout is 900 seconds (15 minutes)
which should be a reasonable value for most OpenStack installations.

View File

@@ -241,7 +241,7 @@ class NovaHelper:
"cold migration for instance %s failed", instance_id)
return False
def resize_instance(self, instance_id, flavor, retry=120):
def resize_instance(self, instance_id, flavor, retry=None, interval=None):
"""This method resizes given instance with specified flavor.
This method uses the Nova built-in resize()
@@ -252,12 +252,18 @@ class NovaHelper:
:param instance_id: the unique id of the instance to resize.
:param flavor: the name or ID of the flavor to resize to.
:param retry: maximum number of retries before giving up
:param interval: interval in seconds between retries
"""
LOG.debug(
"Trying a resize of instance %(instance)s to "
"flavor '%(flavor)s'",
{'instance': instance_id, 'flavor': flavor})
# Use config defaults if not provided in method parameters
retry = retry or CONF.nova.migration_max_retries
interval = interval or CONF.nova.migration_interval
# Looking for the instance to resize
instance = self.find_instance(instance_id)
@@ -291,7 +297,7 @@ class NovaHelper:
and retry:
instance = self.nova.servers.get(instance.id)
LOG.debug('Waiting the resize of %s to %s', instance, flavor_id)
time.sleep(1)
time.sleep(interval)
retry -= 1
instance_status = getattr(instance, 'status')

View File

@@ -300,6 +300,90 @@ class TestNovaHelper(base.TestCase):
self.flavor_name)
self.assertFalse(is_success)
@mock.patch.object(nova_helper.NovaHelper, 'confirm_resize')
def test_watcher_resize_instance_retry_success(
self, mock_confirm_resize, mock_cinder, mock_nova):
"""Test that resize_instance uses config timeout by default"""
nova_util = nova_helper.NovaHelper()
server = self.fake_server(self.instance_uuid)
server.status = 'RESIZING'
setattr(server, 'OS-EXT-STS:vm_state', 'resizing')
resized_server = copy.deepcopy(server)
resized_server.status = 'VERIFY_RESIZE'
setattr(resized_server, 'OS-EXT-STS:vm_state', 'resized')
# This means instance will be found as VERIFY_RESIZE in second retry
nova_util.nova.servers.get.side_effect = (server, server,
resized_server)
mock_confirm_resize.return_value = True
self.flags(migration_max_retries=20, migration_interval=4,
group='nova')
# Resize will succeed because status changes to VERIFY_RESIZE
is_success = nova_util.resize_instance(
self.instance_uuid, self.flavor_name
)
# Should succeed
self.assertTrue(is_success)
# It will sleep 2 times because it will be found as VERIFY_RESIZE in
# the second retry
self.assertEqual(2, self.mock_sleep.call_count)
# Verify all sleep calls used 4 second interval
for call in self.mock_sleep.call_args_list:
self.assertEqual(call[0][0], 4)
def test_watcher_resize_instance_retry_default(
self, mock_cinder, mock_nova):
"""Test that resize_instance uses config timeout by default"""
nova_util = nova_helper.NovaHelper()
server = self.fake_server(self.instance_uuid)
server.status = 'RESIZING'
setattr(server, 'OS-EXT-STS:vm_state', 'resizing')
nova_util.nova.servers.get.side_effect = server
# Resize will timeout because status never changes
is_success = nova_util.resize_instance(
self.instance_uuid, self.flavor_name
)
# Should fail due to timeout
self.assertFalse(is_success)
# With default migration_max_retries and migration_interval, should
# sleep 180 times for 5 seconds
self.assertEqual(180, self.mock_sleep.call_count)
# Verify sleep calls used 5 second
for call in self.mock_sleep.call_args_list:
self.assertEqual(call[0][0], 5)
def test_watcher_resize_instance_retry_custom(
self, mock_cinder, mock_nova):
"""Test that watcher_non_live_migrate respects explicit retry value"""
nova_util = nova_helper.NovaHelper()
server = self.fake_server(self.instance_uuid)
server.status = 'RESIZING'
setattr(server, 'OS-EXT-STS:vm_state', 'resizing')
# Set config to a custom values to ensure custom values are used
self.flags(migration_max_retries=10,
migration_interval=3, group='nova')
is_success = nova_util.resize_instance(
self.instance_uuid, self.flavor_name
)
# Should fail due to timeout
self.assertFalse(is_success)
# It should sleep migration_max_retries times with migration_interval
# seconds
self.assertEqual(10, self.mock_sleep.call_count)
# Verify all sleep calls used migration_interval
for call in self.mock_sleep.call_args_list:
self.assertEqual(call[0][0], 3)
@mock.patch.object(time, 'sleep', mock.Mock())
def test_live_migrate_instance(self, mock_cinder, mock_nova):
nova_util = nova_helper.NovaHelper()