Fix stopping stopped instances

Nova-compute would periodically sync the instance status. So in the
instance failure recovery workflow,it would stop instance random failed
because of conflict, which will terminate the recovery workflow.
This patch can catch the Confict exception and continue the recovery
workflow if the instance already stopped.

Closes-Bug: 1980736
Change-Id: I59a1f9d7078614c1ddc8f4c362e967a15b8ec5e8
(cherry picked from commit 7241101163)
This commit is contained in:
suzhengwei 2022-07-25 17:28:31 +08:00 committed by Radosław Piliszek
parent d689abff98
commit f2c4aada79
3 changed files with 79 additions and 1 deletions

View File

@ -75,7 +75,16 @@ class StopInstanceTask(base.MasakariTask):
msg = "Stopping instance: %s" % instance_uuid
self.update_details(msg)
self.novaclient.stop_server(self.context, instance.id)
try:
self.novaclient.stop_server(self.context, instance.id)
except exception.Conflict:
msg = "Conflict when stopping instance: %s" % instance_uuid
self.update_details(msg)
instance = self.novaclient.get_server(self.context,
instance_uuid)
vm_state = getattr(instance, 'OS-EXT-STS:vm_state')
if vm_state != 'stopped':
raise
def _wait_for_power_off():
new_instance = self.novaclient.get_server(self.context,

View File

@ -167,6 +167,70 @@ class InstanceFailureTestCase(test.TestCase):
"' vm_state is ACTIVE", 1.0)
])
@mock.patch('masakari.compute.nova.novaclient')
@mock.patch('masakari.engine.drivers.taskflow.base.MasakariTask.'
'update_details')
def test_instance_failure_flow_stop_conflict_instance_in_error(
self, _mock_notify, _mock_novaclient):
_mock_novaclient.return_value = self.fake_client
# create test data
server = self.fake_client.servers.create(self.instance_id,
host="fake-host",
ha_enabled=True,
vm_state="active")
def fake_stop_server_conflict(context, uuid):
# assume that while stopping instance goes into error state
setattr(server, 'OS-EXT-STS:vm_state', "error")
raise exception.Conflict(reason="Cannot 'stop' instance 1 "
"while it is in vm_state error")
# test StopInstanceTask
task = instance_failure.StopInstanceTask(self.ctxt, self.novaclient)
with mock.patch.object(self.novaclient, 'stop_server',
fake_stop_server_conflict):
self.assertRaises(
exception.Conflict, task.execute, self.instance_id)
# verify progress details
_mock_notify.assert_has_calls([
mock.call('Stopping instance: ' + self.instance_id),
mock.call('Conflict when stopping instance: ' + self.instance_id)
])
@mock.patch('masakari.compute.nova.novaclient')
@mock.patch('masakari.engine.drivers.taskflow.base.MasakariTask.'
'update_details')
def test_instance_failure_flow_stop_conflict_instance_already_stoppped(
self, _mock_notify, _mock_novaclient):
_mock_novaclient.return_value = self.fake_client
# create test data
server = self.fake_client.servers.create(self.instance_id,
host="fake-host",
ha_enabled=True,
vm_state="active")
def fake_stop_server_conflict(context, uuid):
# assume that while stopping instance goes into stopped state
setattr(server, 'OS-EXT-STS:vm_state', "stopped")
raise exception.Conflict(reason="Cannot 'stop' instance 1 "
"while it is in vm_state stopped")
# test StopInstanceTask
task = instance_failure.StopInstanceTask(self.ctxt, self.novaclient)
with mock.patch.object(self.novaclient, 'stop_server',
fake_stop_server_conflict):
task.execute(self.instance_id)
# verify progress details
_mock_notify.assert_has_calls([
mock.call('Stopping instance: ' + self.instance_id),
mock.call('Conflict when stopping instance: ' + self.instance_id),
mock.call("Stopped instance: '" + self.instance_id + "'", 1.0)
])
@mock.patch('masakari.compute.nova.novaclient')
@mock.patch('masakari.engine.drivers.taskflow.base.MasakariTask.'
'update_details')

View File

@ -0,0 +1,5 @@
---
fixes:
- |
Fixes "Instance stopping fails randomly due to already stopped instances".
`LP#1980736 <https://launchpad.net/bugs/1980736>`__