From 45618c13736f7f4e2def12ca55e7bca7e93b083b Mon Sep 17 00:00:00 2001 From: huangtianhua Date: Thu, 2 Jul 2015 11:34:43 +0800 Subject: [PATCH] Allow suspend and resume again if failed last time Now heat doesn't support to suspend or resume again if failed last time, if a stack in SUSPEND_FAILED or RESUME_FAILED, then user can neither suspend nor resume. Change-Id: I84a2ff32265a66e9018a62ea0ee20b9415f109e7 Closes-Bug: #1469927 --- heat/engine/resource.py | 14 ++++-- heat/engine/resources/aws/ec2/instance.py | 14 ++++-- .../engine/resources/openstack/nova/server.py | 14 ++++-- heat/tests/aws/test_instance.py | 48 ++++++++++++++----- heat/tests/nova/test_server.py | 47 ++++++++++++------ heat/tests/test_resource.py | 38 ++++++++------- 6 files changed, 120 insertions(+), 55 deletions(-) diff --git a/heat/engine/resource.py b/heat/engine/resource.py index e65402789e..2f22f55798 100644 --- a/heat/engine/resource.py +++ b/heat/engine/resource.py @@ -906,7 +906,10 @@ class Resource(object): action = self.SUSPEND # Don't try to suspend the resource unless it's in a stable state - if (self.action == self.DELETE or self.status != self.COMPLETE): + # or if the previous suspend failed + if (self.action == self.DELETE or + (self.action != self.SUSPEND and + self.status != self.COMPLETE)): exc = exception.Error(_('State %s invalid for suspend') % six.text_type(self.state)) raise exception.ResourceFailure(exc, self, action) @@ -921,12 +924,15 @@ class Resource(object): ''' action = self.RESUME - # Can't resume a resource unless it's SUSPEND_COMPLETE - if self.state != (self.SUSPEND, self.COMPLETE): + # Allow resume a resource if it's SUSPEND_COMPLETE + # or RESUME_FAILED or RESUME_COMPLETE. Recommend to check + # the real state of physical resource in handle_resume() + if self.state not in ((self.SUSPEND, self.COMPLETE), + (self.RESUME, self.FAILED), + (self.RESUME, self.COMPLETE)): exc = exception.Error(_('State %s invalid for resume') % six.text_type(self.state)) raise exception.ResourceFailure(exc, self, action) - LOG.info(_LI('resuming %s'), six.text_type(self)) return self._do_action(action) diff --git a/heat/engine/resources/aws/ec2/instance.py b/heat/engine/resources/aws/ec2/instance.py index 192e1416a5..c0a2719b1b 100644 --- a/heat/engine/resources/aws/ec2/instance.py +++ b/heat/engine/resources/aws/ec2/instance.py @@ -795,8 +795,11 @@ class Instance(resource.Resource): else: raise else: - LOG.debug("suspending instance %s" % self.resource_id) - server.suspend() + # if the instance has been suspended successful, + # no need to suspend again + if self.client_plugin().get_status(server) != 'SUSPENDED': + LOG.debug("suspending instance %s" % self.resource_id) + server.suspend() return server.id def check_suspend_complete(self, server_id): @@ -834,8 +837,11 @@ class Instance(resource.Resource): else: raise else: - LOG.debug("resuming instance %s" % self.resource_id) - server.resume() + # if the instance has been resumed successful, + # no need to resume again + if self.client_plugin().get_status(server) != 'ACTIVE': + LOG.debug("resuming instance %s" % self.resource_id) + server.resume() return server.id def check_resume_complete(self, server_id): diff --git a/heat/engine/resources/openstack/nova/server.py b/heat/engine/resources/openstack/nova/server.py index 8f00c7c1aa..6b00e0342f 100644 --- a/heat/engine/resources/openstack/nova/server.py +++ b/heat/engine/resources/openstack/nova/server.py @@ -1405,8 +1405,11 @@ class Server(stack_user.StackUser): else: raise else: - LOG.debug('suspending server %s' % self.resource_id) - server.suspend() + # if the server has been suspended successful, + # no need to suspend again + if self.client_plugin().get_status(server) != 'SUSPENDED': + LOG.debug('suspending server %s' % self.resource_id) + server.suspend() return server.id def check_suspend_complete(self, server_id): @@ -1444,8 +1447,11 @@ class Server(stack_user.StackUser): else: raise else: - LOG.debug('resuming server %s' % self.resource_id) - server.resume() + # if the server has been resumed successful, + # no need to resume again + if self.client_plugin().get_status(server) != 'ACTIVE': + LOG.debug('resuming server %s' % self.resource_id) + server.resume() return server.id def check_resume_complete(self, server_id): diff --git a/heat/tests/aws/test_instance.py b/heat/tests/aws/test_instance.py index 5bdbcb4a41..ea3b032cde 100644 --- a/heat/tests/aws/test_instance.py +++ b/heat/tests/aws/test_instance.py @@ -1013,16 +1013,14 @@ class InstancesTest(common.HeatTestCase): scheduler.TaskRunner(instance.create)() self.assertEqual((instance.CREATE, instance.COMPLETE), instance.state) - def test_instance_status_suspend(self): + def _test_instance_status_suspend(self, name, + state=('CREATE', 'COMPLETE')): return_server = self.fc.servers.list()[1] - instance = self._create_test_instance(return_server, - 'in_suspend_wait') + instance = self._create_test_instance(return_server, name) instance.resource_id = '1234' - self.m.ReplayAll() + instance.state_set(state[0], state[1]) - # Override the get_servers_1234 handler status to SUSPENDED, but - # return the ACTIVE state first (twice, so we sleep) d1 = {'server': self.fc.client.get_servers_detail()[1]['servers'][0]} d2 = copy.deepcopy(d1) d1['server']['status'] = 'ACTIVE' @@ -1039,16 +1037,28 @@ class InstancesTest(common.HeatTestCase): self.m.VerifyAll() - def test_instance_status_resume(self): + def test_instance_suspend_in_create_complete(self): + self._test_instance_status_suspend( + name='test_suspend_in_create_complete') + + def test_instance_suspend_in_suspend_failed(self): + self._test_instance_status_suspend( + name='test_suspend_in_suspend_failed', + state=('SUSPEND', 'FAILED')) + + def test_server_suspend_in_suspend_complete(self): + self._test_instance_status_suspend( + name='test_suspend_in_suspend_complete', + state=('SUSPEND', 'COMPLETE')) + + def _test_instance_status_resume(self, name, + state=('SUSPEND', 'COMPLETE')): return_server = self.fc.servers.list()[1] - instance = self._create_test_instance(return_server, - 'in_resume_wait') + instance = self._create_test_instance(return_server, name) instance.resource_id = '1234' - self.m.ReplayAll() + instance.state_set(state[0], state[1]) - # Override the get_servers_1234 handler status to ACTIVE, but - # return the SUSPENDED state first (twice, so we sleep) d1 = {'server': self.fc.client.get_servers_detail()[1]['servers'][0]} d2 = copy.deepcopy(d1) d1['server']['status'] = 'SUSPENDED' @@ -1067,6 +1077,20 @@ class InstancesTest(common.HeatTestCase): self.m.VerifyAll() + def test_instance_resume_in_suspend_complete(self): + self._test_instance_status_resume( + name='test_resume_in_suspend_complete') + + def test_instance_resume_in_resume_failed(self): + self._test_instance_status_resume( + name='test_resume_in_resume_failed', + state=('RESUME', 'FAILED')) + + def test_instance_resume_in_resume_complete(self): + self._test_instance_status_resume( + name='test_resume_in_resume_complete', + state=('RESUME', 'COMPLETE')) + def test_server_resume_other_exception(self): return_server = self.fc.servers.list()[1] instance = self._create_test_instance(return_server, diff --git a/heat/tests/nova/test_server.py b/heat/tests/nova/test_server.py index 8525c46b0f..66dfb021aa 100644 --- a/heat/tests/nova/test_server.py +++ b/heat/tests/nova/test_server.py @@ -1858,16 +1858,13 @@ class ServersTest(common.HeatTestCase): self.m.VerifyAll() - def test_server_status_suspend(self): + def _test_server_status_suspend(self, name, state=('CREATE', 'COMPLETE')): return_server = self.fc.servers.list()[1] - server = self._create_test_server(return_server, - 'srv_susp_w') + server = self._create_test_server(return_server, name) server.resource_id = '1234' - self.m.ReplayAll() + server.state_set(state[0], state[1]) - # Override the get_servers_1234 handler status to SUSPENDED, but - # return the ACTIVE state first (twice, so we sleep) d1 = {'server': self.fc.client.get_servers_detail()[1]['servers'][0]} d2 = copy.deepcopy(d1) d1['server']['status'] = 'ACTIVE' @@ -1884,6 +1881,19 @@ class ServersTest(common.HeatTestCase): self.m.VerifyAll() + def test_server_suspend_in_create_complete(self): + self._test_server_status_suspend('test_suspend_in_create_complete') + + def test_server_suspend_in_suspend_failed(self): + self._test_server_status_suspend( + name='test_suspend_in_suspend_failed', + state=('SUSPEND', 'FAILED')) + + def test_server_suspend_in_suspend_complete(self): + self._test_server_status_suspend( + name='test_suspend_in_suspend_complete', + state=('SUSPEND', 'COMPLETE')) + def test_server_status_suspend_unknown_status(self): return_server = self.fc.servers.list()[1] server = self._create_test_server(return_server, @@ -1915,16 +1925,13 @@ class ServersTest(common.HeatTestCase): self.m.VerifyAll() - def test_server_status_resume(self): + def _test_server_status_resume(self, name, state=('SUSPEND', 'COMPLETE')): return_server = self.fc.servers.list()[1] - server = self._create_test_server(return_server, - 'srv_res_w') + server = self._create_test_server(return_server, name) server.resource_id = '1234' - self.m.ReplayAll() + server.state_set(state[0], state[1]) - # Override the get_servers_1234 handler status to ACTIVE, but - # return the SUSPENDED state first (twice, so we sleep) d1 = {'server': self.fc.client.get_servers_detail()[1]['servers'][0]} d2 = copy.deepcopy(d1) d1['server']['status'] = 'SUSPENDED' @@ -1936,13 +1943,25 @@ class ServersTest(common.HeatTestCase): get().AndReturn((200, d2)) self.m.ReplayAll() - server.state_set(server.SUSPEND, server.COMPLETE) - scheduler.TaskRunner(server.resume)() self.assertEqual((server.RESUME, server.COMPLETE), server.state) self.m.VerifyAll() + def test_server_resume_in_suspend_complete(self): + self._test_server_status_resume( + name='test_resume_in_suspend_complete') + + def test_server_resume_in_resume_failed(self): + self._test_server_status_resume( + name='test_resume_in_resume_failed', + state=('RESUME', 'FAILED')) + + def test_server_resume_in_resume_complete(self): + self._test_server_status_resume( + name='test_resume_in_resume_complete', + state=('RESUME', 'COMPLETE')) + def test_server_status_resume_no_resource_id(self): return_server = self.fc.servers.list()[1] server = self._create_test_server(return_server, diff --git a/heat/tests/test_resource.py b/heat/tests/test_resource.py index 8603e19e10..8cd95db9f0 100644 --- a/heat/tests/test_resource.py +++ b/heat/tests/test_resource.py @@ -979,7 +979,7 @@ class ResourceTest(common.HeatTestCase): scheduler.TaskRunner(res.resume)() self.assertEqual((res.RESUME, res.COMPLETE), res.state) - def test_suspend_fail_inprogress(self): + def test_suspend_fail_invalid_states(self): tmpl = rsrc_defn.ResourceDefinition('test_resource', 'GenericResourceType', {'Foo': 'abc'}) @@ -987,19 +987,19 @@ class ResourceTest(common.HeatTestCase): scheduler.TaskRunner(res.create)() self.assertEqual((res.CREATE, res.COMPLETE), res.state) - res.state_set(res.CREATE, res.IN_PROGRESS) - suspend = scheduler.TaskRunner(res.suspend) - self.assertRaises(exception.ResourceFailure, suspend) + invalid_actions = (a for a in res.ACTIONS if a != res.SUSPEND) + invalid_status = (s for s in res.STATUSES if s != res.COMPLETE) + invalid_states = [s for s in + itertools.product(invalid_actions, invalid_status)] - res.state_set(res.UPDATE, res.IN_PROGRESS) - suspend = scheduler.TaskRunner(res.suspend) - self.assertRaises(exception.ResourceFailure, suspend) + for state in invalid_states: + res.state_set(*state) + suspend = scheduler.TaskRunner(res.suspend) + expected = 'State %s invalid for suspend' % six.text_type(state) + exc = self.assertRaises(exception.ResourceFailure, suspend) + self.assertIn(expected, six.text_type(exc)) - res.state_set(res.DELETE, res.IN_PROGRESS) - suspend = scheduler.TaskRunner(res.suspend) - self.assertRaises(exception.ResourceFailure, suspend) - - def test_resume_fail_not_suspend_complete(self): + def test_resume_fail_invalid_states(self): tmpl = rsrc_defn.ResourceDefinition('test_resource', 'GenericResourceType', {'Foo': 'abc'}) @@ -1007,13 +1007,17 @@ class ResourceTest(common.HeatTestCase): scheduler.TaskRunner(res.create)() self.assertEqual((res.CREATE, res.COMPLETE), res.state) - non_suspended_states = [s for s in - itertools.product(res.ACTIONS, res.STATUSES) - if s != (res.SUSPEND, res.COMPLETE)] - for state in non_suspended_states: + invalid_states = [s for s in + itertools.product(res.ACTIONS, res.STATUSES) + if s not in ((res.SUSPEND, res.COMPLETE), + (res.RESUME, res.FAILED), + (res.RESUME, res.COMPLETE))] + for state in invalid_states: res.state_set(*state) resume = scheduler.TaskRunner(res.resume) - self.assertRaises(exception.ResourceFailure, resume) + expected = 'State %s invalid for resume' % six.text_type(state) + exc = self.assertRaises(exception.ResourceFailure, resume) + self.assertIn(expected, six.text_type(exc)) def test_suspend_fail_exception(self): tmpl = rsrc_defn.ResourceDefinition('test_resource',