To reset metadata for resources when mark unhealthy

Some resources do not work if their metadata is in
a wrong state, .e.g the metadata 'scaling_in_progress'
of scaling group/policy might be always True if engine
restarts while scaling.
This patch adds an interface 'handle_metadata_reset' for
resource, then the plugins can override it if needed.
We reset the metadata while marking resource healthy.

Change-Id: Ibd6c18acf6f3f24cf9bf16a524127850968062bc
Closes-Bug: #1651084
This commit is contained in:
huangtianhua 2016-12-20 20:29:59 +08:00
parent 1f7925d8e8
commit 5b04acb4e2
4 changed files with 63 additions and 62 deletions

View File

@ -443,6 +443,16 @@ class Resource(object):
db_res.update_metadata(metadata) db_res.update_metadata(metadata)
self._rsrc_metadata = metadata self._rsrc_metadata = metadata
def handle_metadata_reset(self):
"""Default implementation; should be overridden by resources.
Now we override this method to reset the metadata for scale-policy
and scale-group resources, because their metadata might hang in a
wrong state ('scaling_in_progress' is always True) if engine restarts
while scaling.
"""
pass
@classmethod @classmethod
def set_needed_by(cls, db_rsrc, needed_by, expected_engine_id=None): def set_needed_by(cls, db_rsrc, needed_by, expected_engine_id=None):
if db_rsrc: if db_rsrc:

View File

@ -1895,6 +1895,7 @@ class EngineService(service.ServiceBase):
if rsrc.action != rsrc.DELETE: if rsrc.action != rsrc.DELETE:
rsrc.state_set(rsrc.CHECK, rsrc.FAILED, reason=reason) rsrc.state_set(rsrc.CHECK, rsrc.FAILED, reason=reason)
elif rsrc.state == (rsrc.CHECK, rsrc.FAILED): elif rsrc.state == (rsrc.CHECK, rsrc.FAILED):
rsrc.handle_metadata_reset()
rsrc.state_set(rsrc.CHECK, rsrc.COMPLETE, reason=reason) rsrc.state_set(rsrc.CHECK, rsrc.COMPLETE, reason=reason)
except exception.UpdateInProgress: except exception.UpdateInProgress:

View File

@ -69,3 +69,9 @@ class CooldownMixin(object):
self.metadata_set(metadata) self.metadata_set(metadata)
except exception.NotFound: except exception.NotFound:
pass pass
def handle_metadata_reset(self):
metadata = self.metadata_get()
if 'scaling_in_progress' in metadata:
metadata['scaling_in_progress'] = False
self.metadata_set(metadata)

View File

@ -20,6 +20,7 @@ from heat.common import identifier
from heat.engine.clients.os import keystone from heat.engine.clients.os import keystone
from heat.engine import dependencies from heat.engine import dependencies
from heat.engine import resource as res from heat.engine import resource as res
from heat.engine.resources.aws.ec2 import instance as ins
from heat.engine import service from heat.engine import service
from heat.engine import stack from heat.engine import stack
from heat.engine import stack_lock from heat.engine import stack_lock
@ -558,82 +559,77 @@ class StackResourcesServiceTest(common.HeatTestCase):
self.eng._find_resource_in_stack, self.eng._find_resource_in_stack,
self.ctx, 'wibble', self.stack) self.ctx, 'wibble', self.stack)
def _test_mark_healthy_asserts(self, action='CHECK', status='FAILED',
reason='state changed', meta=None):
rs = self.eng.describe_stack_resource(
self.ctx, self.stack.identifier(),
'WebServer', with_attr=None)
self.assertIn('resource_action', rs)
self.assertIn('resource_status', rs)
self.assertIn('resource_status_reason', rs)
self.assertEqual(action, rs['resource_action'])
self.assertEqual(status, rs['resource_status'])
self.assertEqual(reason, rs['resource_status_reason'])
if meta is not None:
self.assertIn('metadata', rs)
self.assertEqual(meta, rs['metadata'])
@tools.stack_context('service_mark_healthy_create_complete_test_stk') @tools.stack_context('service_mark_healthy_create_complete_test_stk')
def test_mark_healthy_in_create_complete(self): def test_mark_healthy_in_create_complete(self):
self.eng.resource_mark_unhealthy(self.ctx, self.stack.identifier(), self.eng.resource_mark_unhealthy(self.ctx, self.stack.identifier(),
'WebServer', False, 'WebServer', False,
resource_status_reason='noop') resource_status_reason='noop')
r = self.eng.describe_stack_resource(self.ctx, self.stack.identifier(), self._test_mark_healthy_asserts(action='CREATE',
'WebServer', with_attr=None) status='COMPLETE')
self.assertIn('resource_action', r)
self.assertIn('resource_status', r)
self.assertIn('resource_status_reason', r)
self.assertEqual(r['resource_action'], 'CREATE')
self.assertEqual(r['resource_status'], 'COMPLETE')
self.assertEqual(r['resource_status_reason'], 'state changed')
@tools.stack_context('service_mark_unhealthy_create_complete_test_stk') @tools.stack_context('service_mark_unhealthy_create_complete_test_stk')
def test_mark_unhealthy_in_create_complete(self): def test_mark_unhealthy_in_create_complete(self):
reason = 'Some Reason'
self.eng.resource_mark_unhealthy(self.ctx, self.stack.identifier(), self.eng.resource_mark_unhealthy(self.ctx, self.stack.identifier(),
'WebServer', True, 'WebServer', True,
resource_status_reason='Some Reason') resource_status_reason=reason)
r = self.eng.describe_stack_resource(self.ctx, self.stack.identifier(), self._test_mark_healthy_asserts(reason=reason)
'WebServer', with_attr=None)
self.assertEqual(r['resource_action'], 'CHECK')
self.assertEqual(r['resource_status'], 'FAILED')
self.assertEqual(r['resource_status_reason'], 'Some Reason')
@tools.stack_context('service_mark_healthy_check_failed_test_stk') @tools.stack_context('service_mark_healthy_check_failed_test_stk')
def test_mark_healthy_check_failed(self): def test_mark_healthy_check_failed(self):
reason = 'Some Reason'
self.eng.resource_mark_unhealthy(self.ctx, self.stack.identifier(), self.eng.resource_mark_unhealthy(self.ctx, self.stack.identifier(),
'WebServer', True, 'WebServer', True,
resource_status_reason='Some Reason') resource_status_reason=reason)
self._test_mark_healthy_asserts(reason=reason)
r = self.eng.describe_stack_resource(self.ctx, self.stack.identifier(), meta = {'for_test': True}
'WebServer', with_attr=None)
self.assertEqual(r['resource_action'], 'CHECK') def override_metadata_reset(rsrc):
self.assertEqual(r['resource_status'], 'FAILED') rsrc.metadata_set(meta)
self.assertEqual(r['resource_status_reason'], 'Some Reason')
ins.Instance.handle_metadata_reset = override_metadata_reset
reason = 'Good Reason'
self.eng.resource_mark_unhealthy(self.ctx, self.stack.identifier(), self.eng.resource_mark_unhealthy(self.ctx, self.stack.identifier(),
'WebServer', False, 'WebServer', False,
resource_status_reason='Good Reason') resource_status_reason=reason)
self._test_mark_healthy_asserts(status='COMPLETE',
r = self.eng.describe_stack_resource(self.ctx, self.stack.identifier(), reason=reason,
'WebServer', with_attr=None) meta=meta)
self.assertEqual(r['resource_action'], 'CHECK')
self.assertEqual(r['resource_status'], 'COMPLETE')
self.assertEqual(r['resource_status_reason'], 'Good Reason')
@tools.stack_context('service_mark_unhealthy_check_failed_test_stack') @tools.stack_context('service_mark_unhealthy_check_failed_test_stack')
def test_mark_unhealthy_check_failed(self): def test_mark_unhealthy_check_failed(self):
reason = 'Some Reason'
self.eng.resource_mark_unhealthy(self.ctx, self.stack.identifier(), self.eng.resource_mark_unhealthy(self.ctx, self.stack.identifier(),
'WebServer', True, 'WebServer', True,
resource_status_reason='Some Reason') resource_status_reason=reason)
self._test_mark_healthy_asserts(reason=reason)
r = self.eng.describe_stack_resource(self.ctx, self.stack.identifier(),
'WebServer', with_attr=None)
self.assertEqual(r['resource_action'], 'CHECK')
self.assertEqual(r['resource_status'], 'FAILED')
self.assertEqual(r['resource_status_reason'], 'Some Reason')
new_reason = 'New Reason'
self.eng.resource_mark_unhealthy(self.ctx, self.stack.identifier(), self.eng.resource_mark_unhealthy(self.ctx, self.stack.identifier(),
'WebServer', True, 'WebServer', True,
resource_status_reason='New Reason') resource_status_reason=new_reason)
self._test_mark_healthy_asserts(reason=new_reason)
r = self.eng.describe_stack_resource(self.ctx, self.stack.identifier(),
'WebServer', with_attr=None)
self.assertEqual(r['resource_action'], 'CHECK')
self.assertEqual(r['resource_status'], 'FAILED')
self.assertEqual(r['resource_status_reason'], 'New Reason')
@tools.stack_context('service_mark_unhealthy_invalid_value_test_stk') @tools.stack_context('service_mark_unhealthy_invalid_value_test_stk')
def test_mark_unhealthy_invalid_value(self): def test_mark_unhealthy_invalid_value(self):
@ -649,28 +645,16 @@ class StackResourcesServiceTest(common.HeatTestCase):
def test_mark_unhealthy_none_reason(self): def test_mark_unhealthy_none_reason(self):
self.eng.resource_mark_unhealthy(self.ctx, self.stack.identifier(), self.eng.resource_mark_unhealthy(self.ctx, self.stack.identifier(),
'WebServer', True) 'WebServer', True)
default_reason = 'state changed by resource_mark_unhealthy api'
r = self.eng.describe_stack_resource(self.ctx, self.stack.identifier(), self._test_mark_healthy_asserts(reason=default_reason)
'WebServer', with_attr=None)
self.assertEqual(r['resource_action'], 'CHECK')
self.assertEqual(r['resource_status'], 'FAILED')
self.assertEqual(r['resource_status_reason'],
'state changed by resource_mark_unhealthy api')
@tools.stack_context('service_mark_unhealthy_empty_reason_test_stk') @tools.stack_context('service_mark_unhealthy_empty_reason_test_stk')
def test_mark_unhealthy_empty_reason(self): def test_mark_unhealthy_empty_reason(self):
self.eng.resource_mark_unhealthy(self.ctx, self.stack.identifier(), self.eng.resource_mark_unhealthy(self.ctx, self.stack.identifier(),
'WebServer', True, 'WebServer', True,
resource_status_reason="") resource_status_reason="")
default_reason = 'state changed by resource_mark_unhealthy api'
r = self.eng.describe_stack_resource(self.ctx, self.stack.identifier(), self._test_mark_healthy_asserts(reason=default_reason)
'WebServer', with_attr=None)
self.assertEqual(r['resource_action'], 'CHECK')
self.assertEqual(r['resource_status'], 'FAILED')
self.assertEqual(r['resource_status_reason'],
'state changed by resource_mark_unhealthy api')
@tools.stack_context('service_mark_unhealthy_lock_no_converge_test_stk') @tools.stack_context('service_mark_unhealthy_lock_no_converge_test_stk')
def test_mark_unhealthy_lock_no_convergence(self): def test_mark_unhealthy_lock_no_convergence(self):