Allow for retries after certain resource delete failures

In cases where a resource is "in use," a Conflict exception may be
thrown, in which case it is worth trying to delete again up to
action_retry_limit times before giving up. Also introduce the
exception PhysicalResourceExists for future use.

Change-Id: Ib9305ff55904638491c6d43599a639d3a658f54b
This commit is contained in:
Crag Wolfe 2016-07-15 19:15:31 -04:00
parent 2d31027553
commit 904bed006a
4 changed files with 123 additions and 9 deletions

View File

@ -159,6 +159,10 @@ class EntityNotFound(HeatException):
**kwargs)
class PhysicalResourceExists(HeatException):
msg_fmt = _("The physical resource for (%(name)s) exists.")
class PhysicalResourceNameAmbiguity(HeatException):
msg_fmt = _(
"Multiple physical resources were found with name (%(name)s).")

View File

@ -889,13 +889,6 @@ class Resource(object):
self.reparse()
self._update_stored_properties()
def pause():
try:
while True:
yield
except scheduler.Timeout:
return
count = {self.CREATE: 0, self.DELETE: 0}
retry_limit = max(cfg.CONF.action_retry_limit, 0)
@ -906,7 +899,7 @@ class Resource(object):
if count[action]:
delay = timeutils.retry_backoff_delay(count[action],
jitter_max=2.0)
waiter = scheduler.TaskRunner(pause)
waiter = scheduler.TaskRunner(self.pause)
yield waiter.as_task(timeout=delay)
try:
yield self._do_action(action, self.properties.validate)
@ -935,6 +928,14 @@ class Resource(object):
yield self._break_if_required(
self.CREATE, environment.HOOK_POST_CREATE)
@staticmethod
def pause():
try:
while True:
yield
except scheduler.Timeout:
return
def prepare_abandon(self):
self.abandon_in_progress = True
return {
@ -1524,6 +1525,15 @@ class Resource(object):
Subclasses should provide a handle_delete() method to customise
deletion.
"""
@excutils.exception_filter
def should_retry(exc):
if count >= retry_limit:
return False
if self.default_client_name:
return (self.client_plugin().is_conflict(exc) or
isinstance(exc, exception.PhysicalResourceExists))
return isinstance(exc, exception.PhysicalResourceExists)
action = self.DELETE
if (self.action, self.status) == (self.DELETE, self.COMPLETE):
@ -1560,7 +1570,23 @@ class Resource(object):
action_args = [[initial_state], 'snapshot']
else:
action_args = []
yield self.action_handler_task(action, *action_args)
count = -1
retry_limit = max(cfg.CONF.action_retry_limit, 0)
while True:
count += 1
LOG.info(_LI('delete %(name)s attempt %(attempt)d') %
{'name': six.text_type(self), 'attempt': count+1})
if count:
delay = timeutils.retry_backoff_delay(count,
jitter_max=2.0)
waiter = scheduler.TaskRunner(self.pause)
yield waiter.as_task(timeout=delay)
with excutils.exception_filter(should_retry):
yield self.action_handler_task(action,
*action_args)
break
if self.stack.action == self.stack.DELETE:
yield self._break_if_required(

View File

@ -11,6 +11,7 @@
# License for the specific language governing permissions and limitations
# under the License.
from oslo_config import cfg
import six
import swiftclient.client as sc
@ -273,6 +274,7 @@ class s3Test(common.HeatTestCase):
self.m.VerifyAll()
def test_delete_conflict_empty(self):
cfg.CONF.set_override('action_retry_limit', 0, enforce_type=True)
t = template_format.parse(swift_template)
stack = utils.parse_stack(t)

View File

@ -2397,6 +2397,88 @@ class ResourceTest(common.HeatTestCase):
self.assertTrue(mock_load_data.called)
class ResourceDeleteRetryTest(common.HeatTestCase):
def setUp(self):
super(ResourceDeleteRetryTest, self).setUp()
self.env = environment.Environment()
self.env.load({u'resource_registry':
{u'OS::Test::GenericResource': u'GenericResourceType'}})
self.stack = parser.Stack(utils.dummy_context(), 'test_stack',
template.Template(empty_template,
env=self.env),
stack_id=str(uuid.uuid4()))
self.num_retries = 2
cfg.CONF.set_override('action_retry_limit', self.num_retries,
enforce_type=True)
def test_delete_retry_conflict(self):
tmpl = rsrc_defn.ResourceDefinition('test_resource',
'GenericResourceType',
{'Foo': 'xyz123'})
res = generic_rsrc.ResourceWithProps(
'test_resource', tmpl, self.stack)
res.state_set(res.CREATE, res.COMPLETE, 'wobble')
res.default_client_name = 'neutron'
self.m.StubOutWithMock(timeutils, 'retry_backoff_delay')
self.m.StubOutWithMock(generic_rsrc.GenericResource, 'handle_delete')
# could be any exception that is_conflict(), using the neutron
# client one
generic_rsrc.GenericResource.handle_delete().AndRaise(
neutron_exp.Conflict(message='foo', request_ids=[1]))
for i in range(self.num_retries):
timeutils.retry_backoff_delay(i+1, jitter_max=2.0).AndReturn(
0.01)
generic_rsrc.GenericResource.handle_delete().AndRaise(
neutron_exp.Conflict(message='foo', request_ids=[1]))
self.m.ReplayAll()
exc = self.assertRaises(exception.ResourceFailure,
scheduler.TaskRunner(res.delete))
exc_text = six.text_type(exc)
self.assertIn('Conflict', exc_text)
self.m.VerifyAll()
def test_delete_retry_phys_resource_exists(self):
tmpl = rsrc_defn.ResourceDefinition(
'test_resource', 'Foo', {'Foo': 'abc'})
res = generic_rsrc.ResourceWithPropsRefPropOnDelete(
'test_resource', tmpl, self.stack)
res.state_set(res.CREATE, res.COMPLETE, 'wobble')
cfg.CONF.set_override('action_retry_limit', self.num_retries,
enforce_type=True)
self.m.StubOutWithMock(timeutils, 'retry_backoff_delay')
self.m.StubOutWithMock(generic_rsrc.GenericResource, 'handle_delete')
self.m.StubOutWithMock(generic_rsrc.ResourceWithPropsRefPropOnDelete,
'check_delete_complete')
generic_rsrc.GenericResource.handle_delete().AndReturn(None)
generic_rsrc.ResourceWithPropsRefPropOnDelete.check_delete_complete(
None).AndRaise(
exception.PhysicalResourceExists(name="foo"))
for i in range(self.num_retries):
timeutils.retry_backoff_delay(i+1, jitter_max=2.0).AndReturn(
0.01)
generic_rsrc.GenericResource.handle_delete().AndReturn(None)
if i < self.num_retries-1:
generic_rsrc.ResourceWithPropsRefPropOnDelete.\
check_delete_complete(None).AndRaise(
exception.PhysicalResourceExists(name="foo"))
else:
generic_rsrc.ResourceWithPropsRefPropOnDelete.\
check_delete_complete(None).AndReturn(True)
self.m.ReplayAll()
scheduler.TaskRunner(res.delete)()
self.m.VerifyAll()
class ResourceAdoptTest(common.HeatTestCase):
def test_adopt_resource_success(self):