Merge "Retry resource create until success"
This commit is contained in:
@@ -47,6 +47,10 @@
|
||||
# one time. (integer value)
|
||||
#max_stacks_per_tenant=100
|
||||
|
||||
# Number of times to retry to bring a resource to a non-error
|
||||
# state. Set to 0 to disable retries. (integer value)
|
||||
#action_retry_limit=5
|
||||
|
||||
# Controls how many events will be pruned whenever a stack's
|
||||
# events exceed max_events_per_stack. Set this lower to keep
|
||||
# more events at the expense of more frequent purges. (integer
|
||||
|
||||
@@ -113,6 +113,11 @@ engine_opts = [
|
||||
default=100,
|
||||
help=_('Maximum number of stacks any one tenant may have'
|
||||
' active at one time.')),
|
||||
cfg.IntOpt('action_retry_limit',
|
||||
default=5,
|
||||
help=_('Number of times to retry to bring a '
|
||||
'resource to a non-error state. Set to 0 to disable '
|
||||
'retries.')),
|
||||
cfg.IntOpt('event_purge_batch_size',
|
||||
default=10,
|
||||
help=_('Controls how many events will be pruned whenever a '
|
||||
|
||||
+48
-1
@@ -14,12 +14,14 @@
|
||||
import base64
|
||||
import contextlib
|
||||
from datetime import datetime
|
||||
from oslo.config import cfg
|
||||
import six
|
||||
import warnings
|
||||
|
||||
from heat.common import exception
|
||||
from heat.common import identifier
|
||||
from heat.common import short_id
|
||||
from heat.common import timeutils
|
||||
from heat.db import api as db_api
|
||||
from heat.engine import attributes
|
||||
from heat.engine import environment
|
||||
@@ -34,6 +36,8 @@ from heat.openstack.common import excutils
|
||||
from heat.openstack.common.gettextutils import _
|
||||
from heat.openstack.common import log as logging
|
||||
|
||||
cfg.CONF.import_opt('action_retry_limit', 'heat.common.config')
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -458,6 +462,7 @@ class Resource(object):
|
||||
'''
|
||||
return self
|
||||
|
||||
@scheduler.wrappertask
|
||||
def create(self):
|
||||
'''
|
||||
Create the resource. Subclasses should provide a handle_create() method
|
||||
@@ -476,7 +481,49 @@ class Resource(object):
|
||||
# the parser.Stack is stored (which is after the resources
|
||||
# are __init__'d, but before they are create()'d)
|
||||
self.reparse()
|
||||
return self._do_action(action, self.properties.validate)
|
||||
|
||||
def pause():
|
||||
try:
|
||||
while True:
|
||||
yield
|
||||
except scheduler.Timeout:
|
||||
return
|
||||
|
||||
count = {self.CREATE: 0, self.DELETE: 0}
|
||||
|
||||
retry_limit = max(cfg.CONF.action_retry_limit, 0)
|
||||
first_failure = None
|
||||
|
||||
while (count[self.CREATE] <= retry_limit and
|
||||
count[self.DELETE] <= retry_limit):
|
||||
if count[action]:
|
||||
delay = timeutils.retry_backoff_delay(count[action],
|
||||
jitter_max=2.0)
|
||||
waiter = scheduler.TaskRunner(pause)
|
||||
waiter.start(timeout=delay)
|
||||
while not waiter.step():
|
||||
yield
|
||||
try:
|
||||
yield self._do_action(action, self.properties.validate)
|
||||
if action == self.CREATE:
|
||||
return
|
||||
else:
|
||||
action = self.CREATE
|
||||
except exception.ResourceFailure as failure:
|
||||
if not isinstance(failure.exc, ResourceInError):
|
||||
raise failure
|
||||
|
||||
count[action] += 1
|
||||
if action == self.CREATE:
|
||||
action = self.DELETE
|
||||
count[action] = 0
|
||||
|
||||
if first_failure is None:
|
||||
# Save the first exception
|
||||
first_failure = failure
|
||||
|
||||
if first_failure:
|
||||
raise first_failure
|
||||
|
||||
def prepare_abandon(self):
|
||||
self.abandon_in_progress = True
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
|
||||
import copy
|
||||
import mox
|
||||
from oslo.config import cfg
|
||||
import six
|
||||
|
||||
from neutronclient.v2_0 import client as neutronclient
|
||||
@@ -474,7 +475,9 @@ class PoolTest(HeatTestCase):
|
||||
self.assertEqual((rsrc.CREATE, rsrc.COMPLETE), rsrc.state)
|
||||
self.m.VerifyAll()
|
||||
|
||||
def test_create_failed_unexpected_status(self):
|
||||
def test_create_failed_error_status(self):
|
||||
cfg.CONF.set_override('action_retry_limit', 0)
|
||||
|
||||
neutron_utils.neutronV20.find_resourceid_by_name_or_id(
|
||||
mox.IsA(neutronclient.Client),
|
||||
'subnet',
|
||||
@@ -532,7 +535,7 @@ class PoolTest(HeatTestCase):
|
||||
neutronclient.Client.show_pool('5678').MultipleTimes().AndReturn(
|
||||
{'pool': {'status': 'ACTIVE'}})
|
||||
neutronclient.Client.show_vip('xyz').AndReturn(
|
||||
{'vip': {'status': 'ERROR', 'name': 'xyz'}})
|
||||
{'vip': {'status': 'SOMETHING', 'name': 'xyz'}})
|
||||
|
||||
snippet = template_format.parse(pool_template)
|
||||
stack = utils.parse_stack(snippet)
|
||||
@@ -543,7 +546,7 @@ class PoolTest(HeatTestCase):
|
||||
error = self.assertRaises(exception.ResourceFailure,
|
||||
scheduler.TaskRunner(rsrc.create))
|
||||
self.assertEqual(
|
||||
'ResourceInError: Went to status ERROR due to "error in vip"',
|
||||
'ResourceUnknownStatus: Unknown status SOMETHING',
|
||||
six.text_type(error))
|
||||
self.assertEqual((rsrc.CREATE, rsrc.FAILED), rsrc.state)
|
||||
self.m.VerifyAll()
|
||||
|
||||
+142
-1
@@ -16,9 +16,11 @@ import json
|
||||
import uuid
|
||||
|
||||
import mock
|
||||
from oslo.config import cfg
|
||||
import six
|
||||
|
||||
from heat.common import exception
|
||||
from heat.common import timeutils
|
||||
from heat.db import api as db_api
|
||||
from heat.engine import attributes
|
||||
from heat.engine.cfn import functions as cfn_funcs
|
||||
@@ -473,12 +475,151 @@ class ResourceTest(HeatTestCase):
|
||||
res = generic_rsrc.ResourceWithProps(rname, tmpl, self.stack)
|
||||
res.id = 'test_res_id'
|
||||
(res.action, res.status) = (res.INIT, res.DELETE)
|
||||
self.assertRaises(exception.ResourceFailure, res.create)
|
||||
create = scheduler.TaskRunner(res.create)
|
||||
self.assertRaises(exception.ResourceFailure, create)
|
||||
scheduler.TaskRunner(res.destroy)()
|
||||
res.state_reset()
|
||||
scheduler.TaskRunner(res.create)()
|
||||
self.assertEqual((res.CREATE, res.COMPLETE), res.state)
|
||||
|
||||
def test_create_fail_retry(self):
|
||||
tmpl = rsrc_defn.ResourceDefinition('test_resource', 'Foo',
|
||||
{'Foo': 'abc'})
|
||||
res = generic_rsrc.ResourceWithProps('test_resource', tmpl, self.stack)
|
||||
self.m.StubOutWithMock(timeutils, 'retry_backoff_delay')
|
||||
self.m.StubOutWithMock(generic_rsrc.ResourceWithProps, 'handle_create')
|
||||
self.m.StubOutWithMock(generic_rsrc.ResourceWithProps, 'handle_delete')
|
||||
|
||||
# first attempt to create fails
|
||||
generic_rsrc.ResourceWithProps.handle_create().AndRaise(
|
||||
resource.ResourceInError(resource_name='test_resource',
|
||||
resource_status='ERROR',
|
||||
resource_type='GenericResourceType',
|
||||
resource_action='CREATE',
|
||||
status_reason='just because'))
|
||||
# delete error resource from first attempt
|
||||
generic_rsrc.ResourceWithProps.handle_delete().AndReturn(None)
|
||||
|
||||
# second attempt to create succeeds
|
||||
timeutils.retry_backoff_delay(1, jitter_max=2.0).AndReturn(0.01)
|
||||
generic_rsrc.ResourceWithProps.handle_create().AndReturn(None)
|
||||
self.m.ReplayAll()
|
||||
|
||||
scheduler.TaskRunner(res.create)()
|
||||
self.assertEqual((res.CREATE, res.COMPLETE), res.state)
|
||||
self.m.VerifyAll()
|
||||
|
||||
def test_create_fail_retry_disabled(self):
|
||||
cfg.CONF.set_override('action_retry_limit', 0)
|
||||
tmpl = rsrc_defn.ResourceDefinition('test_resource', 'Foo',
|
||||
{'Foo': 'abc'})
|
||||
res = generic_rsrc.ResourceWithProps('test_resource', tmpl, self.stack)
|
||||
|
||||
self.m.StubOutWithMock(timeutils, 'retry_backoff_delay')
|
||||
self.m.StubOutWithMock(generic_rsrc.ResourceWithProps, 'handle_create')
|
||||
self.m.StubOutWithMock(generic_rsrc.ResourceWithProps, 'handle_delete')
|
||||
|
||||
# attempt to create fails
|
||||
generic_rsrc.ResourceWithProps.handle_create().AndRaise(
|
||||
resource.ResourceInError(resource_name='test_resource',
|
||||
resource_status='ERROR',
|
||||
resource_type='GenericResourceType',
|
||||
resource_action='CREATE',
|
||||
status_reason='just because'))
|
||||
self.m.ReplayAll()
|
||||
|
||||
estr = ('ResourceInError: Went to status ERROR due to "just because"')
|
||||
create = scheduler.TaskRunner(res.create)
|
||||
err = self.assertRaises(exception.ResourceFailure, create)
|
||||
self.assertEqual(estr, str(err))
|
||||
self.assertEqual((res.CREATE, res.FAILED), res.state)
|
||||
|
||||
self.m.VerifyAll()
|
||||
|
||||
def test_create_deletes_fail_retry(self):
|
||||
tmpl = rsrc_defn.ResourceDefinition('test_resource', 'Foo',
|
||||
{'Foo': 'abc'})
|
||||
res = generic_rsrc.ResourceWithProps('test_resource', tmpl, self.stack)
|
||||
|
||||
self.m.StubOutWithMock(timeutils, 'retry_backoff_delay')
|
||||
self.m.StubOutWithMock(generic_rsrc.ResourceWithProps, 'handle_create')
|
||||
self.m.StubOutWithMock(generic_rsrc.ResourceWithProps, 'handle_delete')
|
||||
|
||||
# first attempt to create fails
|
||||
generic_rsrc.ResourceWithProps.handle_create().AndRaise(
|
||||
resource.ResourceInError(resource_name='test_resource',
|
||||
resource_status='ERROR',
|
||||
resource_type='GenericResourceType',
|
||||
resource_action='CREATE',
|
||||
status_reason='just because'))
|
||||
# first attempt to delete fails
|
||||
generic_rsrc.ResourceWithProps.handle_delete().AndRaise(
|
||||
resource.ResourceInError(resource_name='test_resource',
|
||||
resource_status='ERROR',
|
||||
resource_type='GenericResourceType',
|
||||
resource_action='DELETE',
|
||||
status_reason='delete failed'))
|
||||
# second attempt to delete fails
|
||||
timeutils.retry_backoff_delay(1, jitter_max=2.0).AndReturn(0.01)
|
||||
generic_rsrc.ResourceWithProps.handle_delete().AndRaise(
|
||||
resource.ResourceInError(resource_name='test_resource',
|
||||
resource_status='ERROR',
|
||||
resource_type='GenericResourceType',
|
||||
resource_action='DELETE',
|
||||
status_reason='delete failed again'))
|
||||
|
||||
# third attempt to delete succeeds
|
||||
timeutils.retry_backoff_delay(2, jitter_max=2.0).AndReturn(0.01)
|
||||
generic_rsrc.ResourceWithProps.handle_delete().AndReturn(None)
|
||||
|
||||
# second attempt to create succeeds
|
||||
timeutils.retry_backoff_delay(1, jitter_max=2.0).AndReturn(0.01)
|
||||
generic_rsrc.ResourceWithProps.handle_create().AndReturn(None)
|
||||
self.m.ReplayAll()
|
||||
|
||||
scheduler.TaskRunner(res.create)()
|
||||
self.assertEqual((res.CREATE, res.COMPLETE), res.state)
|
||||
self.m.VerifyAll()
|
||||
|
||||
def test_creates_fail_retry(self):
|
||||
tmpl = rsrc_defn.ResourceDefinition('test_resource', 'Foo',
|
||||
{'Foo': 'abc'})
|
||||
res = generic_rsrc.ResourceWithProps('test_resource', tmpl, self.stack)
|
||||
|
||||
self.m.StubOutWithMock(timeutils, 'retry_backoff_delay')
|
||||
self.m.StubOutWithMock(generic_rsrc.ResourceWithProps, 'handle_create')
|
||||
self.m.StubOutWithMock(generic_rsrc.ResourceWithProps, 'handle_delete')
|
||||
|
||||
# first attempt to create fails
|
||||
generic_rsrc.ResourceWithProps.handle_create().AndRaise(
|
||||
resource.ResourceInError(resource_name='test_resource',
|
||||
resource_status='ERROR',
|
||||
resource_type='GenericResourceType',
|
||||
resource_action='CREATE',
|
||||
status_reason='just because'))
|
||||
# delete error resource from first attempt
|
||||
generic_rsrc.ResourceWithProps.handle_delete().AndReturn(None)
|
||||
|
||||
# second attempt to create fails
|
||||
timeutils.retry_backoff_delay(1, jitter_max=2.0).AndReturn(0.01)
|
||||
generic_rsrc.ResourceWithProps.handle_create().AndRaise(
|
||||
resource.ResourceInError(resource_name='test_resource',
|
||||
resource_status='ERROR',
|
||||
resource_type='GenericResourceType',
|
||||
resource_action='CREATE',
|
||||
status_reason='just because'))
|
||||
# delete error resource from second attempt
|
||||
generic_rsrc.ResourceWithProps.handle_delete().AndReturn(None)
|
||||
|
||||
# third attempt to create succeeds
|
||||
timeutils.retry_backoff_delay(2, jitter_max=2.0).AndReturn(0.01)
|
||||
generic_rsrc.ResourceWithProps.handle_create().AndReturn(None)
|
||||
self.m.ReplayAll()
|
||||
|
||||
scheduler.TaskRunner(res.create)()
|
||||
self.assertEqual((res.CREATE, res.COMPLETE), res.state)
|
||||
self.m.VerifyAll()
|
||||
|
||||
def test_preview(self):
|
||||
tmpl = rsrc_defn.ResourceDefinition('test_resource',
|
||||
'GenericResourceType')
|
||||
|
||||
@@ -17,6 +17,7 @@ import json
|
||||
from cinderclient import exceptions as cinder_exp
|
||||
from cinderclient.v1 import client as cinderclient
|
||||
import mox
|
||||
from oslo.config import cfg
|
||||
import six
|
||||
|
||||
from heat.common import exception
|
||||
@@ -728,6 +729,8 @@ class VolumeTest(HeatTestCase):
|
||||
|
||||
def test_snapshot_no_volume(self):
|
||||
stack_name = 'test_volume_stack'
|
||||
|
||||
cfg.CONF.set_override('action_retry_limit', 0)
|
||||
fv = FakeVolume('creating', 'error')
|
||||
|
||||
self._mock_create_volume(fv, stack_name)
|
||||
|
||||
Reference in New Issue
Block a user