Ironic: Workaround to mitigate bug #1341420
The bug #1341420 can cause the driver to try to deploy onto a node which already has an instance associated with it, the way Ironic reserve a node to a specific instance is associating the UUID of that instance with the node via the instance_uuid field. This patch just makes the request to associate that field non-retriable when the api returns HTTP 409 (Conflict) (that's the HTTP error indicating that the node is already associated with another instance), so it can fail fast in case the node is already pick and the RetryFilter could then pick another node for that instance. Partial-Bug: #1341420 Change-Id: I697c5129893b18562182bbb3ceb8cb160e84c312
This commit is contained in:
parent
7bc2171869
commit
608af87541
|
@ -45,7 +45,7 @@ class IronicClientWrapperTestCase(test.NoDBTestCase):
|
|||
def test_call_good_no_args(self, mock_get_client, mock_multi_getattr):
|
||||
mock_get_client.return_value = FAKE_CLIENT
|
||||
self.ironicclient.call("node.list")
|
||||
mock_get_client.assert_called_once_with()
|
||||
mock_get_client.assert_called_once_with(retry_on_conflict=True)
|
||||
mock_multi_getattr.assert_called_once_with(FAKE_CLIENT, "node.list")
|
||||
mock_multi_getattr.return_value.assert_called_once_with()
|
||||
|
||||
|
@ -54,7 +54,7 @@ class IronicClientWrapperTestCase(test.NoDBTestCase):
|
|||
def test_call_good_with_args(self, mock_get_client, mock_multi_getattr):
|
||||
mock_get_client.return_value = FAKE_CLIENT
|
||||
self.ironicclient.call("node.list", 'test', associated=True)
|
||||
mock_get_client.assert_called_once_with()
|
||||
mock_get_client.assert_called_once_with(retry_on_conflict=True)
|
||||
mock_multi_getattr.assert_called_once_with(FAKE_CLIENT, "node.list")
|
||||
mock_multi_getattr.return_value.assert_called_once_with(
|
||||
'test', associated=True)
|
||||
|
|
|
@ -56,7 +56,7 @@ FAKE_CLIENT = ironic_utils.FakeClient()
|
|||
|
||||
|
||||
class FakeClientWrapper(cw.IronicClientWrapper):
|
||||
def _get_client(self):
|
||||
def _get_client(self, retry_on_conflict=True):
|
||||
return FAKE_CLIENT
|
||||
|
||||
|
||||
|
@ -897,8 +897,7 @@ class IronicDriverTestCase(test.NoDBTestCase):
|
|||
self.driver.spawn, self.ctx, instance, None, [], None)
|
||||
self.assertEqual(0, mock_destroy.call_count)
|
||||
|
||||
@mock.patch.object(FAKE_CLIENT.node, 'update')
|
||||
def test__add_driver_fields_good(self, mock_update):
|
||||
def _test_add_driver_fields(self, mock_update=None, mock_call=None):
|
||||
node = ironic_utils.get_test_node(driver='fake')
|
||||
instance = fake_instance.fake_instance_obj(self.ctx,
|
||||
node=node.uuid)
|
||||
|
@ -921,7 +920,23 @@ class IronicDriverTestCase(test.NoDBTestCase):
|
|||
'value': str(node.properties.get('local_gb', 0))},
|
||||
{'path': '/instance_uuid', 'op': 'add',
|
||||
'value': instance.uuid}]
|
||||
mock_update.assert_called_once_with(node.uuid, expected_patch)
|
||||
|
||||
if mock_call is not None:
|
||||
# assert call() is invoked with retry_on_conflict False to
|
||||
# avoid bug #1341420
|
||||
mock_call.assert_called_once_with('node.update', node.uuid,
|
||||
expected_patch,
|
||||
retry_on_conflict=False)
|
||||
if mock_update is not None:
|
||||
mock_update.assert_called_once_with(node.uuid, expected_patch)
|
||||
|
||||
@mock.patch.object(FAKE_CLIENT.node, 'update')
|
||||
def test__add_driver_fields_mock_update(self, mock_update):
|
||||
self._test_add_driver_fields(mock_update=mock_update)
|
||||
|
||||
@mock.patch.object(cw.IronicClientWrapper, 'call')
|
||||
def test__add_driver_fields_mock_call(self, mock_call):
|
||||
self._test_add_driver_fields(mock_call=mock_call)
|
||||
|
||||
@mock.patch.object(FAKE_CLIENT.node, 'update')
|
||||
def test__add_driver_fields_fail(self, mock_update):
|
||||
|
|
|
@ -56,10 +56,14 @@ class IronicClientWrapper(object):
|
|||
"""Tell the wrapper to invalidate the cached ironic-client."""
|
||||
self._cached_client = None
|
||||
|
||||
def _get_client(self):
|
||||
def _get_client(self, retry_on_conflict=True):
|
||||
max_retries = CONF.ironic.api_max_retries if retry_on_conflict else 1
|
||||
retry_interval = (CONF.ironic.api_retry_interval
|
||||
if retry_on_conflict else 0)
|
||||
|
||||
# If we've already constructed a valid, authed client, just return
|
||||
# that.
|
||||
if self._cached_client is not None:
|
||||
if retry_on_conflict and self._cached_client is not None:
|
||||
return self._cached_client
|
||||
|
||||
auth_token = CONF.ironic.admin_auth_token
|
||||
|
@ -76,13 +80,14 @@ class IronicClientWrapper(object):
|
|||
'ironic_url': CONF.ironic.api_endpoint}
|
||||
|
||||
# Retries for Conflict exception
|
||||
kwargs['max_retries'] = CONF.ironic.api_max_retries
|
||||
kwargs['retry_interval'] = CONF.ironic.api_retry_interval
|
||||
kwargs['max_retries'] = max_retries
|
||||
kwargs['retry_interval'] = retry_interval
|
||||
try:
|
||||
cli = ironic.client.get_client(CONF.ironic.api_version, **kwargs)
|
||||
# Cache the client so we don't have to reconstruct and
|
||||
# reauthenticate it every time we need it.
|
||||
self._cached_client = cli
|
||||
if retry_on_conflict:
|
||||
self._cached_client = cli
|
||||
|
||||
except ironic.exc.Unauthorized:
|
||||
msg = _("Unable to authenticate Ironic client.")
|
||||
|
@ -110,19 +115,25 @@ class IronicClientWrapper(object):
|
|||
:param method: Name of the client method to call as a string.
|
||||
:param args: Client method arguments.
|
||||
:param kwargs: Client method keyword arguments.
|
||||
|
||||
:param retry_on_conflict: Boolean value. Whether the request should be
|
||||
retried in case of a conflict error
|
||||
(HTTP 409) or not. If retry_on_conflict is
|
||||
False the cached instance of the client
|
||||
won't be used. Defaults to True.
|
||||
:raises: NovaException if all retries failed.
|
||||
"""
|
||||
# TODO(dtantsur): drop these once ironicclient 0.8.0 is out and used in
|
||||
# global-requirements.
|
||||
retry_excs = (ironic.exc.ServiceUnavailable,
|
||||
ironic.exc.ConnectionRefused)
|
||||
retry_on_conflict = kwargs.pop('retry_on_conflict', True)
|
||||
|
||||
# num_attempts should be the times of retry + 1
|
||||
# eg. retry==0 just means run once and no retry
|
||||
num_attempts = max(0, CONF.ironic.api_max_retries) + 1
|
||||
|
||||
for attempt in range(1, num_attempts + 1):
|
||||
client = self._get_client()
|
||||
client = self._get_client(retry_on_conflict=retry_on_conflict)
|
||||
|
||||
try:
|
||||
return self._multi_getattr(client, method)(*args, **kwargs)
|
||||
|
|
|
@ -392,7 +392,12 @@ class IronicDriver(virt_driver.ComputeDriver):
|
|||
patch.append({'path': '/instance_uuid', 'op': 'add',
|
||||
'value': instance.uuid})
|
||||
try:
|
||||
self.ironicclient.call('node.update', node.uuid, patch)
|
||||
# FIXME(lucasagomes): The "retry_on_conflict" parameter was added
|
||||
# to basically causes the deployment to fail faster in case the
|
||||
# node picked by the scheduler is already associated with another
|
||||
# instance due bug #1341420.
|
||||
self.ironicclient.call('node.update', node.uuid, patch,
|
||||
retry_on_conflict=False)
|
||||
except ironic.exc.BadRequest:
|
||||
msg = (_("Failed to add deploy parameters on node %(node)s "
|
||||
"when provisioning the instance %(instance)s")
|
||||
|
|
Loading…
Reference in New Issue