Merge "Exclude current conductor from offline_conductors" into stable/yoga
This commit is contained in:
@@ -1604,7 +1604,8 @@ class ConductorManager(base_manager.BaseConductorManager):
|
|||||||
|
|
||||||
:param context: request context.
|
:param context: request context.
|
||||||
"""
|
"""
|
||||||
offline_conductors = self.dbapi.get_offline_conductors()
|
offline_conductors = utils.exclude_current_conductor(
|
||||||
|
self.host, self.dbapi.get_offline_conductors())
|
||||||
if not offline_conductors:
|
if not offline_conductors:
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -3436,7 +3437,8 @@ class ConductorManager(base_manager.BaseConductorManager):
|
|||||||
|
|
||||||
:param context: request context.
|
:param context: request context.
|
||||||
"""
|
"""
|
||||||
offline_conductors = self.dbapi.get_offline_conductors(field='id')
|
offline_conductors = utils.exclude_current_conductor(
|
||||||
|
self.conductor.id, self.dbapi.get_offline_conductors(field='id'))
|
||||||
for conductor_id in offline_conductors:
|
for conductor_id in offline_conductors:
|
||||||
filters = {'state': states.ALLOCATING,
|
filters = {'state': states.ALLOCATING,
|
||||||
'conductor_affinity': conductor_id}
|
'conductor_affinity': conductor_id}
|
||||||
|
|||||||
@@ -1671,3 +1671,24 @@ def update_image_type(context, node):
|
|||||||
'image_type',
|
'image_type',
|
||||||
images.IMAGE_TYPE_WHOLE_DISK if iwdi else images.IMAGE_TYPE_PARTITION)
|
images.IMAGE_TYPE_WHOLE_DISK if iwdi else images.IMAGE_TYPE_PARTITION)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def exclude_current_conductor(current_conductor, offline_conductors):
|
||||||
|
"""Wrapper to exclude current conductor from offline_conductors
|
||||||
|
|
||||||
|
In some cases the current conductor may have failed to update
|
||||||
|
the heartbeat timestamp due to failure or resource starvation.
|
||||||
|
When this occurs the dbapi get_offline_conductors method will
|
||||||
|
include the current conductor in its return value.
|
||||||
|
|
||||||
|
:param current_conductor: id or hostname of the current conductor
|
||||||
|
:param offline_conductors: List of offline conductors.
|
||||||
|
:return: List of offline conductors, excluding current conductor
|
||||||
|
"""
|
||||||
|
if current_conductor in offline_conductors:
|
||||||
|
LOG.warning('Current conductor %s will be excluded from offline '
|
||||||
|
'conductors. Conductor heartbeat has failed to update the '
|
||||||
|
'database timestamp. This is sign of resource starvation.',
|
||||||
|
current_conductor)
|
||||||
|
|
||||||
|
return [x for x in offline_conductors if x != current_conductor]
|
||||||
|
|||||||
@@ -1921,6 +1921,16 @@ class MiscTestCase(db_base.DbTestCase):
|
|||||||
conductor_utils.restore_power_state_if_needed(task, power_state)
|
conductor_utils.restore_power_state_if_needed(task, power_state)
|
||||||
self.assertEqual(0, power_action_mock.call_count)
|
self.assertEqual(0, power_action_mock.call_count)
|
||||||
|
|
||||||
|
@mock.patch.object(conductor_utils.LOG, 'warning', autospec=True)
|
||||||
|
def test_exclude_current_conductor(self, mock_log):
|
||||||
|
current_conductor = 'foo'
|
||||||
|
offline_conductos = ['foo', 'bar']
|
||||||
|
result = conductor_utils.exclude_current_conductor(current_conductor,
|
||||||
|
offline_conductos)
|
||||||
|
self.assertTrue(mock_log.called)
|
||||||
|
self.assertIn('bar', result)
|
||||||
|
self.assertNotIn('foo', result)
|
||||||
|
|
||||||
|
|
||||||
class ValidateInstanceInfoTraitsTestCase(tests_base.TestCase):
|
class ValidateInstanceInfoTraitsTestCase(tests_base.TestCase):
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,12 @@
|
|||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
Fixes an issue where a conductor would attempt local takeover. In case of
|
||||||
|
heartbeat failure due to resource starvation, the current conductor was
|
||||||
|
detected as offline when querying the database. In this scenario the
|
||||||
|
conductor would forcibly remove reservations of it's own and initiate
|
||||||
|
takeover. Current conductor is now excluded from the list of offline
|
||||||
|
conductors, so that local takeover does not occur for this case. A warning
|
||||||
|
is logged to highlight the potential resource starvation issue.
|
||||||
|
See bug: `2010016 <https://storyboard.openstack.org/#!/story/2010016>`_.
|
||||||
|
|
||||||
Reference in New Issue
Block a user