Merge "Exclude current conductor from offline_conductors" into stable/yoga
This commit is contained in:
@@ -1604,7 +1604,8 @@ class ConductorManager(base_manager.BaseConductorManager):
|
||||
|
||||
:param context: request context.
|
||||
"""
|
||||
offline_conductors = self.dbapi.get_offline_conductors()
|
||||
offline_conductors = utils.exclude_current_conductor(
|
||||
self.host, self.dbapi.get_offline_conductors())
|
||||
if not offline_conductors:
|
||||
return
|
||||
|
||||
@@ -3436,7 +3437,8 @@ class ConductorManager(base_manager.BaseConductorManager):
|
||||
|
||||
:param context: request context.
|
||||
"""
|
||||
offline_conductors = self.dbapi.get_offline_conductors(field='id')
|
||||
offline_conductors = utils.exclude_current_conductor(
|
||||
self.conductor.id, self.dbapi.get_offline_conductors(field='id'))
|
||||
for conductor_id in offline_conductors:
|
||||
filters = {'state': states.ALLOCATING,
|
||||
'conductor_affinity': conductor_id}
|
||||
|
||||
@@ -1671,3 +1671,24 @@ def update_image_type(context, node):
|
||||
'image_type',
|
||||
images.IMAGE_TYPE_WHOLE_DISK if iwdi else images.IMAGE_TYPE_PARTITION)
|
||||
return True
|
||||
|
||||
|
||||
def exclude_current_conductor(current_conductor, offline_conductors):
|
||||
"""Wrapper to exclude current conductor from offline_conductors
|
||||
|
||||
In some cases the current conductor may have failed to update
|
||||
the heartbeat timestamp due to failure or resource starvation.
|
||||
When this occurs the dbapi get_offline_conductors method will
|
||||
include the current conductor in its return value.
|
||||
|
||||
:param current_conductor: id or hostname of the current conductor
|
||||
:param offline_conductors: List of offline conductors.
|
||||
:return: List of offline conductors, excluding current conductor
|
||||
"""
|
||||
if current_conductor in offline_conductors:
|
||||
LOG.warning('Current conductor %s will be excluded from offline '
|
||||
'conductors. Conductor heartbeat has failed to update the '
|
||||
'database timestamp. This is sign of resource starvation.',
|
||||
current_conductor)
|
||||
|
||||
return [x for x in offline_conductors if x != current_conductor]
|
||||
|
||||
@@ -1921,6 +1921,16 @@ class MiscTestCase(db_base.DbTestCase):
|
||||
conductor_utils.restore_power_state_if_needed(task, power_state)
|
||||
self.assertEqual(0, power_action_mock.call_count)
|
||||
|
||||
@mock.patch.object(conductor_utils.LOG, 'warning', autospec=True)
|
||||
def test_exclude_current_conductor(self, mock_log):
|
||||
current_conductor = 'foo'
|
||||
offline_conductos = ['foo', 'bar']
|
||||
result = conductor_utils.exclude_current_conductor(current_conductor,
|
||||
offline_conductos)
|
||||
self.assertTrue(mock_log.called)
|
||||
self.assertIn('bar', result)
|
||||
self.assertNotIn('foo', result)
|
||||
|
||||
|
||||
class ValidateInstanceInfoTraitsTestCase(tests_base.TestCase):
|
||||
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Fixes an issue where a conductor would attempt local takeover. In case of
|
||||
heartbeat failure due to resource starvation, the current conductor was
|
||||
detected as offline when querying the database. In this scenario the
|
||||
conductor would forcibly remove reservations of it's own and initiate
|
||||
takeover. Current conductor is now excluded from the list of offline
|
||||
conductors, so that local takeover does not occur for this case. A warning
|
||||
is logged to highlight the potential resource starvation issue.
|
||||
See bug: `2010016 <https://storyboard.openstack.org/#!/story/2010016>`_.
|
||||
|
||||
Reference in New Issue
Block a user