Merge "Support host recovery"
This commit is contained in:
commit
552d1adaf1
|
@ -662,6 +662,15 @@ class PhysicalHostMonitorPlugin(base.BaseMonitorPlugin,
|
|||
LOG.warn('%s failed.',
|
||||
failed_hosts[0]['hypervisor_hostname'])
|
||||
reservation_flags = self._handle_failures(failed_hosts)
|
||||
else:
|
||||
recovered_hosts = db_api.host_get_all_by_queries(
|
||||
['reservable == 0',
|
||||
'hypervisor_hostname == ' + data['host']])
|
||||
if recovered_hosts:
|
||||
db_api.host_update(recovered_hosts[0]['id'],
|
||||
{'reservable': True})
|
||||
LOG.warn('%s recovered.',
|
||||
recovered_hosts[0]['hypervisor_hostname'])
|
||||
|
||||
return reservation_flags
|
||||
|
||||
|
@ -683,32 +692,44 @@ class PhysicalHostMonitorPlugin(base.BaseMonitorPlugin,
|
|||
LOG.trace('Poll...')
|
||||
reservation_flags = {}
|
||||
|
||||
failed_hosts = self._poll_resource_failures()
|
||||
failed_hosts, recovered_hosts = self._poll_resource_failures()
|
||||
if failed_hosts:
|
||||
for host in failed_hosts:
|
||||
LOG.warn('%s failed.', host['hypervisor_hostname'])
|
||||
reservation_flags = self._handle_failures(failed_hosts)
|
||||
if recovered_hosts:
|
||||
for host in recovered_hosts:
|
||||
db_api.host_update(host['id'], {'reservable': True})
|
||||
LOG.warn('%s recovered.', host['hypervisor_hostname'])
|
||||
|
||||
return reservation_flags
|
||||
|
||||
def _poll_resource_failures(self):
|
||||
"""Check health of hosts by calling Nova Hypervisors API.
|
||||
|
||||
:return: a list of failed hosts.
|
||||
:return: a list of failed hosts, a list of recovered hosts.
|
||||
"""
|
||||
reservable_hosts = db_api.reservable_host_get_all_by_queries([])
|
||||
hosts = db_api.host_get_all_by_filters({})
|
||||
reservable_hosts = [h for h in hosts if h['reservable'] is True]
|
||||
unreservable_hosts = [h for h in hosts if h['reservable'] is False]
|
||||
|
||||
try:
|
||||
hvs = self.nova.hypervisors.list()
|
||||
|
||||
failed_hv_ids = [str(hv.id) for hv in hvs
|
||||
if hv.state == 'down' or hv.status == 'disabled']
|
||||
failed_hosts = [host for host in reservable_hosts
|
||||
if host['id'] in failed_hv_ids]
|
||||
|
||||
active_hv_ids = [str(hv.id) for hv in hvs
|
||||
if hv.state == 'up' and hv.status == 'enabled']
|
||||
recovered_hosts = [host for host in unreservable_hosts
|
||||
if host['id'] in active_hv_ids]
|
||||
except Exception as e:
|
||||
LOG.exception('Skipping health check of host %s. %s',
|
||||
host['hypervisor_hostname'], str(e))
|
||||
|
||||
return failed_hosts
|
||||
return failed_hosts, recovered_hosts
|
||||
|
||||
def _handle_failures(self, failed_hosts):
|
||||
"""Handle resource failures.
|
||||
|
|
|
@ -1702,6 +1702,8 @@ class PhysicalHostMonitorPluginTestCase(tests.TestCase):
|
|||
|
||||
result = self.host_monitor_plugin.notification_callback(event_type,
|
||||
payload)
|
||||
host_get_all.assert_called_once_with(
|
||||
['hypervisor_hostname == ' + payload['nova_object.data']['host']])
|
||||
self.assertEqual({'rsrv-1': {'missing_resources': True}}, result)
|
||||
|
||||
def test_notification_callback_no_failure(self):
|
||||
|
@ -1724,14 +1726,53 @@ class PhysicalHostMonitorPluginTestCase(tests.TestCase):
|
|||
'uuid': 'fa69c544-906b-4a6a-a9c6-c1f7a8078c73'
|
||||
}
|
||||
}
|
||||
host_get_all = self.patch(db_api,
|
||||
'reservable_host_get_all_by_queries')
|
||||
host_get_all = self.patch(db_api, 'host_get_all_by_queries')
|
||||
host_get_all.return_value = []
|
||||
handle_failures = self.patch(self.host_monitor_plugin,
|
||||
'_handle_failures')
|
||||
|
||||
result = self.host_monitor_plugin.notification_callback(event_type,
|
||||
payload)
|
||||
host_get_all.assert_not_called()
|
||||
host_get_all.assert_called_once_with(
|
||||
['reservable == 0',
|
||||
'hypervisor_hostname == ' + payload['nova_object.data']['host']])
|
||||
handle_failures.assert_not_called()
|
||||
self.assertEqual({}, result)
|
||||
|
||||
def test_notification_callback_recover(self):
|
||||
recovered_host = {'hypervisor_hostname': 'compute-1', 'id': 1}
|
||||
event_type = 'service.update'
|
||||
payload = {
|
||||
'nova_object.namespace': 'nova',
|
||||
'nova_object.name': 'ServiceStatusPayload',
|
||||
'nova_object.version': '1.1',
|
||||
'nova_object.data': {
|
||||
'host': 'compute-1',
|
||||
'disabled': False,
|
||||
'last_seen_up': '2012-10-29T13:42:05Z',
|
||||
'binary': 'nova-compute',
|
||||
'topic': 'compute',
|
||||
'disabled_reason': None,
|
||||
'report_count': 1,
|
||||
'forced_down': False,
|
||||
'version': 22,
|
||||
'availability_zone': None,
|
||||
'uuid': 'fa69c544-906b-4a6a-a9c6-c1f7a8078c73'
|
||||
}
|
||||
}
|
||||
host_get_all = self.patch(db_api, 'host_get_all_by_queries')
|
||||
host_get_all.return_value = [recovered_host]
|
||||
handle_failures = self.patch(self.host_monitor_plugin,
|
||||
'_handle_failures')
|
||||
host_update = self.patch(db_api, 'host_update')
|
||||
|
||||
result = self.host_monitor_plugin.notification_callback(event_type,
|
||||
payload)
|
||||
host_get_all.assert_called_once_with(
|
||||
['reservable == 0',
|
||||
'hypervisor_hostname == ' + payload['nova_object.data']['host']])
|
||||
host_update.assert_called_once_with(recovered_host['id'],
|
||||
{'reservable': True})
|
||||
handle_failures.assert_not_called()
|
||||
self.assertEqual({}, result)
|
||||
|
||||
|
@ -1739,14 +1780,14 @@ class PhysicalHostMonitorPluginTestCase(tests.TestCase):
|
|||
hosts = [
|
||||
{'id': '1',
|
||||
'hypervisor_hostname': 'compute-1',
|
||||
'trust_id': 'trust-1'},
|
||||
'reservable': True},
|
||||
{'id': '2',
|
||||
'hypervisor_hostname': 'compute-2',
|
||||
'trust_id': 'trust-2'},
|
||||
'reservable': True},
|
||||
]
|
||||
|
||||
host_get_all = self.patch(db_api,
|
||||
'reservable_host_get_all_by_queries')
|
||||
'host_get_all_by_filters')
|
||||
host_get_all.return_value = hosts
|
||||
hypervisors_list = self.patch(
|
||||
self.host_monitor_plugin.nova.hypervisors, 'list')
|
||||
|
@ -1755,20 +1796,20 @@ class PhysicalHostMonitorPluginTestCase(tests.TestCase):
|
|||
mock.MagicMock(id=2, state='down', status='enabled')]
|
||||
|
||||
result = self.host_monitor_plugin._poll_resource_failures()
|
||||
self.assertEqual(hosts, result)
|
||||
self.assertEqual((hosts, []), result)
|
||||
|
||||
def test_poll_resource_failures_status_disabled(self):
|
||||
hosts = [
|
||||
{'id': '1',
|
||||
'hypervisor_hostname': 'compute-1',
|
||||
'trust_id': 'trust-1'},
|
||||
'reservable': True},
|
||||
{'id': '2',
|
||||
'hypervisor_hostname': 'compute-2',
|
||||
'trust_id': 'trust-2'},
|
||||
'reservable': True},
|
||||
]
|
||||
|
||||
host_get_all = self.patch(db_api,
|
||||
'reservable_host_get_all_by_queries')
|
||||
'host_get_all_by_filters')
|
||||
host_get_all.return_value = hosts
|
||||
hypervisors_list = self.patch(
|
||||
self.host_monitor_plugin.nova.hypervisors, 'list')
|
||||
|
@ -1777,20 +1818,20 @@ class PhysicalHostMonitorPluginTestCase(tests.TestCase):
|
|||
mock.MagicMock(id=2, state='up', status='disabled')]
|
||||
|
||||
result = self.host_monitor_plugin._poll_resource_failures()
|
||||
self.assertEqual(hosts, result)
|
||||
self.assertEqual((hosts, []), result)
|
||||
|
||||
def test_poll_resource_failures_nothing(self):
|
||||
hosts = [
|
||||
{'id': '1',
|
||||
'hypervisor_hostname': 'compute-1',
|
||||
'trust_id': 'trust-1'},
|
||||
'reservable': True},
|
||||
{'id': '2',
|
||||
'hypervisor_hostname': 'compute-2',
|
||||
'trust_id': 'trust-2'},
|
||||
'reservable': True},
|
||||
]
|
||||
|
||||
host_get_all = self.patch(db_api,
|
||||
'reservable_host_get_all_by_queries')
|
||||
'host_get_all_by_filters')
|
||||
host_get_all.return_value = hosts
|
||||
hypervisors_list = self.patch(
|
||||
self.host_monitor_plugin.nova.hypervisors, 'list')
|
||||
|
@ -1799,7 +1840,29 @@ class PhysicalHostMonitorPluginTestCase(tests.TestCase):
|
|||
mock.MagicMock(id=2, state='up', status='enabled')]
|
||||
|
||||
result = self.host_monitor_plugin._poll_resource_failures()
|
||||
self.assertEqual([], result)
|
||||
self.assertEqual(([], []), result)
|
||||
|
||||
def test_poll_resource_failures_recover(self):
|
||||
hosts = [
|
||||
{'id': '1',
|
||||
'hypervisor_hostname': 'compute-1',
|
||||
'reservable': False},
|
||||
{'id': '2',
|
||||
'hypervisor_hostname': 'compute-2',
|
||||
'reservable': False},
|
||||
]
|
||||
|
||||
host_get_all = self.patch(db_api,
|
||||
'host_get_all_by_filters')
|
||||
host_get_all.return_value = hosts
|
||||
hypervisors_list = self.patch(
|
||||
self.host_monitor_plugin.nova.hypervisors, 'list')
|
||||
hypervisors_list.return_value = [
|
||||
mock.MagicMock(id=1, state='up', status='enabled'),
|
||||
mock.MagicMock(id=2, state='up', status='enabled')]
|
||||
|
||||
result = self.host_monitor_plugin._poll_resource_failures()
|
||||
self.assertEqual(([], hosts), result)
|
||||
|
||||
def test_handle_failures(self):
|
||||
hosts = [
|
||||
|
|
Loading…
Reference in New Issue