From bd508a5a0e2462292cb3ebfa3ba851e8a1a2a01f Mon Sep 17 00:00:00 2001 From: Giridhar Jayavelu Date: Tue, 8 Mar 2016 23:13:14 -0800 Subject: [PATCH] VMware: set service status based on vc connection When the vcenter (vc) session is disconnected, say, if the management service on vcenter is down, then the nova-compute service status does not reflect that. This patch sets disabled=True in nova.objects.Service when the vcenter session is broken. When vcenter service is accessible again, then oslo_vmware takes care of recreating a new session. Closes-Bug: #1316928 Change-Id: I73a34eb6e0ca32d03e54d12a5e066b2ed4f19a61 --- .../unit/virt/vmwareapi/test_configdrive.py | 3 +- .../unit/virt/vmwareapi/test_driver_api.py | 43 +++++++++++++++++++ nova/virt/vmwareapi/host.py | 43 ++++++++++++++++--- 3 files changed, 82 insertions(+), 7 deletions(-) diff --git a/nova/tests/unit/virt/vmwareapi/test_configdrive.py b/nova/tests/unit/virt/vmwareapi/test_configdrive.py index e54ed1651c53..baa8972f3399 100644 --- a/nova/tests/unit/virt/vmwareapi/test_configdrive.py +++ b/nova/tests/unit/virt/vmwareapi/test_configdrive.py @@ -37,8 +37,9 @@ class ConfigDriveTestCase(test.NoDBTestCase): REQUIRES_LOCKING = True + @mock.patch.object(objects.Service, 'get_by_compute_host') @mock.patch.object(driver.VMwareVCDriver, '_register_openstack_extension') - def setUp(self, mock_register): + def setUp(self, mock_register, mock_service): super(ConfigDriveTestCase, self).setUp() vm_util.vm_refs_cache_reset() self.context = context.RequestContext('fake', 'fake', is_admin=False) diff --git a/nova/tests/unit/virt/vmwareapi/test_driver_api.py b/nova/tests/unit/virt/vmwareapi/test_driver_api.py index bbcd3ca9a81b..79eedf8ef5d8 100644 --- a/nova/tests/unit/virt/vmwareapi/test_driver_api.py +++ b/nova/tests/unit/virt/vmwareapi/test_driver_api.py @@ -150,6 +150,15 @@ class VMwareAPIVMTestCase(test.NoDBTestCase): REQUIRES_LOCKING = True + def _create_service(self, **kwargs): + service_ref = {'host': kwargs.get('host', 'dummy'), + 'disabled': kwargs.get('disabled', False), + 'binary': 'nova-compute', + 'topic': 'compute', + 'report_count': 0, + 'forced_down': kwargs.get('forced_down', False)} + return objects.Service(**service_ref) + @mock.patch.object(driver.VMwareVCDriver, '_register_openstack_extension') def setUp(self, mock_register): super(VMwareAPIVMTestCase, self).setUp() @@ -171,7 +180,10 @@ class VMwareAPIVMTestCase(test.NoDBTestCase): stubs.set_stubs(self) vmwareapi_fake.reset() nova.tests.unit.image.fake.stub_out_image_service(self) + service = self._create_service(host='test_url') + self.conn = driver.VMwareVCDriver(None, False) + self.assertFalse(service.disabled) self._set_exception_vars() self.node_name = self.conn._nodename self.ds = 'ds1' @@ -2288,3 +2300,34 @@ class VMwareAPIVMTestCase(test.NoDBTestCase): version_arg_found = True break self.assertTrue(version_arg_found) + + @mock.patch.object(objects.Service, 'get_by_compute_host') + def test_host_state_service_disabled(self, mock_service): + service = self._create_service(disabled=False, host='fake-mini') + mock_service.return_value = service + + fake_stats = {'vcpus': 4, 'mem': {'total': '8194', 'free': '2048'}} + with test.nested( + mock.patch.object(vm_util, 'get_stats_from_cluster', + side_effect=[vexc.VimConnectionException('fake'), + fake_stats, fake_stats]), + mock.patch.object(service, 'save')) as (mock_stats, + mock_save): + self.conn._vc_state.update_status() + self.assertEqual(1, mock_save.call_count) + self.assertTrue(service.disabled) + self.assertTrue(self.conn._vc_state._auto_service_disabled) + + # ensure the service is enabled again when there is no connection + # exception + self.conn._vc_state.update_status() + self.assertEqual(2, mock_save.call_count) + self.assertFalse(service.disabled) + self.assertFalse(self.conn._vc_state._auto_service_disabled) + + # ensure objects.Service.save method is not called more than once + # after the service is enabled + self.conn._vc_state.update_status() + self.assertEqual(2, mock_save.call_count) + self.assertFalse(service.disabled) + self.assertFalse(self.conn._vc_state._auto_service_disabled) diff --git a/nova/virt/vmwareapi/host.py b/nova/virt/vmwareapi/host.py index 7a401ff19a2d..8bdda008788d 100644 --- a/nova/virt/vmwareapi/host.py +++ b/nova/virt/vmwareapi/host.py @@ -17,17 +17,26 @@ Management class for host-related functions (start, reboot, etc). """ +from oslo_log import log as logging from oslo_utils import units from oslo_utils import versionutils +from oslo_vmware import exceptions as vexc from nova.compute import arch from nova.compute import hv_type from nova.compute import vm_mode +import nova.conf +from nova import context from nova import exception +from nova.i18n import _LW +from nova import objects from nova.virt.vmwareapi import ds_util from nova.virt.vmwareapi import vim_util from nova.virt.vmwareapi import vm_util +CONF = nova.conf.CONF +LOG = logging.getLogger(__name__) + def _get_ds_capacity_and_freespace(session, cluster=None, datastore_regex=None): @@ -48,6 +57,7 @@ class VCState(object): self._cluster = cluster self._datastore_regex = datastore_regex self._stats = {} + self._auto_service_disabled = False self.update_status() def get_host_stats(self, refresh=False): @@ -60,13 +70,23 @@ class VCState(object): def update_status(self): """Update the current state of the cluster.""" - capacity, freespace = _get_ds_capacity_and_freespace(self._session, - self._cluster, self._datastore_regex) - - # Get cpu, memory stats from the cluster - stats = vm_util.get_stats_from_cluster(self._session, self._cluster) - about_info = self._session._call_method(vim_util, "get_about_info") data = {} + try: + capacity, freespace = _get_ds_capacity_and_freespace(self._session, + self._cluster, self._datastore_regex) + + # Get cpu, memory stats from the cluster + stats = vm_util.get_stats_from_cluster(self._session, + self._cluster) + about_info = self._session._call_method(vim_util, "get_about_info") + except (vexc.VimConnectionException, vexc.VimAttributeException) as ex: + # VimAttributeException is thrown when vpxd service is down + LOG.warning(_LW("Failed to connect with %(node)s. " + "Error: %(error)s"), + {'node': self._host_name, 'error': ex}) + self._set_host_enabled(False) + return data + data["vcpus"] = stats['vcpus'] data["disk_total"] = capacity / units.Gi data["disk_available"] = freespace / units.Gi @@ -82,4 +102,15 @@ class VCState(object): (arch.X86_64, hv_type.VMWARE, vm_mode.HVM)] self._stats = data + if self._auto_service_disabled: + self._set_host_enabled(True) return data + + def _set_host_enabled(self, enabled): + """Sets the compute host's ability to accept new instances.""" + ctx = context.get_admin_context() + service = objects.Service.get_by_compute_host(ctx, CONF.host) + service.disabled = not enabled + service.disabled_reason = 'set by vmwareapi host_state' + service.save() + self._auto_service_disabled = service.disabled