VMware: set service status based on vc connection

When the vcenter (vc) session is disconnected, say,
if the management service on vcenter is down, then
the nova-compute service status does not reflect that.

This patch sets disabled=True in nova.objects.Service
when the vcenter session is broken.
When vcenter service is accessible again, then oslo_vmware
takes care of recreating a new session.

Closes-Bug: #1316928
Change-Id: I73a34eb6e0ca32d03e54d12a5e066b2ed4f19a61
This commit is contained in:
Giridhar Jayavelu 2016-03-08 23:13:14 -08:00
parent 39d60220f7
commit bd508a5a0e
3 changed files with 82 additions and 7 deletions

View File

@ -37,8 +37,9 @@ class ConfigDriveTestCase(test.NoDBTestCase):
REQUIRES_LOCKING = True REQUIRES_LOCKING = True
@mock.patch.object(objects.Service, 'get_by_compute_host')
@mock.patch.object(driver.VMwareVCDriver, '_register_openstack_extension') @mock.patch.object(driver.VMwareVCDriver, '_register_openstack_extension')
def setUp(self, mock_register): def setUp(self, mock_register, mock_service):
super(ConfigDriveTestCase, self).setUp() super(ConfigDriveTestCase, self).setUp()
vm_util.vm_refs_cache_reset() vm_util.vm_refs_cache_reset()
self.context = context.RequestContext('fake', 'fake', is_admin=False) self.context = context.RequestContext('fake', 'fake', is_admin=False)

View File

@ -150,6 +150,15 @@ class VMwareAPIVMTestCase(test.NoDBTestCase):
REQUIRES_LOCKING = True REQUIRES_LOCKING = True
def _create_service(self, **kwargs):
service_ref = {'host': kwargs.get('host', 'dummy'),
'disabled': kwargs.get('disabled', False),
'binary': 'nova-compute',
'topic': 'compute',
'report_count': 0,
'forced_down': kwargs.get('forced_down', False)}
return objects.Service(**service_ref)
@mock.patch.object(driver.VMwareVCDriver, '_register_openstack_extension') @mock.patch.object(driver.VMwareVCDriver, '_register_openstack_extension')
def setUp(self, mock_register): def setUp(self, mock_register):
super(VMwareAPIVMTestCase, self).setUp() super(VMwareAPIVMTestCase, self).setUp()
@ -171,7 +180,10 @@ class VMwareAPIVMTestCase(test.NoDBTestCase):
stubs.set_stubs(self) stubs.set_stubs(self)
vmwareapi_fake.reset() vmwareapi_fake.reset()
nova.tests.unit.image.fake.stub_out_image_service(self) nova.tests.unit.image.fake.stub_out_image_service(self)
service = self._create_service(host='test_url')
self.conn = driver.VMwareVCDriver(None, False) self.conn = driver.VMwareVCDriver(None, False)
self.assertFalse(service.disabled)
self._set_exception_vars() self._set_exception_vars()
self.node_name = self.conn._nodename self.node_name = self.conn._nodename
self.ds = 'ds1' self.ds = 'ds1'
@ -2288,3 +2300,34 @@ class VMwareAPIVMTestCase(test.NoDBTestCase):
version_arg_found = True version_arg_found = True
break break
self.assertTrue(version_arg_found) self.assertTrue(version_arg_found)
@mock.patch.object(objects.Service, 'get_by_compute_host')
def test_host_state_service_disabled(self, mock_service):
service = self._create_service(disabled=False, host='fake-mini')
mock_service.return_value = service
fake_stats = {'vcpus': 4, 'mem': {'total': '8194', 'free': '2048'}}
with test.nested(
mock.patch.object(vm_util, 'get_stats_from_cluster',
side_effect=[vexc.VimConnectionException('fake'),
fake_stats, fake_stats]),
mock.patch.object(service, 'save')) as (mock_stats,
mock_save):
self.conn._vc_state.update_status()
self.assertEqual(1, mock_save.call_count)
self.assertTrue(service.disabled)
self.assertTrue(self.conn._vc_state._auto_service_disabled)
# ensure the service is enabled again when there is no connection
# exception
self.conn._vc_state.update_status()
self.assertEqual(2, mock_save.call_count)
self.assertFalse(service.disabled)
self.assertFalse(self.conn._vc_state._auto_service_disabled)
# ensure objects.Service.save method is not called more than once
# after the service is enabled
self.conn._vc_state.update_status()
self.assertEqual(2, mock_save.call_count)
self.assertFalse(service.disabled)
self.assertFalse(self.conn._vc_state._auto_service_disabled)

View File

@ -17,17 +17,26 @@
Management class for host-related functions (start, reboot, etc). Management class for host-related functions (start, reboot, etc).
""" """
from oslo_log import log as logging
from oslo_utils import units from oslo_utils import units
from oslo_utils import versionutils from oslo_utils import versionutils
from oslo_vmware import exceptions as vexc
from nova.compute import arch from nova.compute import arch
from nova.compute import hv_type from nova.compute import hv_type
from nova.compute import vm_mode from nova.compute import vm_mode
import nova.conf
from nova import context
from nova import exception from nova import exception
from nova.i18n import _LW
from nova import objects
from nova.virt.vmwareapi import ds_util from nova.virt.vmwareapi import ds_util
from nova.virt.vmwareapi import vim_util from nova.virt.vmwareapi import vim_util
from nova.virt.vmwareapi import vm_util from nova.virt.vmwareapi import vm_util
CONF = nova.conf.CONF
LOG = logging.getLogger(__name__)
def _get_ds_capacity_and_freespace(session, cluster=None, def _get_ds_capacity_and_freespace(session, cluster=None,
datastore_regex=None): datastore_regex=None):
@ -48,6 +57,7 @@ class VCState(object):
self._cluster = cluster self._cluster = cluster
self._datastore_regex = datastore_regex self._datastore_regex = datastore_regex
self._stats = {} self._stats = {}
self._auto_service_disabled = False
self.update_status() self.update_status()
def get_host_stats(self, refresh=False): def get_host_stats(self, refresh=False):
@ -60,13 +70,23 @@ class VCState(object):
def update_status(self): def update_status(self):
"""Update the current state of the cluster.""" """Update the current state of the cluster."""
data = {}
try:
capacity, freespace = _get_ds_capacity_and_freespace(self._session, capacity, freespace = _get_ds_capacity_and_freespace(self._session,
self._cluster, self._datastore_regex) self._cluster, self._datastore_regex)
# Get cpu, memory stats from the cluster # Get cpu, memory stats from the cluster
stats = vm_util.get_stats_from_cluster(self._session, self._cluster) stats = vm_util.get_stats_from_cluster(self._session,
self._cluster)
about_info = self._session._call_method(vim_util, "get_about_info") about_info = self._session._call_method(vim_util, "get_about_info")
data = {} except (vexc.VimConnectionException, vexc.VimAttributeException) as ex:
# VimAttributeException is thrown when vpxd service is down
LOG.warning(_LW("Failed to connect with %(node)s. "
"Error: %(error)s"),
{'node': self._host_name, 'error': ex})
self._set_host_enabled(False)
return data
data["vcpus"] = stats['vcpus'] data["vcpus"] = stats['vcpus']
data["disk_total"] = capacity / units.Gi data["disk_total"] = capacity / units.Gi
data["disk_available"] = freespace / units.Gi data["disk_available"] = freespace / units.Gi
@ -82,4 +102,15 @@ class VCState(object):
(arch.X86_64, hv_type.VMWARE, vm_mode.HVM)] (arch.X86_64, hv_type.VMWARE, vm_mode.HVM)]
self._stats = data self._stats = data
if self._auto_service_disabled:
self._set_host_enabled(True)
return data return data
def _set_host_enabled(self, enabled):
"""Sets the compute host's ability to accept new instances."""
ctx = context.get_admin_context()
service = objects.Service.get_by_compute_host(ctx, CONF.host)
service.disabled = not enabled
service.disabled_reason = 'set by vmwareapi host_state'
service.save()
self._auto_service_disabled = service.disabled