Disable nova-compute on libvirt connectivity exceptions

The service will be disabled and re-enabled according to the success
or failure of establishing a connection to the libvirt service.

update_available_resources, periodic task, will probe the connection
even if the service is disabled.

This is in order to avoid migrations or new instances to be scheduled
on a disconnected from libvirt compute node.

Closes-Bug: #1240670
Change-Id: Ib8d67838ceb73c5b1cdc9498c17b335e9e5bb6f3
This commit is contained in:
Vladik Romanovsky 2013-10-16 15:14:16 -04:00
parent 2efcc9dc24
commit cc70e3a28d
3 changed files with 137 additions and 7 deletions

View File

@ -380,6 +380,8 @@ class LibvirtConnTestCase(test.TestCase):
self.useFixture(fixtures.MonkeyPatch(
'nova.virt.libvirt.imagebackend.libvirt_utils',
fake_libvirt_utils))
self.stubs.Set(libvirt_driver.LibvirtDriver,
'set_host_enabled', mock.Mock())
def fake_extend(image, size, use_cow=False):
pass
@ -665,12 +667,16 @@ class LibvirtConnTestCase(test.TestCase):
self.stubs.Set(self.conn, "getLibVersion", get_lib_version_stub)
self.mox.StubOutWithMock(conn, '_connect')
self.mox.StubOutWithMock(self.conn, 'registerCloseCallback')
self.mox.StubOutWithMock(conn, 'set_host_enabled')
conn._connect(mox.IgnoreArg(), mox.IgnoreArg()).AndReturn(self.conn)
self.conn.registerCloseCallback(
mox.IgnoreArg(), mox.IgnoreArg()).WithSideEffects(
set_close_callback)
conn.set_host_enabled('fake-mini', True)
conn.set_host_enabled('fake-mini', 'Connection to libvirt lost: 1')
conn._connect(mox.IgnoreArg(), mox.IgnoreArg()).AndReturn(self.conn)
conn.set_host_enabled('fake-mini', True)
self.conn.registerCloseCallback(mox.IgnoreArg(), mox.IgnoreArg())
self.mox.ReplayAll()
@ -682,6 +688,7 @@ class LibvirtConnTestCase(test.TestCase):
self.close_callback(self.conn, 1, None)
conn._get_connection()
self.mox.UnsetStubs()
def test_cpu_features_bug_1217630(self):
conn = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
@ -3881,15 +3888,67 @@ class LibvirtConnTestCase(test.TestCase):
self.mox.StubOutWithMock(libvirt, "openAuth")
self.mox.StubOutWithMock(libvirt.libvirtError, "get_error_code")
self.mox.StubOutWithMock(libvirt.libvirtError, "get_error_domain")
self.mox.StubOutWithMock(conn, 'set_host_enabled')
libvirt.openAuth(mox.IgnoreArg(), mox.IgnoreArg(),
mox.IgnoreArg()).AndRaise(
libvirt.libvirtError("fake failure"))
conn.set_host_enabled('fake-mini', 'Connection to libvirt lost: ERROR')
conn.set_host_enabled('fake-mini', False)
self.mox.ReplayAll()
conn._close_callback(conn._wrapped_conn, 'ERROR', '')
self.assertRaises(exception.HypervisorUnavailable,
conn.get_num_instances)
def test_broken_connection_disable_service(self):
disabled_reason = 'Connection to libvirt lost: ERROR!'
self.mox.UnsetStubs()
conn = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
self.mox.StubOutWithMock(libvirt, "openAuth")
self.mox.StubOutWithMock(libvirt.libvirtError, "get_error_code")
self.mox.StubOutWithMock(libvirt.libvirtError, "get_error_domain")
libvirt.openAuth(mox.IgnoreArg(), mox.IgnoreArg(),
mox.IgnoreArg()).AndRaise(
libvirt.libvirtError("fake failure"))
from nova.objects import service as service_obj
service_mock = mock.MagicMock()
service_mock.__getitem__.return_value = False
service_mock_failed_conn = mock.MagicMock()
service_mock_failed_conn.__getitem__.return_value = True
self.mox.StubOutWithMock(service_obj.Service,
'get_by_compute_host')
service_obj.Service.get_by_compute_host(mox.IgnoreArg(),
'fake-mini').AndReturn(service_mock)
service_obj.Service.get_by_compute_host(mox.IgnoreArg(),
'fake-mini').AndReturn(service_mock_failed_conn)
self.mox.ReplayAll()
conn._close_callback(conn._wrapped_conn, 'ERROR!', '')
self.assertTrue(service_mock.disabled and
service_mock.disabled_reason == disabled_reason)
self.assertRaises(exception.HypervisorUnavailable,
conn.get_num_instances)
def test_service_resume_after_broken_connection(self):
self.mox.UnsetStubs()
conn = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
self.mox.StubOutWithMock(libvirt, "openAuth")
libvirt.openAuth(mox.IgnoreArg(), mox.IgnoreArg(),
mox.IgnoreArg()).AndReturn(mock.MagicMock())
from nova.objects import service as service_obj
service_mock = mock.MagicMock()
service_mock.__getitem__.return_value = True
self.mox.StubOutWithMock(service_obj.Service,
'get_by_compute_host')
service_obj.Service.get_by_compute_host(mox.IgnoreArg(),
'fake-mini').AndReturn(service_mock)
self.mox.ReplayAll()
conn.get_num_instances()
self.assertTrue(not service_mock.disabled and
not service_mock.disabled_reason)
def test_immediate_delete(self):
def fake_lookup_by_name(instance_name):
raise exception.InstanceNotFound(instance_id=instance_name)
@ -7169,6 +7228,7 @@ class LibvirtNonblockingTestCase(test.TestCase):
# Test bug 962840.
import nova.virt.libvirt.driver as libvirt_driver
connection = libvirt_driver.LibvirtDriver('')
connection.set_host_enabled = mock.Mock()
jsonutils.to_primitive(connection._conn, convert_instances=True)

View File

@ -19,6 +19,7 @@ import fixtures
import sys
import traceback
from mock import MagicMock
import netaddr
from nova.compute import manager
@ -706,6 +707,14 @@ class LibvirtConnTestCase(_VirtDriverTestCase, test.TestCase):
# Point _VirtDriverTestCase at the right module
self.driver_module = 'nova.virt.libvirt.LibvirtDriver'
super(LibvirtConnTestCase, self).setUp()
self.stubs.Set(self.connection,
'set_host_enabled', MagicMock())
self.useFixture(fixtures.MonkeyPatch(
'nova.context.get_admin_context',
self._fake_admin_context))
def _fake_admin_context(self, *args, **kwargs):
return self.ctxt
def test_force_hard_reboot(self):
self.flags(libvirt_wait_soft_reboot_seconds=0)
@ -715,3 +724,19 @@ class LibvirtConnTestCase(_VirtDriverTestCase, test.TestCase):
# there is lack of fake stuff to execute this method. so pass.
self.skipTest("Test nothing, but this method"
" needed to override superclass.")
def test_set_host_enabled(self):
self.mox.UnsetStubs()
service_mock = MagicMock()
# Previous status of the service: disabled: False
service_mock.__getitem__.return_value = False
from nova.objects import service as service_obj
self.mox.StubOutWithMock(service_obj.Service,
'get_by_compute_host')
service_obj.Service.get_by_compute_host(self.ctxt,
'fake-mini').AndReturn(service_mock)
self.mox.ReplayAll()
self.connection.set_host_enabled('my_test_host', 'ERROR!')
self.assertTrue(service_mock.disabled and
service_mock.disabled_reason == 'ERROR!')

View File

@ -73,6 +73,7 @@ from nova import exception
from nova.image import glance
from nova import notifier
from nova.objects import instance as instance_obj
from nova.objects import service as service_obj
from nova.openstack.common import excutils
from nova.openstack.common import fileutils
from nova.openstack.common.gettextutils import _
@ -588,12 +589,20 @@ class LibvirtDriver(driver.ComputeDriver):
if not wrapped_conn or not self._test_connection(wrapped_conn):
LOG.debug(_('Connecting to libvirt: %s'), self.uri())
try:
if not CONF.libvirt_nonblocking:
wrapped_conn = self._connect(self.uri(), self.read_only)
wrapped_conn = self._connect(self.uri(),
self.read_only)
else:
wrapped_conn = tpool.proxy_call(
(libvirt.virDomain, libvirt.virConnect),
self._connect, self.uri(), self.read_only)
finally:
# Enabling the compute service, in case it was disabled
# since the connection was successful.
is_connected = bool(wrapped_conn)
self.set_host_enabled(CONF.host, is_connected)
self._wrapped_conn = wrapped_conn
try:
@ -627,9 +636,14 @@ class LibvirtDriver(driver.ComputeDriver):
def _close_callback(self, conn, reason, opaque):
with self._wrapped_conn_lock:
if conn == self._wrapped_conn:
LOG.info(_("Connection to libvirt lost: %s") % reason)
_error = _("Connection to libvirt lost: %s") % reason
LOG.warn(_error)
self._wrapped_conn = None
# Disable compute service to avoid
# new instances of being scheduled on this host.
self.set_host_enabled(CONF.host, _error)
@staticmethod
def _test_connection(conn):
try:
@ -2589,6 +2603,37 @@ class LibvirtDriver(driver.ComputeDriver):
% {'dev': pci_devs, 'dom': dom.ID()})
raise
def set_host_enabled(self, host, enabled):
"""Sets the specified host's ability to accept new instances."""
status_name = {True: 'Enabled',
False: 'Disabled'}
if isinstance(enabled, bool):
disable_service = not enabled
disable_reason = ''
else:
disable_service = bool(enabled)
disable_reason = enabled
ctx = nova_context.get_admin_context()
try:
service = service_obj.Service.get_by_compute_host(ctx, CONF.host)
if service.disabled != disable_service:
service.disabled = disable_service
service.disabled_reason = disable_reason
service.save()
LOG.debug(_('Updating compute service status to: %s'),
status_name[disable_service])
except exception.ComputeHostNotFound:
LOG.warn(_('Cannot update service status on host: %s,'
'since it is not registered.') % CONF.host)
except Exception:
LOG.warn(_('Cannot update service status on host: %s,'
'due to an unexpected exception.') % CONF.host,
exc_info=True)
def get_host_capabilities(self):
"""Returns an instance of config.LibvirtConfigCaps representing
the capabilities of the host.