Merge "Workaround for race condition in libvirt"

This commit is contained in:
Jenkins
2015-03-30 13:12:44 +00:00
committed by Gerrit Code Review
5 changed files with 128 additions and 16 deletions

View File

@@ -140,6 +140,7 @@ VIR_FROM_NODEDEV = 666
VIR_ERR_NO_SUPPORT = 3 VIR_ERR_NO_SUPPORT = 3
VIR_ERR_XML_DETAIL = 350 VIR_ERR_XML_DETAIL = 350
VIR_ERR_NO_DOMAIN = 420 VIR_ERR_NO_DOMAIN = 420
VIR_ERR_OPERATION_FAILED = 510
VIR_ERR_OPERATION_INVALID = 55 VIR_ERR_OPERATION_INVALID = 55
VIR_ERR_OPERATION_TIMEOUT = 68 VIR_ERR_OPERATION_TIMEOUT = 68
VIR_ERR_NO_NWFILTER = 620 VIR_ERR_NO_NWFILTER = 620

View File

@@ -0,0 +1,66 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import mock
from nova.compute import power_state
from nova import test
from nova.tests.unit.virt.libvirt import fakelibvirt
from nova.virt.libvirt import compat
from nova.virt.libvirt import host
class CompatTestCase(test.NoDBTestCase):
def setUp(self):
super(CompatTestCase, self).setUp()
self.useFixture(fakelibvirt.FakeLibvirtFixture())
@mock.patch.object(host.Host, 'has_min_version')
def test_get_domain_info(self, mock_has_min_version):
test_host = host.Host("qemu:///system")
domain = mock.MagicMock()
expected = [power_state.RUNNING, 512, 512, None, None]
race = fakelibvirt.make_libvirtError(
fakelibvirt.libvirtError,
'ERR',
error_code=fakelibvirt.VIR_ERR_OPERATION_FAILED,
error_message='cannot read cputime for domain')
mock_has_min_version.return_value = True
domain.info.return_value = expected
actual = compat.get_domain_info(fakelibvirt, test_host, domain)
self.assertEqual(actual, expected)
self.assertEqual(domain.info.call_count, 1)
domain.info.reset_mock()
domain.info.side_effect = race
self.assertRaises(fakelibvirt.libvirtError,
compat.get_domain_info,
fakelibvirt, test_host, domain)
self.assertEqual(domain.info.call_count, 1)
domain.info.reset_mock()
mock_has_min_version.return_value = False
domain.info.side_effect = [race, expected]
actual = compat.get_domain_info(fakelibvirt, test_host, domain)
self.assertEqual(actual, expected)
self.assertEqual(domain.info.call_count, 2)
domain.info.reset_mock()
domain.info.side_effect = race
self.assertRaises(fakelibvirt.libvirtError,
compat.get_domain_info,
fakelibvirt, test_host, domain)
self.assertEqual(domain.info.call_count, 2)

View File

@@ -0,0 +1,38 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from oslo_log import log as logging
from nova.i18n import _LW
LOG = logging.getLogger(__name__)
def get_domain_info(libvirt, host, virt_dom):
"""Method virDomain.info (libvirt version < 1.2.11) is
affected by a race condition. See bug #1372670 for more details.
This method detects it to perform a retry.
"""
def is_race(e):
code = e.get_error_code()
message = e.get_error_message()
return (code == libvirt.VIR_ERR_OPERATION_FAILED and
'cannot read cputime for domain' in message)
try:
return virt_dom.info()
except libvirt.libvirtError as e:
if not host.has_min_version((1, 2, 11)) and is_race(e):
LOG.warn(_LW('Race detected in libvirt.virDomain.info, '
'trying one more time'))
return virt_dom.info()
raise

View File

@@ -639,7 +639,7 @@ class LibvirtDriver(driver.ComputeDriver):
# If the instance is already shut off, we get this: # If the instance is already shut off, we get this:
# Code=55 Error=Requested operation is not valid: # Code=55 Error=Requested operation is not valid:
# domain is not running # domain is not running
state = LIBVIRT_POWER_STATE[virt_dom.info()[0]] state = self._get_power_state(virt_dom)
if state == power_state.SHUTDOWN: if state == power_state.SHUTDOWN:
is_okay = True is_okay = True
elif errcode == libvirt.VIR_ERR_INTERNAL_ERROR: elif errcode == libvirt.VIR_ERR_INTERNAL_ERROR:
@@ -1041,7 +1041,7 @@ class LibvirtDriver(driver.ComputeDriver):
# domains are persistent, but we should only # domains are persistent, but we should only
# affect live if the domain is running. # affect live if the domain is running.
flags = libvirt.VIR_DOMAIN_AFFECT_CONFIG flags = libvirt.VIR_DOMAIN_AFFECT_CONFIG
state = LIBVIRT_POWER_STATE[virt_dom.info()[0]] state = self._get_power_state(virt_dom)
if state in (power_state.RUNNING, power_state.PAUSED): if state in (power_state.RUNNING, power_state.PAUSED):
flags |= libvirt.VIR_DOMAIN_AFFECT_LIVE flags |= libvirt.VIR_DOMAIN_AFFECT_LIVE
@@ -1179,7 +1179,7 @@ class LibvirtDriver(driver.ComputeDriver):
# domains are persistent, but we should only # domains are persistent, but we should only
# affect live if the domain is running. # affect live if the domain is running.
flags = libvirt.VIR_DOMAIN_AFFECT_CONFIG flags = libvirt.VIR_DOMAIN_AFFECT_CONFIG
state = LIBVIRT_POWER_STATE[virt_dom.info()[0]] state = self._get_power_state(virt_dom)
if state in (power_state.RUNNING, power_state.PAUSED): if state in (power_state.RUNNING, power_state.PAUSED):
flags |= libvirt.VIR_DOMAIN_AFFECT_LIVE flags |= libvirt.VIR_DOMAIN_AFFECT_LIVE
virt_dom.detachDeviceFlags(xml, flags) virt_dom.detachDeviceFlags(xml, flags)
@@ -1218,7 +1218,7 @@ class LibvirtDriver(driver.ComputeDriver):
CONF.libvirt.virt_type) CONF.libvirt.virt_type)
try: try:
flags = libvirt.VIR_DOMAIN_AFFECT_CONFIG flags = libvirt.VIR_DOMAIN_AFFECT_CONFIG
state = LIBVIRT_POWER_STATE[virt_dom.info()[0]] state = self._get_power_state(virt_dom)
if state == power_state.RUNNING or state == power_state.PAUSED: if state == power_state.RUNNING or state == power_state.PAUSED:
flags |= libvirt.VIR_DOMAIN_AFFECT_LIVE flags |= libvirt.VIR_DOMAIN_AFFECT_LIVE
virt_dom.attachDeviceFlags(cfg.to_xml(), flags) virt_dom.attachDeviceFlags(cfg.to_xml(), flags)
@@ -1236,7 +1236,7 @@ class LibvirtDriver(driver.ComputeDriver):
try: try:
self.vif_driver.unplug(instance, vif) self.vif_driver.unplug(instance, vif)
flags = libvirt.VIR_DOMAIN_AFFECT_CONFIG flags = libvirt.VIR_DOMAIN_AFFECT_CONFIG
state = LIBVIRT_POWER_STATE[virt_dom.info()[0]] state = self._get_power_state(virt_dom)
if state == power_state.RUNNING or state == power_state.PAUSED: if state == power_state.RUNNING or state == power_state.PAUSED:
flags |= libvirt.VIR_DOMAIN_AFFECT_LIVE flags |= libvirt.VIR_DOMAIN_AFFECT_LIVE
virt_dom.detachDeviceFlags(cfg.to_xml(), flags) virt_dom.detachDeviceFlags(cfg.to_xml(), flags)
@@ -1310,7 +1310,7 @@ class LibvirtDriver(driver.ComputeDriver):
snapshot_name = uuid.uuid4().hex snapshot_name = uuid.uuid4().hex
state = LIBVIRT_POWER_STATE[virt_dom.info()[0]] state = self._get_power_state(virt_dom)
# NOTE(rmk): Live snapshots require QEMU 1.3 and Libvirt 1.0.0. # NOTE(rmk): Live snapshots require QEMU 1.3 and Libvirt 1.0.0.
# These restrictions can be relaxed as other configurations # These restrictions can be relaxed as other configurations
@@ -1996,7 +1996,7 @@ class LibvirtDriver(driver.ComputeDriver):
:returns: True if the reboot succeeded :returns: True if the reboot succeeded
""" """
dom = self._host.get_domain(instance) dom = self._host.get_domain(instance)
state = LIBVIRT_POWER_STATE[dom.info()[0]] state = self._get_power_state(dom)
old_domid = dom.ID() old_domid = dom.ID()
# NOTE(vish): This check allows us to reboot an instance that # NOTE(vish): This check allows us to reboot an instance that
# is already shutdown. # is already shutdown.
@@ -2009,7 +2009,7 @@ class LibvirtDriver(driver.ComputeDriver):
pci_manager.get_instance_pci_devs(instance, 'all')) pci_manager.get_instance_pci_devs(instance, 'all'))
for x in xrange(CONF.libvirt.wait_soft_reboot_seconds): for x in xrange(CONF.libvirt.wait_soft_reboot_seconds):
dom = self._host.get_domain(instance) dom = self._host.get_domain(instance)
state = LIBVIRT_POWER_STATE[dom.info()[0]] state = self._get_power_state(dom)
new_domid = dom.ID() new_domid = dom.ID()
# NOTE(ivoks): By checking domain IDs, we make sure we are # NOTE(ivoks): By checking domain IDs, we make sure we are
@@ -2143,8 +2143,7 @@ class LibvirtDriver(driver.ComputeDriver):
# wait for it to shutdown # wait for it to shutdown
return True return True
(state, _max_mem, _mem, _cpus, _t) = dom.info() state = self._get_power_state(dom)
state = LIBVIRT_POWER_STATE[state]
if state in SHUTDOWN_STATES: if state in SHUTDOWN_STATES:
LOG.info(_LI("Instance already shutdown."), LOG.info(_LI("Instance already shutdown."),
instance=instance) instance=instance)
@@ -2158,8 +2157,7 @@ class LibvirtDriver(driver.ComputeDriver):
for sec in six.moves.range(timeout): for sec in six.moves.range(timeout):
dom = self._host.get_domain(instance) dom = self._host.get_domain(instance)
(state, _max_mem, _mem, _cpus, _t) = dom.info() state = self._get_power_state(dom)
state = LIBVIRT_POWER_STATE[state]
if state in SHUTDOWN_STATES: if state in SHUTDOWN_STATES:
LOG.info(_LI("Instance shutdown successfully after %d " LOG.info(_LI("Instance shutdown successfully after %d "
@@ -2243,7 +2241,7 @@ class LibvirtDriver(driver.ComputeDriver):
# anything if it is. # anything if it is.
try: try:
domain = self._host.get_domain(instance) domain = self._host.get_domain(instance)
state = LIBVIRT_POWER_STATE[domain.info()[0]] state = self._get_power_state(domain)
ignored_states = (power_state.RUNNING, ignored_states = (power_state.RUNNING,
power_state.SUSPENDED, power_state.SUSPENDED,
@@ -4172,7 +4170,7 @@ class LibvirtDriver(driver.ComputeDriver):
""" """
virt_dom = self._host.get_domain(instance) virt_dom = self._host.get_domain(instance)
try: try:
dom_info = virt_dom.info() dom_info = self._host.get_domain_info(virt_dom)
except libvirt.libvirtError as ex: except libvirt.libvirtError as ex:
error_code = ex.get_error_code() error_code = ex.get_error_code()
if error_code == libvirt.VIR_ERR_NO_DOMAIN: if error_code == libvirt.VIR_ERR_NO_DOMAIN:
@@ -4538,7 +4536,7 @@ class LibvirtDriver(driver.ComputeDriver):
used = 0 used = 0
for dom in self._host.list_instance_domains(only_guests=False): for dom in self._host.list_instance_domains(only_guests=False):
try: try:
dom_mem = int(dom.info()[2]) dom_mem = int(self._host.get_domain_info(dom)[2])
except libvirt.libvirtError as e: except libvirt.libvirtError as e:
LOG.warn(_LW("couldn't obtain the memory from domain:" LOG.warn(_LW("couldn't obtain the memory from domain:"
" %(uuid)s, exception: %(ex)s") % " %(uuid)s, exception: %(ex)s") %
@@ -6508,7 +6506,8 @@ class LibvirtDriver(driver.ComputeDriver):
xml = domain.XMLDesc(0) xml = domain.XMLDesc(0)
xml_doc = etree.fromstring(xml) xml_doc = etree.fromstring(xml)
(state, max_mem, mem, num_cpu, cpu_time) = domain.info() (state, max_mem, mem, num_cpu, cpu_time) = \
self._host.get_domain_info(domain)
config_drive = configdrive.required_by(instance) config_drive = configdrive.required_by(instance)
launched_at = timeutils.normalize_time(instance.launched_at) launched_at = timeutils.normalize_time(instance.launched_at)
uptime = timeutils.delta_seconds(launched_at, uptime = timeutils.delta_seconds(launched_at,
@@ -6673,3 +6672,7 @@ class LibvirtDriver(driver.ComputeDriver):
def is_supported_fs_format(self, fs_type): def is_supported_fs_format(self, fs_type):
return fs_type in [disk.FS_FORMAT_EXT2, disk.FS_FORMAT_EXT3, return fs_type in [disk.FS_FORMAT_EXT2, disk.FS_FORMAT_EXT3,
disk.FS_FORMAT_EXT4, disk.FS_FORMAT_XFS] disk.FS_FORMAT_EXT4, disk.FS_FORMAT_XFS]
def _get_power_state(self, virt_dom):
dom_info = self._host.get_domain_info(virt_dom)
return LIBVIRT_POWER_STATE[dom_info[0]]

View File

@@ -49,6 +49,7 @@ from nova.i18n import _LW
from nova import rpc from nova import rpc
from nova import utils from nova import utils
from nova.virt import event as virtevent from nova.virt import event as virtevent
from nova.virt.libvirt import compat
from nova.virt.libvirt import config as vconfig from nova.virt.libvirt import config as vconfig
libvirt = None libvirt = None
@@ -853,3 +854,6 @@ class Host(object):
secret = self.find_secret(usage_type, usage_id) secret = self.find_secret(usage_type, usage_id)
if secret is not None: if secret is not None:
secret.undefine() secret.undefine()
def get_domain_info(self, virt_dom):
return compat.get_domain_info(libvirt, self, virt_dom)