Workaround for race condition in libvirt
Method "info" is affected by a race condition in libvirt. Methods "vcpus" and "memoryStats" are also affected, but their errors are handled gracefully by present code. This patch detects the race and then performs a retry on error. New code should avoid calling libvirt.virDomain.info directly. Instead, method Host.get_domain_info() should be called. A full fix for this bug would involve upgrading libvirt to version >=1.2.11. Change-Id: I4e3baceabdbc843635d07652abf80d39d58d9e06 Partial-Bug: #1372670
This commit is contained in:
parent
1cdedccf9e
commit
32c10850ae
|
@ -140,6 +140,7 @@ VIR_FROM_NODEDEV = 666
|
|||
VIR_ERR_NO_SUPPORT = 3
|
||||
VIR_ERR_XML_DETAIL = 350
|
||||
VIR_ERR_NO_DOMAIN = 420
|
||||
VIR_ERR_OPERATION_FAILED = 510
|
||||
VIR_ERR_OPERATION_INVALID = 55
|
||||
VIR_ERR_OPERATION_TIMEOUT = 68
|
||||
VIR_ERR_NO_NWFILTER = 620
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import mock
|
||||
|
||||
from nova.compute import power_state
|
||||
from nova import test
|
||||
from nova.tests.unit.virt.libvirt import fakelibvirt
|
||||
from nova.virt.libvirt import compat
|
||||
from nova.virt.libvirt import host
|
||||
|
||||
|
||||
class CompatTestCase(test.NoDBTestCase):
|
||||
|
||||
def setUp(self):
|
||||
super(CompatTestCase, self).setUp()
|
||||
self.useFixture(fakelibvirt.FakeLibvirtFixture())
|
||||
|
||||
@mock.patch.object(host.Host, 'has_min_version')
|
||||
def test_get_domain_info(self, mock_has_min_version):
|
||||
test_host = host.Host("qemu:///system")
|
||||
domain = mock.MagicMock()
|
||||
expected = [power_state.RUNNING, 512, 512, None, None]
|
||||
race = fakelibvirt.make_libvirtError(
|
||||
fakelibvirt.libvirtError,
|
||||
'ERR',
|
||||
error_code=fakelibvirt.VIR_ERR_OPERATION_FAILED,
|
||||
error_message='cannot read cputime for domain')
|
||||
|
||||
mock_has_min_version.return_value = True
|
||||
|
||||
domain.info.return_value = expected
|
||||
actual = compat.get_domain_info(fakelibvirt, test_host, domain)
|
||||
self.assertEqual(actual, expected)
|
||||
self.assertEqual(domain.info.call_count, 1)
|
||||
domain.info.reset_mock()
|
||||
|
||||
domain.info.side_effect = race
|
||||
self.assertRaises(fakelibvirt.libvirtError,
|
||||
compat.get_domain_info,
|
||||
fakelibvirt, test_host, domain)
|
||||
self.assertEqual(domain.info.call_count, 1)
|
||||
domain.info.reset_mock()
|
||||
|
||||
mock_has_min_version.return_value = False
|
||||
|
||||
domain.info.side_effect = [race, expected]
|
||||
actual = compat.get_domain_info(fakelibvirt, test_host, domain)
|
||||
self.assertEqual(actual, expected)
|
||||
self.assertEqual(domain.info.call_count, 2)
|
||||
domain.info.reset_mock()
|
||||
|
||||
domain.info.side_effect = race
|
||||
self.assertRaises(fakelibvirt.libvirtError,
|
||||
compat.get_domain_info,
|
||||
fakelibvirt, test_host, domain)
|
||||
self.assertEqual(domain.info.call_count, 2)
|
|
@ -0,0 +1,38 @@
|
|||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from oslo_log import log as logging
|
||||
|
||||
from nova.i18n import _LW
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_domain_info(libvirt, host, virt_dom):
|
||||
"""Method virDomain.info (libvirt version < 1.2.11) is
|
||||
affected by a race condition. See bug #1372670 for more details.
|
||||
This method detects it to perform a retry.
|
||||
"""
|
||||
def is_race(e):
|
||||
code = e.get_error_code()
|
||||
message = e.get_error_message()
|
||||
return (code == libvirt.VIR_ERR_OPERATION_FAILED and
|
||||
'cannot read cputime for domain' in message)
|
||||
|
||||
try:
|
||||
return virt_dom.info()
|
||||
except libvirt.libvirtError as e:
|
||||
if not host.has_min_version((1, 2, 11)) and is_race(e):
|
||||
LOG.warn(_LW('Race detected in libvirt.virDomain.info, '
|
||||
'trying one more time'))
|
||||
return virt_dom.info()
|
||||
raise
|
|
@ -640,7 +640,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||
# If the instance is already shut off, we get this:
|
||||
# Code=55 Error=Requested operation is not valid:
|
||||
# domain is not running
|
||||
state = LIBVIRT_POWER_STATE[virt_dom.info()[0]]
|
||||
state = self._get_power_state(virt_dom)
|
||||
if state == power_state.SHUTDOWN:
|
||||
is_okay = True
|
||||
elif errcode == libvirt.VIR_ERR_INTERNAL_ERROR:
|
||||
|
@ -1042,7 +1042,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||
# domains are persistent, but we should only
|
||||
# affect live if the domain is running.
|
||||
flags = libvirt.VIR_DOMAIN_AFFECT_CONFIG
|
||||
state = LIBVIRT_POWER_STATE[virt_dom.info()[0]]
|
||||
state = self._get_power_state(virt_dom)
|
||||
if state in (power_state.RUNNING, power_state.PAUSED):
|
||||
flags |= libvirt.VIR_DOMAIN_AFFECT_LIVE
|
||||
|
||||
|
@ -1180,7 +1180,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||
# domains are persistent, but we should only
|
||||
# affect live if the domain is running.
|
||||
flags = libvirt.VIR_DOMAIN_AFFECT_CONFIG
|
||||
state = LIBVIRT_POWER_STATE[virt_dom.info()[0]]
|
||||
state = self._get_power_state(virt_dom)
|
||||
if state in (power_state.RUNNING, power_state.PAUSED):
|
||||
flags |= libvirt.VIR_DOMAIN_AFFECT_LIVE
|
||||
virt_dom.detachDeviceFlags(xml, flags)
|
||||
|
@ -1219,7 +1219,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||
CONF.libvirt.virt_type)
|
||||
try:
|
||||
flags = libvirt.VIR_DOMAIN_AFFECT_CONFIG
|
||||
state = LIBVIRT_POWER_STATE[virt_dom.info()[0]]
|
||||
state = self._get_power_state(virt_dom)
|
||||
if state == power_state.RUNNING or state == power_state.PAUSED:
|
||||
flags |= libvirt.VIR_DOMAIN_AFFECT_LIVE
|
||||
virt_dom.attachDeviceFlags(cfg.to_xml(), flags)
|
||||
|
@ -1237,7 +1237,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||
try:
|
||||
self.vif_driver.unplug(instance, vif)
|
||||
flags = libvirt.VIR_DOMAIN_AFFECT_CONFIG
|
||||
state = LIBVIRT_POWER_STATE[virt_dom.info()[0]]
|
||||
state = self._get_power_state(virt_dom)
|
||||
if state == power_state.RUNNING or state == power_state.PAUSED:
|
||||
flags |= libvirt.VIR_DOMAIN_AFFECT_LIVE
|
||||
virt_dom.detachDeviceFlags(cfg.to_xml(), flags)
|
||||
|
@ -1311,7 +1311,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||
|
||||
snapshot_name = uuid.uuid4().hex
|
||||
|
||||
state = LIBVIRT_POWER_STATE[virt_dom.info()[0]]
|
||||
state = self._get_power_state(virt_dom)
|
||||
|
||||
# NOTE(rmk): Live snapshots require QEMU 1.3 and Libvirt 1.0.0.
|
||||
# These restrictions can be relaxed as other configurations
|
||||
|
@ -1997,7 +1997,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||
:returns: True if the reboot succeeded
|
||||
"""
|
||||
dom = self._host.get_domain(instance)
|
||||
state = LIBVIRT_POWER_STATE[dom.info()[0]]
|
||||
state = self._get_power_state(dom)
|
||||
old_domid = dom.ID()
|
||||
# NOTE(vish): This check allows us to reboot an instance that
|
||||
# is already shutdown.
|
||||
|
@ -2010,7 +2010,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||
pci_manager.get_instance_pci_devs(instance, 'all'))
|
||||
for x in xrange(CONF.libvirt.wait_soft_reboot_seconds):
|
||||
dom = self._host.get_domain(instance)
|
||||
state = LIBVIRT_POWER_STATE[dom.info()[0]]
|
||||
state = self._get_power_state(dom)
|
||||
new_domid = dom.ID()
|
||||
|
||||
# NOTE(ivoks): By checking domain IDs, we make sure we are
|
||||
|
@ -2144,8 +2144,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||
# wait for it to shutdown
|
||||
return True
|
||||
|
||||
(state, _max_mem, _mem, _cpus, _t) = dom.info()
|
||||
state = LIBVIRT_POWER_STATE[state]
|
||||
state = self._get_power_state(dom)
|
||||
if state in SHUTDOWN_STATES:
|
||||
LOG.info(_LI("Instance already shutdown."),
|
||||
instance=instance)
|
||||
|
@ -2159,8 +2158,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||
for sec in six.moves.range(timeout):
|
||||
|
||||
dom = self._host.get_domain(instance)
|
||||
(state, _max_mem, _mem, _cpus, _t) = dom.info()
|
||||
state = LIBVIRT_POWER_STATE[state]
|
||||
state = self._get_power_state(dom)
|
||||
|
||||
if state in SHUTDOWN_STATES:
|
||||
LOG.info(_LI("Instance shutdown successfully after %d "
|
||||
|
@ -2244,7 +2242,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||
# anything if it is.
|
||||
try:
|
||||
domain = self._host.get_domain(instance)
|
||||
state = LIBVIRT_POWER_STATE[domain.info()[0]]
|
||||
state = self._get_power_state(domain)
|
||||
|
||||
ignored_states = (power_state.RUNNING,
|
||||
power_state.SUSPENDED,
|
||||
|
@ -4176,7 +4174,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||
"""
|
||||
virt_dom = self._host.get_domain(instance)
|
||||
try:
|
||||
dom_info = virt_dom.info()
|
||||
dom_info = self._host.get_domain_info(virt_dom)
|
||||
except libvirt.libvirtError as ex:
|
||||
error_code = ex.get_error_code()
|
||||
if error_code == libvirt.VIR_ERR_NO_DOMAIN:
|
||||
|
@ -4542,7 +4540,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||
used = 0
|
||||
for dom in self._host.list_instance_domains(only_guests=False):
|
||||
try:
|
||||
dom_mem = int(dom.info()[2])
|
||||
dom_mem = int(self._host.get_domain_info(dom)[2])
|
||||
except libvirt.libvirtError as e:
|
||||
LOG.warn(_LW("couldn't obtain the memory from domain:"
|
||||
" %(uuid)s, exception: %(ex)s") %
|
||||
|
@ -6501,7 +6499,8 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||
xml = domain.XMLDesc(0)
|
||||
xml_doc = etree.fromstring(xml)
|
||||
|
||||
(state, max_mem, mem, num_cpu, cpu_time) = domain.info()
|
||||
(state, max_mem, mem, num_cpu, cpu_time) = \
|
||||
self._host.get_domain_info(domain)
|
||||
config_drive = configdrive.required_by(instance)
|
||||
launched_at = timeutils.normalize_time(instance.launched_at)
|
||||
uptime = timeutils.delta_seconds(launched_at,
|
||||
|
@ -6666,3 +6665,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||
def is_supported_fs_format(self, fs_type):
|
||||
return fs_type in [disk.FS_FORMAT_EXT2, disk.FS_FORMAT_EXT3,
|
||||
disk.FS_FORMAT_EXT4, disk.FS_FORMAT_XFS]
|
||||
|
||||
def _get_power_state(self, virt_dom):
|
||||
dom_info = self._host.get_domain_info(virt_dom)
|
||||
return LIBVIRT_POWER_STATE[dom_info[0]]
|
||||
|
|
|
@ -49,6 +49,7 @@ from nova.i18n import _LW
|
|||
from nova import rpc
|
||||
from nova import utils
|
||||
from nova.virt import event as virtevent
|
||||
from nova.virt.libvirt import compat
|
||||
from nova.virt.libvirt import config as vconfig
|
||||
|
||||
libvirt = None
|
||||
|
@ -853,3 +854,6 @@ class Host(object):
|
|||
secret = self.find_secret(usage_type, usage_id)
|
||||
if secret is not None:
|
||||
secret.undefine()
|
||||
|
||||
def get_domain_info(self, virt_dom):
|
||||
return compat.get_domain_info(libvirt, self, virt_dom)
|
||||
|
|
Loading…
Reference in New Issue