libvirt: retry to undefine network filters during _post_live_migration
Sometimes post live migration fails because libvirt raises an error saying the network filter is still in use. Use the live_migration_retry_count config option (like in pre-live-migrate) to retry the operation until it's successful or we timeout. Also adds some debug logging to _post_live_migration in the compute manager before calling driver.unfilter_instance and driver.cleanup, which calls unfilter_instance, so that when we hit this we can see which path we're coming from. Closes-Bug: #1438803 Change-Id: Idffbe2857fbb23fafab1591dea82f5d64edac4bc
This commit is contained in:
parent
326ebe1aad
commit
20a95915c9
|
@ -5357,6 +5357,8 @@ class ComputeManager(manager.Manager):
|
|||
"live_migration._post.start",
|
||||
network_info=network_info)
|
||||
# Releasing security group ingress rule.
|
||||
LOG.debug('Calling driver.unfilter_instance from _post_live_migration',
|
||||
instance=instance)
|
||||
self.driver.unfilter_instance(instance,
|
||||
network_info)
|
||||
|
||||
|
@ -5386,6 +5388,8 @@ class ComputeManager(manager.Manager):
|
|||
block_migration, migrate_data)
|
||||
|
||||
if do_cleanup:
|
||||
LOG.debug('Calling driver.cleanup from _post_live_migration',
|
||||
instance=instance)
|
||||
self.driver.cleanup(ctxt, instance, network_info,
|
||||
destroy_disks=destroy_disks,
|
||||
migrate_data=migrate_data,
|
||||
|
|
|
@ -17,6 +17,7 @@ import re
|
|||
import uuid
|
||||
from xml.dom import minidom
|
||||
|
||||
from eventlet import greenthread
|
||||
from lxml import etree
|
||||
import mock
|
||||
from mox3 import mox
|
||||
|
@ -515,6 +516,7 @@ class IptablesFirewallTestCase(test.NoDBTestCase):
|
|||
self.assertEqual(1, len(rules))
|
||||
|
||||
|
||||
@mock.patch.object(firewall, 'libvirt', fakelibvirt)
|
||||
class NWFilterTestCase(test.NoDBTestCase):
|
||||
def setUp(self):
|
||||
super(NWFilterTestCase, self).setUp()
|
||||
|
@ -636,6 +638,66 @@ class NWFilterTestCase(test.NoDBTestCase):
|
|||
self.fw.unfilter_instance(instance_ref, network_info)
|
||||
self.assertEqual(original_filter_count - len(fakefilter.filters), 1)
|
||||
|
||||
@mock.patch.object(fakelibvirt.virConnect, "nwfilterLookupByName")
|
||||
@mock.patch.object(greenthread, 'sleep')
|
||||
def test_unfilter_instance_retry_and_error(self, mock_sleep, mock_lookup):
|
||||
# Tests that we try to undefine the network filter when it's in use
|
||||
# until we hit a timeout. We try two times and sleep once in between.
|
||||
self.flags(live_migration_retry_count=2)
|
||||
in_use = fakelibvirt.libvirtError('nwfilter is in use')
|
||||
in_use.err = (fakelibvirt.VIR_ERR_OPERATION_INVALID,)
|
||||
mock_undefine = mock.Mock(side_effect=in_use)
|
||||
fakefilter = mock.MagicMock(undefine=mock_undefine)
|
||||
mock_lookup.return_value = fakefilter
|
||||
|
||||
instance_ref = self._create_instance()
|
||||
network_info = _fake_network_info(self.stubs, 1)
|
||||
|
||||
self.assertRaises(fakelibvirt.libvirtError, self.fw.unfilter_instance,
|
||||
instance_ref, network_info)
|
||||
self.assertEqual(2, mock_lookup.call_count)
|
||||
self.assertEqual(2, mock_undefine.call_count)
|
||||
mock_sleep.assert_called_once_with(1)
|
||||
|
||||
@mock.patch.object(fakelibvirt.virConnect, "nwfilterLookupByName")
|
||||
@mock.patch.object(greenthread, 'sleep')
|
||||
def test_unfilter_instance_retry_not_found(self, mock_sleep, mock_lookup):
|
||||
# Tests that we exit if the nw filter is not found.
|
||||
in_use = fakelibvirt.libvirtError('nwfilter is in use')
|
||||
in_use.err = (fakelibvirt.VIR_ERR_OPERATION_INVALID,)
|
||||
not_found = fakelibvirt.libvirtError('no nwfilter with matching name')
|
||||
not_found.err = (fakelibvirt.VIR_ERR_NO_NWFILTER,)
|
||||
mock_undefine = mock.Mock(side_effect=(in_use, not_found))
|
||||
fakefilter = mock.MagicMock(undefine=mock_undefine)
|
||||
mock_lookup.return_value = fakefilter
|
||||
|
||||
instance_ref = self._create_instance()
|
||||
network_info = _fake_network_info(self.stubs, 1)
|
||||
|
||||
self.fw.unfilter_instance(instance_ref, network_info)
|
||||
self.assertEqual(2, mock_lookup.call_count)
|
||||
self.assertEqual(2, mock_undefine.call_count)
|
||||
mock_sleep.assert_called_once_with(1)
|
||||
|
||||
@mock.patch.object(fakelibvirt.virConnect, "nwfilterLookupByName")
|
||||
@mock.patch.object(greenthread, 'sleep')
|
||||
def test_unfilter_instance_retry_and_pass(self, mock_sleep, mock_lookup):
|
||||
# Tests that we retry on in-use error but pass if undefine() works
|
||||
# while looping.
|
||||
in_use = fakelibvirt.libvirtError('nwfilter is in use')
|
||||
in_use.err = (fakelibvirt.VIR_ERR_OPERATION_INVALID,)
|
||||
mock_undefine = mock.Mock(side_effect=(in_use, None))
|
||||
fakefilter = mock.MagicMock(undefine=mock_undefine)
|
||||
mock_lookup.return_value = fakefilter
|
||||
|
||||
instance_ref = self._create_instance()
|
||||
network_info = _fake_network_info(self.stubs, 1)
|
||||
|
||||
self.fw.unfilter_instance(instance_ref, network_info)
|
||||
self.assertEqual(2, mock_lookup.call_count)
|
||||
self.assertEqual(2, mock_undefine.call_count)
|
||||
mock_sleep.assert_called_once_with(1)
|
||||
|
||||
def test_redefining_nwfilters(self):
|
||||
fakefilter = NWFilterFakes()
|
||||
self.fw._conn.nwfilterDefineXML = fakefilter.filterDefineXMLMock
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import uuid
|
||||
|
||||
from eventlet import greenthread
|
||||
from lxml import etree
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
|
@ -31,6 +32,7 @@ from nova.virt import netutils
|
|||
LOG = logging.getLogger(__name__)
|
||||
CONF = cfg.CONF
|
||||
CONF.import_opt('use_ipv6', 'nova.netconf')
|
||||
CONF.import_opt('live_migration_retry_count', 'nova.compute.manager')
|
||||
|
||||
libvirt = None
|
||||
|
||||
|
@ -269,17 +271,33 @@ class NWFilterFirewall(base_firewall.FirewallDriver):
|
|||
nic_id = vif['address'].replace(':', '')
|
||||
instance_filter_name = self._instance_filter_name(instance, nic_id)
|
||||
|
||||
try:
|
||||
_nw = self._conn.nwfilterLookupByName(instance_filter_name)
|
||||
_nw.undefine()
|
||||
except libvirt.libvirtError as e:
|
||||
errcode = e.get_error_code()
|
||||
if errcode == libvirt.VIR_ERR_OPERATION_INVALID:
|
||||
# This happens when the instance filter is still in
|
||||
# use (ie. when the instance has not terminated properly)
|
||||
raise
|
||||
LOG.debug('The nwfilter(%s) is not found.',
|
||||
instance_filter_name, instance=instance)
|
||||
# nwfilters may be defined in a separate thread in the case
|
||||
# of libvirt non-blocking mode, so we wait for completion
|
||||
max_retry = CONF.live_migration_retry_count
|
||||
for cnt in range(max_retry):
|
||||
try:
|
||||
_nw = self._conn.nwfilterLookupByName(instance_filter_name)
|
||||
_nw.undefine()
|
||||
break
|
||||
except libvirt.libvirtError as e:
|
||||
if cnt == max_retry - 1:
|
||||
raise
|
||||
errcode = e.get_error_code()
|
||||
if errcode == libvirt.VIR_ERR_OPERATION_INVALID:
|
||||
# This happens when the instance filter is still in use
|
||||
# (ie. when the instance has not terminated properly)
|
||||
LOG.info(_LI('Failed to undefine network filter '
|
||||
'%(name)s. Try %(cnt)d of '
|
||||
'%(max_retry)d.'),
|
||||
{'name': instance_filter_name,
|
||||
'cnt': cnt + 1,
|
||||
'max_retry': max_retry},
|
||||
instance=instance)
|
||||
greenthread.sleep(1)
|
||||
else:
|
||||
LOG.debug('The nwfilter(%s) is not found.',
|
||||
instance_filter_name, instance=instance)
|
||||
break
|
||||
|
||||
@staticmethod
|
||||
def _instance_filter_name(instance, nic_id=None):
|
||||
|
|
Loading…
Reference in New Issue