Browse Source

Unplug VIFs as part of cleanup of networks

If an instance fails to build, which is possible for a variety of
reasons, we may end up in a situation where we have remnants of a
plugged VIF (typically files) left on the host. This is because we
cleanup from the neutron perspective but don't attempt to unplug the
VIF, a call which may have many side-effects depending on the VIF
driver. Resolve this by always attempting to unplug VIFs as part of the
network cleanup.

A now invalid note is also removed and a unit test corrected.

Conflicts:
	nova/tests/unit/compute/test_compute_mgr.py

NOTE(stephenfin): Conflict is because we're missing change
Ic5cab99944df9e501ba2032eb96911c36304494d ("Port binding based on events
during live migration") which we don't want to backport.

Closes-Bug: #1831771
Related-Bug: #1830081
Signed-off-by: Stephen Finucane <sfinucan@redhat.com>
Change-Id: Ibdbde4ed460a99b0cbe0d6b76e0e5b3c0650f9d9
(cherry picked from commit b3e14931d6)
(cherry picked from commit 3e935325a8)
(cherry picked from commit 265fd4f6bd)
(cherry picked from commit 85521691a8)
changes/06/715406/2
Stephen Finucane 1 year ago
committed by Stephen Finucane
parent
commit
f50bd6e657
5 changed files with 122 additions and 28 deletions
  1. +44
    -0
      nova/compute/manager.py
  2. +1
    -3
      nova/tests/functional/regressions/test_bug_1831771.py
  3. +10
    -2
      nova/tests/functional/test_compute_mgr.py
  4. +67
    -20
      nova/tests/unit/compute/test_compute_mgr.py
  5. +0
    -3
      nova/virt/driver.py

+ 44
- 0
nova/compute/manager.py View File

@@ -2417,6 +2417,50 @@ class ComputeManager(manager.Manager):

def _cleanup_allocated_networks(self, context, instance,
requested_networks):
"""Cleanup networks allocated for instance.

:param context: nova request context
:param instance: nova.objects.instance.Instance object
:param requested_networks: nova.objects.NetworkRequestList
"""
LOG.debug('Unplugging VIFs for instance', instance=instance)

network_info = instance.get_network_info()

# NOTE(stephenfin) to avoid nova destroying the instance without
# unplugging the interface, refresh network_info if it is empty.
if not network_info:
try:
network_info = self.network_api.get_instance_nw_info(
context, instance,
)
except Exception as exc:
LOG.warning(
'Failed to update network info cache when cleaning up '
'allocated networks. Stale VIFs may be left on this host.'
'Error: %s', six.text_type(exc)
)
return

try:
self.driver.unplug_vifs(instance, network_info)
except NotImplementedError:
# This is an optional method so ignore things if it doesn't exist
LOG.debug(
'Virt driver does not provide unplug_vifs method, so it '
'is not possible determine if VIFs should be unplugged.'
)
except exception.NovaException as exc:
# It's possible that the instance never got as far as plugging
# VIFs, in which case we would see an exception which can be
# mostly ignored
LOG.warning(
'Cleaning up VIFs failed for instance. Error: %s',
six.text_type(exc), instance=instance,
)
else:
LOG.debug('Unplugged VIFs for instance', instance=instance)

try:
self._deallocate_network(context, instance, requested_networks)
except Exception:


+ 1
- 3
nova/tests/functional/regressions/test_bug_1831771.py View File

@@ -97,8 +97,6 @@ class TestDelete(test_servers.ProviderUsageBaseTestCase):
# assert that we spawned the instance, and unplugged vifs on
# cleanup
mock_spawn.assert_called()
# FIXME(mdbooth): We should have called unplug_vifs in cleanup, but
# we didn't due to bug 1831771
mock_unplug_vifs.assert_not_called()
mock_unplug_vifs.assert_called()
# FIXME(mdbooth): Bug 1848666
self.assertTrue(active_after_deleting_error[0])

+ 10
- 2
nova/tests/functional/test_compute_mgr.py View File

@@ -10,13 +10,17 @@
# License for the specific language governing permissions and limitations
# under the License.

from __future__ import absolute_import

import fixtures
import mock
from oslo_config import cfg

from nova import context
from nova.network import model as network_model
from nova import objects
from nova import test
from nova.tests import fixtures
from nova.tests import fixtures as nova_fixtures
from nova.tests.unit import cast_as_call
from nova.tests.unit import fake_network
from nova.tests.unit import fake_server_actions
@@ -28,7 +32,7 @@ CONF = cfg.CONF
class ComputeManagerTestCase(test.TestCase):
def setUp(self):
super(ComputeManagerTestCase, self).setUp()
self.useFixture(fixtures.SpawnIsSynchronousFixture())
self.useFixture(nova_fixtures.SpawnIsSynchronousFixture())
self.useFixture(cast_as_call.CastAsCall(self))
self.conductor = self.start_service('conductor')
self.start_service('scheduler')
@@ -36,6 +40,10 @@ class ComputeManagerTestCase(test.TestCase):
self.context = context.RequestContext('fake', 'fake')
fake_server_actions.stub_out_action_events(self)
fake_network.set_stub_network_methods(self)
self.useFixture(fixtures.MockPatch(
'nova.network.base_api.NetworkAPI.get_instance_nw_info',
return_value=network_model.NetworkInfo(),
))

def test_instance_fault_message_no_traceback_with_retry(self):
"""This test simulates a spawn failure on the last retry attempt.


+ 67
- 20
nova/tests/unit/compute/test_compute_mgr.py View File

@@ -91,7 +91,9 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
@mock.patch.object(manager.ComputeManager, '_get_power_state')
@mock.patch.object(manager.ComputeManager, '_sync_instance_power_state')
@mock.patch.object(objects.Instance, 'get_by_uuid')
def _test_handle_lifecycle_event(self, mock_get, mock_sync,
@mock.patch('nova.network.neutronv2.api.API.migrate_instance_start')
def _test_handle_lifecycle_event(self, migrate_instance_start,
mock_get, mock_sync,
mock_get_power_state, transition,
event_pwr_state, current_pwr_state):
event = mock.Mock()
@@ -5226,9 +5228,14 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
mock_build_run.side_effect = exception.RescheduledException(reason='',
instance_uuid=self.instance.uuid)

with mock.patch.object(
self.compute.network_api,
'cleanup_instance_network_on_host') as mock_clean:
with test.nested(
mock.patch.object(
self.compute.network_api, 'cleanup_instance_network_on_host',
),
mock.patch.object(
self.compute.network_api, 'get_instance_nw_info',
),
) as (mock_clean, _):
self.compute.build_and_run_instance(self.context, self.instance,
self.image, request_spec={},
filter_properties=self.filter_properties,
@@ -5317,9 +5324,14 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
mock_build_and_run.side_effect = exception.RescheduledException(
reason='', instance_uuid=self.instance.uuid)

with mock.patch.object(
self.compute.network_api,
'cleanup_instance_network_on_host') as mock_cleanup_network:
with test.nested(
mock.patch.object(
self.compute.network_api, 'cleanup_instance_network_on_host',
),
mock.patch.object(
self.compute.network_api, 'get_instance_nw_info',
),
) as (mock_cleanup_network, _):
self.compute._do_build_and_run_instance(self.context, instance,
self.image, request_spec={},
filter_properties=self.filter_properties,
@@ -5962,9 +5974,14 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
mock_claim.side_effect = exc
self._do_build_instance_update(mock_save, reschedule_update=True)

with mock.patch.object(
self.compute.network_api,
'cleanup_instance_network_on_host') as mock_clean:
with test.nested(
mock.patch.object(
self.compute.network_api, 'cleanup_instance_network_on_host',
),
mock.patch.object(
self.compute.network_api, 'get_instance_nw_info',
),
) as (mock_clean, _):
self.compute.build_and_run_instance(self.context, self.instance,
self.image, request_spec={},
filter_properties=self.filter_properties,
@@ -6374,18 +6391,48 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
mock_setup.assert_called_once_with(self.context, instance,
instance.host)

def test_cleanup_allocated_networks_instance_not_found(self):
def test__cleanup_allocated_networks__instance_not_found(self):
with test.nested(
mock.patch.object(self.compute, '_deallocate_network'),
mock.patch.object(self.instance, 'save',
side_effect=exception.InstanceNotFound(instance_id=''))
) as (_deallocate_network, save):
mock.patch.object(self.compute.network_api,
'get_instance_nw_info'),
mock.patch.object(self.compute.driver, 'unplug_vifs'),
mock.patch.object(self.compute, '_deallocate_network'),
mock.patch.object(self.instance, 'save',
side_effect=exception.InstanceNotFound(instance_id=''))
) as (mock_nwinfo, mock_unplug, mock_deallocate_network, mock_save):
# Testing that this doesn't raise an exception
self.compute._cleanup_allocated_networks(self.context,
self.instance, self.requested_networks)
save.assert_called_once_with()
self.assertEqual('False',
self.instance.system_metadata['network_allocated'])
self.compute._cleanup_allocated_networks(
self.context, self.instance, self.requested_networks)

mock_nwinfo.assert_called_once_with(
self.context, self.instance)
mock_unplug.assert_called_once_with(
self.instance, mock_nwinfo.return_value)
mock_deallocate_network.assert_called_once_with(
self.context, self.instance, self.requested_networks)
mock_save.assert_called_once_with()
self.assertEqual(
'False', self.instance.system_metadata['network_allocated'])

@mock.patch('nova.compute.manager.LOG')
def test__cleanup_allocated_networks__error(self, mock_log):
with test.nested(
mock.patch.object(
self.compute.network_api, 'get_instance_nw_info',
side_effect=Exception('some neutron error')
),
mock.patch.object(self.compute.driver, 'unplug_vifs'),
) as (mock_nwinfo, mock_unplug):
self.compute._cleanup_allocated_networks(
self.context, self.instance, self.requested_networks)

mock_nwinfo.assert_called_once_with(self.context, self.instance)
self.assertEqual(1, mock_log.warning.call_count)
self.assertIn(
'Failed to update network info cache',
mock_log.warning.call_args[0][0],
)
mock_unplug.assert_not_called()

def test_deallocate_network_none_requested(self):
# Tests that we don't deallocate networks if 'none' were


+ 0
- 3
nova/virt/driver.py View File

@@ -1283,9 +1283,6 @@ class ComputeDriver(object):
raise NotImplementedError()

def unplug_vifs(self, instance, network_info):
# NOTE(markus_z): 2015-08-18
# The compute manager doesn't use this interface, which seems odd
# since the manager should be the controlling thing here.
"""Unplug virtual interfaces (VIFs) from networks.

The counter action is :func:`plug_vifs`.


Loading…
Cancel
Save