nova/nova/tests/functional/regressions/test_bug_1784353.py
Lee Yarwood 41452a5c6a conductor: Recreate volume attachments during a reschedule
When an instance with attached volumes fails to spawn, cleanup code
within the compute manager (_shutdown_instance called from
_build_resources) will delete the volume attachments referenced by
the bdms in Cinder. As a result we should check and if necessary
recreate these volume attachments when rescheduling an instance.

Note that there are a few different ways to fix this bug by
making changes to the compute manager code, either by not deleting
the volume attachment on failure before rescheduling [1] or by
performing the get/create check during each build after the
reschedule [2].

The problem with *not* cleaning up the attachments is if we don't
reschedule, then we've left orphaned "reserved" volumes in Cinder
(or we have to add special logic to tell compute when to cleanup
attachments).

The problem with checking the existence of the attachment on every
new host we build on is that we'd be needlessly checking that for
initial creates even if we don't ever need to reschedule, unless
again we have special logic against that (like checking to see if
we've rescheduled at all).

Also, in either case that involves changes to the compute means that
older computes might not have the fix.

So ultimately it seems that the best way to handle this is:

1. Only deal with this on reschedules.
2. Let the cell conductor orchestrate it since it's already dealing
   with the reschedule. Then the compute logic doesn't need to change.

[1] https://review.openstack.org/#/c/587071/3/nova/compute/manager.py@1631
[2] https://review.openstack.org/#/c/587071/4/nova/compute/manager.py@1667

Change-Id: I739c06bd02336bf720cddacb21f48e7857378487
Closes-bug: #1784353
2018-10-22 15:29:15 -04:00

91 lines
3.6 KiB
Python

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from nova import test
from nova.tests import fixtures as nova_fixtures
from nova.tests.functional import integrated_helpers
from nova.tests.unit import fake_network
import nova.tests.unit.image.fake
from nova.tests.unit import policy_fixture
from nova.virt import fake
class TestRescheduleWithVolumesAttached(
test.TestCase, integrated_helpers.InstanceHelperMixin):
"""Regression test for bug 1784353 introduced in Queens.
This regression test asserts that volume backed instances fail to start
when rescheduled due to their volume attachments being deleted by cleanup
code within the compute layer after an initial failure to spawn.
"""
def setUp(self):
super(TestRescheduleWithVolumesAttached, self).setUp()
# Use the new attach flow fixture for cinder
cinder_fixture = nova_fixtures.CinderFixtureNewAttachFlow(self)
self.cinder = self.useFixture(cinder_fixture)
self.useFixture(policy_fixture.RealPolicyFixture())
self.useFixture(nova_fixtures.NeutronFixture(self))
fake_network.set_stub_network_methods(self)
self.useFixture(nova_fixtures.PlacementFixture())
api_fixture = self.useFixture(nova_fixtures.OSAPIFixture(
api_version='v2.1'))
self.api = api_fixture.admin_api
nova.tests.unit.image.fake.stub_out_image_service(self)
self.addCleanup(nova.tests.unit.image.fake.FakeImageService_reset)
self.flags(compute_driver='fake.FakeRescheduleDriver')
self.start_service('conductor')
self.start_service('scheduler')
# Start two computes to allow the instance to be rescheduled
fake.set_nodes(['host1'])
self.addCleanup(fake.restore_nodes)
self.host1 = self.start_service('compute', host='host1')
fake.set_nodes(['host2'])
self.addCleanup(fake.restore_nodes)
self.host2 = self.start_service('compute', host='host2')
self.image_id = self.api.get_images()[0]['id']
self.flavor_id = self.api.get_flavors()[0]['id']
def test_reschedule_with_volume_attached(self):
# Boot a volume backed instance
volume_id = nova_fixtures.CinderFixture.IMAGE_BACKED_VOL
server_request = {
'name': 'server',
'flavorRef': self.flavor_id,
'block_device_mapping_v2': [{
'boot_index': 0,
'uuid': volume_id,
'source_type': 'volume',
'destination_type': 'volume'}],
}
server_response = self.api.post_server({'server': server_request})
server_id = server_response['id']
self._wait_for_state_change(self.api, server_response, 'ACTIVE')
attached_volume_ids = self.cinder.volume_ids_for_instance(server_id)
self.assertIn(volume_id, attached_volume_ids)
self.assertEqual(1, len(self.cinder.volume_to_attachment))
# There should only be one attachment record for the volume and
# instance because the original would have been deleted before
# rescheduling off the first host.
self.assertEqual(1, len(self.cinder.volume_to_attachment[volume_id]))