41452a5c6a
When an instance with attached volumes fails to spawn, cleanup code within the compute manager (_shutdown_instance called from _build_resources) will delete the volume attachments referenced by the bdms in Cinder. As a result we should check and if necessary recreate these volume attachments when rescheduling an instance. Note that there are a few different ways to fix this bug by making changes to the compute manager code, either by not deleting the volume attachment on failure before rescheduling [1] or by performing the get/create check during each build after the reschedule [2]. The problem with *not* cleaning up the attachments is if we don't reschedule, then we've left orphaned "reserved" volumes in Cinder (or we have to add special logic to tell compute when to cleanup attachments). The problem with checking the existence of the attachment on every new host we build on is that we'd be needlessly checking that for initial creates even if we don't ever need to reschedule, unless again we have special logic against that (like checking to see if we've rescheduled at all). Also, in either case that involves changes to the compute means that older computes might not have the fix. So ultimately it seems that the best way to handle this is: 1. Only deal with this on reschedules. 2. Let the cell conductor orchestrate it since it's already dealing with the reschedule. Then the compute logic doesn't need to change. [1] https://review.openstack.org/#/c/587071/3/nova/compute/manager.py@1631 [2] https://review.openstack.org/#/c/587071/4/nova/compute/manager.py@1667 Change-Id: I739c06bd02336bf720cddacb21f48e7857378487 Closes-bug: #1784353
91 lines
3.6 KiB
Python
91 lines
3.6 KiB
Python
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
from nova import test
|
|
from nova.tests import fixtures as nova_fixtures
|
|
from nova.tests.functional import integrated_helpers
|
|
from nova.tests.unit import fake_network
|
|
import nova.tests.unit.image.fake
|
|
from nova.tests.unit import policy_fixture
|
|
from nova.virt import fake
|
|
|
|
|
|
class TestRescheduleWithVolumesAttached(
|
|
test.TestCase, integrated_helpers.InstanceHelperMixin):
|
|
"""Regression test for bug 1784353 introduced in Queens.
|
|
|
|
This regression test asserts that volume backed instances fail to start
|
|
when rescheduled due to their volume attachments being deleted by cleanup
|
|
code within the compute layer after an initial failure to spawn.
|
|
"""
|
|
|
|
def setUp(self):
|
|
super(TestRescheduleWithVolumesAttached, self).setUp()
|
|
|
|
# Use the new attach flow fixture for cinder
|
|
cinder_fixture = nova_fixtures.CinderFixtureNewAttachFlow(self)
|
|
self.cinder = self.useFixture(cinder_fixture)
|
|
self.useFixture(policy_fixture.RealPolicyFixture())
|
|
self.useFixture(nova_fixtures.NeutronFixture(self))
|
|
|
|
fake_network.set_stub_network_methods(self)
|
|
|
|
self.useFixture(nova_fixtures.PlacementFixture())
|
|
|
|
api_fixture = self.useFixture(nova_fixtures.OSAPIFixture(
|
|
api_version='v2.1'))
|
|
self.api = api_fixture.admin_api
|
|
|
|
nova.tests.unit.image.fake.stub_out_image_service(self)
|
|
self.addCleanup(nova.tests.unit.image.fake.FakeImageService_reset)
|
|
|
|
self.flags(compute_driver='fake.FakeRescheduleDriver')
|
|
|
|
self.start_service('conductor')
|
|
self.start_service('scheduler')
|
|
|
|
# Start two computes to allow the instance to be rescheduled
|
|
fake.set_nodes(['host1'])
|
|
self.addCleanup(fake.restore_nodes)
|
|
self.host1 = self.start_service('compute', host='host1')
|
|
|
|
fake.set_nodes(['host2'])
|
|
self.addCleanup(fake.restore_nodes)
|
|
self.host2 = self.start_service('compute', host='host2')
|
|
|
|
self.image_id = self.api.get_images()[0]['id']
|
|
self.flavor_id = self.api.get_flavors()[0]['id']
|
|
|
|
def test_reschedule_with_volume_attached(self):
|
|
# Boot a volume backed instance
|
|
volume_id = nova_fixtures.CinderFixture.IMAGE_BACKED_VOL
|
|
server_request = {
|
|
'name': 'server',
|
|
'flavorRef': self.flavor_id,
|
|
'block_device_mapping_v2': [{
|
|
'boot_index': 0,
|
|
'uuid': volume_id,
|
|
'source_type': 'volume',
|
|
'destination_type': 'volume'}],
|
|
}
|
|
server_response = self.api.post_server({'server': server_request})
|
|
server_id = server_response['id']
|
|
|
|
self._wait_for_state_change(self.api, server_response, 'ACTIVE')
|
|
attached_volume_ids = self.cinder.volume_ids_for_instance(server_id)
|
|
self.assertIn(volume_id, attached_volume_ids)
|
|
self.assertEqual(1, len(self.cinder.volume_to_attachment))
|
|
# There should only be one attachment record for the volume and
|
|
# instance because the original would have been deleted before
|
|
# rescheduling off the first host.
|
|
self.assertEqual(1, len(self.cinder.volume_to_attachment[volume_id]))
|