9009d56d21
Tests were previously using arbitrary keys that weren't published anywhere in the codebase. Switch to using meaningful ones. Change-Id: I8da84b48e4d630eeb91d92346aa2323e25e28e3b Signed-off-by: Stephen Finucane <sfinucan@redhat.com>
1199 lines
59 KiB
Python
1199 lines
59 KiB
Python
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import datetime
|
|
import mock
|
|
|
|
from oslo_db import exception as oslo_db_exc
|
|
from oslo_utils import fixture as osloutils_fixture
|
|
from oslo_utils.fixture import uuidsentinel as uuids
|
|
from oslo_utils import timeutils
|
|
|
|
from nova.compute import instance_actions
|
|
from nova import conf
|
|
from nova import context as nova_context
|
|
from nova.db import api as db_api
|
|
from nova import exception
|
|
from nova import objects
|
|
from nova.policies import base as base_policies
|
|
from nova.policies import servers as servers_policies
|
|
from nova.scheduler import utils as scheduler_utils
|
|
from nova.scheduler import weights
|
|
from nova import test
|
|
from nova.tests import fixtures as nova_fixtures
|
|
from nova.tests.functional import integrated_helpers
|
|
from nova.tests.unit import cast_as_call
|
|
from nova.tests.unit import fake_notifier
|
|
from nova import utils
|
|
|
|
CONF = conf.CONF
|
|
|
|
|
|
class HostNameWeigher(weights.BaseHostWeigher):
|
|
# TestMultiCellMigrate creates host1 in cell1 and host2 in cell2.
|
|
# Something about migrating from host1 to host2 teases out failures
|
|
# which probably has to do with cell1 being the default cell DB in
|
|
# our base test class setup, so prefer host1 to make the tests
|
|
# deterministic.
|
|
_weights = {'host1': 100, 'host2': 50}
|
|
|
|
def _weigh_object(self, host_state, weight_properties):
|
|
# Any undefined host gets no weight.
|
|
return self._weights.get(host_state.host, 0)
|
|
|
|
|
|
class TestMultiCellMigrate(integrated_helpers.ProviderUsageBaseTestCase):
|
|
"""Tests for cross-cell cold migration (resize)"""
|
|
|
|
NUMBER_OF_CELLS = 2
|
|
compute_driver = 'fake.MediumFakeDriver'
|
|
|
|
def setUp(self):
|
|
# Use our custom weigher defined above to make sure that we have
|
|
# a predictable scheduling sort order during server create.
|
|
weight_classes = [
|
|
__name__ + '.HostNameWeigher',
|
|
'nova.scheduler.weights.cross_cell.CrossCellWeigher'
|
|
]
|
|
self.flags(weight_classes=weight_classes,
|
|
group='filter_scheduler')
|
|
super(TestMultiCellMigrate, self).setUp()
|
|
self.cinder = self.useFixture(nova_fixtures.CinderFixture(self))
|
|
|
|
self._enable_cross_cell_resize()
|
|
self.created_images = [] # list of image IDs created during resize
|
|
|
|
# Adjust the polling interval and timeout for long RPC calls.
|
|
self.flags(rpc_response_timeout=1)
|
|
self.flags(long_rpc_timeout=60)
|
|
|
|
# Set up 2 compute services in different cells
|
|
self.host_to_cell_mappings = {
|
|
'host1': 'cell1', 'host2': 'cell2'}
|
|
|
|
self.cell_to_aggregate = {}
|
|
for host in sorted(self.host_to_cell_mappings):
|
|
cell_name = self.host_to_cell_mappings[host]
|
|
# Start the compute service on the given host in the given cell.
|
|
self._start_compute(host, cell_name=cell_name)
|
|
# Create an aggregate where the AZ name is the cell name.
|
|
agg_id = self._create_aggregate(
|
|
cell_name, availability_zone=cell_name)
|
|
# Add the host to the aggregate.
|
|
body = {'add_host': {'host': host}}
|
|
self.admin_api.post_aggregate_action(agg_id, body)
|
|
self.cell_to_aggregate[cell_name] = agg_id
|
|
|
|
def _enable_cross_cell_resize(self):
|
|
# Enable cross-cell resize policy since it defaults to not allow
|
|
# anyone to perform that type of operation. For these tests we'll
|
|
# just allow admins to perform cross-cell resize.
|
|
self.policy.set_rules({
|
|
servers_policies.CROSS_CELL_RESIZE:
|
|
base_policies.RULE_ADMIN_API},
|
|
overwrite=False)
|
|
|
|
def assertFlavorMatchesAllocation(self, flavor, allocation,
|
|
volume_backed=False):
|
|
self.assertEqual(flavor['vcpus'], allocation['VCPU'])
|
|
self.assertEqual(flavor['ram'], allocation['MEMORY_MB'])
|
|
# Volume-backed instances won't have DISK_GB allocations.
|
|
if volume_backed:
|
|
self.assertNotIn('DISK_GB', allocation)
|
|
else:
|
|
self.assertEqual(flavor['disk'], allocation['DISK_GB'])
|
|
|
|
def assert_instance_fields_match_flavor(self, instance, flavor):
|
|
self.assertEqual(instance.memory_mb, flavor['ram'])
|
|
self.assertEqual(instance.vcpus, flavor['vcpus'])
|
|
self.assertEqual(instance.root_gb, flavor['disk'])
|
|
self.assertEqual(
|
|
instance.ephemeral_gb, flavor['OS-FLV-EXT-DATA:ephemeral'])
|
|
|
|
def _count_volume_attachments(self, server_id):
|
|
attachment_ids = self.cinder.attachment_ids_for_instance(server_id)
|
|
return len(attachment_ids)
|
|
|
|
def assert_quota_usage(self, expected_num_instances):
|
|
limits = self.api.get_limits()['absolute']
|
|
self.assertEqual(expected_num_instances, limits['totalInstancesUsed'])
|
|
|
|
def _create_server(self, flavor, volume_backed=False, group_id=None,
|
|
no_networking=False):
|
|
"""Creates a server and waits for it to be ACTIVE
|
|
|
|
:param flavor: dict form of the flavor to use
|
|
:param volume_backed: True if the server should be volume-backed
|
|
:param group_id: UUID of a server group in which to create the server
|
|
:param no_networking: True if the server should be creating without
|
|
networking, otherwise it will be created with a specific port and
|
|
VIF tag
|
|
:returns: server dict response from the GET /servers/{server_id} API
|
|
"""
|
|
if no_networking:
|
|
networks = 'none'
|
|
else:
|
|
# Provide a VIF tag for the pre-existing port. Since VIF tags are
|
|
# stored in the virtual_interfaces table in the cell DB, we want to
|
|
# make sure those survive the resize to another cell.
|
|
networks = [{
|
|
'port': self.neutron.port_1['id'],
|
|
'tag': 'private'
|
|
}]
|
|
server = self._build_server(
|
|
flavor_id=flavor['id'], networks=networks)
|
|
# Put a tag on the server to make sure that survives the resize.
|
|
server['tags'] = ['test']
|
|
if volume_backed:
|
|
bdms = [{
|
|
'boot_index': 0,
|
|
'uuid': nova_fixtures.CinderFixture.IMAGE_BACKED_VOL,
|
|
'source_type': 'volume',
|
|
'destination_type': 'volume',
|
|
'tag': 'root'
|
|
}]
|
|
server['block_device_mapping_v2'] = bdms
|
|
# We don't need the imageRef for volume-backed servers.
|
|
server.pop('imageRef', None)
|
|
|
|
req = dict(server=server)
|
|
if group_id:
|
|
req['os:scheduler_hints'] = {'group': group_id}
|
|
server = self.api.post_server(req)
|
|
server = self._wait_for_state_change(server, 'ACTIVE')
|
|
# For volume-backed make sure there is one attachment to start.
|
|
if volume_backed:
|
|
self.assertEqual(1, self._count_volume_attachments(server['id']),
|
|
self.cinder.volume_to_attachment)
|
|
return server
|
|
|
|
def stub_image_create(self):
|
|
"""Stubs the _FakeImageService.create method to track created images"""
|
|
original_create = self.image_service.create
|
|
|
|
def image_create_snooper(*args, **kwargs):
|
|
image = original_create(*args, **kwargs)
|
|
self.created_images.append(image['id'])
|
|
return image
|
|
|
|
_p = mock.patch.object(
|
|
self.image_service, 'create', side_effect=image_create_snooper)
|
|
_p.start()
|
|
self.addCleanup(_p.stop)
|
|
|
|
def _resize_and_validate(self, volume_backed=False, stopped=False,
|
|
target_host=None, server=None):
|
|
"""Creates (if a server is not provided) and resizes the server to
|
|
another cell. Validates various aspects of the server and its related
|
|
records (allocations, migrations, actions, VIF tags, etc).
|
|
|
|
:param volume_backed: True if the server should be volume-backed, False
|
|
if image-backed.
|
|
:param stopped: True if the server should be stopped prior to resize,
|
|
False if the server should be ACTIVE
|
|
:param target_host: If not None, triggers a cold migration to the
|
|
specified host.
|
|
:param server: A pre-existing server to resize. If None this method
|
|
creates the server.
|
|
:returns: tuple of:
|
|
- server response object
|
|
- source compute node resource provider uuid
|
|
- target compute node resource provider uuid
|
|
- old flavor
|
|
- new flavor
|
|
"""
|
|
flavors = self.api.get_flavors()
|
|
if server is None:
|
|
# Create the server.
|
|
old_flavor = flavors[0]
|
|
server = self._create_server(
|
|
old_flavor, volume_backed=volume_backed)
|
|
else:
|
|
for flavor in flavors:
|
|
if flavor['name'] == server['flavor']['original_name']:
|
|
old_flavor = flavor
|
|
break
|
|
else:
|
|
self.fail('Unable to find old flavor with name %s. Flavors: '
|
|
'%s', server['flavor']['original_name'], flavors)
|
|
original_host = server['OS-EXT-SRV-ATTR:host']
|
|
image_uuid = None if volume_backed else server['image']['id']
|
|
|
|
# Our HostNameWeigher ensures the server starts in cell1, so we expect
|
|
# the server AZ to be cell1 as well.
|
|
self.assertEqual('cell1', server['OS-EXT-AZ:availability_zone'])
|
|
|
|
if stopped:
|
|
# Stop the server before resizing it.
|
|
self.api.post_server_action(server['id'], {'os-stop': None})
|
|
self._wait_for_state_change(server, 'SHUTOFF')
|
|
|
|
# Before resizing make sure quota usage is only 1 for total instances.
|
|
self.assert_quota_usage(expected_num_instances=1)
|
|
|
|
if target_host:
|
|
# Cold migrate the server to the target host.
|
|
new_flavor = old_flavor # flavor does not change for cold migrate
|
|
body = {'migrate': {'host': target_host}}
|
|
expected_host = target_host
|
|
else:
|
|
# Resize it which should migrate the server to the host in the
|
|
# other cell.
|
|
new_flavor = flavors[1]
|
|
body = {'resize': {'flavorRef': new_flavor['id']}}
|
|
expected_host = 'host1' if original_host == 'host2' else 'host2'
|
|
|
|
self.stub_image_create()
|
|
|
|
self.api.post_server_action(server['id'], body)
|
|
# Wait for the server to be resized and then verify the host has
|
|
# changed to be the host in the other cell.
|
|
server = self._wait_for_state_change(server, 'VERIFY_RESIZE')
|
|
self.assertEqual(expected_host, server['OS-EXT-SRV-ATTR:host'])
|
|
# Assert that the instance is only listed one time from the API (to
|
|
# make sure it's not listed out of both cells).
|
|
# Note that we only get one because the DB API excludes hidden
|
|
# instances by default (see instance_get_all_by_filters_sort).
|
|
servers = self.api.get_servers()
|
|
self.assertEqual(1, len(servers),
|
|
'Unexpected number of servers: %s' % servers)
|
|
self.assertEqual(expected_host, servers[0]['OS-EXT-SRV-ATTR:host'])
|
|
|
|
# And that there is only one migration record.
|
|
migrations = self.api.api_get(
|
|
'/os-migrations?instance_uuid=%s' % server['id']
|
|
).body['migrations']
|
|
self.assertEqual(1, len(migrations),
|
|
'Unexpected number of migrations records: %s' %
|
|
migrations)
|
|
migration = migrations[0]
|
|
self.assertEqual('finished', migration['status'])
|
|
|
|
# There should be at least two actions, one for create and one for the
|
|
# resize. There will be a third action if the server was stopped. Use
|
|
# assertGreaterEqual in case a test performed some actions on a
|
|
# pre-created server before resizing it, like attaching a volume.
|
|
actions = self.api.get_instance_actions(server['id'])
|
|
expected_num_of_actions = 3 if stopped else 2
|
|
self.assertGreaterEqual(len(actions), expected_num_of_actions, actions)
|
|
# Each action should have events (make sure these were copied from
|
|
# the source cell to the target cell).
|
|
for action in actions:
|
|
detail = self.api.api_get(
|
|
'/servers/%s/os-instance-actions/%s' % (
|
|
server['id'], action['request_id'])).body['instanceAction']
|
|
self.assertNotEqual(0, len(detail['events']), detail)
|
|
|
|
# The tag should still be present on the server.
|
|
self.assertEqual(1, len(server['tags']),
|
|
'Server tags not found in target cell.')
|
|
self.assertEqual('test', server['tags'][0])
|
|
|
|
# Confirm the source node has allocations for the old flavor and the
|
|
# target node has allocations for the new flavor.
|
|
source_rp_uuid = self._get_provider_uuid_by_host(original_host)
|
|
# The source node allocations should be on the migration record.
|
|
source_allocations = self._get_allocations_by_provider_uuid(
|
|
source_rp_uuid)[migration['uuid']]['resources']
|
|
self.assertFlavorMatchesAllocation(
|
|
old_flavor, source_allocations, volume_backed=volume_backed)
|
|
|
|
target_rp_uuid = self._get_provider_uuid_by_host(expected_host)
|
|
# The target node allocations should be on the instance record.
|
|
target_allocations = self._get_allocations_by_provider_uuid(
|
|
target_rp_uuid)[server['id']]['resources']
|
|
self.assertFlavorMatchesAllocation(
|
|
new_flavor, target_allocations, volume_backed=volume_backed)
|
|
|
|
# The instance, in the target cell DB, should have the old and new
|
|
# flavor stored with it with the values we expect at this point.
|
|
target_cell_name = self.host_to_cell_mappings[expected_host]
|
|
self.assertEqual(
|
|
target_cell_name, server['OS-EXT-AZ:availability_zone'])
|
|
target_cell = self.cell_mappings[target_cell_name]
|
|
admin_context = nova_context.get_admin_context()
|
|
with nova_context.target_cell(admin_context, target_cell) as cctxt:
|
|
inst = objects.Instance.get_by_uuid(
|
|
cctxt, server['id'], expected_attrs=['flavor'])
|
|
self.assertIsNotNone(
|
|
inst.old_flavor,
|
|
'instance.old_flavor not saved in target cell')
|
|
self.assertIsNotNone(
|
|
inst.new_flavor,
|
|
'instance.new_flavor not saved in target cell')
|
|
self.assertEqual(inst.flavor.flavorid, inst.new_flavor.flavorid)
|
|
if target_host: # cold migrate so flavor does not change
|
|
self.assertEqual(
|
|
inst.flavor.flavorid, inst.old_flavor.flavorid)
|
|
else:
|
|
self.assertNotEqual(
|
|
inst.flavor.flavorid, inst.old_flavor.flavorid)
|
|
self.assertEqual(old_flavor['id'], inst.old_flavor.flavorid)
|
|
self.assertEqual(new_flavor['id'], inst.new_flavor.flavorid)
|
|
# Assert the ComputeManager._set_instance_info fields
|
|
# are correct after the resize.
|
|
self.assert_instance_fields_match_flavor(inst, new_flavor)
|
|
# The availability_zone field in the DB should also be updated.
|
|
self.assertEqual(target_cell_name, inst.availability_zone)
|
|
|
|
# A pre-created server might not have any ports attached.
|
|
if server['addresses']:
|
|
# Assert the VIF tag was carried through to the target cell DB.
|
|
interface_attachments = self.api.get_port_interfaces(server['id'])
|
|
self.assertEqual(1, len(interface_attachments))
|
|
self.assertEqual('private', interface_attachments[0]['tag'])
|
|
|
|
if volume_backed:
|
|
# Assert the BDM tag was carried through to the target cell DB.
|
|
volume_attachments = self.api.get_server_volumes(server['id'])
|
|
self.assertEqual(1, len(volume_attachments))
|
|
self.assertEqual('root', volume_attachments[0]['tag'])
|
|
|
|
# Make sure the guest is no longer tracked on the source node.
|
|
source_guest_uuids = (
|
|
self.computes[original_host].manager.driver.list_instance_uuids())
|
|
self.assertNotIn(server['id'], source_guest_uuids)
|
|
# And the guest is on the target node hypervisor.
|
|
target_guest_uuids = (
|
|
self.computes[expected_host].manager.driver.list_instance_uuids())
|
|
self.assertIn(server['id'], target_guest_uuids)
|
|
|
|
# The source hypervisor continues to report usage in the hypervisors
|
|
# API because even though the guest was destroyed there, the instance
|
|
# resources are still claimed on that node in case the user reverts.
|
|
self.assert_hypervisor_usage(source_rp_uuid, old_flavor, volume_backed)
|
|
# The new flavor should show up with resource usage on the target host.
|
|
self.assert_hypervisor_usage(target_rp_uuid, new_flavor, volume_backed)
|
|
|
|
# While we have a copy of the instance in each cell database make sure
|
|
# that quota usage is only reporting 1 (because one is hidden).
|
|
self.assert_quota_usage(expected_num_instances=1)
|
|
|
|
# For a volume-backed server, at this point there should be two volume
|
|
# attachments for the instance: one tracked in the source cell and
|
|
# one in the target cell.
|
|
if volume_backed:
|
|
self.assertEqual(2, self._count_volume_attachments(server['id']),
|
|
self.cinder.volume_to_attachment)
|
|
|
|
# Assert the expected power state.
|
|
expected_power_state = 4 if stopped else 1
|
|
self.assertEqual(
|
|
expected_power_state, server['OS-EXT-STS:power_state'],
|
|
"Unexpected power state after resize.")
|
|
|
|
# For an image-backed server, a snapshot image should have been created
|
|
# and then deleted during the resize.
|
|
if volume_backed:
|
|
self.assertEqual('', server['image'])
|
|
self.assertEqual(
|
|
0, len(self.created_images),
|
|
"Unexpected image create during volume-backed resize")
|
|
else:
|
|
# The original image for the server shown in the API should not
|
|
# have changed even if a snapshot was used to create the guest
|
|
# on the dest host.
|
|
self.assertEqual(image_uuid, server['image']['id'])
|
|
self.assertEqual(
|
|
1, len(self.created_images),
|
|
"Unexpected number of images created for image-backed resize")
|
|
# Make sure the temporary snapshot image was deleted; we use the
|
|
# compute images proxy API here which is deprecated so we force the
|
|
# microversion to 2.1.
|
|
with utils.temporary_mutation(self.api, microversion='2.1'):
|
|
self.api.api_get('/images/%s' % self.created_images[0],
|
|
check_response_status=[404])
|
|
|
|
return server, source_rp_uuid, target_rp_uuid, old_flavor, new_flavor
|
|
|
|
def _attach_volume_to_server(self, server_id, volume_id):
|
|
"""Attaches the volume to the server and waits for the
|
|
"instance.volume_attach.end" versioned notification.
|
|
"""
|
|
body = {'volumeAttachment': {'volumeId': volume_id}}
|
|
self.api.api_post(
|
|
'/servers/%s/os-volume_attachments' % server_id, body)
|
|
fake_notifier.wait_for_versioned_notifications(
|
|
'instance.volume_attach.end')
|
|
|
|
def _detach_volume_from_server(self, server_id, volume_id):
|
|
"""Detaches the volume from the server and waits for the
|
|
"instance.volume_detach.end" versioned notification.
|
|
"""
|
|
self.api.api_delete(
|
|
'/servers/%s/os-volume_attachments/%s' % (server_id, volume_id))
|
|
fake_notifier.wait_for_versioned_notifications(
|
|
'instance.volume_detach.end')
|
|
|
|
def assert_volume_is_attached(self, server_id, volume_id):
|
|
"""Asserts the volume is attached to the server."""
|
|
server = self.api.get_server(server_id)
|
|
attachments = server['os-extended-volumes:volumes_attached']
|
|
attached_vol_ids = [attachment['id'] for attachment in attachments]
|
|
self.assertIn(volume_id, attached_vol_ids,
|
|
'Attached volumes: %s' % attachments)
|
|
|
|
def assert_volume_is_detached(self, server_id, volume_id):
|
|
"""Asserts the volume is detached from the server."""
|
|
server = self.api.get_server(server_id)
|
|
attachments = server['os-extended-volumes:volumes_attached']
|
|
attached_vol_ids = [attachment['id'] for attachment in attachments]
|
|
self.assertNotIn(volume_id, attached_vol_ids,
|
|
'Attached volumes: %s' % attachments)
|
|
|
|
def assert_resize_confirm_notifications(self):
|
|
# We should have gotten only two notifications:
|
|
# 1. instance.resize_confirm.start
|
|
# 2. instance.resize_confirm.end
|
|
self.assertEqual(2, len(fake_notifier.VERSIONED_NOTIFICATIONS),
|
|
'Unexpected number of versioned notifications for '
|
|
'cross-cell resize confirm: %s' %
|
|
fake_notifier.VERSIONED_NOTIFICATIONS)
|
|
start = fake_notifier.VERSIONED_NOTIFICATIONS[0]['event_type']
|
|
self.assertEqual('instance.resize_confirm.start', start)
|
|
end = fake_notifier.VERSIONED_NOTIFICATIONS[1]['event_type']
|
|
self.assertEqual('instance.resize_confirm.end', end)
|
|
|
|
def delete_server_and_assert_cleanup(self, server,
|
|
assert_confirmed_migration=False):
|
|
"""Deletes the server and makes various cleanup checks.
|
|
|
|
- makes sure allocations from placement are gone
|
|
- makes sure the instance record is gone from both cells
|
|
- makes sure there are no leaked volume attachments
|
|
|
|
:param server: dict of the server resource to delete
|
|
:param assert_confirmed_migration: If True, asserts that the Migration
|
|
record for the server has status "confirmed". This is useful when
|
|
testing that deleting a resized server automatically confirms the
|
|
resize.
|
|
"""
|
|
# Determine which cell the instance was in when the server was deleted
|
|
# in the API so we can check hard vs soft delete in the DB.
|
|
current_cell = self.host_to_cell_mappings[
|
|
server['OS-EXT-SRV-ATTR:host']]
|
|
# Delete the server and check that the allocations are gone from
|
|
# the placement service.
|
|
mig_uuid = self._delete_and_check_allocations(server)
|
|
ctxt = nova_context.get_admin_context()
|
|
if assert_confirmed_migration:
|
|
# Get the Migration object from the last cell the instance was in
|
|
# and assert its status is "confirmed".
|
|
cell = self.cell_mappings[current_cell]
|
|
with nova_context.target_cell(ctxt, cell) as cctxt:
|
|
migration = objects.Migration.get_by_uuid(cctxt, mig_uuid)
|
|
self.assertEqual('confirmed', migration.status)
|
|
# Make sure the instance record is gone from both cell databases.
|
|
for cell_name in self.host_to_cell_mappings.values():
|
|
cell = self.cell_mappings[cell_name]
|
|
with nova_context.target_cell(ctxt, cell) as cctxt:
|
|
# If this is the current cell the instance was in when it was
|
|
# deleted it should be soft-deleted (instance.deleted!=0),
|
|
# otherwise it should be hard-deleted and getting it with a
|
|
# read_deleted='yes' context should still raise.
|
|
read_deleted = 'no' if current_cell == cell_name else 'yes'
|
|
with utils.temporary_mutation(
|
|
cctxt, read_deleted=read_deleted):
|
|
self.assertRaises(exception.InstanceNotFound,
|
|
objects.Instance.get_by_uuid,
|
|
cctxt, server['id'])
|
|
# Make sure there are no leaked volume attachments.
|
|
attachment_count = self._count_volume_attachments(server['id'])
|
|
self.assertEqual(0, attachment_count, 'Leaked volume attachments: %s' %
|
|
self.cinder.volume_to_attachment)
|
|
|
|
def assert_resize_confirm_actions(self, server):
|
|
actions = self.api.get_instance_actions(server['id'])
|
|
actions_by_action = {action['action']: action for action in actions}
|
|
self.assertIn(instance_actions.CONFIRM_RESIZE, actions_by_action)
|
|
confirm_action = actions_by_action[instance_actions.CONFIRM_RESIZE]
|
|
detail = self.api.api_get(
|
|
'/servers/%s/os-instance-actions/%s' % (
|
|
server['id'], confirm_action['request_id'])
|
|
).body['instanceAction']
|
|
events_by_name = {event['event']: event for event in detail['events']}
|
|
self.assertEqual(2, len(detail['events']), detail)
|
|
for event_name in ('conductor_confirm_snapshot_based_resize',
|
|
'compute_confirm_snapshot_based_resize_at_source'):
|
|
self.assertIn(event_name, events_by_name)
|
|
self.assertEqual('Success', events_by_name[event_name]['result'])
|
|
self.assertEqual('Success', detail['events'][0]['result'])
|
|
|
|
def test_resize_confirm_image_backed(self):
|
|
"""Creates an image-backed server in one cell and resizes it to the
|
|
host in the other cell. The resize is confirmed.
|
|
"""
|
|
server, source_rp_uuid, target_rp_uuid, _, new_flavor = (
|
|
self._resize_and_validate())
|
|
|
|
# Attach a fake volume to the server to make sure it survives confirm.
|
|
self._attach_volume_to_server(server['id'], uuids.fake_volume_id)
|
|
|
|
# Reset the fake notifier so we only check confirmation notifications.
|
|
fake_notifier.reset()
|
|
|
|
# Confirm the resize and check all the things. The instance and its
|
|
# related records should be gone from the source cell database; the
|
|
# migration should be confirmed; the allocations, held by the migration
|
|
# record on the source compute node resource provider, should now be
|
|
# gone; there should be a confirmResize instance action record with
|
|
# a successful event.
|
|
self.api.post_server_action(server['id'], {'confirmResize': None})
|
|
self._wait_for_state_change(server, 'ACTIVE')
|
|
|
|
self._assert_confirm(
|
|
server, source_rp_uuid, target_rp_uuid, new_flavor)
|
|
|
|
# Make sure the fake volume is still attached.
|
|
self.assert_volume_is_attached(server['id'], uuids.fake_volume_id)
|
|
|
|
# Explicitly delete the server and make sure it's gone from all cells.
|
|
self.delete_server_and_assert_cleanup(server)
|
|
|
|
# Run the DB archive code in all cells to make sure we did not mess
|
|
# up some referential constraint.
|
|
self._archive_cell_dbs()
|
|
|
|
def _assert_confirm(self, server, source_rp_uuid, target_rp_uuid,
|
|
new_flavor):
|
|
target_host = server['OS-EXT-SRV-ATTR:host']
|
|
source_host = 'host1' if target_host == 'host2' else 'host2'
|
|
# The migration should be confirmed.
|
|
migrations = self.api.api_get(
|
|
'/os-migrations?instance_uuid=%s' % server['id']
|
|
).body['migrations']
|
|
self.assertEqual(1, len(migrations), migrations)
|
|
migration = migrations[0]
|
|
self.assertEqual('confirmed', migration['status'], migration)
|
|
|
|
# The resource allocations held against the source node by the
|
|
# migration record should be gone and the target node provider should
|
|
# have allocations held by the instance.
|
|
source_allocations = self._get_allocations_by_provider_uuid(
|
|
source_rp_uuid)
|
|
self.assertEqual({}, source_allocations)
|
|
target_allocations = self._get_allocations_by_provider_uuid(
|
|
target_rp_uuid)
|
|
self.assertIn(server['id'], target_allocations)
|
|
self.assertFlavorMatchesAllocation(
|
|
new_flavor, target_allocations[server['id']]['resources'])
|
|
|
|
self.assert_resize_confirm_actions(server)
|
|
|
|
# Make sure the guest is on the target node hypervisor and not on the
|
|
# source node hypervisor.
|
|
source_guest_uuids = (
|
|
self.computes[source_host].manager.driver.list_instance_uuids())
|
|
self.assertNotIn(server['id'], source_guest_uuids,
|
|
'Guest is still running on the source hypervisor.')
|
|
target_guest_uuids = (
|
|
self.computes[target_host].manager.driver.list_instance_uuids())
|
|
self.assertIn(server['id'], target_guest_uuids,
|
|
'Guest is not running on the target hypervisor.')
|
|
|
|
# Assert the source host hypervisor usage is back to 0 and the target
|
|
# is using the new flavor.
|
|
self.assert_hypervisor_usage(
|
|
target_rp_uuid, new_flavor, volume_backed=False)
|
|
no_usage = {'vcpus': 0, 'disk': 0, 'ram': 0}
|
|
self.assert_hypervisor_usage(
|
|
source_rp_uuid, no_usage, volume_backed=False)
|
|
|
|
# Run periodics and make sure the usage is still as expected.
|
|
self._run_periodics()
|
|
self.assert_hypervisor_usage(
|
|
target_rp_uuid, new_flavor, volume_backed=False)
|
|
self.assert_hypervisor_usage(
|
|
source_rp_uuid, no_usage, volume_backed=False)
|
|
|
|
# Make sure we got the expected notifications for the confirm action.
|
|
self.assert_resize_confirm_notifications()
|
|
|
|
def _archive_cell_dbs(self):
|
|
ctxt = nova_context.get_admin_context()
|
|
archived_instances_count = 0
|
|
for cell in self.cell_mappings.values():
|
|
with nova_context.target_cell(ctxt, cell) as cctxt:
|
|
results = db_api.archive_deleted_rows(
|
|
context=cctxt, max_rows=1000)[0]
|
|
archived_instances_count += results.get('instances', 0)
|
|
# We expect to have archived at least one instance.
|
|
self.assertGreaterEqual(archived_instances_count, 1,
|
|
'No instances were archived from any cell.')
|
|
|
|
def assert_resize_revert_notifications(self):
|
|
# We should have gotten three notifications:
|
|
# 1. instance.resize_revert.start (from target compute host)
|
|
# 2. instance.exists (from target compute host)
|
|
# 3. instance.resize_revert.end (from source compute host)
|
|
self.assertEqual(3, len(fake_notifier.VERSIONED_NOTIFICATIONS),
|
|
'Unexpected number of versioned notifications for '
|
|
'cross-cell resize revert: %s' %
|
|
fake_notifier.VERSIONED_NOTIFICATIONS)
|
|
start = fake_notifier.VERSIONED_NOTIFICATIONS[0]['event_type']
|
|
self.assertEqual('instance.resize_revert.start', start)
|
|
exists = fake_notifier.VERSIONED_NOTIFICATIONS[1]['event_type']
|
|
self.assertEqual('instance.exists', exists)
|
|
end = fake_notifier.VERSIONED_NOTIFICATIONS[2]['event_type']
|
|
self.assertEqual('instance.resize_revert.end', end)
|
|
|
|
def assert_resize_revert_actions(self, server, source_host, dest_host):
|
|
# There should not be any InstanceActionNotFound errors in the logs
|
|
# since ComputeTaskManager.revert_snapshot_based_resize passes
|
|
# graceful_exit=True to wrap_instance_event.
|
|
self.assertNotIn('InstanceActionNotFound', self.stdlog.logger.output)
|
|
actions = self.api.get_instance_actions(server['id'])
|
|
# The revert instance action should have been copied from the target
|
|
# cell to the source cell and "completed" there, i.e. an event
|
|
# should show up under that revert action.
|
|
actions_by_action = {action['action']: action for action in actions}
|
|
self.assertIn(instance_actions.REVERT_RESIZE, actions_by_action)
|
|
confirm_action = actions_by_action[instance_actions.REVERT_RESIZE]
|
|
detail = self.api.api_get(
|
|
'/servers/%s/os-instance-actions/%s' % (
|
|
server['id'], confirm_action['request_id'])
|
|
).body['instanceAction']
|
|
events_by_name = {event['event']: event for event in detail['events']}
|
|
# There are two events:
|
|
# - conductor_revert_snapshot_based_resize which is copied from the
|
|
# target cell database record in conductor
|
|
# - compute_revert_snapshot_based_resize_at_dest
|
|
# - compute_finish_revert_snapshot_based_resize_at_source which is from
|
|
# the source compute service method
|
|
self.assertEqual(3, len(events_by_name), detail)
|
|
|
|
self.assertIn('conductor_revert_snapshot_based_resize', events_by_name)
|
|
conductor_event = events_by_name[
|
|
'conductor_revert_snapshot_based_resize']
|
|
# The RevertResizeTask explicitly finishes this event in the source
|
|
# cell DB.
|
|
self.assertEqual('Success', conductor_event['result'])
|
|
|
|
self.assertIn('compute_revert_snapshot_based_resize_at_dest',
|
|
events_by_name)
|
|
finish_revert_at_dest_event = events_by_name[
|
|
'compute_revert_snapshot_based_resize_at_dest']
|
|
self.assertEqual(dest_host, finish_revert_at_dest_event['host'])
|
|
self.assertEqual('Success', finish_revert_at_dest_event['result'])
|
|
|
|
self.assertIn('compute_finish_revert_snapshot_based_resize_at_source',
|
|
events_by_name)
|
|
finish_revert_at_source_event = events_by_name[
|
|
'compute_finish_revert_snapshot_based_resize_at_source']
|
|
self.assertEqual(source_host, finish_revert_at_source_event['host'])
|
|
self.assertEqual('Success', finish_revert_at_source_event['result'])
|
|
|
|
def test_resize_revert_volume_backed(self):
|
|
"""Tests a volume-backed resize to another cell where the resize
|
|
is reverted back to the original source cell.
|
|
"""
|
|
server, source_rp_uuid, target_rp_uuid, old_flavor, new_flavor = (
|
|
self._resize_and_validate(volume_backed=True))
|
|
target_host = server['OS-EXT-SRV-ATTR:host']
|
|
|
|
# Attach a fake volume to the server to make sure it survives revert.
|
|
self._attach_volume_to_server(server['id'], uuids.fake_volume_id)
|
|
|
|
# Reset the fake notifier so we only check revert notifications.
|
|
fake_notifier.reset()
|
|
|
|
# Revert the resize. The server should be re-spawned in the source
|
|
# cell and removed from the target cell. The allocations
|
|
# should be gone from the target compute node resource provider, the
|
|
# migration record should be reverted and there should be a revert
|
|
# action.
|
|
self.api.post_server_action(server['id'], {'revertResize': None})
|
|
server = self._wait_for_state_change(server, 'ACTIVE')
|
|
source_host = server['OS-EXT-SRV-ATTR:host']
|
|
|
|
# The migration should be reverted. Wait for the
|
|
# instance.resize_revert.end notification because the migration.status
|
|
# is changed to "reverted" *after* the instance status is changed to
|
|
# ACTIVE.
|
|
fake_notifier.wait_for_versioned_notifications(
|
|
'instance.resize_revert.end')
|
|
migrations = self.api.api_get(
|
|
'/os-migrations?instance_uuid=%s' % server['id']
|
|
).body['migrations']
|
|
self.assertEqual(1, len(migrations), migrations)
|
|
migration = migrations[0]
|
|
self.assertEqual('reverted', migration['status'], migration)
|
|
|
|
# The target allocations should be gone.
|
|
target_allocations = self._get_allocations_by_provider_uuid(
|
|
target_rp_uuid)
|
|
self.assertEqual({}, target_allocations)
|
|
# The source allocations should just be on the server and for the old
|
|
# flavor.
|
|
source_allocations = self._get_allocations_by_provider_uuid(
|
|
source_rp_uuid)
|
|
self.assertNotIn(migration['uuid'], source_allocations)
|
|
self.assertIn(server['id'], source_allocations)
|
|
source_allocations = source_allocations[server['id']]['resources']
|
|
self.assertFlavorMatchesAllocation(
|
|
old_flavor, source_allocations, volume_backed=True)
|
|
|
|
self.assert_resize_revert_actions(server, source_host, target_host)
|
|
|
|
# Make sure the guest is on the source node hypervisor and not on the
|
|
# target node hypervisor.
|
|
source_guest_uuids = (
|
|
self.computes[source_host].manager.driver.list_instance_uuids())
|
|
self.assertIn(server['id'], source_guest_uuids,
|
|
'Guest is not running on the source hypervisor.')
|
|
target_guest_uuids = (
|
|
self.computes[target_host].manager.driver.list_instance_uuids())
|
|
self.assertNotIn(server['id'], target_guest_uuids,
|
|
'Guest is still running on the target hypervisor.')
|
|
|
|
# Assert the target host hypervisor usage is back to 0 and the source
|
|
# is back to using the old flavor.
|
|
self.assert_hypervisor_usage(
|
|
source_rp_uuid, old_flavor, volume_backed=True)
|
|
no_usage = {'vcpus': 0, 'disk': 0, 'ram': 0}
|
|
self.assert_hypervisor_usage(
|
|
target_rp_uuid, no_usage, volume_backed=True)
|
|
|
|
# Run periodics and make sure the usage is still as expected.
|
|
self._run_periodics()
|
|
self.assert_hypervisor_usage(
|
|
source_rp_uuid, old_flavor, volume_backed=True)
|
|
self.assert_hypervisor_usage(
|
|
target_rp_uuid, no_usage, volume_backed=True)
|
|
|
|
# Make sure the fake volume is still attached.
|
|
self.assert_volume_is_attached(server['id'], uuids.fake_volume_id)
|
|
|
|
# Make sure we got the expected notifications for the revert action.
|
|
self.assert_resize_revert_notifications()
|
|
|
|
# Explicitly delete the server and make sure it's gone from all cells.
|
|
self.delete_server_and_assert_cleanup(server)
|
|
|
|
def test_resize_revert_detach_volume_while_resized(self):
|
|
"""Test for resize revert where a volume is attached to the server
|
|
before resize, then it is detached while resized, and then we revert
|
|
and make sure it is still detached.
|
|
"""
|
|
# Create the server up-front.
|
|
server = self._create_server(self.api.get_flavors()[0])
|
|
# Attach a random fake volume to the server.
|
|
self._attach_volume_to_server(server['id'], uuids.fake_volume_id)
|
|
# Resize the server.
|
|
self._resize_and_validate(server=server)
|
|
# Ensure the volume is still attached to the server in the target cell.
|
|
self.assert_volume_is_attached(server['id'], uuids.fake_volume_id)
|
|
# Detach the volume from the server in the target cell while the
|
|
# server is in VERIFY_RESIZE status.
|
|
self._detach_volume_from_server(server['id'], uuids.fake_volume_id)
|
|
# Revert the resize and assert the volume is still detached from the
|
|
# server after it has gone back to the source cell.
|
|
self.api.post_server_action(server['id'], {'revertResize': None})
|
|
server = self._wait_for_state_change(server, 'ACTIVE')
|
|
self._wait_for_migration_status(server, ['reverted'])
|
|
self.assert_volume_is_detached(server['id'], uuids.fake_volume_id)
|
|
# Delete the server and make sure we did not leak anything.
|
|
self.delete_server_and_assert_cleanup(server)
|
|
|
|
def test_delete_while_in_verify_resize_status(self):
|
|
"""Tests that when deleting a server in VERIFY_RESIZE status, the
|
|
data is cleaned from both the source and target cell and the resize
|
|
is automatically confirmed.
|
|
"""
|
|
server = self._resize_and_validate()[0]
|
|
self.delete_server_and_assert_cleanup(server,
|
|
assert_confirmed_migration=True)
|
|
|
|
def test_cold_migrate_target_host_in_other_cell(self):
|
|
"""Tests cold migrating to a target host in another cell. This is
|
|
mostly just to ensure the API does not restrict the target host to
|
|
the source cell when cross-cell resize is allowed by policy.
|
|
"""
|
|
# _resize_and_validate creates the server on host1 which is in cell1.
|
|
# To make things interesting, start a third host but in cell1 so we can
|
|
# be sure the requested host from cell2 is honored.
|
|
self._start_compute(
|
|
'host3', cell_name=self.host_to_cell_mappings['host1'])
|
|
self._resize_and_validate(target_host='host2')
|
|
|
|
def test_cold_migrate_cross_cell_weigher_stays_in_source_cell(self):
|
|
"""Tests cross-cell cold migrate where the source cell has two hosts
|
|
so the CrossCellWeigher picks the other host in the source cell and we
|
|
do a traditional resize. Note that in this case, HostNameWeigher will
|
|
actually weigh host2 (in cell2) higher than host3 (in cell1) but the
|
|
CrossCellWeigher will weigh host2 much lower than host3 since host3 is
|
|
in the same cell as the source host (host1).
|
|
"""
|
|
# Create the server first (should go in host1).
|
|
server = self._create_server(self.api.get_flavors()[0])
|
|
# Start another compute host service in cell1.
|
|
self._start_compute(
|
|
'host3', cell_name=self.host_to_cell_mappings['host1'])
|
|
# Cold migrate the server which should move the server to host3.
|
|
self.admin_api.post_server_action(server['id'], {'migrate': None})
|
|
server = self._wait_for_state_change(server, 'VERIFY_RESIZE')
|
|
self.assertEqual('host3', server['OS-EXT-SRV-ATTR:host'])
|
|
|
|
def test_resize_cross_cell_weigher_filtered_to_target_cell_by_spec(self):
|
|
"""Variant of test_cold_migrate_cross_cell_weigher_stays_in_source_cell
|
|
but in this case the flavor used for the resize is restricted via
|
|
aggregate metadata to host2 in cell2 so even though normally host3 in
|
|
cell1 would be weigher higher the CrossCellWeigher is a no-op since
|
|
host3 is filtered out.
|
|
"""
|
|
# Create the server first (should go in host1).
|
|
old_flavor = self.api.get_flavors()[0]
|
|
server = self._create_server(old_flavor)
|
|
# Start another compute host service in cell1.
|
|
self._start_compute(
|
|
'host3', cell_name=self.host_to_cell_mappings['host1'])
|
|
# Set foo=bar metadata on the cell2 aggregate.
|
|
self.admin_api.post_aggregate_action(
|
|
self.cell_to_aggregate['cell2'],
|
|
{'set_metadata': {'metadata': {'foo': 'bar'}}})
|
|
# Create a flavor to use for the resize which has the foo=bar spec.
|
|
new_flavor = {
|
|
'id': uuids.new_flavor,
|
|
'name': 'cell2-foo-bar-flavor',
|
|
'vcpus': old_flavor['vcpus'],
|
|
'ram': old_flavor['ram'],
|
|
'disk': old_flavor['disk']
|
|
}
|
|
self.admin_api.post_flavor({'flavor': new_flavor})
|
|
# TODO(stephenfin): What do I do with this???
|
|
self.admin_api.post_extra_spec(
|
|
new_flavor['id'],
|
|
{'extra_specs': {'aggregate_instance_extra_specs:foo': 'bar'}}
|
|
)
|
|
# Enable AggregateInstanceExtraSpecsFilter and restart the scheduler.
|
|
enabled_filters = CONF.filter_scheduler.enabled_filters
|
|
if 'AggregateInstanceExtraSpecsFilter' not in enabled_filters:
|
|
enabled_filters.append('AggregateInstanceExtraSpecsFilter')
|
|
self.flags(enabled_filters=enabled_filters,
|
|
group='filter_scheduler')
|
|
self.scheduler_service.stop()
|
|
self.scheduler_service = self.start_service('scheduler')
|
|
# Now resize to the new flavor and it should go to host2 in cell2.
|
|
self.admin_api.post_server_action(
|
|
server['id'], {'resize': {'flavorRef': new_flavor['id']}})
|
|
server = self._wait_for_state_change(server, 'VERIFY_RESIZE')
|
|
self.assertEqual('host2', server['OS-EXT-SRV-ATTR:host'])
|
|
|
|
# TODO(mriedem): Test a bunch of rollback scenarios.
|
|
|
|
# TODO(mriedem): Test re-scheduling when the first host fails the
|
|
# resize_claim and a subsequent alternative host works, and also the
|
|
# case that all hosts fail the resize_claim.
|
|
|
|
def test_anti_affinity_group(self):
|
|
"""Tests an anti-affinity group scenario where a server is moved across
|
|
cells and then trying to move the other from the same group to the same
|
|
host in the target cell should be rejected by the scheduler.
|
|
"""
|
|
# Create an anti-affinity server group for our servers.
|
|
body = {
|
|
'server_group': {
|
|
'name': 'test_anti_affinity_group',
|
|
'policy': 'anti-affinity'
|
|
}
|
|
}
|
|
group_id = self.api.api_post(
|
|
'/os-server-groups', body).body['server_group']['id']
|
|
|
|
# Create a server in the group in cell1 (should land on host1 due to
|
|
# HostNameWeigher).
|
|
flavor = self.api.get_flavors()[0]
|
|
server1 = self._create_server(
|
|
flavor, group_id=group_id, no_networking=True)
|
|
|
|
# Start another compute host service in cell1.
|
|
self._start_compute(
|
|
'host3', cell_name=self.host_to_cell_mappings['host1'])
|
|
# Create another server but we want it on host3 in cell1. We cannot
|
|
# use the az forced host parameter because then we will not be able to
|
|
# move the server across cells later. The HostNameWeigher will prefer
|
|
# host2 in cell2 so we need to temporarily force host2 down.
|
|
host2_service_uuid = self.computes['host2'].service_ref.uuid
|
|
self.admin_api.put_service_force_down(
|
|
host2_service_uuid, forced_down=True)
|
|
server2 = self._create_server(
|
|
flavor, group_id=group_id, no_networking=True)
|
|
self.assertEqual('host3', server2['OS-EXT-SRV-ATTR:host'])
|
|
# Remove the forced-down status of the host2 compute service so we can
|
|
# migrate there.
|
|
self.admin_api.put_service_force_down(
|
|
host2_service_uuid, forced_down=False)
|
|
|
|
# Now migrate server1 which should move it to host2 in cell2 otherwise
|
|
# it would violate the anti-affinity policy since server2 is on host3
|
|
# in cell1.
|
|
self.admin_api.post_server_action(server1['id'], {'migrate': None})
|
|
server1 = self._wait_for_state_change(server1, 'VERIFY_RESIZE')
|
|
self.assertEqual('host2', server1['OS-EXT-SRV-ATTR:host'])
|
|
self.admin_api.post_server_action(
|
|
server1['id'], {'confirmResize': None})
|
|
self._wait_for_state_change(server1, 'ACTIVE')
|
|
|
|
# At this point we have:
|
|
# server1: host2 in cell2
|
|
# server2: host3 in cell1
|
|
# The server group hosts should reflect that.
|
|
ctxt = nova_context.get_admin_context()
|
|
group = objects.InstanceGroup.get_by_uuid(ctxt, group_id)
|
|
group_hosts = scheduler_utils._get_instance_group_hosts_all_cells(
|
|
ctxt, group)
|
|
self.assertEqual(['host2', 'host3'], sorted(group_hosts))
|
|
|
|
# Try to migrate server2 to host2 in cell2 which should fail scheduling
|
|
# because it violates the anti-affinity policy. Note that without
|
|
# change I4b67ec9dd4ce846a704d0f75ad64c41e693de0fb in
|
|
# ServerGroupAntiAffinityFilter this would fail because the scheduler
|
|
# utils setup_instance_group only looks at the group hosts in the
|
|
# source cell.
|
|
self.admin_api.post_server_action(
|
|
server2['id'], {'migrate': {'host': 'host2'}})
|
|
self._wait_for_migration_status(server2, ['error'])
|
|
|
|
def test_poll_unconfirmed_resizes_with_upcall(self):
|
|
"""Tests the _poll_unconfirmed_resizes periodic task with a cross-cell
|
|
resize once the instance is in VERIFY_RESIZE status on the dest host.
|
|
In this case _poll_unconfirmed_resizes works because an up-call is
|
|
possible to the API DB.
|
|
"""
|
|
server, source_rp_uuid, target_rp_uuid, _, new_flavor = (
|
|
self._resize_and_validate())
|
|
# At this point the server is in VERIFY_RESIZE status so enable the
|
|
# _poll_unconfirmed_resizes periodic task and run it on the target
|
|
# compute service.
|
|
# Reset the fake notifier so we only check confirmation notifications.
|
|
fake_notifier.reset()
|
|
self.flags(resize_confirm_window=1)
|
|
# Stub timeutils so the DB API query finds the unconfirmed migration.
|
|
future = timeutils.utcnow() + datetime.timedelta(hours=1)
|
|
ctxt = nova_context.get_admin_context()
|
|
# This works because the test environment is configured with the API DB
|
|
# connection globally. If the compute service was running with a conf
|
|
# that did not have access to the API DB this would fail.
|
|
target_host = server['OS-EXT-SRV-ATTR:host']
|
|
cell = self.cell_mappings[self.host_to_cell_mappings[target_host]]
|
|
with nova_context.target_cell(ctxt, cell) as cctxt:
|
|
with osloutils_fixture.TimeFixture(future):
|
|
self.computes[target_host].manager._poll_unconfirmed_resizes(
|
|
cctxt)
|
|
self._wait_for_state_change(server, 'ACTIVE')
|
|
self._assert_confirm(
|
|
server, source_rp_uuid, target_rp_uuid, new_flavor)
|
|
|
|
def test_poll_unconfirmed_resizes_with_no_upcall(self):
|
|
"""Tests the _poll_unconfirmed_resizes periodic task with a cross-cell
|
|
resize once the instance is in VERIFY_RESIZE status on the dest host.
|
|
In this case _poll_unconfirmed_resizes fails because an up-call is
|
|
not possible to the API DB.
|
|
"""
|
|
server, source_rp_uuid, target_rp_uuid, _, new_flavor = (
|
|
self._resize_and_validate())
|
|
# At this point the server is in VERIFY_RESIZE status so enable the
|
|
# _poll_unconfirmed_resizes periodic task and run it on the target
|
|
# compute service.
|
|
self.flags(resize_confirm_window=1)
|
|
# Stub timeutils so the DB API query finds the unconfirmed migration.
|
|
future = timeutils.utcnow() + datetime.timedelta(hours=1)
|
|
ctxt = nova_context.get_admin_context()
|
|
target_host = server['OS-EXT-SRV-ATTR:host']
|
|
cell = self.cell_mappings[self.host_to_cell_mappings[target_host]]
|
|
nova_context.set_target_cell(ctxt, cell)
|
|
# Simulate not being able to query the API DB by poisoning calls to
|
|
# the instance_mappings table. Use the CastAsCall fixture so we can
|
|
# trap and log errors for assertions in the test.
|
|
with test.nested(
|
|
osloutils_fixture.TimeFixture(future),
|
|
cast_as_call.CastAsCall(self),
|
|
mock.patch('nova.objects.InstanceMapping.get_by_instance_uuid',
|
|
side_effect=oslo_db_exc.CantStartEngineError)
|
|
) as (
|
|
_, _, get_im
|
|
):
|
|
self.computes[target_host].manager._poll_unconfirmed_resizes(ctxt)
|
|
get_im.assert_called()
|
|
log_output = self.stdlog.logger.output
|
|
self.assertIn('Error auto-confirming resize', log_output)
|
|
self.assertIn('CantStartEngineError', log_output)
|
|
|
|
# TODO(mriedem): Perform a resize with at-capacity computes, meaning that
|
|
# when we revert we can only fit the instance with the old flavor back
|
|
# onto the source host in the source cell.
|
|
|
|
def test_resize_confirm_from_stopped(self):
|
|
"""Tests resizing and confirming a volume-backed server that was
|
|
initially stopped so it should remain stopped through the resize.
|
|
"""
|
|
server = self._resize_and_validate(volume_backed=True, stopped=True)[0]
|
|
# Confirm the resize and assert the guest remains off.
|
|
self.api.post_server_action(server['id'], {'confirmResize': None})
|
|
server = self._wait_for_state_change(server, 'SHUTOFF')
|
|
self.assertEqual(4, server['OS-EXT-STS:power_state'],
|
|
"Unexpected power state after confirmResize.")
|
|
self._wait_for_migration_status(server, ['confirmed'])
|
|
|
|
# Now try cold-migrating back to cell1 to make sure there is no
|
|
# duplicate entry error in the DB.
|
|
self.api.post_server_action(server['id'], {'migrate': None})
|
|
server = self._wait_for_state_change(server, 'VERIFY_RESIZE')
|
|
# Should be back on host1 in cell1.
|
|
self.assertEqual('host1', server['OS-EXT-SRV-ATTR:host'])
|
|
|
|
def test_resize_revert_from_stopped(self):
|
|
"""Tests resizing and reverting an image-backed server that was
|
|
initially stopped so it should remain stopped through the revert.
|
|
"""
|
|
server = self._resize_and_validate(stopped=True)[0]
|
|
# Revert the resize and assert the guest remains off.
|
|
self.api.post_server_action(server['id'], {'revertResize': None})
|
|
server = self._wait_for_state_change(server, 'SHUTOFF')
|
|
self.assertEqual(4, server['OS-EXT-STS:power_state'],
|
|
"Unexpected power state after revertResize.")
|
|
self._wait_for_migration_status(server, ['reverted'])
|
|
|
|
# Now try cold-migrating to cell2 to make sure there is no
|
|
# duplicate entry error in the DB.
|
|
self.api.post_server_action(server['id'], {'migrate': None})
|
|
server = self._wait_for_state_change(server, 'VERIFY_RESIZE')
|
|
# Should be on host2 in cell2.
|
|
self.assertEqual('host2', server['OS-EXT-SRV-ATTR:host'])
|
|
|
|
def test_finish_snapshot_based_resize_at_dest_spawn_fails(self):
|
|
"""Negative test where the driver spawn fails on the dest host during
|
|
finish_snapshot_based_resize_at_dest which triggers a rollback of the
|
|
instance data in the target cell. Furthermore, the test will hard
|
|
reboot the server in the source cell to recover it from ERROR status.
|
|
"""
|
|
# Create a volume-backed server. This is more interesting for rollback
|
|
# testing to make sure the volume attachments in the target cell were
|
|
# cleaned up on failure.
|
|
flavors = self.api.get_flavors()
|
|
server = self._create_server(flavors[0], volume_backed=True)
|
|
|
|
# Now mock out the spawn method on the destination host to fail
|
|
# during _finish_snapshot_based_resize_at_dest_spawn and then resize
|
|
# the server.
|
|
error = exception.HypervisorUnavailable(host='host2')
|
|
with mock.patch.object(self.computes['host2'].driver, 'spawn',
|
|
side_effect=error):
|
|
flavor2 = flavors[1]['id']
|
|
body = {'resize': {'flavorRef': flavor2}}
|
|
self.api.post_server_action(server['id'], body)
|
|
# The server should go to ERROR state with a fault record and
|
|
# the API should still be showing the server from the source cell
|
|
# because the instance mapping was not updated.
|
|
server = self._wait_for_server_parameter(server,
|
|
{'status': 'ERROR', 'OS-EXT-STS:task_state': None})
|
|
|
|
# The migration should be in 'error' status.
|
|
self._wait_for_migration_status(server, ['error'])
|
|
# Assert a fault was recorded.
|
|
self.assertIn('fault', server)
|
|
self.assertIn('Connection to the hypervisor is broken',
|
|
server['fault']['message'])
|
|
# The instance in the target cell DB should have been hard-deleted.
|
|
self._assert_instance_not_in_cell('cell2', server['id'])
|
|
|
|
# Assert that there is only one volume attachment for the server, i.e.
|
|
# the one in the target cell was deleted.
|
|
self.assertEqual(1, self._count_volume_attachments(server['id']),
|
|
self.cinder.volume_to_attachment)
|
|
|
|
# Assert that migration-based allocations were properly reverted.
|
|
self._assert_allocation_revert_on_fail(server)
|
|
|
|
# Now hard reboot the server in the source cell and it should go back
|
|
# to ACTIVE.
|
|
self.api.post_server_action(server['id'], {'reboot': {'type': 'HARD'}})
|
|
self._wait_for_state_change(server, 'ACTIVE')
|
|
|
|
# Now retry the resize without the fault in the target host to make
|
|
# sure things are OK (no duplicate entry errors in the target DB).
|
|
self.api.post_server_action(server['id'], body)
|
|
self._wait_for_state_change(server, 'VERIFY_RESIZE')
|
|
|
|
def _assert_instance_not_in_cell(self, cell_name, server_id):
|
|
cell = self.cell_mappings[cell_name]
|
|
ctxt = nova_context.get_admin_context(read_deleted='yes')
|
|
with nova_context.target_cell(ctxt, cell) as cctxt:
|
|
self.assertRaises(
|
|
exception.InstanceNotFound,
|
|
objects.Instance.get_by_uuid, cctxt, server_id)
|
|
|
|
def _assert_allocation_revert_on_fail(self, server):
|
|
# Since this happens in MigrationTask.rollback in conductor, we need
|
|
# to wait for something which happens after that, which is the
|
|
# ComputeTaskManager._cold_migrate method sending the
|
|
# compute_task.migrate_server.error event.
|
|
fake_notifier.wait_for_versioned_notifications(
|
|
'compute_task.migrate_server.error')
|
|
mig_uuid = self.get_migration_uuid_for_instance(server['id'])
|
|
mig_allocs = self._get_allocations_by_server_uuid(mig_uuid)
|
|
self.assertEqual({}, mig_allocs)
|
|
source_rp_uuid = self._get_provider_uuid_by_host(
|
|
server['OS-EXT-SRV-ATTR:host'])
|
|
server_allocs = self._get_allocations_by_server_uuid(server['id'])
|
|
volume_backed = False if server['image'] else True
|
|
self.assertFlavorMatchesAllocation(
|
|
server['flavor'], server_allocs[source_rp_uuid]['resources'],
|
|
volume_backed=volume_backed)
|
|
|
|
def test_prep_snapshot_based_resize_at_source_destroy_fails(self):
|
|
"""Negative test where prep_snapshot_based_resize_at_source fails
|
|
destroying the guest for the non-volume backed server and asserts
|
|
resources are rolled back.
|
|
"""
|
|
# Create a non-volume backed server for the snapshot flow.
|
|
flavors = self.api.get_flavors()
|
|
flavor1 = flavors[0]
|
|
server = self._create_server(flavor1)
|
|
|
|
# Now mock out the snapshot method on the source host to fail
|
|
# during _prep_snapshot_based_resize_at_source and then resize
|
|
# the server.
|
|
source_host = server['OS-EXT-SRV-ATTR:host']
|
|
error = exception.HypervisorUnavailable(host=source_host)
|
|
with mock.patch.object(self.computes[source_host].driver, 'destroy',
|
|
side_effect=error):
|
|
flavor2 = flavors[1]['id']
|
|
body = {'resize': {'flavorRef': flavor2}}
|
|
self.api.post_server_action(server['id'], body)
|
|
# The server should go to ERROR state with a fault record and
|
|
# the API should still be showing the server from the source cell
|
|
# because the instance mapping was not updated.
|
|
server = self._wait_for_server_parameter(server,
|
|
{'status': 'ERROR', 'OS-EXT-STS:task_state': None})
|
|
|
|
# The migration should be in 'error' status.
|
|
self._wait_for_migration_status(server, ['error'])
|
|
# Assert a fault was recorded.
|
|
self.assertIn('fault', server)
|
|
self.assertIn('Connection to the hypervisor is broken',
|
|
server['fault']['message'])
|
|
# The instance in the target cell DB should have been hard-deleted.
|
|
self._assert_instance_not_in_cell('cell2', server['id'])
|
|
# Assert that migration-based allocations were properly reverted.
|
|
self._assert_allocation_revert_on_fail(server)
|
|
|
|
# Now hard reboot the server in the source cell and it should go back
|
|
# to ACTIVE.
|
|
self.api.post_server_action(server['id'], {'reboot': {'type': 'HARD'}})
|
|
self._wait_for_state_change(server, 'ACTIVE')
|
|
|
|
# Now retry the resize without the fault in the target host to make
|
|
# sure things are OK (no duplicate entry errors in the target DB).
|
|
self.api.post_server_action(server['id'], body)
|
|
self._wait_for_state_change(server, 'VERIFY_RESIZE')
|