Use COMPUTE_SAME_HOST_COLD_MIGRATE trait during migrate
This uses the COMPUTE_SAME_HOST_COLD_MIGRATE trait in the API during a cold migration to filter out hosts that cannot support same-host cold migration, which is all of them except for the hosts using the vCenter driver. For any nodes that do not report the trait, we won't know if they don't because they don't support it or if they are not new enough to report it, so the API has a service version check and will fallback to old behavior using the config if the node is old. That compat code can be removed in the next release. As a result of this the FakeDriver capabilities are updated so the FakeDriver no longer supports same-host cold migration and a new fake driver is added to support that scenario for any tests that need it. Change-Id: I7a4b951f3ab324c666ab924e6003d24cc8e539f5 Closes-Bug: #1748697 Related-Bug: #1811235
This commit is contained in:
parent
9fa3600fca
commit
4921e822e7
@ -66,7 +66,7 @@ os-brick==2.6.2
|
||||
os-client-config==1.29.0
|
||||
os-resource-classes==0.4.0
|
||||
os-service-types==1.7.0
|
||||
os-traits==2.0.0
|
||||
os-traits==2.1.0
|
||||
os-vif==1.14.0
|
||||
os-win==3.0.0
|
||||
os-xenapi==0.3.3
|
||||
|
@ -25,6 +25,7 @@ import re
|
||||
import string
|
||||
|
||||
from castellan import key_manager
|
||||
import os_traits
|
||||
from oslo_log import log as logging
|
||||
from oslo_messaging import exceptions as oslo_exceptions
|
||||
from oslo_serialization import base64 as base64utils
|
||||
@ -103,6 +104,7 @@ AGGREGATE_ACTION_ADD = 'Add'
|
||||
|
||||
MIN_COMPUTE_SYNC_COMPUTE_STATUS_DISABLED = 38
|
||||
MIN_COMPUTE_CROSS_CELL_RESIZE = 47
|
||||
MIN_COMPUTE_SAME_HOST_COLD_MIGRATE = 48
|
||||
|
||||
# FIXME(danms): Keep a global cache of the cells we find the
|
||||
# first time we look. This needs to be refreshed on a timer or
|
||||
@ -3881,8 +3883,7 @@ class API(base.Base):
|
||||
validate_pci=True)
|
||||
|
||||
filter_properties = {'ignore_hosts': []}
|
||||
|
||||
if not CONF.allow_resize_to_same_host:
|
||||
if not self._allow_resize_to_same_host(same_instance_type, instance):
|
||||
filter_properties['ignore_hosts'].append(instance.host)
|
||||
|
||||
request_spec = objects.RequestSpec.get_by_instance_uuid(
|
||||
@ -3931,6 +3932,57 @@ class API(base.Base):
|
||||
request_spec=request_spec,
|
||||
do_cast=True)
|
||||
|
||||
def _allow_resize_to_same_host(self, cold_migrate, instance):
|
||||
"""Contains logic for excluding the instance.host on resize/migrate.
|
||||
|
||||
If performing a cold migration and the compute node resource provider
|
||||
reports the COMPUTE_SAME_HOST_COLD_MIGRATE trait then same-host cold
|
||||
migration is allowed otherwise it is not and the current instance.host
|
||||
should be excluded as a scheduling candidate.
|
||||
|
||||
:param cold_migrate: true if performing a cold migration, false
|
||||
for resize
|
||||
:param instance: Instance object being resized or cold migrated
|
||||
:returns: True if same-host resize/cold migrate is allowed, False
|
||||
otherwise
|
||||
"""
|
||||
if cold_migrate:
|
||||
# Check to see if the compute node resource provider on which the
|
||||
# instance is running has the COMPUTE_SAME_HOST_COLD_MIGRATE
|
||||
# trait.
|
||||
# Note that we check this here in the API since we cannot
|
||||
# pre-filter allocation candidates in the scheduler using this
|
||||
# trait as it would not work. For example, libvirt nodes will not
|
||||
# report the trait but using it as a forbidden trait filter when
|
||||
# getting allocation candidates would still return libvirt nodes
|
||||
# which means we could attempt to cold migrate to the same libvirt
|
||||
# node, which would fail.
|
||||
ctxt = instance._context
|
||||
cn = objects.ComputeNode.get_by_host_and_nodename(
|
||||
ctxt, instance.host, instance.node)
|
||||
traits = self.placementclient.get_provider_traits(
|
||||
ctxt, cn.uuid).traits
|
||||
# If the provider has the trait it is (1) new enough to report that
|
||||
# trait and (2) supports cold migration on the same host.
|
||||
if os_traits.COMPUTE_SAME_HOST_COLD_MIGRATE in traits:
|
||||
allow_same_host = True
|
||||
else:
|
||||
# TODO(mriedem): Remove this compatibility code after one
|
||||
# release. If the compute is old we will not know if it
|
||||
# supports same-host cold migration so we fallback to config.
|
||||
service = objects.Service.get_by_compute_host(ctxt, cn.host)
|
||||
if service.version >= MIN_COMPUTE_SAME_HOST_COLD_MIGRATE:
|
||||
# The compute is new enough to report the trait but does
|
||||
# not so same-host cold migration is not allowed.
|
||||
allow_same_host = False
|
||||
else:
|
||||
# The compute is not new enough to report the trait so we
|
||||
# fallback to config.
|
||||
allow_same_host = CONF.allow_resize_to_same_host
|
||||
else:
|
||||
allow_same_host = CONF.allow_resize_to_same_host
|
||||
return allow_same_host
|
||||
|
||||
@check_instance_lock
|
||||
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
|
||||
vm_states.PAUSED, vm_states.SUSPENDED])
|
||||
|
@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# NOTE(danms): This is the global service version counter
|
||||
SERVICE_VERSION = 47
|
||||
SERVICE_VERSION = 48
|
||||
|
||||
|
||||
# NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
|
||||
@ -176,6 +176,8 @@ SERVICE_VERSION_HISTORY = (
|
||||
# Version 47: Compute RPC v5.10:
|
||||
# finish_revert_snapshot_based_resize_at_source
|
||||
{'compute_rpc': '5.10'},
|
||||
# Version 48: Drivers report COMPUTE_SAME_HOST_COLD_MIGRATE trait.
|
||||
{'compute_rpc': '5.10'},
|
||||
)
|
||||
|
||||
|
||||
|
122
nova/tests/functional/test_cold_migrate.py
Normal file
122
nova/tests/functional/test_cold_migrate.py
Normal file
@ -0,0 +1,122 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import mock
|
||||
|
||||
from nova.compute import api as compute_api
|
||||
from nova import context as nova_context
|
||||
from nova import objects
|
||||
from nova import test
|
||||
from nova.tests.functional import integrated_helpers
|
||||
from nova.tests.unit import fake_notifier
|
||||
|
||||
|
||||
class ColdMigrationDisallowSameHost(
|
||||
integrated_helpers.ProviderUsageBaseTestCase):
|
||||
"""Tests cold migrate where the source host does not have the
|
||||
COMPUTE_SAME_HOST_COLD_MIGRATE trait.
|
||||
"""
|
||||
compute_driver = 'fake.MediumFakeDriver'
|
||||
|
||||
def setUp(self):
|
||||
super(ColdMigrationDisallowSameHost, self).setUp()
|
||||
# Start one compute service which will use the fake virt driver
|
||||
# which disallows cold migration to the same host.
|
||||
self._start_compute('host1')
|
||||
|
||||
def _wait_for_migrate_no_valid_host(self, error='NoValidHost'):
|
||||
event = fake_notifier.wait_for_versioned_notifications(
|
||||
'compute_task.migrate_server.error')[0]
|
||||
self.assertEqual(error,
|
||||
event['payload']['nova_object.data']['reason'][
|
||||
'nova_object.data']['exception'])
|
||||
|
||||
def test_cold_migrate_same_host_not_supported(self):
|
||||
"""Simple test to show that you cannot cold-migrate to the same host
|
||||
when the resource provider does not expose the
|
||||
COMPUTE_SAME_HOST_COLD_MIGRATE trait.
|
||||
"""
|
||||
server = self._create_server(networks='none')
|
||||
# The fake driver does not report COMPUTE_SAME_HOST_COLD_MIGRATE
|
||||
# so cold migration should fail since we only have one host.
|
||||
self.api.post_server_action(server['id'], {'migrate': None})
|
||||
self._wait_for_migrate_no_valid_host()
|
||||
|
||||
def test_cold_migrate_same_host_old_compute_disallow(self):
|
||||
"""Upgrade compat test where the resource provider does not report
|
||||
the COMPUTE_SAME_HOST_COLD_MIGRATE trait but the compute service is
|
||||
old so the API falls back to the allow_resize_to_same_host config which
|
||||
defaults to False.
|
||||
"""
|
||||
server = self._create_server(networks='none')
|
||||
# Stub the compute service version check to make the compute service
|
||||
# appear old.
|
||||
fake_service = objects.Service()
|
||||
fake_service.version = (
|
||||
compute_api.MIN_COMPUTE_SAME_HOST_COLD_MIGRATE - 1)
|
||||
with mock.patch('nova.objects.Service.get_by_compute_host',
|
||||
return_value=fake_service) as mock_get_service:
|
||||
self.api.post_server_action(server['id'], {'migrate': None})
|
||||
mock_get_service.assert_called_once_with(
|
||||
test.MatchType(nova_context.RequestContext), 'host1')
|
||||
# Since allow_resize_to_same_host defaults to False scheduling failed
|
||||
# since there are no other hosts.
|
||||
self._wait_for_migrate_no_valid_host()
|
||||
|
||||
def test_cold_migrate_same_host_old_compute_allow(self):
|
||||
"""Upgrade compat test where the resource provider does not report
|
||||
the COMPUTE_SAME_HOST_COLD_MIGRATE trait but the compute service is
|
||||
old so the API falls back to the allow_resize_to_same_host config which
|
||||
in this test is set to True.
|
||||
"""
|
||||
self.flags(allow_resize_to_same_host=True)
|
||||
server = self._create_server(networks='none')
|
||||
# Stub the compute service version check to make the compute service
|
||||
# appear old.
|
||||
fake_service = objects.Service()
|
||||
fake_service.version = (
|
||||
compute_api.MIN_COMPUTE_SAME_HOST_COLD_MIGRATE - 1)
|
||||
with mock.patch('nova.objects.Service.get_by_compute_host',
|
||||
return_value=fake_service) as mock_get_service:
|
||||
self.api.post_server_action(server['id'], {'migrate': None})
|
||||
mock_get_service.assert_called_once_with(
|
||||
test.MatchType(nova_context.RequestContext), 'host1')
|
||||
# In this case the compute is old so the API falls back to checking
|
||||
# allow_resize_to_same_host which says same-host cold migrate is
|
||||
# allowed so the scheduler sends the request to the only compute
|
||||
# available but the virt driver says same-host cold migrate is not
|
||||
# supported and raises UnableToMigrateToSelf. A reschedule is sent
|
||||
# to conductor which results in MaxRetriesExceeded since there are no
|
||||
# alternative hosts.
|
||||
self._wait_for_migrate_no_valid_host(error='MaxRetriesExceeded')
|
||||
|
||||
|
||||
class ColdMigrationAllowSameHost(
|
||||
integrated_helpers.ProviderUsageBaseTestCase):
|
||||
"""Tests cold migrate where the source host has the
|
||||
COMPUTE_SAME_HOST_COLD_MIGRATE trait.
|
||||
"""
|
||||
compute_driver = 'fake.SameHostColdMigrateDriver'
|
||||
|
||||
def setUp(self):
|
||||
super(ColdMigrationAllowSameHost, self).setUp()
|
||||
self._start_compute('host1')
|
||||
|
||||
def test_cold_migrate_same_host_supported(self):
|
||||
"""Simple test to show that you can cold-migrate to the same host
|
||||
when the resource provider exposes the COMPUTE_SAME_HOST_COLD_MIGRATE
|
||||
trait.
|
||||
"""
|
||||
server = self._create_server(networks='none')
|
||||
self.api.post_server_action(server['id'], {'migrate': None})
|
||||
server = self._wait_for_state_change(server, 'VERIFY_RESIZE')
|
||||
self.assertEqual('host1', server['OS-EXT-SRV-ATTR:host'])
|
@ -1741,6 +1741,7 @@ class _ComputeAPIUnitTestMixIn(object):
|
||||
|
||||
@mock.patch('nova.compute.api.API.get_instance_host_status',
|
||||
new=mock.Mock(return_value=fields_obj.HostStatus.UP))
|
||||
@mock.patch('nova.compute.api.API._allow_resize_to_same_host')
|
||||
@mock.patch('nova.compute.utils.is_volume_backed_instance',
|
||||
return_value=False)
|
||||
@mock.patch('nova.compute.api.API._validate_flavor_image_nostatus')
|
||||
@ -1757,6 +1758,7 @@ class _ComputeAPIUnitTestMixIn(object):
|
||||
mock_get_by_instance_uuid, mock_get_flavor, mock_upsize,
|
||||
mock_inst_save, mock_count, mock_limit, mock_record,
|
||||
mock_migration, mock_validate, mock_is_vol_backed,
|
||||
mock_allow_resize_to_same_host,
|
||||
flavor_id_passed=True,
|
||||
same_host=False, allow_same_host=False,
|
||||
project_id=None,
|
||||
@ -1768,6 +1770,7 @@ class _ComputeAPIUnitTestMixIn(object):
|
||||
allow_cross_cell_resize=False):
|
||||
|
||||
self.flags(allow_resize_to_same_host=allow_same_host)
|
||||
mock_allow_resize_to_same_host.return_value = allow_same_host
|
||||
|
||||
params = {}
|
||||
if project_id is not None:
|
||||
@ -1871,6 +1874,7 @@ class _ComputeAPIUnitTestMixIn(object):
|
||||
self.assertEqual(
|
||||
allow_cross_cell_resize,
|
||||
fake_spec.requested_destination.allow_cross_cell_move)
|
||||
mock_allow_resize_to_same_host.assert_called_once()
|
||||
|
||||
if host_name:
|
||||
mock_get_all_by_host.assert_called_once_with(
|
||||
|
@ -7705,6 +7705,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
|
||||
super(ComputeManagerMigrationTestCase, self).setUp()
|
||||
fake_notifier.stub_notifier(self)
|
||||
self.addCleanup(fake_notifier.reset)
|
||||
self.flags(compute_driver='fake.SameHostColdMigrateDriver')
|
||||
self.compute = manager.ComputeManager()
|
||||
self.context = context.RequestContext(fakes.FAKE_USER_ID,
|
||||
fakes.FAKE_PROJECT_ID)
|
||||
@ -9658,7 +9659,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
|
||||
|
||||
@mock.patch.object(manager.ComputeManager, '_notify_about_instance_usage')
|
||||
@mock.patch.object(objects.Migration, 'get_by_id')
|
||||
@mock.patch.object(nova.virt.fake.SmallFakeDriver, 'live_migration_abort')
|
||||
@mock.patch.object(nova.virt.fake.FakeDriver, 'live_migration_abort')
|
||||
@mock.patch('nova.compute.utils.notify_about_instance_action')
|
||||
def test_live_migration_abort(self, mock_notify_action, mock_driver,
|
||||
mock_get_migration, mock_notify):
|
||||
@ -9711,7 +9712,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
|
||||
@mock.patch.object(compute_utils, 'add_instance_fault_from_exc')
|
||||
@mock.patch.object(manager.ComputeManager, '_notify_about_instance_usage')
|
||||
@mock.patch.object(objects.Migration, 'get_by_id')
|
||||
@mock.patch.object(nova.virt.fake.SmallFakeDriver, 'live_migration_abort')
|
||||
@mock.patch.object(nova.virt.fake.FakeDriver, 'live_migration_abort')
|
||||
@mock.patch('nova.compute.utils.notify_about_instance_action')
|
||||
def test_live_migration_abort_not_supported(self, mock_notify_action,
|
||||
mock_driver,
|
||||
|
@ -124,6 +124,9 @@ CAPABILITY_TRAITS_MAP = {
|
||||
"supports_image_type_vmdk": os_traits.COMPUTE_IMAGE_TYPE_VMDK,
|
||||
# Added in os-traits 2.0.0
|
||||
"supports_image_type_ploop": os_traits.COMPUTE_IMAGE_TYPE_PLOOP,
|
||||
|
||||
# Added in os-traits 2.1.0.
|
||||
"supports_migrate_to_same_host": os_traits.COMPUTE_SAME_HOST_COLD_MIGRATE,
|
||||
}
|
||||
|
||||
|
||||
|
@ -104,7 +104,7 @@ class FakeDriver(driver.ComputeDriver):
|
||||
capabilities = {
|
||||
"has_imagecache": True,
|
||||
"supports_evacuate": True,
|
||||
"supports_migrate_to_same_host": True,
|
||||
"supports_migrate_to_same_host": False,
|
||||
"supports_attach_interface": True,
|
||||
"supports_device_tagging": True,
|
||||
"supports_tagged_attach_interface": True,
|
||||
@ -702,6 +702,12 @@ class MediumFakeDriver(FakeDriver):
|
||||
local_gb = 1028
|
||||
|
||||
|
||||
class SameHostColdMigrateDriver(MediumFakeDriver):
|
||||
"""MediumFakeDriver variant that supports same-host cold migrate."""
|
||||
capabilities = dict(FakeDriver.capabilities,
|
||||
supports_migrate_to_same_host=True)
|
||||
|
||||
|
||||
class PowerUpdateFakeDriver(SmallFakeDriver):
|
||||
# A specific fake driver for the power-update external event testing.
|
||||
|
||||
|
@ -0,0 +1,18 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
This release contains a fix for `bug 1748697`_ which distinguishes between
|
||||
resize and cold migrate such that the ``allow_resize_to_same_host`` config
|
||||
option is no longer used to determine if a server can be cold migrated to
|
||||
the same compute service host. Now when requesting a cold migration the
|
||||
API will check if the source compute node resource provider has the
|
||||
``COMPUTE_SAME_HOST_COLD_MIGRATE`` trait and if so the scheduler can select
|
||||
the source host. Note that the only in-tree compute driver that supports
|
||||
cold migration to the same host is ``VMwareVCDriver``.
|
||||
|
||||
To support rolling upgrades with N-1 computes if a node does not report the
|
||||
trait and is old the API will fallback to the ``allow_resize_to_same_host``
|
||||
config option value. That compatibility will be removed in a subsequent
|
||||
release.
|
||||
|
||||
.. _bug 1748697: https://launchpad.net/bugs/1748697
|
@ -55,7 +55,7 @@ psutil>=3.2.2 # BSD
|
||||
oslo.versionedobjects>=1.35.0 # Apache-2.0
|
||||
os-brick>=2.6.2 # Apache-2.0
|
||||
os-resource-classes>=0.4.0 # Apache-2.0
|
||||
os-traits>=2.0.0 # Apache-2.0
|
||||
os-traits>=2.1.0 # Apache-2.0
|
||||
os-vif>=1.14.0 # Apache-2.0
|
||||
os-win>=3.0.0 # Apache-2.0
|
||||
castellan>=0.16.0 # Apache-2.0
|
||||
|
Loading…
Reference in New Issue
Block a user