Block rebuild when NUMA topology changed

If the image change during a rebuild it's possible for the request
NUMA topology to change. As a rebuild uses a noop claim in the
resource tracker the NUMA topology will not be updated as part of
a rebuild.

If the NUMA constraints do not change, a rebuild will continue as normal.
If the new constraints conflict with the existing NUMA constraints of the
instance the rebuild will be rejected without altering the status of the
instance.

This change introduces an API check to block rebuild when the NUMA
requirements for the new image do not match the existing NUMA constraints.
This is in line with the previous check introduced to prevent the rebuild of
volume-backed instances which similarly are not supported.

This change adds functional tests to assert the expected behaviour of
rebuilding NUMA instances with new images. This change also asserts that
in place rebuilds of numa instances is currently not supported.

Closes-Bug: #1763766
Partial-implements: blueprint inplace-rebuild-of-numa-instances
Change-Id: I0322d872bdff68936033a6f5a54e8296a6fb3434
This commit is contained in:
Sean Mooney 2019-10-10 18:00:07 +01:00
parent 15c59f1897
commit 6f5358ac19
7 changed files with 320 additions and 5 deletions

View File

@ -67,6 +67,7 @@ INVALID_FLAVOR_IMAGE_EXCEPTIONS = (
exception.ImageNUMATopologyForbidden,
exception.ImageNUMATopologyIncomplete,
exception.ImageNUMATopologyMemoryOutOfRange,
exception.ImageNUMATopologyRebuildConflict,
exception.ImagePMUConflict,
exception.ImageSerialPortNumberExceedFlavorValue,
exception.ImageSerialPortNumberInvalid,

View File

@ -3418,6 +3418,14 @@ class API(base.Base):
self._checks_for_create_and_rebuild(context, image_id, image,
flavor, metadata, files_to_inject, root_bdm)
# NOTE(sean-k-mooney): When we rebuild with a new image we need to
# validate that the NUMA topology does not change as we do a NOOP claim
# in resource tracker. As such we cannot allow the resource usage or
# assignment to change as a result of a new image altering the
# numa constraints.
if orig_image_ref != image_href:
self._validate_numa_rebuild(instance, image, flavor)
kernel_id, ramdisk_id = self._handle_kernel_and_ramdisk(
context, None, None, image)
@ -3509,6 +3517,48 @@ class API(base.Base):
preserve_ephemeral=preserve_ephemeral, host=host,
request_spec=request_spec)
@staticmethod
def _validate_numa_rebuild(instance, image, flavor):
"""validates that the NUMA constraints do not change on rebuild.
:param instance: nova.objects.instance.Instance object
:param image: the new image the instance will be rebuilt with.
:param flavor: the flavor of the current instance.
:returns: True or raises on failure.
:raises: nova.exception.ImageNUMATopologyRebuildConflict
"""
# NOTE(sean-k-mooney): currently it is not possible to express
# a PCI NUMA affinity policy via flavor or image but that will
# change in the future. we pull out the image metadata into
# separate variable to make future testing of this easier.
old_image_meta = instance.image_meta
new_image_meta = objects.ImageMeta.from_dict(image)
old_constraints = hardware.numa_get_constraints(flavor, old_image_meta)
new_constraints = hardware.numa_get_constraints(flavor, new_image_meta)
# early out for non NUMA instances
if old_constraints is None and new_constraints is None:
# return true for easy unit testing
return True
# if only one of the constrains are non-None (or 'set') then the
# constraints changed so raise an exception.
if old_constraints is None or new_constraints is None:
raise exception.ImageNUMATopologyRebuildConflict()
# otherwise since both the old a new constrains are non none compare
# them as dictionaries.
old = old_constraints.obj_to_primitive()
new = new_constraints.obj_to_primitive()
if old != new:
raise exception.ImageNUMATopologyRebuildConflict()
# TODO(sean-k-mooney): add PCI NUMA affinity policy check.
# return true for easy unit testing
return True
@staticmethod
def _check_quota_for_upsize(context, instance, current_flavor, new_flavor):
project_id, user_id = quotas_obj.ids_from_instance(context,

View File

@ -1855,6 +1855,12 @@ class ImageNUMATopologyForbidden(Forbidden):
"NUMA configuration set against the flavor")
class ImageNUMATopologyRebuildConflict(Invalid):
msg_fmt = _(
"An instance's NUMA typology cannot be changed as part of a rebuild. "
"The image provided is invalid for this instance.")
class ImageNUMATopologyAsymmetric(Invalid):
msg_fmt = _("Instance CPUs and/or memory cannot be evenly distributed "
"across instance NUMA nodes. Explicit assignment of CPUs "

View File

@ -248,7 +248,8 @@ class _IntegratedTestBase(test.TestCase):
return None
self.stub_out('nova.privsep.linux_net.bind_ip', fake_noop)
nova.tests.unit.image.fake.stub_out_image_service(self)
self.fake_image_service =\
nova.tests.unit.image.fake.stub_out_image_service(self)
self.useFixture(cast_as_call.CastAsCall(self))
placement = self.useFixture(func_fixtures.PlacementFixture())

View File

@ -13,26 +13,26 @@
# License for the specific language governing permissions and limitations
# under the License.
import mock
import six
import mock
from oslo_config import cfg
from oslo_log import log as logging
from nova.conf import neutron as neutron_conf
from nova import context as nova_context
from nova import objects
from nova.tests import fixtures as nova_fixtures
from nova.tests.functional.api import client
from nova.tests.functional.libvirt import base
from nova.tests.unit import fake_notifier
from nova.tests.unit.virt.libvirt import fakelibvirt
CONF = cfg.CONF
LOG = logging.getLogger(__name__)
class NUMAServersTestBase(base.ServersTestBase):
ADDITIONAL_FILTERS = ['NUMATopologyFilter']
def setUp(self):
@ -925,3 +925,127 @@ class NUMAServersWithNetworksTest(NUMAServersTestBase):
network_metadata = args[1].network_metadata
self.assertIsNotNone(network_metadata)
self.assertEqual(set(['foo']), network_metadata.physnets)
class NUMAServersRebuildTests(NUMAServersTestBase):
def setUp(self):
super().setUp()
images = self.api.get_images()
# save references to first two image for server create and rebuild
self.image_ref_0 = images[0]['id']
self.image_ref_1 = images[1]['id']
fake_notifier.stub_notifier(self)
self.addCleanup(fake_notifier.reset)
def _create_active_server(self, server_args=None):
basic_server = {
'flavorRef': 1,
'name': 'numa_server',
'networks': [{
'uuid': nova_fixtures.NeutronFixture.network_1['id']
}],
'imageRef': self.image_ref_0
}
if server_args:
basic_server.update(server_args)
server = self.api.post_server({'server': basic_server})
return self._wait_for_state_change(server, 'ACTIVE')
def _rebuild_server(self, active_server, image_ref):
args = {"rebuild": {"imageRef": image_ref}}
self.api.api_post(
'servers/%s/action' % active_server['id'], args)
fake_notifier.wait_for_versioned_notifications('instance.rebuild.end')
return self._wait_for_state_change(active_server, 'ACTIVE')
def test_rebuild_server_with_numa(self):
"""Create a NUMA instance and ensure it can be rebuilt.
"""
# Create a flavor consuming 2 pinned cpus with an implicit
# numa topology of 1 virtual numa node.
extra_spec = {'hw:cpu_policy': 'dedicated'}
flavor_id = self._create_flavor(extra_spec=extra_spec)
# Create a host with 4 physical cpus to allow rebuild leveraging
# the free space to ensure the numa topology filter does not
# eliminate the host.
host_info = fakelibvirt.HostInfo(cpu_nodes=1, cpu_sockets=1,
cpu_cores=4, kB_mem=15740000)
fake_connection = self._get_connection(host_info=host_info)
self.mock_conn.return_value = fake_connection
self.compute = self.start_service('compute', host='compute1')
server = self._create_active_server(
server_args={"flavorRef": flavor_id})
# this should succeed as the NUMA topology has not changed
# and we have enough resources on the host. We rebuild with
# a different image to force the rebuild to query the scheduler
# to validate the host.
self._rebuild_server(server, self.image_ref_1)
def test_rebuild_server_with_numa_inplace_fails(self):
"""Create a NUMA instance and ensure in place rebuild fails.
"""
# Create a flavor consuming 2 pinned cpus with an implicit
# numa topology of 1 virtual numa node.
extra_spec = {'hw:cpu_policy': 'dedicated'}
flavor_id = self._create_flavor(extra_spec=extra_spec)
# cpu_cores is set to 2 to ensure that
# we have enough space to boot the vm but not enough space to rebuild
# by doubling the resource use during scheduling.
host_info = fakelibvirt.HostInfo(
cpu_nodes=1, cpu_sockets=1, cpu_cores=2, kB_mem=15740000)
fake_connection = self._get_connection(host_info=host_info)
self.mock_conn.return_value = fake_connection
self.compute = self.start_service('compute', host='compute1')
server = self._create_active_server(
server_args={"flavorRef": flavor_id})
# TODO(sean-k-mooney): this should pass but i currently expect it to
# fail because the NUMA topology filter does not support in place
# rebuild and we have used all the resources on the compute node.
self.assertRaises(
client.OpenStackApiException, self._rebuild_server,
server, self.image_ref_1)
def test_rebuild_server_with_different_numa_topology_fails(self):
"""Create a NUMA instance and ensure inplace rebuild fails.
"""
# Create a flavor consuming 2 pinned cpus with an implicit
# numa topology of 1 virtual numa node.
extra_spec = {'hw:cpu_policy': 'dedicated'}
flavor_id = self._create_flavor(extra_spec=extra_spec)
host_info = fakelibvirt.HostInfo(
cpu_nodes=2, cpu_sockets=1, cpu_cores=4, kB_mem=15740000)
fake_connection = self._get_connection(host_info=host_info)
self.mock_conn.return_value = fake_connection
self.compute = self.start_service('compute', host='compute1')
server = self._create_active_server(
server_args={"flavorRef": flavor_id})
# the original vm had an implicit numa topology of 1 virtual numa nodes
# so we alter the requested numa topology in image_ref_1 to request
# 2 virtual numa nodes.
ctx = nova_context.get_admin_context()
image_meta = {'properties': {'hw_numa_nodes': 2}}
self.fake_image_service.update(ctx, self.image_ref_1, image_meta)
# NOTE(sean-k-mooney): this should fail because rebuild uses noop
# claims therefor it is not allowed for the numa topology or resource
# usage to change during a rebuild.
ex = self.assertRaises(
client.OpenStackApiException, self._rebuild_server,
server, self.image_ref_1)
self.assertEqual(400, ex.response.status_code)
self.assertIn("An instance's NUMA typology cannot be changed",
six.text_type(ex))

View File

@ -6251,6 +6251,123 @@ class ComputeAPIUnitTestCase(_ComputeAPIUnitTestMixIn, test.NoDBTestCase):
self.assertEqual(['context-for-%s' % c for c in compute_api.CELLS],
cells)
def test__validate_numa_rebuild_non_numa(self):
"""This test asserts that a rebuild of an instance without a NUMA
topology passes validation.
"""
flavor = objects.Flavor(
id=42, vcpus=1, memory_mb=512, root_gb=1, extra_specs={})
instance = self._create_instance_obj(flavor=flavor)
# we use a dict instead of image metadata object as
# _validate_numa_rebuild constructs the object internally
image = {
'id': uuids.image_id, 'status': 'foo',
'properties': {}}
self.compute_api._validate_numa_rebuild(instance, image, flavor)
def test__validate_numa_rebuild_no_conflict(self):
"""This test asserts that a rebuild of an instance without a change
in NUMA topology passes validation.
"""
flavor = objects.Flavor(
id=42, vcpus=1, memory_mb=512, root_gb=1,
extra_specs={"hw:numa_nodes": 1})
instance = self._create_instance_obj(flavor=flavor)
# we use a dict instead of image metadata object as
# _validate_numa_rebuild constructs the object internally
image = {
'id': uuids.image_id, 'status': 'foo',
'properties': {}}
# The flavor creates a NUMA topology but the default image and the
# rebuild image do not have any image properties so there will
# be no conflict.
self.compute_api._validate_numa_rebuild(instance, image, flavor)
def test__validate_numa_rebuild_add_numa_toplogy(self):
"""This test asserts that a rebuild of an instance with a new image
that requests a NUMA topology when the original instance did not
have a NUMA topology is invalid.
"""
flavor = objects.Flavor(
id=42, vcpus=1, memory_mb=512, root_gb=1,
extra_specs={})
instance = self._create_instance_obj(flavor=flavor)
# we use a dict instead of image metadata object as
# _validate_numa_rebuild constructs the object internally
image = {
'id': uuids.image_id, 'status': 'foo',
'properties': {"hw_numa_nodes": 1}}
# The flavor and default image have no NUMA topology defined. The image
# used to rebuild requests a NUMA topology which is not allowed as it
# would alter the NUMA constrains.
self.assertRaises(
exception.ImageNUMATopologyRebuildConflict,
self.compute_api._validate_numa_rebuild, instance, image, flavor)
def test__validate_numa_rebuild_remove_numa_toplogy(self):
"""This test asserts that a rebuild of an instance with a new image
that does not request a NUMA topology when the original image did
is invalid if it would alter the instances topology as a result.
"""
flavor = objects.Flavor(
id=42, vcpus=1, memory_mb=512, root_gb=1,
extra_specs={})
instance = self._create_instance_obj(flavor=flavor)
# we use a dict instead of image metadata object as
# _validate_numa_rebuild constructs the object internally
old_image = {
'id': uuidutils.generate_uuid(), 'status': 'foo',
'properties': {"hw_numa_nodes": 1}}
old_image_meta = objects.ImageMeta.from_dict(old_image)
image = {
'id': uuidutils.generate_uuid(), 'status': 'foo',
'properties': {}}
with mock.patch(
'nova.objects.instance.Instance.image_meta',
new_callable=mock.PropertyMock(return_value=old_image_meta)):
# The old image has a NUMA topology defined but the new image
# used to rebuild does not. This would alter the NUMA constrains
# and therefor should raise.
self.assertRaises(
exception.ImageNUMATopologyRebuildConflict,
self.compute_api._validate_numa_rebuild, instance,
image, flavor)
def test__validate_numa_rebuild_alter_numa_toplogy(self):
"""This test asserts that a rebuild of an instance with a new image
that requests a different NUMA topology than the original image
is invalid.
"""
# NOTE(sean-k-mooney): we need to use 2 vcpus here or we will fail
# with a different exception ImageNUMATopologyAsymmetric when we
# construct the NUMA constrains as the rebuild image would result
# in an invalid topology.
flavor = objects.Flavor(
id=42, vcpus=2, memory_mb=512, root_gb=1,
extra_specs={})
instance = self._create_instance_obj(flavor=flavor)
# we use a dict instead of image metadata object as
# _validate_numa_rebuild constructs the object internally
old_image = {
'id': uuidutils.generate_uuid(), 'status': 'foo',
'properties': {"hw_numa_nodes": 1}}
old_image_meta = objects.ImageMeta.from_dict(old_image)
image = {
'id': uuidutils.generate_uuid(), 'status': 'foo',
'properties': {"hw_numa_nodes": 2}}
with mock.patch(
'nova.objects.instance.Instance.image_meta',
new_callable=mock.PropertyMock(return_value=old_image_meta)):
# the original image requested 1 NUMA node and the image used
# for rebuild requests 2 so assert an error is raised.
self.assertRaises(
exception.ImageNUMATopologyRebuildConflict,
self.compute_api._validate_numa_rebuild, instance,
image, flavor)
@mock.patch('nova.pci.request.get_pci_requests_from_flavor')
def test_pmu_image_and_flavor_conflict(self, mock_request):
"""Tests that calling _validate_flavor_image_nostatus()

View File

@ -0,0 +1,16 @@
---
fixes:
- |
An instance can be rebuilt in-place with the original image or a new
image. Instance resource usage cannot be altered during a rebuild.
Previously Nova would have ignored the NUMA topology of the new image
continuing to use the NUMA topology of the existing instance until a move
operation was performed. As Nova did not explicitly guard against
inadvertent changes in resource request contained in a new image,
it was possible to rebuild with an image that would violate this requirement
`bug #1763766`_. This resulted in an inconsistent state as the instance that
was running did not match the instance that was requested. Nova now explicitly
checks if a rebuild would alter the requested NUMA topology of an instance
and rejects the rebuild.
.. _`bug #1763766`: https://bugs.launchpad.net/nova/+bug/1763766