diff --git a/nova/api/openstack/compute/servers.py b/nova/api/openstack/compute/servers.py index b7dae423665d..4ae1e160e6f4 100644 --- a/nova/api/openstack/compute/servers.py +++ b/nova/api/openstack/compute/servers.py @@ -67,6 +67,7 @@ INVALID_FLAVOR_IMAGE_EXCEPTIONS = ( exception.ImageNUMATopologyForbidden, exception.ImageNUMATopologyIncomplete, exception.ImageNUMATopologyMemoryOutOfRange, + exception.ImageNUMATopologyRebuildConflict, exception.ImagePMUConflict, exception.ImageSerialPortNumberExceedFlavorValue, exception.ImageSerialPortNumberInvalid, diff --git a/nova/compute/api.py b/nova/compute/api.py index 0a50898ae9b2..c3033a050eb5 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -3299,6 +3299,14 @@ class API(base.Base): self._checks_for_create_and_rebuild(context, image_id, image, flavor, metadata, files_to_inject, root_bdm) + # NOTE(sean-k-mooney): When we rebuild with a new image we need to + # validate that the NUMA topology does not change as we do a NOOP claim + # in resource tracker. As such we cannot allow the resource usage or + # assignment to change as a result of a new image altering the + # numa constraints. + if orig_image_ref != image_href: + self._validate_numa_rebuild(instance, image, flavor) + kernel_id, ramdisk_id = self._handle_kernel_and_ramdisk( context, None, None, image) @@ -3390,6 +3398,48 @@ class API(base.Base): preserve_ephemeral=preserve_ephemeral, host=host, request_spec=request_spec) + @staticmethod + def _validate_numa_rebuild(instance, image, flavor): + """validates that the NUMA constraints do not change on rebuild. + + :param instance: nova.objects.instance.Instance object + :param image: the new image the instance will be rebuilt with. + :param flavor: the flavor of the current instance. + + :returns: True or raises on failure. + :raises: nova.exception.ImageNUMATopologyRebuildConflict + """ + + # NOTE(sean-k-mooney): currently it is not possible to express + # a PCI NUMA affinity policy via flavor or image but that will + # change in the future. we pull out the image metadata into + # separate variable to make future testing of this easier. + old_image_meta = instance.image_meta + new_image_meta = objects.ImageMeta.from_dict(image) + old_constraints = hardware.numa_get_constraints(flavor, old_image_meta) + new_constraints = hardware.numa_get_constraints(flavor, new_image_meta) + + # early out for non NUMA instances + if old_constraints is None and new_constraints is None: + # return true for easy unit testing + return True + + # if only one of the constrains are non-None (or 'set') then the + # constraints changed so raise an exception. + if old_constraints is None or new_constraints is None: + raise exception.ImageNUMATopologyRebuildConflict() + + # otherwise since both the old a new constrains are non none compare + # them as dictionaries. + old = old_constraints.obj_to_primitive() + new = new_constraints.obj_to_primitive() + if old != new: + raise exception.ImageNUMATopologyRebuildConflict() + # TODO(sean-k-mooney): add PCI NUMA affinity policy check. + + # return true for easy unit testing + return True + @staticmethod def _check_quota_for_upsize(context, instance, current_flavor, new_flavor): project_id, user_id = quotas_obj.ids_from_instance(context, diff --git a/nova/exception.py b/nova/exception.py index df8c68190808..149bbda7f97e 100644 --- a/nova/exception.py +++ b/nova/exception.py @@ -1915,6 +1915,12 @@ class ImageNUMATopologyForbidden(Forbidden): "NUMA configuration set against the flavor") +class ImageNUMATopologyRebuildConflict(Invalid): + msg_fmt = _( + "An instance's NUMA typology cannot be changed as part of a rebuild. " + "The image provided is invalid for this instance.") + + class ImageNUMATopologyAsymmetric(Invalid): msg_fmt = _("Instance CPUs and/or memory cannot be evenly distributed " "across instance NUMA nodes. Explicit assignment of CPUs " diff --git a/nova/tests/functional/integrated_helpers.py b/nova/tests/functional/integrated_helpers.py index 7cccc12402bb..2884ce0c1530 100644 --- a/nova/tests/functional/integrated_helpers.py +++ b/nova/tests/functional/integrated_helpers.py @@ -93,7 +93,8 @@ class _IntegratedTestBase(test.TestCase): return None self.stub_out('nova.privsep.linux_net.bind_ip', fake_noop) - nova.tests.unit.image.fake.stub_out_image_service(self) + self.fake_image_service =\ + nova.tests.unit.image.fake.stub_out_image_service(self) self.useFixture(cast_as_call.CastAsCall(self)) placement = self.useFixture(func_fixtures.PlacementFixture()) diff --git a/nova/tests/functional/libvirt/test_numa_servers.py b/nova/tests/functional/libvirt/test_numa_servers.py index 440680a83c97..f6c3e9a48fb7 100644 --- a/nova/tests/functional/libvirt/test_numa_servers.py +++ b/nova/tests/functional/libvirt/test_numa_servers.py @@ -13,26 +13,26 @@ # License for the specific language governing permissions and limitations # under the License. +import mock import six -import mock from oslo_config import cfg from oslo_log import log as logging from nova.conf import neutron as neutron_conf from nova import context as nova_context from nova import objects + +from nova.tests import fixtures as nova_fixtures from nova.tests.functional.api import client from nova.tests.functional.libvirt import base +from nova.tests.unit import fake_notifier from nova.tests.unit.virt.libvirt import fakelibvirt - - CONF = cfg.CONF LOG = logging.getLogger(__name__) class NUMAServersTestBase(base.ServersTestBase): - ADDITIONAL_FILTERS = ['NUMATopologyFilter'] def setUp(self): @@ -1006,3 +1006,127 @@ class NUMAServersWithNetworksTest(NUMAServersTestBase): network_metadata = args[1].network_metadata self.assertIsNotNone(network_metadata) self.assertEqual(set(['foo']), network_metadata.physnets) + + +class NUMAServersRebuildTests(NUMAServersTestBase): + + def setUp(self): + super(NUMAServersRebuildTests, self).setUp() + images = self.api.get_images() + # save references to first two image for server create and rebuild + self.image_ref_0 = images[0]['id'] + self.image_ref_1 = images[1]['id'] + + fake_notifier.stub_notifier(self) + self.addCleanup(fake_notifier.reset) + + def _create_active_server(self, server_args=None): + basic_server = { + 'flavorRef': 1, + 'name': 'numa_server', + 'networks': [{ + 'uuid': nova_fixtures.NeutronFixture.network_1['id'] + }], + 'imageRef': self.image_ref_0 + } + if server_args: + basic_server.update(server_args) + server = self.api.post_server({'server': basic_server}) + return self._wait_for_state_change(server, 'BUILD') + + def _rebuild_server(self, active_server, image_ref): + args = {"rebuild": {"imageRef": image_ref}} + self.api.api_post( + 'servers/%s/action' % active_server['id'], args) + fake_notifier.wait_for_versioned_notifications('instance.rebuild.end') + return self._wait_for_state_change(active_server, 'REBUILD') + + def test_rebuild_server_with_numa(self): + """Create a NUMA instance and ensure it can be rebuilt. + """ + + # Create a flavor consuming 2 pinned cpus with an implicit + # numa topology of 1 virtual numa node. + extra_spec = {'hw:cpu_policy': 'dedicated'} + flavor_id = self._create_flavor(extra_spec=extra_spec) + + # Create a host with 4 physical cpus to allow rebuild leveraging + # the free space to ensure the numa topology filter does not + # eliminate the host. + host_info = fakelibvirt.HostInfo(cpu_nodes=1, cpu_sockets=1, + cpu_cores=4, kB_mem=15740000) + fake_connection = self._get_connection(host_info=host_info) + self.mock_conn.return_value = fake_connection + self.compute = self.start_service('compute', host='compute1') + + server = self._create_active_server( + server_args={"flavorRef": flavor_id}) + + # this should succeed as the NUMA topology has not changed + # and we have enough resources on the host. We rebuild with + # a different image to force the rebuild to query the scheduler + # to validate the host. + self._rebuild_server(server, self.image_ref_1) + + def test_rebuild_server_with_numa_inplace_fails(self): + """Create a NUMA instance and ensure in place rebuild fails. + """ + + # Create a flavor consuming 2 pinned cpus with an implicit + # numa topology of 1 virtual numa node. + extra_spec = {'hw:cpu_policy': 'dedicated'} + flavor_id = self._create_flavor(extra_spec=extra_spec) + + # cpu_cores is set to 2 to ensure that + # we have enough space to boot the vm but not enough space to rebuild + # by doubling the resource use during scheduling. + host_info = fakelibvirt.HostInfo( + cpu_nodes=1, cpu_sockets=1, cpu_cores=2, kB_mem=15740000) + fake_connection = self._get_connection(host_info=host_info) + self.mock_conn.return_value = fake_connection + self.compute = self.start_service('compute', host='compute1') + + server = self._create_active_server( + server_args={"flavorRef": flavor_id}) + + # TODO(sean-k-mooney): this should pass but i currently expect it to + # fail because the NUMA topology filter does not support in place + # rebuild and we have used all the resources on the compute node. + self.assertRaises( + client.OpenStackApiException, self._rebuild_server, + server, self.image_ref_1) + + def test_rebuild_server_with_different_numa_topology_fails(self): + """Create a NUMA instance and ensure inplace rebuild fails. + """ + + # Create a flavor consuming 2 pinned cpus with an implicit + # numa topology of 1 virtual numa node. + extra_spec = {'hw:cpu_policy': 'dedicated'} + flavor_id = self._create_flavor(extra_spec=extra_spec) + + host_info = fakelibvirt.HostInfo( + cpu_nodes=2, cpu_sockets=1, cpu_cores=4, kB_mem=15740000) + fake_connection = self._get_connection(host_info=host_info) + self.mock_conn.return_value = fake_connection + self.compute = self.start_service('compute', host='compute1') + + server = self._create_active_server( + server_args={"flavorRef": flavor_id}) + + # the original vm had an implicit numa topology of 1 virtual numa nodes + # so we alter the requested numa topology in image_ref_1 to request + # 2 virtual numa nodes. + ctx = nova_context.get_admin_context() + image_meta = {'properties': {'hw_numa_nodes': 2}} + self.fake_image_service.update(ctx, self.image_ref_1, image_meta) + + # NOTE(sean-k-mooney): this should fail because rebuild uses noop + # claims therefor it is not allowed for the numa topology or resource + # usage to change during a rebuild. + ex = self.assertRaises( + client.OpenStackApiException, self._rebuild_server, + server, self.image_ref_1) + self.assertEqual(400, ex.response.status_code) + self.assertIn("An instance's NUMA typology cannot be changed", + six.text_type(ex)) diff --git a/nova/tests/unit/compute/test_compute_api.py b/nova/tests/unit/compute/test_compute_api.py index 488539152abe..f2a95e08eec5 100644 --- a/nova/tests/unit/compute/test_compute_api.py +++ b/nova/tests/unit/compute/test_compute_api.py @@ -6224,6 +6224,123 @@ class ComputeAPIUnitTestCase(_ComputeAPIUnitTestMixIn, test.NoDBTestCase): self.assertEqual(['context-for-%s' % c for c in compute_api.CELLS], cells) + def test__validate_numa_rebuild_non_numa(self): + """This test asserts that a rebuild of an instance without a NUMA + topology passes validation. + """ + flavor = objects.Flavor( + id=42, vcpus=1, memory_mb=512, root_gb=1, extra_specs={}) + instance = self._create_instance_obj(flavor=flavor) + # we use a dict instead of image metadata object as + # _validate_numa_rebuild constructs the object internally + image = { + 'id': uuids.image_id, 'status': 'foo', + 'properties': {}} + self.compute_api._validate_numa_rebuild(instance, image, flavor) + + def test__validate_numa_rebuild_no_conflict(self): + """This test asserts that a rebuild of an instance without a change + in NUMA topology passes validation. + """ + flavor = objects.Flavor( + id=42, vcpus=1, memory_mb=512, root_gb=1, + extra_specs={"hw:numa_nodes": 1}) + instance = self._create_instance_obj(flavor=flavor) + # we use a dict instead of image metadata object as + # _validate_numa_rebuild constructs the object internally + image = { + 'id': uuids.image_id, 'status': 'foo', + 'properties': {}} + # The flavor creates a NUMA topology but the default image and the + # rebuild image do not have any image properties so there will + # be no conflict. + self.compute_api._validate_numa_rebuild(instance, image, flavor) + + def test__validate_numa_rebuild_add_numa_toplogy(self): + """This test asserts that a rebuild of an instance with a new image + that requests a NUMA topology when the original instance did not + have a NUMA topology is invalid. + """ + + flavor = objects.Flavor( + id=42, vcpus=1, memory_mb=512, root_gb=1, + extra_specs={}) + instance = self._create_instance_obj(flavor=flavor) + # we use a dict instead of image metadata object as + # _validate_numa_rebuild constructs the object internally + image = { + 'id': uuids.image_id, 'status': 'foo', + 'properties': {"hw_numa_nodes": 1}} + # The flavor and default image have no NUMA topology defined. The image + # used to rebuild requests a NUMA topology which is not allowed as it + # would alter the NUMA constrains. + self.assertRaises( + exception.ImageNUMATopologyRebuildConflict, + self.compute_api._validate_numa_rebuild, instance, image, flavor) + + def test__validate_numa_rebuild_remove_numa_toplogy(self): + """This test asserts that a rebuild of an instance with a new image + that does not request a NUMA topology when the original image did + is invalid if it would alter the instances topology as a result. + """ + + flavor = objects.Flavor( + id=42, vcpus=1, memory_mb=512, root_gb=1, + extra_specs={}) + instance = self._create_instance_obj(flavor=flavor) + # we use a dict instead of image metadata object as + # _validate_numa_rebuild constructs the object internally + old_image = { + 'id': uuidutils.generate_uuid(), 'status': 'foo', + 'properties': {"hw_numa_nodes": 1}} + old_image_meta = objects.ImageMeta.from_dict(old_image) + image = { + 'id': uuidutils.generate_uuid(), 'status': 'foo', + 'properties': {}} + with mock.patch( + 'nova.objects.instance.Instance.image_meta', + new_callable=mock.PropertyMock(return_value=old_image_meta)): + # The old image has a NUMA topology defined but the new image + # used to rebuild does not. This would alter the NUMA constrains + # and therefor should raise. + self.assertRaises( + exception.ImageNUMATopologyRebuildConflict, + self.compute_api._validate_numa_rebuild, instance, + image, flavor) + + def test__validate_numa_rebuild_alter_numa_toplogy(self): + """This test asserts that a rebuild of an instance with a new image + that requests a different NUMA topology than the original image + is invalid. + """ + + # NOTE(sean-k-mooney): we need to use 2 vcpus here or we will fail + # with a different exception ImageNUMATopologyAsymmetric when we + # construct the NUMA constrains as the rebuild image would result + # in an invalid topology. + flavor = objects.Flavor( + id=42, vcpus=2, memory_mb=512, root_gb=1, + extra_specs={}) + instance = self._create_instance_obj(flavor=flavor) + # we use a dict instead of image metadata object as + # _validate_numa_rebuild constructs the object internally + old_image = { + 'id': uuidutils.generate_uuid(), 'status': 'foo', + 'properties': {"hw_numa_nodes": 1}} + old_image_meta = objects.ImageMeta.from_dict(old_image) + image = { + 'id': uuidutils.generate_uuid(), 'status': 'foo', + 'properties': {"hw_numa_nodes": 2}} + with mock.patch( + 'nova.objects.instance.Instance.image_meta', + new_callable=mock.PropertyMock(return_value=old_image_meta)): + # the original image requested 1 NUMA node and the image used + # for rebuild requests 2 so assert an error is raised. + self.assertRaises( + exception.ImageNUMATopologyRebuildConflict, + self.compute_api._validate_numa_rebuild, instance, + image, flavor) + @mock.patch('nova.pci.request.get_pci_requests_from_flavor') def test_pmu_image_and_flavor_conflict(self, mock_request): """Tests that calling _validate_flavor_image_nostatus() diff --git a/releasenotes/notes/numa-rebuild-b75f9a1966f576ea.yaml b/releasenotes/notes/numa-rebuild-b75f9a1966f576ea.yaml new file mode 100644 index 000000000000..99777e4df6bf --- /dev/null +++ b/releasenotes/notes/numa-rebuild-b75f9a1966f576ea.yaml @@ -0,0 +1,16 @@ +--- +fixes: + - | + An instance can be rebuilt in-place with the original image or a new + image. Instance resource usage cannot be altered during a rebuild. + Previously Nova would have ignored the NUMA topology of the new image + continuing to use the NUMA topology of the existing instance until a move + operation was performed. As Nova did not explicitly guard against + inadvertent changes in resource request contained in a new image, + it was possible to rebuild with an image that would violate this requirement + `bug #1763766`_. This resulted in an inconsistent state as the instance that + was running did not match the instance that was requested. Nova now explicitly + checks if a rebuild would alter the requested NUMA topology of an instance + and rejects the rebuild. + + .. _`bug #1763766`: https://bugs.launchpad.net/nova/+bug/1763766