Browse Source

Merge "Block rebuild when NUMA topology changed" into stable/stein

tags/19.1.0
Zuul Gerrit Code Review 3 weeks ago
parent
commit
1041388afd
7 changed files with 320 additions and 4 deletions
  1. +1
    -0
      nova/api/openstack/compute/servers.py
  2. +50
    -0
      nova/compute/api.py
  3. +6
    -0
      nova/exception.py
  4. +2
    -1
      nova/tests/functional/integrated_helpers.py
  5. +128
    -3
      nova/tests/functional/libvirt/test_numa_servers.py
  6. +117
    -0
      nova/tests/unit/compute/test_compute_api.py
  7. +16
    -0
      releasenotes/notes/numa-rebuild-b75f9a1966f576ea.yaml

+ 1
- 0
nova/api/openstack/compute/servers.py View File

@@ -68,6 +68,7 @@ INVALID_FLAVOR_IMAGE_EXCEPTIONS = (
exception.ImageNUMATopologyForbidden,
exception.ImageNUMATopologyIncomplete,
exception.ImageNUMATopologyMemoryOutOfRange,
exception.ImageNUMATopologyRebuildConflict,
exception.ImageSerialPortNumberExceedFlavorValue,
exception.ImageSerialPortNumberInvalid,
exception.ImageVCPULimitsRangeExceeded,


+ 50
- 0
nova/compute/api.py View File

@@ -3398,6 +3398,14 @@ class API(base.Base):
self._checks_for_create_and_rebuild(context, image_id, image,
flavor, metadata, files_to_inject, root_bdm)

# NOTE(sean-k-mooney): When we rebuild with a new image we need to
# validate that the NUMA topology does not change as we do a NOOP claim
# in resource tracker. As such we cannot allow the resource usage or
# assignment to change as a result of a new image altering the
# numa constraints.
if orig_image_ref != image_href:
self._validate_numa_rebuild(instance, image, flavor)

kernel_id, ramdisk_id = self._handle_kernel_and_ramdisk(
context, None, None, image)

@@ -3490,6 +3498,48 @@ class API(base.Base):
request_spec=request_spec,
kwargs=kwargs)

@staticmethod
def _validate_numa_rebuild(instance, image, flavor):
"""validates that the NUMA constraints do not change on rebuild.

:param instance: nova.objects.instance.Instance object
:param image: the new image the instance will be rebuilt with.
:param flavor: the flavor of the current instance.

:returns: True or raises on failure.
:raises: nova.exception.ImageNUMATopologyRebuildConflict
"""

# NOTE(sean-k-mooney): currently it is not possible to express
# a PCI NUMA affinity policy via flavor or image but that will
# change in the future. we pull out the image metadata into
# separate variable to make future testing of this easier.
old_image_meta = instance.image_meta
new_image_meta = objects.ImageMeta.from_dict(image)
old_constraints = hardware.numa_get_constraints(flavor, old_image_meta)
new_constraints = hardware.numa_get_constraints(flavor, new_image_meta)

# early out for non NUMA instances
if old_constraints is None and new_constraints is None:
# return true for easy unit testing
return True

# if only one of the constrains are non-None (or 'set') then the
# constraints changed so raise an exception.
if old_constraints is None or new_constraints is None:
raise exception.ImageNUMATopologyRebuildConflict()

# otherwise since both the old a new constrains are non none compare
# them as dictionaries.
old = old_constraints.obj_to_primitive()
new = new_constraints.obj_to_primitive()
if old != new:
raise exception.ImageNUMATopologyRebuildConflict()
# TODO(sean-k-mooney): add PCI NUMA affinity policy check.

# return true for easy unit testing
return True

@staticmethod
def _check_quota_for_upsize(context, instance, current_flavor, new_flavor):
project_id, user_id = quotas_obj.ids_from_instance(context,


+ 6
- 0
nova/exception.py View File

@@ -1936,6 +1936,12 @@ class ImageNUMATopologyForbidden(Forbidden):
"NUMA configuration set against the flavor")


class ImageNUMATopologyRebuildConflict(Invalid):
msg_fmt = _(
"An instance's NUMA typology cannot be changed as part of a rebuild. "
"The image provided is invalid for this instance.")


class ImageNUMATopologyAsymmetric(Invalid):
msg_fmt = _("Instance CPUs and/or memory cannot be evenly distributed "
"across instance NUMA nodes. Explicit assignment of CPUs "


+ 2
- 1
nova/tests/functional/integrated_helpers.py View File

@@ -91,7 +91,8 @@ class _IntegratedTestBase(test.TestCase):
return None
self.stub_out('nova.privsep.linux_net.bind_ip', fake_noop)

nova.tests.unit.image.fake.stub_out_image_service(self)
self.fake_image_service =\
nova.tests.unit.image.fake.stub_out_image_service(self)

self.useFixture(cast_as_call.CastAsCall(self))
placement = self.useFixture(func_fixtures.PlacementFixture())


+ 128
- 3
nova/tests/functional/libvirt/test_numa_servers.py View File

@@ -13,20 +13,21 @@
# License for the specific language governing permissions and limitations
# under the License.

import mock
import six

import mock
from oslo_config import cfg
from oslo_log import log as logging

from nova.conf import neutron as neutron_conf
from nova import context as nova_context
from nova import objects

from nova.tests import fixtures as nova_fixtures
from nova.tests.functional.api import client
from nova.tests.functional.libvirt import base
from nova.tests.unit import fake_notifier
from nova.tests.unit.virt.libvirt import fakelibvirt


CONF = cfg.CONF
LOG = logging.getLogger(__name__)

@@ -362,3 +363,127 @@ class NUMAServersWithNetworksTest(NUMAServersTestBase):
network_metadata = args[1].network_metadata
self.assertIsNotNone(network_metadata)
self.assertEqual(set(['foo']), network_metadata.physnets)


class NUMAServersRebuildTests(NUMAServersTestBase):

def setUp(self):
super(NUMAServersRebuildTests, self).setUp()
images = self.api.get_images()
# save references to first two image for server create and rebuild
self.image_ref_0 = images[0]['id']
self.image_ref_1 = images[1]['id']

fake_notifier.stub_notifier(self)
self.addCleanup(fake_notifier.reset)

def _create_active_server(self, server_args=None):
basic_server = {
'flavorRef': 1,
'name': 'numa_server',
'networks': [{
'uuid': nova_fixtures.NeutronFixture.network_1['id']
}],
'imageRef': self.image_ref_0
}
if server_args:
basic_server.update(server_args)
server = self.api.post_server({'server': basic_server})
return self._wait_for_state_change(server, 'BUILD')

def _rebuild_server(self, active_server, image_ref):
args = {"rebuild": {"imageRef": image_ref}}
self.api.api_post(
'servers/%s/action' % active_server['id'], args)
fake_notifier.wait_for_versioned_notifications('instance.rebuild.end')
return self._wait_for_state_change(active_server, 'REBUILD')

def test_rebuild_server_with_numa(self):
"""Create a NUMA instance and ensure it can be rebuilt.
"""

# Create a flavor consuming 2 pinned cpus with an implicit
# numa topology of 1 virtual numa node.
extra_spec = {'hw:cpu_policy': 'dedicated'}
flavor_id = self._create_flavor(extra_spec=extra_spec)

# Create a host with 4 physical cpus to allow rebuild leveraging
# the free space to ensure the numa topology filter does not
# eliminate the host.
host_info = fakelibvirt.NUMAHostInfo(cpu_nodes=1, cpu_sockets=1,
cpu_cores=4, kB_mem=15740000)
fake_connection = self._get_connection(host_info=host_info)
self.mock_conn.return_value = fake_connection
self.compute = self.start_service('compute', host='compute1')

server = self._create_active_server(
server_args={"flavorRef": flavor_id})

# this should succeed as the NUMA topology has not changed
# and we have enough resources on the host. We rebuild with
# a different image to force the rebuild to query the scheduler
# to validate the host.
self._rebuild_server(server, self.image_ref_1)

def test_rebuild_server_with_numa_inplace_fails(self):
"""Create a NUMA instance and ensure in place rebuild fails.
"""

# Create a flavor consuming 2 pinned cpus with an implicit
# numa topology of 1 virtual numa node.
extra_spec = {'hw:cpu_policy': 'dedicated'}
flavor_id = self._create_flavor(extra_spec=extra_spec)

# cpu_cores is set to 2 to ensure that
# we have enough space to boot the vm but not enough space to rebuild
# by doubling the resource use during scheduling.
host_info = fakelibvirt.NUMAHostInfo(
cpu_nodes=1, cpu_sockets=1, cpu_cores=2, kB_mem=15740000)
fake_connection = self._get_connection(host_info=host_info)
self.mock_conn.return_value = fake_connection
self.compute = self.start_service('compute', host='compute1')

server = self._create_active_server(
server_args={"flavorRef": flavor_id})

# TODO(sean-k-mooney): this should pass but i currently expect it to
# fail because the NUMA topology filter does not support in place
# rebuild and we have used all the resources on the compute node.
self.assertRaises(
client.OpenStackApiException, self._rebuild_server,
server, self.image_ref_1)

def test_rebuild_server_with_different_numa_topology_fails(self):
"""Create a NUMA instance and ensure inplace rebuild fails.
"""

# Create a flavor consuming 2 pinned cpus with an implicit
# numa topology of 1 virtual numa node.
extra_spec = {'hw:cpu_policy': 'dedicated'}
flavor_id = self._create_flavor(extra_spec=extra_spec)

host_info = fakelibvirt.NUMAHostInfo(
cpu_nodes=2, cpu_sockets=1, cpu_cores=4, kB_mem=15740000)
fake_connection = self._get_connection(host_info=host_info)
self.mock_conn.return_value = fake_connection
self.compute = self.start_service('compute', host='compute1')

server = self._create_active_server(
server_args={"flavorRef": flavor_id})

# the original vm had an implicit numa topology of 1 virtual numa nodes
# so we alter the requested numa topology in image_ref_1 to request
# 2 virtual numa nodes.
ctx = nova_context.get_admin_context()
image_meta = {'properties': {'hw_numa_nodes': 2}}
self.fake_image_service.update(ctx, self.image_ref_1, image_meta)

# NOTE(sean-k-mooney): this should fail because rebuild uses noop
# claims therefor it is not allowed for the numa topology or resource
# usage to change during a rebuild.
ex = self.assertRaises(
client.OpenStackApiException, self._rebuild_server,
server, self.image_ref_1)
self.assertEqual(400, ex.response.status_code)
self.assertIn("An instance's NUMA typology cannot be changed",
six.text_type(ex))

+ 117
- 0
nova/tests/unit/compute/test_compute_api.py View File

@@ -6539,6 +6539,123 @@ class ComputeAPIUnitTestCase(_ComputeAPIUnitTestMixIn, test.NoDBTestCase):
self.assertEqual(['context-for-%s' % c for c in compute_api.CELLS],
cells)

def test__validate_numa_rebuild_non_numa(self):
"""This test asserts that a rebuild of an instance without a NUMA
topology passes validation.
"""
flavor = objects.Flavor(
id=42, vcpus=1, memory_mb=512, root_gb=1, extra_specs={})
instance = self._create_instance_obj(flavor=flavor)
# we use a dict instead of image metadata object as
# _validate_numa_rebuild constructs the object internally
image = {
'id': uuids.image_id, 'status': 'foo',
'properties': {}}
self.compute_api._validate_numa_rebuild(instance, image, flavor)

def test__validate_numa_rebuild_no_conflict(self):
"""This test asserts that a rebuild of an instance without a change
in NUMA topology passes validation.
"""
flavor = objects.Flavor(
id=42, vcpus=1, memory_mb=512, root_gb=1,
extra_specs={"hw:numa_nodes": 1})
instance = self._create_instance_obj(flavor=flavor)
# we use a dict instead of image metadata object as
# _validate_numa_rebuild constructs the object internally
image = {
'id': uuids.image_id, 'status': 'foo',
'properties': {}}
# The flavor creates a NUMA topology but the default image and the
# rebuild image do not have any image properties so there will
# be no conflict.
self.compute_api._validate_numa_rebuild(instance, image, flavor)

def test__validate_numa_rebuild_add_numa_toplogy(self):
"""This test asserts that a rebuild of an instance with a new image
that requests a NUMA topology when the original instance did not
have a NUMA topology is invalid.
"""

flavor = objects.Flavor(
id=42, vcpus=1, memory_mb=512, root_gb=1,
extra_specs={})
instance = self._create_instance_obj(flavor=flavor)
# we use a dict instead of image metadata object as
# _validate_numa_rebuild constructs the object internally
image = {
'id': uuids.image_id, 'status': 'foo',
'properties': {"hw_numa_nodes": 1}}
# The flavor and default image have no NUMA topology defined. The image
# used to rebuild requests a NUMA topology which is not allowed as it
# would alter the NUMA constrains.
self.assertRaises(
exception.ImageNUMATopologyRebuildConflict,
self.compute_api._validate_numa_rebuild, instance, image, flavor)

def test__validate_numa_rebuild_remove_numa_toplogy(self):
"""This test asserts that a rebuild of an instance with a new image
that does not request a NUMA topology when the original image did
is invalid if it would alter the instances topology as a result.
"""

flavor = objects.Flavor(
id=42, vcpus=1, memory_mb=512, root_gb=1,
extra_specs={})
instance = self._create_instance_obj(flavor=flavor)
# we use a dict instead of image metadata object as
# _validate_numa_rebuild constructs the object internally
old_image = {
'id': uuidutils.generate_uuid(), 'status': 'foo',
'properties': {"hw_numa_nodes": 1}}
old_image_meta = objects.ImageMeta.from_dict(old_image)
image = {
'id': uuidutils.generate_uuid(), 'status': 'foo',
'properties': {}}
with mock.patch(
'nova.objects.instance.Instance.image_meta',
new_callable=mock.PropertyMock(return_value=old_image_meta)):
# The old image has a NUMA topology defined but the new image
# used to rebuild does not. This would alter the NUMA constrains
# and therefor should raise.
self.assertRaises(
exception.ImageNUMATopologyRebuildConflict,
self.compute_api._validate_numa_rebuild, instance,
image, flavor)

def test__validate_numa_rebuild_alter_numa_toplogy(self):
"""This test asserts that a rebuild of an instance with a new image
that requests a different NUMA topology than the original image
is invalid.
"""

# NOTE(sean-k-mooney): we need to use 2 vcpus here or we will fail
# with a different exception ImageNUMATopologyAsymmetric when we
# construct the NUMA constrains as the rebuild image would result
# in an invalid topology.
flavor = objects.Flavor(
id=42, vcpus=2, memory_mb=512, root_gb=1,
extra_specs={})
instance = self._create_instance_obj(flavor=flavor)
# we use a dict instead of image metadata object as
# _validate_numa_rebuild constructs the object internally
old_image = {
'id': uuidutils.generate_uuid(), 'status': 'foo',
'properties': {"hw_numa_nodes": 1}}
old_image_meta = objects.ImageMeta.from_dict(old_image)
image = {
'id': uuidutils.generate_uuid(), 'status': 'foo',
'properties': {"hw_numa_nodes": 2}}
with mock.patch(
'nova.objects.instance.Instance.image_meta',
new_callable=mock.PropertyMock(return_value=old_image_meta)):
# the original image requested 1 NUMA node and the image used
# for rebuild requests 2 so assert an error is raised.
self.assertRaises(
exception.ImageNUMATopologyRebuildConflict,
self.compute_api._validate_numa_rebuild, instance,
image, flavor)

@mock.patch('nova.pci.request.get_pci_requests_from_flavor')
def test_pci_validated(self, mock_request):
"""Tests that calling _validate_flavor_image_nostatus() with


+ 16
- 0
releasenotes/notes/numa-rebuild-b75f9a1966f576ea.yaml View File

@@ -0,0 +1,16 @@
---
fixes:
- |
An instance can be rebuilt in-place with the original image or a new
image. Instance resource usage cannot be altered during a rebuild.
Previously Nova would have ignored the NUMA topology of the new image
continuing to use the NUMA topology of the existing instance until a move
operation was performed. As Nova did not explicitly guard against
inadvertent changes in resource request contained in a new image,
it was possible to rebuild with an image that would violate this requirement
`bug #1763766`_. This resulted in an inconsistent state as the instance that
was running did not match the instance that was requested. Nova now explicitly
checks if a rebuild would alter the requested NUMA topology of an instance
and rejects the rebuild.

.. _`bug #1763766`: https://bugs.launchpad.net/nova/+bug/1763766

Loading…
Cancel
Save