Merge "Fail to live migration if instance has a NUMA topology" into stable/queens

2019-07-03 01:21:44 +00:00 · 2019-07-03 01:21:44 +00:00 · 5d349060f5
parent 5c30abd1fc 9999bce00f
commit 5d349060f5
12 changed files with 169 additions and 21 deletions
--- a/doc/source/admin/adv-config.rst
+++ b/doc/source/admin/adv-config.rst
@ -20,6 +20,8 @@ vital. In these cases, instances can be expected to deliver near-native
 performance. The Compute service provides features to improve individual
 instance for these kind of workloads.

+.. include:: /common/numa-live-migration-warning.txt
+
 .. toctree::
   :maxdepth: 2

--- a/doc/source/admin/configuring-migrations.rst
+++ b/doc/source/admin/configuring-migrations.rst
@ -19,7 +19,8 @@ This document covers live migrations using the
 .. note::

   Not all Compute service hypervisor drivers support live-migration, or
-   support all live-migration features.
+   support all live-migration features. Similarly not all compute service
+   features are supported.

   Consult :doc:`/user/support-matrix` to determine which hypervisors
   support live-migration.
--- a/doc/source/admin/cpu-topologies.rst
+++ b/doc/source/admin/cpu-topologies.rst
@ -7,6 +7,8 @@ control over how instances run on hypervisor CPUs and the topology of virtual
 CPUs available to instances. These features help minimize latency and maximize
 performance.

+.. include:: /common/numa-live-migration-warning.txt
+
 SMP, NUMA, and SMT
 ~~~~~~~~~~~~~~~~~~

--- a/doc/source/common/numa-live-migration-warning.txt
+++ b/doc/source/common/numa-live-migration-warning.txt
@ -0,0 +1,10 @@
+.. important::
+
+   Unless :oslo.config:option:`specifically enabled
+   <workarounds.enable_numa_live_migration>`, live migration is not currently
+   possible for instances with a NUMA topology when using the libvirt driver.
+   A NUMA topology may be specified explicitly or can be added implicitly due
+   to the use of CPU pinning or huge pages. Refer to `bug #1289064`__ for more
+   information.
+
+   __ https://bugs.launchpad.net/nova/+bug/1289064
--- a/doc/source/user/feature-classification.rst
+++ b/doc/source/user/feature-classification.rst
@ -65,6 +65,8 @@ create a particular service. It is common for this workloads needing
 bare metal like performance, i.e. low latency and close to line speed
 performance.

+.. include:: /common/numa-live-migration-warning.txt
+
 .. feature_matrix:: feature-matrix-nfv.ini

 .. _matrix-hpc:
--- a/nova/api/openstack/compute/migrate_server.py
+++ b/nova/api/openstack/compute/migrate_server.py
@ -100,7 +100,11 @@ class MigrateServerController(wsgi.Controller):
            disk_over_commit = strutils.bool_from_string(disk_over_commit,
                                                         strict=True)

-        instance = common.get_instance(self.compute_api, context, id)
+        # NOTE(stephenfin): we need 'numa_topology' because of the
+        # 'LiveMigrationTask._check_instance_has_no_numa' check in the
+        # conductor
+        instance = common.get_instance(self.compute_api, context, id,
+                                       expected_attrs=['numa_topology'])
        try:
            self.compute_api.live_migrate(context, instance, block_migration,
                                          disk_over_commit, host, force, async)
--- a/nova/conductor/tasks/live_migrate.py
+++ b/nova/conductor/tasks/live_migrate.py
@ -22,6 +22,7 @@ import nova.conf
 from nova import exception
 from nova.i18n import _
 from nova import objects
+from nova.objects import fields as obj_fields
 from nova.scheduler import utils as scheduler_utils
 from nova import utils

@ -56,6 +57,7 @@ class LiveMigrationTask(base.TaskBase):

    def _execute(self):
        self._check_instance_is_active()
+        self._check_instance_has_no_numa()
        self._check_host_is_up(self.source)

        if should_do_migration_allocation(self.context):
@ -141,6 +143,30 @@ class LiveMigrationTask(base.TaskBase):
                    state=self.instance.power_state,
                    method='live migrate')

+    def _check_instance_has_no_numa(self):
+        """Prevent live migrations of instances with NUMA topologies."""
+        if not self.instance.numa_topology:
+            return
+
+        # Only KVM (libvirt) supports NUMA topologies with CPU pinning;
+        # HyperV's vNUMA feature doesn't allow specific pinning
+        hypervisor_type = objects.ComputeNode.get_by_host_and_nodename(
+            self.context, self.source, self.instance.node).hypervisor_type
+        if hypervisor_type != obj_fields.HVType.KVM:
+            return
+
+        msg = ('Instance has an associated NUMA topology. '
+               'Instance NUMA topologies, including related attributes '
+               'such as CPU pinning, huge page and emulator thread '
+               'pinning information, are not currently recalculated on '
+               'live migration. See bug #1289064 for more information.'
+               )
+
+        if CONF.workarounds.enable_numa_live_migration:
+            LOG.warning(msg, instance=self.instance)
+        else:
+            raise exception.MigrationPreCheckError(reason=msg)
+
    def _check_host_is_up(self, host):
        service = objects.Service.get_by_compute_host(self.context, host)

--- a/nova/conf/workarounds.py
+++ b/nova/conf/workarounds.py
@ -176,6 +176,32 @@ Related options:
 * ``compute_driver`` (libvirt)
 * ``[libvirt]/images_type`` (rbd)
 * ``instances_path``
+"""),
+
+    cfg.BoolOpt(
+        'enable_numa_live_migration',
+        default=False,
+        help="""
+Enable live migration of instances with NUMA topologies.
+
+Live migration of instances with NUMA topologies is disabled by default
+when using the libvirt driver. This includes live migration of instances with
+CPU pinning or hugepages. CPU pinning and huge page information for such
+instances is not currently re-calculated, as noted in bug #1289064.  This
+means that if instances were already present on the destination host, the
+migrated instance could be placed on the same dedicated cores as these
+instances or use hugepages allocated for another instance. Alternately, if the
+host platforms were not homogeneous, the instance could be assigned to
+non-existent cores or be inadvertently split across host NUMA nodes.
+
+Despite these known issues, there may be cases where live migration is
+necessary. By enabling this option, operators that are aware of the issues and
+are willing to manually work around them can enable live migration support for
+these instances.
+
+Related options:
+
+* ``compute_driver``: Only the libvirt driver is affected.
 """),
 ]

--- a/nova/tests/unit/api/openstack/compute/admin_only_action_common.py
+++ b/nova/tests/unit/api/openstack/compute/admin_only_action_common.py
@ -30,27 +30,36 @@ class CommonMixin(object):
        self.req = fakes.HTTPRequest.blank('')
        self.context = self.req.environ['nova.context']

-    def _stub_instance_get(self, uuid=None):
-        if uuid is None:
+    def _stub_instance_get(self, action=None, uuid=None):
+        if not uuid:
            uuid = uuidutils.generate_uuid()
        instance = fake_instance.fake_instance_obj(self.context,
                id=1, uuid=uuid, vm_state=vm_states.ACTIVE,
                task_state=None, launched_at=timeutils.utcnow())
+
+        expected_attrs = None
+        if action == '_migrate_live':
+            expected_attrs = ['numa_topology']
+
        self.compute_api.get(
-            self.context, uuid, expected_attrs=None).AndReturn(instance)
+            self.context, uuid, expected_attrs=expected_attrs).AndReturn(
+                instance)
+
        return instance

-    def _stub_instance_get_failure(self, exc_info, uuid=None):
-        if uuid is None:
-            uuid = uuidutils.generate_uuid()
+    def _stub_instance_get_failure(self, action, exc_info, uuid):
+        expected_attrs = None
+        if action == '_migrate_live':
+            expected_attrs = ['numa_topology']
+
        self.compute_api.get(
-            self.context, uuid, expected_attrs=None).AndRaise(exc_info)
-        return uuid
+            self.context, uuid, expected_attrs=expected_attrs).AndRaise(
+                exc_info)

    def _test_non_existing_instance(self, action, body_map=None):
        uuid = uuidutils.generate_uuid()
        self._stub_instance_get_failure(
-                exception.InstanceNotFound(instance_id=uuid), uuid=uuid)
+            action, exception.InstanceNotFound(instance_id=uuid), uuid=uuid)

        self.mox.ReplayAll()
        controller_function = getattr(self.controller, action)
@ -68,7 +77,7 @@ class CommonMixin(object):
            method = action.replace('_', '')
        compute_api_args_map = compute_api_args_map or {}

-        instance = self._stub_instance_get()
+        instance = self._stub_instance_get(action)
        args, kwargs = compute_api_args_map.get(action, ((), {}))
        getattr(self.compute_api, method)(self.context, instance, *args,
                                          **kwargs)
@ -92,7 +101,7 @@ class CommonMixin(object):
        if method is None:
            method = action.replace('_', '')

-        instance = self._stub_instance_get()
+        instance = self._stub_instance_get(action)
        body = {}
        compute_api_args_map = {}
        args, kwargs = compute_api_args_map.get(action, ((), {}))
@ -120,7 +129,7 @@ class CommonMixin(object):
        if compute_api_args_map is None:
            compute_api_args_map = {}

-        instance = self._stub_instance_get()
+        instance = self._stub_instance_get(action)

        args, kwargs = compute_api_args_map.get(action, ((), {}))

@ -150,7 +159,7 @@ class CommonMixin(object):
            method = action.replace('_', '')

        compute_api_args_map = compute_api_args_map or {}
-        instance = self._stub_instance_get()
+        instance = self._stub_instance_get(action)

        args, kwargs = compute_api_args_map.get(action, ((), {}))
        getattr(self.compute_api, method)(self.context, instance, *args,
@ -174,7 +183,7 @@ class CommonMixin(object):
            method = action.replace('_', '')
        compute_api_args_map = compute_api_args_map or {}

-        instance = self._stub_instance_get()
+        instance = self._stub_instance_get(action)

        args, kwargs = compute_api_args_map.get(action, ((), {}))
        getattr(self.compute_api, method)(self.context, instance, *args,
--- a/nova/tests/unit/api/openstack/compute/test_migrate_server.py
+++ b/nova/tests/unit/api/openstack/compute/test_migrate_server.py
@ -138,7 +138,7 @@ class MigrateServerTestsV21(admin_only_action_common.CommonTests):

    def _test_migrate_live_succeeded(self, param):
        self.mox.StubOutWithMock(self.compute_api, 'live_migrate')
-        instance = self._stub_instance_get()
+        instance = self._stub_instance_get('_migrate_live')
        self.compute_api.live_migrate(self.context, instance, False,
                                      self.disk_over_commit, 'hostname',
                                      self.force, self.async)
@ -211,7 +211,7 @@ class MigrateServerTestsV21(admin_only_action_common.CommonTests):
                                         check_response=True):
        self.mox.StubOutWithMock(self.compute_api, 'live_migrate')

-        instance = self._stub_instance_get(uuid=uuid)
+        instance = self._stub_instance_get('_migrate_live', uuid=uuid)
        self.compute_api.live_migrate(self.context, instance, False,
                                      self.disk_over_commit,
                                      'hostname', self.force, self.async
@ -438,7 +438,7 @@ class MigrateServerTestsV234(MigrateServerTestsV230):
                reason="Compute host %(host)s could not be found.",
                host='hostname')
        self.mox.StubOutWithMock(self.compute_api, 'live_migrate')
-        instance = self._stub_instance_get()
+        instance = self._stub_instance_get('_migrate_live')
        self.compute_api.live_migrate(self.context, instance, None,
                                      self.disk_over_commit, 'hostname',
                                      self.force, self.async).AndRaise(exc)
@ -455,7 +455,7 @@ class MigrateServerTestsV234(MigrateServerTestsV230):
        exc = exception.InvalidHypervisorType(
                reason="The supplied hypervisor type of is invalid.")
        self.mox.StubOutWithMock(self.compute_api, 'live_migrate')
-        instance = self._stub_instance_get()
+        instance = self._stub_instance_get('_migrate_live')
        self.compute_api.live_migrate(self.context, instance, None,
                                      self.disk_over_commit, 'hostname',
                                      self.force, self.async).AndRaise(exc)
--- a/nova/tests/unit/conductor/tasks/test_live_migrate.py
+++ b/nova/tests/unit/conductor/tasks/test_live_migrate.py
@ -18,6 +18,7 @@ from nova.compute import power_state
 from nova.compute import rpcapi as compute_rpcapi
 from nova.compute import vm_states
 from nova.conductor.tasks import live_migrate
+from nova import context as nova_context
 from nova import exception
 from nova import objects
 from nova.scheduler import client as scheduler_client
@ -38,7 +39,7 @@ fake_selection2 = objects.Selection(service_host="host2", nodename="node2",
 class LiveMigrationTaskTestCase(test.NoDBTestCase):
    def setUp(self):
        super(LiveMigrationTaskTestCase, self).setUp()
-        self.context = "context"
+        self.context = nova_context.get_admin_context()
        self.instance_host = "host"
        self.instance_uuid = uuids.instance
        self.instance_image = "image_ref"
@ -52,6 +53,7 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase):
        self.instance = objects.Instance._from_db_object(
                self.context, objects.Instance(), db_instance)
        self.instance.system_metadata = {'image_hw_disk_bus': 'scsi'}
+        self.instance.numa_topology = None
        self.destination = "destination"
        self.block_migration = "bm"
        self.disk_over_commit = "doc"
@ -176,6 +178,45 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase):
        self.assertRaises(exception.InstanceInvalidState,
                          self.task._check_instance_is_active)

+    @mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename')
+    def test_check_instance_has_no_numa_passes_no_numa(self, mock_get):
+        self.flags(enable_numa_live_migration=False, group='workarounds')
+        self.task.instance.numa_topology = None
+        mock_get.return_value = objects.ComputeNode(
+            uuid=uuids.cn1, hypervisor_type='kvm')
+        self.task._check_instance_has_no_numa()
+
+    @mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename')
+    def test_check_instance_has_no_numa_passes_non_kvm(self, mock_get):
+        self.flags(enable_numa_live_migration=False, group='workarounds')
+        self.task.instance.numa_topology = objects.InstanceNUMATopology(
+            cells=[objects.InstanceNUMACell(id=0, cpuset=set([0]),
+                                            memory=1024)])
+        mock_get.return_value = objects.ComputeNode(
+            uuid=uuids.cn1, hypervisor_type='xen')
+        self.task._check_instance_has_no_numa()
+
+    @mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename')
+    def test_check_instance_has_no_numa_passes_workaround(self, mock_get):
+        self.flags(enable_numa_live_migration=True, group='workarounds')
+        self.task.instance.numa_topology = objects.InstanceNUMATopology(
+            cells=[objects.InstanceNUMACell(id=0, cpuset=set([0]),
+                                            memory=1024)])
+        mock_get.return_value = objects.ComputeNode(
+            uuid=uuids.cn1, hypervisor_type='kvm')
+        self.task._check_instance_has_no_numa()
+
+    @mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename')
+    def test_check_instance_has_no_numa_fails(self, mock_get):
+        self.flags(enable_numa_live_migration=False, group='workarounds')
+        mock_get.return_value = objects.ComputeNode(
+            uuid=uuids.cn1, hypervisor_type='kvm')
+        self.task.instance.numa_topology = objects.InstanceNUMATopology(
+            cells=[objects.InstanceNUMACell(id=0, cpuset=set([0]),
+                                            memory=1024)])
+        self.assertRaises(exception.MigrationPreCheckError,
+                          self.task._check_instance_has_no_numa)
+
    @mock.patch.object(objects.Service, 'get_by_compute_host')
    @mock.patch.object(servicegroup.API, 'service_is_up')
    def test_check_instance_host_is_up(self, mock_is_up, mock_get):
--- a/releasenotes/notes/disable-live-migration-with-numa-bc710a1bcde25957.yaml
+++ b/releasenotes/notes/disable-live-migration-with-numa-bc710a1bcde25957.yaml
@ -0,0 +1,25 @@
+---
+upgrade:
+  - |
+    Live migration of instances with NUMA topologies is now disabled by default
+    when using the libvirt driver. This includes live migration of instances
+    with CPU pinning or hugepages. CPU pinning and huge page information for
+    such instances is not currently re-calculated, as noted in `bug #1289064`_.
+    This means that if instances were already present on the destination host,
+    the migrated instance could be placed on the same dedicated cores as these
+    instances or use hugepages allocated for another instance. Alternately, if
+    the host platforms were not homogeneous, the instance could be assigned to
+    non-existent cores or be inadvertently split across host NUMA nodes.
+
+    The `long term solution`_ to these issues is to recalculate the XML on the
+    destination node. When this work is completed, the restriction on live
+    migration with NUMA topologies will be lifted.
+
+    For operators that are aware of the issues and are able to manually work
+    around them, the ``[workarounds] enable_numa_live_migration`` option can
+    be used to allow the broken behavior.
+
+    For more information, refer to `bug #1289064`_.
+
+    .. _bug #1289064: https://bugs.launchpad.net/nova/+bug/1289064
+    .. _long term solution: https://blueprints.launchpad.net/nova/+spec/numa-aware-live-migration