Migrate RequestSpec.numa_topology to use pcpuset

When the InstanceNUMATopology OVO has changed in
I901fbd7df00e45196395ff4c69e7b8aa3359edf6 to separately track
pcpus from vcpus a data migration was added. This data migration is
triggered when the InstanceNUMATopology object is loaded from the
instance_extra table. However that patch is missed the fact that the
InstanceNUMATopology object can be loaded from the request_spec table as
well. So InstanceNUMATopology object in RequestSpec are not migrated.
This could lead to errors in the scheduler when such RequestSpec object
is used for scheduling (e.g. during a migration of a pre Victoria
instance with cpu pinning)

This patch adds the missing data migration.

Change-Id: I812d720555bdf008c83cae3d81541a37bd99e594
Closes-Bug: #1952941
This commit is contained in:
Balazs Gibizer 2021-12-02 12:52:01 +01:00
parent 05e8977cb2
commit e853bb5718
4 changed files with 47 additions and 42 deletions

View File

@ -166,8 +166,10 @@ class InstanceNUMATopology(base.NovaObject,
if 'nova_object.name' in primitive:
obj = cls.obj_from_primitive(primitive)
cls._migrate_legacy_dedicated_instance_cpuset(
context, instance_uuid, obj)
updated = cls._migrate_legacy_dedicated_instance_cpuset(obj)
if updated:
cls._save_migrated_cpuset_to_instance_extra(
context, obj, instance_uuid)
else:
obj = cls._migrate_legacy_object(context, instance_uuid, primitive)
@ -176,13 +178,14 @@ class InstanceNUMATopology(base.NovaObject,
# TODO(huaqiang): Remove after Wallaby once we are sure these objects have
# been loaded at least once.
@classmethod
def _migrate_legacy_dedicated_instance_cpuset(cls, context, instance_uuid,
obj):
def _migrate_legacy_dedicated_instance_cpuset(cls, obj):
# NOTE(huaqiang): We may meet some topology object with the old version
# 'InstanceNUMACell' cells, in that case, the 'dedicated' CPU is kept
# in 'InstanceNUMACell.cpuset' field, but it should be kept in
# 'InstanceNUMACell.pcpuset' field since Victoria. Making an upgrade
# and persisting to database.
# here but letting the caller persist the result if needed as we
# don't know which table the InstanceNUMACell is coming from. It can
# come from instance_extra or request_spec too.
update_db = False
for cell in obj.cells:
if len(cell.cpuset) == 0:
@ -194,14 +197,20 @@ class InstanceNUMATopology(base.NovaObject,
cell.pcpuset = cell.cpuset
cell.cpuset = set()
update_db = True
return update_db
if update_db:
db_obj = jsonutils.dumps(obj.obj_to_primitive())
values = {
'numa_topology': db_obj,
}
db.instance_extra_update_by_uuid(context, instance_uuid,
values)
# TODO(huaqiang): Remove after Yoga once we are sure these objects have
# been loaded at least once.
@classmethod
def _save_migrated_cpuset_to_instance_extra(
cls, context, obj, instance_uuid
):
db_obj = jsonutils.dumps(obj.obj_to_primitive())
values = {
'numa_topology': db_obj,
}
db.instance_extra_update_by_uuid(
context, instance_uuid, values)
# TODO(stephenfin): Remove in X or later, once this has bedded in
@classmethod

View File

@ -596,6 +596,8 @@ class RequestSpec(base.NovaObject):
@staticmethod
def _from_db_object(context, spec, db_spec):
spec_obj = spec.obj_from_primitive(jsonutils.loads(db_spec['spec']))
data_migrated = False
for key in spec.fields:
# Load these from the db model not the serialized object within,
# though they should match.
@ -616,6 +618,13 @@ class RequestSpec(base.NovaObject):
# fields. If they are not set, set None.
if not spec.obj_attr_is_set(key):
setattr(spec, key, None)
elif key == "numa_topology":
if key in spec_obj:
spec.numa_topology = spec_obj.numa_topology
if spec.numa_topology:
data_migrated = objects.InstanceNUMATopology.\
_migrate_legacy_dedicated_instance_cpuset(
spec.numa_topology)
elif key in spec_obj:
setattr(spec, key, getattr(spec_obj, key))
spec._context = context
@ -637,6 +646,9 @@ class RequestSpec(base.NovaObject):
# NOTE(danms): Instance group may have been deleted
spec.instance_group = None
if data_migrated:
spec.save()
spec.obj_reset_changes()
return spec

View File

@ -615,29 +615,12 @@ class _TestRequestSpecObject(object):
self.assertIsInstance(req_obj.instance_group, objects.InstanceGroup)
self.assertEqual('fresh', req_obj.instance_group.name)
# FIXME(gibi): This is bug 1952941. When the cpuset -> pcpuset data
# migration was added to InstanceNUMATopology it was missed that such
# object is not only hydrated via
# InstanceNUMATopology.get_by_instance_uuid() but also hydrated by
# RequestSpec.get_by_instance_uuid() indirectly. However the
# latter code patch does not call InstanceNUMATopology.obj_from_db_obj()
# that triggers the data migration via
# InstanceNUMATopology._migrate_legacy_dedicated_instance_cpuset.
# This causes that when the new nova code loads an old RequestSpec object
# from the DB (e.g. during migration of an instance) the
# InstanceNUMATopology in the RequestSpec will not be migrated to the new
# object version and it will lead to errors when the pcpuset field is read
# during scheduling.
@mock.patch(
'nova.objects.instance_numa.InstanceNUMATopology.'
'_migrate_legacy_dedicated_instance_cpuset',
new=mock.NonCallableMock()
)
@mock.patch('nova.objects.request_spec.RequestSpec.save')
@mock.patch.object(
request_spec.RequestSpec, '_get_by_instance_uuid_from_db')
@mock.patch('nova.objects.InstanceGroup.get_by_uuid')
def test_get_by_instance_uuid_numa_topology_migration(
self, mock_get_ig, get_by_uuid
self, mock_get_ig, get_by_uuid, mock_save
):
# Simulate a pre-Victoria RequestSpec where the pcpuset field is not
# defined for the embedded InstanceNUMACell objects but the cpu_policy
@ -665,18 +648,10 @@ class _TestRequestSpecObject(object):
self.context, fake_spec['instance_uuid'])
self.assertEqual(2, len(req_obj.numa_topology.cells))
self.assertEqual({1, 2}, req_obj.numa_topology.cells[0].pcpuset)
self.assertEqual({3, 4}, req_obj.numa_topology.cells[1].pcpuset)
# This is bug 1952941 as the pcpuset is not defined in object as the
# object is not migrated
ex = self.assertRaises(
NotImplementedError,
lambda: req_obj.numa_topology.cells[0].pcpuset
)
self.assertIn("Cannot load 'pcpuset' in the base class", str(ex))
# This is the expected behavior
# self.assertEqual({1, 2}, req_obj.numa_topology.cells[0].pcpuset)
# self.assertEqual({3, 4}, req_obj.numa_topology.cells[1].pcpuset)
mock_save.assert_called_once()
def _check_update_primitive(self, req_obj, changes):
self.assertEqual(req_obj.instance_uuid, changes['instance_uuid'])

View File

@ -0,0 +1,9 @@
---
fixes:
- |
The `bug 1952941`_ is fixed where a pre-Victoria server with pinned
CPUs cannot be migrated or evacuated after the cloud is upgraded to
Victoria or newer as the scheduling fails with
``NotImplementedError: Cannot load 'pcpuset'`` error.
.. _bug 1952941: https://bugs.launchpad.net/nova/+bug/1952941