From e90e89219410a771f9b6b0c4200edb0480360afe Mon Sep 17 00:00:00 2001 From: Tetsuro Nakamura Date: Fri, 19 Oct 2018 23:12:20 +0900 Subject: [PATCH] Consider root id is None in the database case There are cases where ``root_provider_id`` of a resource provider is set to NULL just after it is upgraded to the Rocky release. In such cases getting allocation candidates raises a Keyerror. This patch fixes that bug for cases there is no sharing or nested providers in play. Change-Id: I9639d852078c95de506110f24d3f35e7cf5e361e Closes-Bug:#1799892 (cherry picked from commit cdbedac920f407e102a6916f2d943e50a1b0943d) --- .../placement/objects/resource_provider.py | 50 +++++++++--- .../db/test_allocation_candidates.py | 76 +++++++++++++------ 2 files changed, 92 insertions(+), 34 deletions(-) diff --git a/nova/api/openstack/placement/objects/resource_provider.py b/nova/api/openstack/placement/objects/resource_provider.py index e6a778a4ead8..2f611d6b0ca7 100644 --- a/nova/api/openstack/placement/objects/resource_provider.py +++ b/nova/api/openstack/placement/objects/resource_provider.py @@ -697,9 +697,18 @@ def _provider_ids_from_rp_ids(context, rp_ids): me.c.parent_provider_id == parent.c.id) sel = sa.select(cols).select_from(me_to_parent) sel = sel.where(me.c.id.in_(rp_ids)) - return { - r[0]: ProviderIds(**dict(r)) for r in context.session.execute(sel) - } + + ret = {} + for r in context.session.execute(sel): + # Use its id/uuid for the root id/uuid if the root id/uuid is None + # TODO(tetsuro): Remove this to when we are sure all root_provider_id + # values are NOT NULL + d = dict(r) + if d['root_id'] is None: + d['root_id'] = d['id'] + d['root_uuid'] = d['uuid'] + ret[d['id']] = ProviderIds(**d) + return ret def _provider_ids_from_uuid(context, uuid): @@ -2695,13 +2704,15 @@ def _get_usages_by_provider_tree(ctx, root_ids): # FROM allocations # JOIN resource_providers # ON allocations.resource_provider_id = resource_providers.id - # AND resource_providers.root_provider_id IN($root_ids) + # AND (resource_providers.root_provider_id IN($root_ids) + # OR resource_providers.id IN($root_ids)) # GROUP BY resource_provider_id, resource_class_id # ) - # AS usages + # AS usage # ON inv.resource_provider_id = usage.resource_provider_id # AND inv.resource_class_id = usage.resource_class_id - # WHERE rp.root_provider_id IN ($root_ids) + # WHERE (rp.root_provider_id IN ($root_ids) + # OR resource_providers.id IN($root_ids)) rpt = sa.alias(_RP_TBL, name="rp") inv = sa.alias(_INV_TBL, name="inv") # Build our derived table (subquery in the FROM clause) that sums used @@ -2709,7 +2720,12 @@ def _get_usages_by_provider_tree(ctx, root_ids): derived_alloc_to_rp = sa.join( _ALLOC_TBL, _RP_TBL, sa.and_(_ALLOC_TBL.c.resource_provider_id == _RP_TBL.c.id, - _RP_TBL.c.root_provider_id.in_(root_ids))) + # TODO(tetsuro): Remove this OR condition when all + # root_provider_id values are NOT NULL + sa.or_(_RP_TBL.c.root_provider_id.in_(root_ids), + _RP_TBL.c.id.in_(root_ids)) + ) + ) usage = sa.alias( sa.select([ _ALLOC_TBL.c.resource_provider_id, @@ -2741,7 +2757,14 @@ def _get_usages_by_provider_tree(ctx, root_ids): inv.c.allocation_ratio, inv.c.max_unit, usage.c.used, - ]).select_from(usage_join).where(rpt.c.root_provider_id.in_(root_ids)) + ]).select_from(usage_join).where( + # TODO(tetsuro): Remove this or condition when all + # root_provider_id values are NOT NULL + sa.or_( + rpt.c.root_provider_id.in_(root_ids), + rpt.c.id.in_(root_ids) + ) + ) return ctx.session.execute(query).fetchall() @@ -3015,7 +3038,16 @@ def _get_providers_with_resource(ctx, rc_id, amount): sel = sel.where(sa.and_(*where_conds)) res = ctx.session.execute(sel).fetchall() res = set((r[0], r[1]) for r in res) - return res + # TODO(tetsuro): Bug#1799892: We could have old providers with no root + # provider set and they haven't undergone a data migration yet, + # so we need to set the root_id explicitly here. We remove + # this and when all root_provider_id values are NOT NULL + ret = [] + for rp_tuple in res: + rp_id = rp_tuple[0] + root_id = rp_id if rp_tuple[1] is None else rp_tuple[1] + ret.append((rp_id, root_id)) + return ret @db_api.placement_context_manager.reader diff --git a/nova/tests/functional/api/openstack/placement/db/test_allocation_candidates.py b/nova/tests/functional/api/openstack/placement/db/test_allocation_candidates.py index 54d3af26a4c3..b56cb36aecd8 100644 --- a/nova/tests/functional/api/openstack/placement/db/test_allocation_candidates.py +++ b/nova/tests/functional/api/openstack/placement/db/test_allocation_candidates.py @@ -433,6 +433,9 @@ class AllocationCandidatesTestCase(tb.PlacementDbBaseTestCase): ) conn.execute(ins_rptbl) + # This is needed for _validate_allocation_requests() at the end + self.rp_uuid_to_name[uuids.rp1] = 'cn1' + # Add VCPU(resource_class_id=0) inventory to the provider. ins_invtbl = inv_tbl.insert().values( id=1, @@ -457,31 +460,7 @@ class AllocationCandidatesTestCase(tb.PlacementDbBaseTestCase): ) conn.execute(ins_alloctbl) - # TODO(tetsuro): Bug#1799892: Fix this not to raise the KeyError - # alloc_cands = self._get_allocation_candidates( - # {'': placement_lib.RequestGroup( - # use_same_provider=False, - # resources={ - # fields.ResourceClass.VCPU: 1 - # } - # )} - # ) - # - # expected = [ - # [('cn1', fields.ResourceClass.VCPU, 1)] - # ] - # self._validate_allocation_requests(expected, alloc_cands) - # - # expected = { - # 'cn1': set([ - # (fields.ResourceClass.VCPU, 8, 4) - # ]), - # } - # self._validate_provider_summary_resources(expected, alloc_cands) - - self.assertRaises( - KeyError, - self._get_allocation_candidates, + alloc_cands = self._get_allocation_candidates( {'': placement_lib.RequestGroup( use_same_provider=False, resources={ @@ -490,6 +469,53 @@ class AllocationCandidatesTestCase(tb.PlacementDbBaseTestCase): )} ) + expected = [ + [('cn1', fields.ResourceClass.VCPU, 1)] + ] + self._validate_allocation_requests(expected, alloc_cands) + + expected = { + 'cn1': set([ + (fields.ResourceClass.VCPU, 8, 4) + ]), + } + self._validate_provider_summary_resources(expected, alloc_cands) + + # NOTE(tetsuro): Getting allocation candidates goes through a + # different path when sharing/nested providers exist, let's test + # that case and the path creating a new sharing provider. + # We omit the direct database insertion of 'ss1' here since 'cn1', + # which has no root id in the database, is the actual target of the + # following test. + ss1 = self._create_provider('ss1', uuids.agg1) + tb.set_traits(ss1, "MISC_SHARES_VIA_AGGREGATE") + tb.add_inventory(ss1, fields.ResourceClass.VCPU, 8) + + alloc_cands = self._get_allocation_candidates( + {'': placement_lib.RequestGroup( + use_same_provider=False, + resources={ + fields.ResourceClass.VCPU: 1 + } + )} + ) + + expected = [ + [('cn1', fields.ResourceClass.VCPU, 1)], + [('ss1', fields.ResourceClass.VCPU, 1)] + ] + self._validate_allocation_requests(expected, alloc_cands) + + expected = { + 'cn1': set([ + (fields.ResourceClass.VCPU, 8, 4) + ]), + 'ss1': set([ + (fields.ResourceClass.VCPU, 8, 0) + ]), + } + self._validate_provider_summary_resources(expected, alloc_cands) + def test_all_local(self): """Create some resource providers that can satisfy the request for resources with local (non-shared) resources and verify that the