From 03bc8b6a6b47d5372b30453283f85d2264e5b1f9 Mon Sep 17 00:00:00 2001
From: Balazs Gibizer <balazs.gibizer@ericsson.com>
Date: Tue, 18 Dec 2018 15:24:45 +0100
Subject: [PATCH] Transfer port.resource_request to the scheduler

This patch collects the resource requests from each neutron port
involved in a server create request. Converts each request to
a RequestGroup object and includes them in the RequestSpec.
This way the requests are reaching the scheduler and there
they are included in the generation of the allocation_candidates
query.

This patch only handles the happy path of a server create request. But
it adds couple of TODOs to places where the server move operations
related code paths need to be implemented. That implementation will be
part of subsequent patches.

Note that this patch technically makes it possible to boot server with
one neutron port that has resource request. But it does not handle
multiple such ports or SRIOV ports where two PFs are supporting the
same physnet as well as many server lifecycle operations like resize,
migrate, live-migrate, unshelve. To avoid possible resource allocation
inconsistencies due to the partial support nova rejects any requests
that involves such ports. See the previous patches in this patch
series for details.

Also note that the simple boot cases are verified with functional tests
and in those tests we need to mock out the above described logic that
reject such requests. See a more background about this approach on the
ML [1].

[1] http://lists.openstack.org/pipermail/openstack-discuss/2018-December/001129.html

blueprint bandwidth-resource-provider
Change-Id: Ica6152ccb97dce805969d964d6ed032bfe22a33f
---
 nova/compute/api.py                         |   7 +-
 nova/conductor/manager.py                   |  13 ++
 nova/conductor/tasks/live_migrate.py        |   3 +
 nova/conductor/tasks/migrate.py             |   3 +
 nova/tests/functional/integrated_helpers.py |   6 +-
 nova/tests/functional/test_servers.py       | 189 ++++++++++++++++++++
 nova/tests/unit/compute/test_compute_api.py |  17 +-
 7 files changed, 228 insertions(+), 10 deletions(-)

diff --git a/nova/compute/api.py b/nova/compute/api.py
index 70c7bbceb7d4..0fec77fb55b9 100644
--- a/nova/compute/api.py
+++ b/nova/compute/api.py
@@ -846,7 +846,8 @@ class API(base.Base):
             'progress': 0,
             'pci_requests': pci_request_info,
             'numa_topology': numa_topology,
-            'system_metadata': system_metadata}
+            'system_metadata': system_metadata,
+            'port_resource_requests': port_resource_requests}
 
         options_from_image = self._inherit_properties_from_image(
                 boot_meta, auto_disk_config)
@@ -870,6 +871,7 @@ class API(base.Base):
         security_groups = self.security_group_api.populate_security_groups(
                 security_groups)
         self.security_group_api.ensure_default(context)
+        port_resource_requests = base_options.pop('port_resource_requests')
         LOG.debug("Going to run %s instances...", num_instances)
         instances_to_build = []
         try:
@@ -883,7 +885,8 @@ class API(base.Base):
                         base_options['numa_topology'],
                         base_options['pci_requests'], filter_properties,
                         instance_group, base_options['availability_zone'],
-                        security_groups=security_groups)
+                        security_groups=security_groups,
+                        port_resource_requests=port_resource_requests)
 
                 if block_device_mapping:
                     # Record whether or not we are a BFV instance
diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py
index ecc8634d6ce4..6a54bb1c7563 100644
--- a/nova/conductor/manager.py
+++ b/nova/conductor/manager.py
@@ -608,6 +608,11 @@ class ComputeTaskManager(base.Base):
             else:
                 # This is not a reschedule, so we need to call the scheduler to
                 # get appropriate hosts for the request.
+                # NOTE(gibi): We only call the scheduler if using cells v1 or
+                # we are rescheduling from a really old compute. In
+                # either case we do not support externally-defined resource
+                # requests, like port QoS. So no requested_resources are set
+                # on the RequestSpec here.
                 host_lists = self._schedule_instances(context, spec_obj,
                         instance_uuids, return_alternates=True)
         except Exception as exc:
@@ -809,6 +814,11 @@ class ComputeTaskManager(base.Base):
                     # when populate_filter_properties accepts it
                     filter_properties = request_spec.\
                         to_legacy_filter_properties_dict()
+
+                    # TODO(gibi): We need to make sure that the
+                    # requested_resources field is re calculated based on
+                    # neutron ports.
+
                     # NOTE(cfriesen): Ensure that we restrict the scheduler to
                     # the cell specified by the instance mapping.
                     instance_mapping = \
@@ -984,6 +994,9 @@ class ComputeTaskManager(base.Base):
                     # if we want to make sure that the next destination
                     # is not forced to be the original host
                     request_spec.reset_forced_destinations()
+                    # TODO(gibi): We need to make sure that the
+                    # requested_resources field is re calculated based on
+                    # neutron ports.
                 try:
                     # if this is a rebuild of instance on the same host with
                     # new image.
diff --git a/nova/conductor/tasks/live_migrate.py b/nova/conductor/tasks/live_migrate.py
index b543a91ffb3f..d06a4c427195 100644
--- a/nova/conductor/tasks/live_migrate.py
+++ b/nova/conductor/tasks/live_migrate.py
@@ -346,6 +346,9 @@ class LiveMigrationTask(base.TaskBase):
         # if we want to make sure that the next destination
         # is not forced to be the original host
         request_spec.reset_forced_destinations()
+
+        # TODO(gibi): We need to make sure that the requested_resources field
+        # is re calculated based on neutron ports.
         scheduler_utils.setup_instance_group(self.context, request_spec)
 
         # We currently only support live migrating to hosts in the same
diff --git a/nova/conductor/tasks/migrate.py b/nova/conductor/tasks/migrate.py
index 7601b9e11c39..4998c96fa975 100644
--- a/nova/conductor/tasks/migrate.py
+++ b/nova/conductor/tasks/migrate.py
@@ -202,6 +202,9 @@ class MigrationTask(base.TaskBase):
         # is not forced to be the original host
         self.request_spec.reset_forced_destinations()
 
+        # TODO(gibi): We need to make sure that the requested_resources field
+        # is re calculated based on neutron ports.
+
         self._restrict_request_spec_to_cell(legacy_props)
 
         # Once _preallocate_migration() is done, the source node allocation is
diff --git a/nova/tests/functional/integrated_helpers.py b/nova/tests/functional/integrated_helpers.py
index 083a85598e29..76de3b8b8733 100644
--- a/nova/tests/functional/integrated_helpers.py
+++ b/nova/tests/functional/integrated_helpers.py
@@ -488,16 +488,18 @@ class ProviderUsageBaseTestCase(test.TestCase, InstanceHelperMixin):
             url= ('/resource_providers/%s/inventories' % rp_uuid),
             version='1.15', body=inv_body).body
 
-    def _update_inventory(self, rp_uuid, inv_body):
+    def _update_inventory(self, rp_uuid, inv_body, version='1.20'):
         """This will update the inventory for a given resource provider.
 
         :param rp_uuid: UUID of the resource provider to update
         :param inv_body: inventory to set on the provider
+        :param version: the placement microversion used in the request
         :returns: APIResponse object with the results
         """
         return self.placement_api.put(
             url= ('/resource_providers/%s/inventories' % rp_uuid),
-            body=inv_body).body
+            body=inv_body,
+            version=version).body
 
     def _get_resource_provider_by_uuid(self, rp_uuid):
         return self.placement_api.get(
diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py
index 2324d3c9337b..67c0fe9bcf38 100644
--- a/nova/tests/functional/test_servers.py
+++ b/nova/tests/functional/test_servers.py
@@ -12,10 +12,12 @@
 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 #    License for the specific language governing permissions and limitations
 #    under the License.
+from __future__ import absolute_import
 
 import collections
 import copy
 import datetime
+import fixtures
 import time
 import zlib
 
@@ -36,6 +38,7 @@ from nova import context
 from nova import exception
 from nova import objects
 from nova.objects import block_device as block_device_obj
+from nova import rc_fields
 from nova.scheduler import weights
 from nova import test
 from nova.tests import fixtures as nova_fixtures
@@ -5380,12 +5383,18 @@ class PortResourceRequestBasedSchedulingTestBase(
 
     compute_driver = 'fake.SmallFakeDriver'
 
+    CUSTOM_VNIC_TYPE_NORMAL = 'CUSTOM_VNIC_TYPE_NORMAL'
+    CUSTOM_PHYSNET_2 = 'CUSTOM_PHYSNET_2'
+
     def setUp(self):
         super(PortResourceRequestBasedSchedulingTestBase, self).setUp()
         self.compute1 = self._start_compute('host1')
         self.compute1_rp_uuid = self._get_provider_uuid_by_host('host1')
+        self.ovs_bridge_rp_per_host = {}
         self.flavor = self.api.get_flavors()[0]
 
+        self._create_networking_rp_tree(self.compute1_rp_uuid)
+
     def _create_server(self, flavor, networks):
         server_req = self._build_minimal_create_server_request(
             self.api, 'bandwidth-aware-server',
@@ -5393,6 +5402,62 @@ class PortResourceRequestBasedSchedulingTestBase(
             flavor_id=flavor['id'], networks=networks)
         return self.api.post_server({'server': server_req})
 
+    def _set_provider_inventories(self, rp_uuid, inventories):
+        rp = self.placement_api.get(
+            '/resource_providers/%s' % rp_uuid).body
+        inventories['resource_provider_generation'] = rp['generation']
+        return self._update_inventory(rp_uuid, inventories)
+
+    def _create_networking_rp_tree(self, compute_rp_uuid):
+        # let's simulate what the neutron ovs agent would do
+
+        # we need uuid sentinel for the test to make pep8 happy but we need a
+        # unique one per compute so here is some ugliness
+        ovs_agent_rp_uuid = getattr(uuids, compute_rp_uuid + 'ovs agent')
+        agent_rp_req = {
+            "name": ovs_agent_rp_uuid,
+            "uuid": ovs_agent_rp_uuid,
+            "parent_provider_uuid": compute_rp_uuid
+        }
+        self.placement_api.post('/resource_providers',
+                                body=agent_rp_req,
+                                version='1.20')
+        ovs_bridge_rp_uuid = getattr(uuids, ovs_agent_rp_uuid + 'ovs br')
+        ovs_bridge_req = {
+            "name": ovs_bridge_rp_uuid,
+            "uuid": ovs_bridge_rp_uuid,
+            "parent_provider_uuid": ovs_agent_rp_uuid
+        }
+        self.placement_api.post('/resource_providers',
+                                body=ovs_bridge_req,
+                                version='1.20')
+        self.ovs_bridge_rp_per_host[compute_rp_uuid] = ovs_bridge_rp_uuid
+
+        self._set_provider_inventories(
+            ovs_bridge_rp_uuid,
+            {"inventories": {
+                rc_fields.ResourceClass.NET_BW_IGR_KILOBIT_PER_SEC:
+                    {"total": 10000},
+                rc_fields.ResourceClass.NET_BW_EGR_KILOBIT_PER_SEC:
+                    {"total": 10000},
+            }})
+
+        self._create_trait(self.CUSTOM_VNIC_TYPE_NORMAL)
+        self._create_trait(self.CUSTOM_PHYSNET_2)
+
+        self._set_provider_traits(
+            ovs_bridge_rp_uuid,
+            [self.CUSTOM_VNIC_TYPE_NORMAL, self.CUSTOM_PHYSNET_2])
+
+    def assertPortMatchesAllocation(self, port, allocations):
+        port_request = port['resource_request']['resources']
+        for rc, amount in allocations.items():
+            self.assertEqual(port_request[rc], amount,
+                             'port %s requested %d %s '
+                             'resources but got allocation %d' %
+                             (port['id'], port_request[rc], rc,
+                              amount))
+
 
 class PortResourceRequestBasedSchedulingTest(
         PortResourceRequestBasedSchedulingTestBase):
@@ -5648,3 +5713,127 @@ class PortResourceRequestBasedSchedulingTest(
 
         self.api.post_server_action(server['id'], {'unshelve': {}})
         self._wait_for_state_change(self.admin_api, server, 'ACTIVE')
+
+
+class PortResourceRequestBasedSchedulingTestIgnoreMicroversionCheck(
+    PortResourceRequestBasedSchedulingTestBase):
+
+    def setUp(self):
+        super(
+            PortResourceRequestBasedSchedulingTestIgnoreMicroversionCheck,
+            self).setUp()
+
+        # NOTE(gibi): This mock turns off the api microversion that prevents
+        # handling of instances operations if the request involves port's with
+        # resource request with old microversion. The new microversion does not
+        # exists yet as the whole feature is not read for end user consumption.
+        # This functional tests however would like to prove that some use cases
+        # already work.
+        self.useFixture(
+            fixtures.MockPatch(
+                'nova.api.openstack.common.'
+                'supports_port_resource_request',
+                return_value=True))
+
+    def test_boot_server_with_two_ports_one_having_resource_request(self):
+        non_qos_port = self.neutron.port_1
+        qos_port = self.neutron.port_with_resource_request
+
+        server = self._create_server(
+            flavor=self.flavor,
+            networks=[{'port': non_qos_port['id']},
+                      {'port': qos_port['id']}])
+        server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE')
+
+        allocations = self.placement_api.get(
+            '/allocations/%s' % server['id']).body['allocations']
+
+        # We expect one set of allocations for the compute resources on the
+        # compute rp and one set for the networking resources on the ovs bridge
+        # rp due to the qos_port resource request
+        self.assertEqual(2, len(allocations))
+        compute_allocations = allocations[self.compute1_rp_uuid]['resources']
+        network_allocations = allocations[
+            self.ovs_bridge_rp_per_host[self.compute1_rp_uuid]]['resources']
+
+        self.assertEqual(self._resources_from_flavor(self.flavor),
+                         compute_allocations)
+        self.assertPortMatchesAllocation(qos_port, network_allocations)
+
+        self.api.delete_server(server['id'])
+        self._wait_until_deleted(server)
+        fake_notifier.wait_for_versioned_notifications('instance.delete.end')
+        allocations = self._get_allocations_by_server_uuid(server['id'])
+        self.assertEqual(0, len(allocations))
+
+
+class PortResourceRequestReSchedulingTestIgnoreMicroversionCheck(
+        PortResourceRequestBasedSchedulingTestBase):
+
+    compute_driver = 'fake.FakeRescheduleDriver'
+
+    def setUp(self):
+        super(
+            PortResourceRequestReSchedulingTestIgnoreMicroversionCheck,
+            self).setUp()
+        self.compute2 = self._start_compute('host2')
+        self.compute2_rp_uuid = self._get_provider_uuid_by_host('host2')
+        self._create_networking_rp_tree(self.compute2_rp_uuid)
+
+        # NOTE(gibi): This mock turns off the api microversion that prevents
+        # handling of instances operations if the request involves port's with
+        # resource request with old microversion. The new microversion does not
+        # exists yet as the whole feature is not read for end user consumption.
+        # This functional tests however would like to prove that some use cases
+        # already work.
+        self.useFixture(
+            fixtures.MockPatch(
+                'nova.api.openstack.common.'
+                'supports_port_resource_request',
+                return_value=True))
+
+    def test_boot_reschedule_success(self):
+        port = self.neutron.port_with_resource_request
+
+        server = self._create_server(
+            flavor=self.flavor,
+            networks=[{'port': port['id']}])
+        server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE')
+
+        dest_hostname = server['OS-EXT-SRV-ATTR:host']
+        dest_compute_rp_uuid = self._get_provider_uuid_by_host(dest_hostname)
+
+        failed_compute_rp = (self.compute1_rp_uuid
+                             if dest_compute_rp_uuid == self.compute2_rp_uuid
+                             else self.compute2_rp_uuid)
+
+        allocations = self.placement_api.get(
+            '/allocations/%s' % server['id']).body['allocations']
+
+        # We expect one set of allocations for the compute resources on the
+        # compute rp and one set for the networking resources on the ovs bridge
+        # rp
+        self.assertEqual(2, len(allocations))
+        compute_allocations = allocations[dest_compute_rp_uuid]['resources']
+        network_allocations = allocations[
+            self.ovs_bridge_rp_per_host[dest_compute_rp_uuid]]['resources']
+
+        self.assertEqual(self._resources_from_flavor(self.flavor),
+                         compute_allocations)
+        self.assertPortMatchesAllocation(port, network_allocations)
+
+        # assert that the allocations against the host where the spawn
+        # failed are cleaned up properly
+        self.assertEqual(
+            {'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0},
+            self._get_provider_usages(failed_compute_rp))
+        self.assertEqual(
+            {'NET_BW_EGR_KILOBIT_PER_SEC': 0, 'NET_BW_IGR_KILOBIT_PER_SEC': 0},
+            self._get_provider_usages(
+                self.ovs_bridge_rp_per_host[failed_compute_rp]))
+
+        self.api.delete_server(server['id'])
+        self._wait_until_deleted(server)
+        fake_notifier.wait_for_versioned_notifications('instance.delete.end')
+        allocations = self._get_allocations_by_server_uuid(server['id'])
+        self.assertEqual(0, len(allocations))
diff --git a/nova/tests/unit/compute/test_compute_api.py b/nova/tests/unit/compute/test_compute_api.py
index d0d4396796ea..055c6321327f 100644
--- a/nova/tests/unit/compute/test_compute_api.py
+++ b/nova/tests/unit/compute/test_compute_api.py
@@ -4595,7 +4595,8 @@ class _ComputeAPIUnitTestMixIn(object):
                             'root_device_name': None,
                             'user_data': None,
                             'numa_topology': None,
-                            'pci_requests': None}
+                            'pci_requests': None,
+                            'port_resource_requests': None}
             security_groups = {}
             block_device_mapping = objects.BlockDeviceMappingList(
                 objects=[objects.BlockDeviceMapping(
@@ -4737,7 +4738,8 @@ class _ComputeAPIUnitTestMixIn(object):
                             'root_device_name': None,
                             'user_data': None,
                             'numa_topology': None,
-                            'pci_requests': None}
+                            'pci_requests': None,
+                            'port_resource_requests': None}
             security_groups = {}
             block_device_mappings = objects.BlockDeviceMappingList(
                 objects=[objects.BlockDeviceMapping(
@@ -4827,7 +4829,8 @@ class _ComputeAPIUnitTestMixIn(object):
                             'root_device_name': None,
                             'user_data': None,
                             'numa_topology': None,
-                            'pci_requests': None}
+                            'pci_requests': None,
+                            'port_resource_requests': None}
             security_groups = {}
             block_device_mapping = objects.BlockDeviceMappingList(
                 objects=[objects.BlockDeviceMapping(
@@ -4924,7 +4927,8 @@ class _ComputeAPIUnitTestMixIn(object):
                             'root_device_name': None,
                             'user_data': None,
                             'numa_topology': None,
-                            'pci_requests': None}
+                            'pci_requests': None,
+                            'port_resource_requests': None}
             security_groups = {}
             block_device_mapping = objects.BlockDeviceMappingList(
                 objects=[objects.BlockDeviceMapping(
@@ -5027,7 +5031,8 @@ class _ComputeAPIUnitTestMixIn(object):
                             'root_device_name': None,
                             'user_data': None,
                             'numa_topology': None,
-                            'pci_requests': None}
+                            'pci_requests': None,
+                            'port_resource_requests': None}
             security_groups = {}
             block_device_mapping = objects.BlockDeviceMappingList(
                 objects=[objects.BlockDeviceMapping(
@@ -5091,7 +5096,7 @@ class _ComputeAPIUnitTestMixIn(object):
             mock_objects.RequestSpec.from_components.assert_called_once_with(
                 mock.ANY, mock.ANY, mock.ANY, mock.ANY, mock.ANY, mock.ANY,
                 mock.ANY, mock.ANY, mock.ANY,
-                security_groups=secgroups)
+                security_groups=secgroups, port_resource_requests=mock.ANY)
         test()
 
     def _test_rescue(self, vm_state=vm_states.ACTIVE, rescue_password=None,