diff --git a/doc/source/admin/index.rst b/doc/source/admin/index.rst index 8a6353bd8a57..0948beb9bba2 100644 --- a/doc/source/admin/index.rst +++ b/doc/source/admin/index.rst @@ -113,6 +113,7 @@ instance for these kind of workloads. ports-with-resource-requests virtual-persistent-memory emulated-tpm + managing-resource-providers Additional guides diff --git a/doc/source/admin/managing-resource-providers.rst b/doc/source/admin/managing-resource-providers.rst new file mode 100644 index 000000000000..27bfe20140a0 --- /dev/null +++ b/doc/source/admin/managing-resource-providers.rst @@ -0,0 +1,216 @@ +============================================== +Managing Resource Providers Using Config Files +============================================== + +In order to facilitate management of resource provider information in the +Placement API, Nova provides `a method`__ for admins to add custom inventory +and traits to resource providers using YAML files. + +__ https://specs.openstack.org/openstack/nova-specs/specs/ussuri/approved/provider-config-file.html + +.. note:: + + Only ``CUSTOM_*`` resource classes and traits may be managed this way. + +Placing Files +------------- + +Nova-compute will search for ``*.yaml`` files in the path specified in +:oslo.config:option:`compute.provider_config_location`. These files will be +loaded and validated for errors on nova-compute startup. If there are any +errors in the files, nova-compute will fail to start up. + +Administrators should ensure that provider config files have appropriate +permissions and ownership. See the `specification`__ and `admin guide`__ +for more details. + +__ https://specs.openstack.org/openstack/nova-specs/specs/ussuri/approved/provider-config-file.html +__ https://docs.openstack.org/nova/latest/admin/managing-resource-providers.html + +.. note:: + + The files are loaded once at nova-compute startup and any changes or new + files will not be recognized until the next nova-compute startup. + +Examples +-------- + +Resource providers to target can be identified by either UUID or name. In +addition, the value ``$COMPUTE_NODE`` can be used in the UUID field to +identify all nodes managed by the service. + +If an entry does not include any additional inventory or traits, it will be +logged at load time but otherwise ignored. In the case of a resource provider +being identified by both ``$COMPUTE_NODE`` and individual UUID/name, the +values in the ``$COMPUTE_NODE`` entry will be ignored for *that provider* only +if the explicit entry includes inventory or traits. + +.. note:: + + In the case that a resource provider is identified more than once by + explicit UUID/name, the nova-compute service will fail to start. This + is a global requirement across all supplied ``provider.yaml`` files. + +.. code-block:: yaml + + meta: + schema_version: '1.0' + providers: + - identification: + name: 'EXAMPLE_RESOURCE_PROVIDER' + # Additional valid identification examples: + # uuid: '$COMPUTE_NODE' + # uuid: '5213b75d-9260-42a6-b236-f39b0fd10561' + inventories: + additional: + - CUSTOM_EXAMPLE_RESOURCE_CLASS: + total: 100 + reserved: 0 + min_unit: 1 + max_unit: 10 + step_size: 1 + allocation_ratio: 1.0 + traits: + additional: + - 'CUSTOM_EXAMPLE_TRAIT' + +Schema Example +-------------- +.. code-block:: yaml + + type: object + properties: + # This property is used to track where the provider.yaml file originated. + # It is reserved for internal use and should never be set in a provider.yaml + # file supplied by an end user. + __source_file: + not: {} + meta: + type: object + properties: + # Version ($Major, $minor) of the schema must successfully parse + # documents conforming to ($Major, 0..N). Any breaking schema change + # (e.g. removing fields, adding new required fields, imposing a stricter + # pattern on a value, etc.) must bump $Major. + schema_version: + type: string + pattern: '^1\.([0-9]|[1-9][0-9]+)$' + required: + - schema_version + additionalProperties: true + providers: + type: array + items: + type: object + properties: + identification: + $ref: '#/provider_definitions/provider_identification' + inventories: + $ref: '#/provider_definitions/provider_inventories' + traits: + $ref: '#/provider_definitions/provider_traits' + required: + - identification + additionalProperties: true + required: + - meta + additionalProperties: true + + provider_definitions: + provider_identification: + # Identify a single provider to configure. Exactly one identification + # method should be used. Currently `uuid` or `name` are supported, but + # future versions may support others. + # The uuid can be set to the sentinel value `$COMPUTE_NODE` which will + # cause the consuming compute service to apply the configuration to + # to all compute node root providers it manages that are not otherwise + # specified using a uuid or name. + type: object + properties: + uuid: + oneOf: + # TODO(sean-k-mooney): replace this with type uuid when we can depend + # on a version of the jsonschema lib that implements draft 8 or later + # of the jsonschema spec. + - type: string + pattern: '^[0-9A-Fa-f]{8}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{12}$' + - type: string + const: '$COMPUTE_NODE' + name: + type: string + minLength: 1 + # This introduces the possibility of an unsupported key name being used to + # get by schema validation, but is necessary to support forward + # compatibility with new identification methods. This should be checked + # after schema validation. + minProperties: 1 + maxProperties: 1 + additionalProperties: false + provider_inventories: + # Allows the admin to specify various adjectives to create and manage + # providers' inventories. This list of adjectives can be extended in the + # future as the schema evolves to meet new use cases. As of v1.0, only one + # adjective, `additional`, is supported. + type: object + properties: + additional: + type: array + items: + patternProperties: + # Allows any key name matching the resource class pattern, + # check to prevent conflicts with virt driver owned resouces classes + # will be done after schema validation. + ^[A-Z0-9_]{1,255}$: + type: object + properties: + # Any optional properties not populated will be given a default value by + # placement. If overriding a pre-existing provider values will not be + # preserved from the existing inventory. + total: + type: integer + reserved: + type: integer + min_unit: + type: integer + max_unit: + type: integer + step_size: + type: integer + allocation_ratio: + type: number + required: + - total + # The defined properties reflect the current placement data + # model. While defining those in the schema and not allowing + # additional properties means we will need to bump the schema + # version if they change, that is likely to be part of a large + # change that may have other impacts anyway. The benefit of + # stricter validation of property names outweighs the (small) + # chance of having to bump the schema version as described above. + additionalProperties: false + # This ensures only keys matching the pattern above are allowed + additionalProperties: false + additionalProperties: true + provider_traits: + # Allows the admin to specify various adjectives to create and manage + # providers' traits. This list of adjectives can be extended in the + # future as the schema evolves to meet new use cases. As of v1.0, only one + # adjective, `additional`, is supported. + type: object + properties: + additional: + type: array + items: + # Allows any value matching the trait pattern here, additional + # validation will be done after schema validation. + type: string + pattern: '^[A-Z0-9_]{1,255}$' + additionalProperties: true + +.. note:: + + When creating a ``provider.yaml`` config file it is recommended to use the + schema provided by nova to validate the config using a simple jsonschema + validator rather than starting the nova compute agent to enable faster + iteration. + diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py index 7f3532406f50..1074879738c1 100644 --- a/nova/compute/resource_tracker.py +++ b/nova/compute/resource_tracker.py @@ -30,6 +30,7 @@ import retrying from nova.compute import claims from nova.compute import monitors +from nova.compute import provider_config from nova.compute import stats as compute_stats from nova.compute import task_states from nova.compute import utils as compute_utils @@ -112,6 +113,11 @@ class ResourceTracker(object): # and value of this sub-dict is a set of Resource obj self.assigned_resources = collections.defaultdict( lambda: collections.defaultdict(set)) + # Retrieves dict of provider config data. This can fail with + # nova.exception.ProviderConfigException if invalid or conflicting + # data exists in the provider config files. + self.provider_configs = provider_config.get_provider_configs( + CONF.compute.provider_config_location) # Set of ids for providers identified in provider config files that # are not found on the provider tree. These are tracked to facilitate # smarter logging. @@ -1155,6 +1161,9 @@ class ResourceTracker(object): self.provider_tree = prov_tree + # This merges in changes from the provider config files loaded in init + self._merge_provider_configs(self.provider_configs, prov_tree) + # Flush any changes. If we processed ReshapeNeeded above, allocs is not # None, and this will hit placement's POST /reshaper route. self.reportclient.update_from_provider_tree(context, prov_tree, @@ -1714,6 +1723,7 @@ class ResourceTracker(object): :param provider_tree: The provider tree to be updated in place """ processed_providers = {} + provider_custom_traits = {} for uuid_or_name, provider_data in provider_configs.items(): additional_traits = provider_data.get( "traits", {}).get("additional", []) @@ -1758,10 +1768,23 @@ class ResourceTracker(object): 'current_uuid': current_uuid } ) - processed_providers[current_uuid] = source_file_name + + # NOTE(sean-k-mooney): since each provider should be processed + # at most once if a provider has custom traits they were + # set either in previous iteration, the virt driver or via the + # the placement api. As a result we must ignore them when + # checking for duplicate traits so we construct a set of the + # existing custom traits. + if current_uuid not in provider_custom_traits: + provider_custom_traits[current_uuid] = { + trait for trait in provider.traits + if trait.startswith('CUSTOM') + } + existing_custom_traits = provider_custom_traits[current_uuid] if additional_traits: intersect = set(provider.traits) & set(additional_traits) + intersect -= existing_custom_traits if intersect: invalid = ','.join(intersect) raise ValueError(_( @@ -1797,6 +1820,8 @@ class ResourceTracker(object): provider_tree.update_inventory( provider.uuid, merged_inventory) + processed_providers[current_uuid] = source_file_name + def _get_providers_to_update(self, uuid_or_name, provider_tree, source_file): """Identifies the providers to be updated. diff --git a/nova/conf/compute.py b/nova/conf/compute.py index 98ce74ec0740..e949dd2e9540 100644 --- a/nova/conf/compute.py +++ b/nova/conf/compute.py @@ -967,6 +967,19 @@ Possible values: * -1 means unlimited * Any integer >= 0 represents the maximum allowed +"""), + cfg.StrOpt('provider_config_location', + default='/etc/nova/provider_config/', + help=""" +Location of YAML files containing resource provider configuration data. + +These files allow the operator to specify additional custom inventory and +traits to assign to one or more resource providers. + +Additional documentation is available here: + + https://docs.openstack.org/nova/latest/admin/managing-resource-providers.html + """), ] diff --git a/nova/tests/functional/compute/test_resource_tracker.py b/nova/tests/functional/compute/test_resource_tracker.py index 0a4c4ee46211..65aac1927350 100644 --- a/nova/tests/functional/compute/test_resource_tracker.py +++ b/nova/tests/functional/compute/test_resource_tracker.py @@ -10,9 +10,15 @@ # License for the specific language governing permissions and limitations # under the License. +import copy +import fixtures import mock +import os + import os_resource_classes as orc +import os_traits from oslo_utils.fixture import uuidsentinel as uuids +import yaml from nova.compute import power_state from nova.compute import resource_tracker @@ -23,8 +29,11 @@ from nova import conf from nova import context from nova import objects from nova import test +from nova.tests import fixtures as nova_fixtures from nova.tests.functional import fixtures as func_fixtures from nova.tests.functional import integrated_helpers +from nova.tests.unit import fake_notifier +from nova.tests.unit.image import fake as fake_image from nova.virt import driver as virt_driver @@ -469,3 +478,258 @@ class TestUpdateComputeNodeReservedAndAllocationRatio( self.assertIn('allocation_ratio', inv[rc]) self.assertEqual(ratio, inv[rc]['allocation_ratio'], 'Unexpected allocation ratio for %s' % rc) + + +class TestProviderConfig(integrated_helpers.ProviderUsageBaseTestCase): + """Tests for adding inventories and traits to resource providers using + provider config files described in spec provider-config-file. + """ + + compute_driver = 'fake.FakeDriver' + + BASE_CONFIG = { + "meta": { + "schema_version": "1.0" + }, + "providers": [] + } + EMPTY_PROVIDER = { + "identification": { + }, + "inventories": { + "additional": [] + }, + "traits": { + "additional": [] + } + } + + def setUp(self): + super().setUp() + + # make a new temp dir and configure nova-compute to look for provider + # config files there + self.pconf_loc = self.useFixture(fixtures.TempDir()).path + self.flags(provider_config_location=self.pconf_loc, group='compute') + + def _create_config_entry(self, id_value, id_method="uuid", cfg_file=None): + """Adds an entry in the config file for the provider using the + requested identification method [uuid, name] with additional traits + and inventories. + """ + # if an existing config file was not passed, create a new one + if not cfg_file: + cfg_file = copy.deepcopy(self.BASE_CONFIG) + provider = copy.deepcopy(self.EMPTY_PROVIDER) + + # create identification method + provider['identification'] = {id_method: id_value} + + # create entries for additional traits and inventories using values + # unique to this provider entry + provider['inventories']['additional'].append({ + orc.normalize_name(id_value): { + "total": 100, + "reserved": 0, + "min_unit": 1, + "max_unit": 10, + "step_size": 1, + "allocation_ratio": 1 + } + }) + provider['traits']['additional'].append( + os_traits.normalize_name(id_value)) + + # edit cfg_file in place, but return it in case this is the first call + cfg_file['providers'].append(provider) + return cfg_file + + def _assert_inventory_and_traits(self, provider, config): + """Asserts that the inventory and traits on the provider include those + defined in the provided config file. If the provider was identified + explicitly, also asserts that the $COMPUTE_NODE values are not included + on the provider. + + Testing for specific inventory values is done in depth in unit tests + so here we are just checking for keys. + """ + # retrieve actual inventory and traits for the provider + actual_inventory = list( + self._get_provider_inventory(provider['uuid']).keys()) + actual_traits = self._get_provider_traits(provider['uuid']) + + # search config file data for expected inventory and traits + # since we also want to check for unexpected inventory, + # we also need to track compute node entries + expected_inventory, expected_traits = [], [] + cn_expected_inventory, cn_expected_traits = [], [] + for p_config in config['providers']: + _pid = p_config['identification'] + # check for explicit uuid/name match + if _pid.get("uuid") == provider['uuid'] \ + or _pid.get("name") == provider['name']: + expected_inventory = list(p_config.get( + "inventories", {}).get("additional", [])[0].keys()) + expected_traits = p_config.get( + "traits", {}).get("additional", []) + # check for uuid==$COMPUTE_NODE match + elif _pid.get("uuid") == "$COMPUTE_NODE": + cn_expected_inventory = list(p_config.get( + "inventories", {}).get("additional", [])[0].keys()) + cn_expected_traits = p_config.get( + "traits", {}).get("additional", []) + + # if expected inventory or traits are found, + # test that they all exist in the actual inventory/traits + missing_inventory, missing_traits = None, None + unexpected_inventory, unexpected_traits = None, None + if expected_inventory or expected_traits: + missing_inventory = [key for key in expected_inventory + if key not in actual_inventory] + missing_traits = [key for key in expected_traits + if key not in actual_traits] + # if $COMPUTE_NODE values are also found, + # test that they do not exist + if cn_expected_inventory or cn_expected_traits: + unexpected_inventory = [ + key for key in actual_inventory + if key in cn_expected_inventory and key + not in expected_inventory] + missing_traits = [ + trait for trait in cn_expected_traits + if trait in actual_traits and trait + not in expected_traits] + # if no explicit values were found, test for $COMPUTE_NODE values + elif cn_expected_inventory or cn_expected_traits: + missing_inventory = [key for key in cn_expected_inventory + if key not in actual_inventory] + missing_traits = [trait for trait in cn_expected_traits + if trait not in actual_traits] + # if no values were found, the test is broken + else: + self.fail("No expected values were found, the test is broken.") + + self.assertFalse(missing_inventory, + msg="Missing inventory: %s" % missing_inventory) + self.assertFalse(unexpected_inventory, + msg="Unexpected inventory: %s" % unexpected_inventory) + self.assertFalse(missing_traits, + msg="Missing traits: %s" % missing_traits) + self.assertFalse(unexpected_traits, + msg="Unexpected traits: %s" % unexpected_traits) + + def _place_config_file(self, file_name, file_data): + """Creates a file in the provider config directory using file_name and + dumps file_data to it in yaml format. + + NOTE: The file name should end in ".yaml" for Nova to recognize and + load it. + """ + with open(os.path.join(self.pconf_loc, file_name), "w") as open_file: + yaml.dump(file_data, open_file) + + def test_single_config_file(self): + """Tests that additional inventories and traits defined for a provider + are applied to the correct provider. + """ + # create a config file with both explicit name and uuid=$COMPUTE_NODE + config = self._create_config_entry("fake-host", id_method="name") + self._place_config_file("provider_config1.yaml", config) + + # start nova-compute + self._start_compute("fake-host") + + # test that only inventory from the explicit entry exists + provider = self._get_resource_provider_by_uuid( + self._get_provider_uuid_by_host("fake-host")) + self._assert_inventory_and_traits(provider, config) + + def test_multiple_config_files(self): + """This performs the same test as test_single_config_file but splits + the configurations into separate files. + """ + # create a config file with uuid=$COMPUTE_NODE + config1 = self._create_config_entry("$COMPUTE_NODE", id_method="uuid") + self._place_config_file("provider_config1.yaml", config1) + # create a second config file with explicit name + config2 = self._create_config_entry("fake-host", id_method="name") + self._place_config_file("provider_config2.yaml", config2) + + # start nova-compute + self._start_compute("fake-host") + + # test that only inventory from the explicit entry exists + provider1 = self._get_resource_provider_by_uuid( + self._get_provider_uuid_by_host("fake-host")) + self._assert_inventory_and_traits(provider1, config2) + + def test_multiple_compute_nodes(self): + """This test mimics an ironic-like environment with multiple compute + nodes. Some nodes will be updated with the uuid=$COMPUTE_NODE provider + config entries and others will use explicit name matching. + """ + # get some uuids to use as compute host names + provider_names = [uuids.cn2, uuids.cn3, uuids.cn4, + uuids.cn5, uuids.cn6, uuids.cn7] + + # create config file with $COMPUTE_NODE entry + config = self._create_config_entry("$COMPUTE_NODE", id_method="uuid") + # add three explicit name entries + for provider_name in provider_names[-3:]: + self._create_config_entry(provider_name, id_method="name", + cfg_file=config) + self._place_config_file("provider.yaml", config) + + # start the compute services + for provider_name in provider_names: + self._start_compute(provider_name) + + # test for expected inventory and traits on each provider + for provider_name in provider_names: + self._assert_inventory_and_traits( + self._get_resource_provider_by_uuid( + self._get_provider_uuid_by_host(provider_name)), + config) + + def test_end_to_end(self): + """This test emulates a full end to end test showing that without this + feature a vm cannot be spawning using a custom trait and then start a + compute service that provides that trait. + """ + + self.neutron = nova_fixtures.NeutronFixture(self) + self.useFixture(self.neutron) + fake_image.stub_out_image_service(self) + self.addCleanup(fake_image.FakeImageService_reset) + # Start nova services. + self.api = self.useFixture(nova_fixtures.OSAPIFixture( + api_version='v2.1')).admin_api + self.api.microversion = 'latest' + fake_notifier.stub_notifier(self) + self.addCleanup(fake_notifier.reset) + self.start_service('conductor') + # start nova-compute that will not have the additional trait. + self._start_compute("fake-host-1") + + node_name = "fake-host-2" + + # create a config file with explicit name + provider_config = self._create_config_entry( + node_name, id_method="name") + self._place_config_file("provider_config.yaml", provider_config) + + self._create_flavor( + name='CUSTOM_Flavor', id=42, vcpu=4, memory_mb=4096, + disk=1024, swap=0, extra_spec={ + f"trait:{os_traits.normalize_name(node_name)}": "required" + }) + + self._create_server( + flavor_id=42, expected_state='ERROR', + networks=[{'port': self.neutron.port_1['id']}]) + + # start compute node that will report the custom trait. + self._start_compute("fake-host-2") + self._create_server( + flavor_id=42, expected_state='ACTIVE', + networks=[{'port': self.neutron.port_1['id']}]) diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py index be196e27aed3..62bf0b8ead9a 100644 --- a/nova/tests/unit/compute/test_resource_tracker.py +++ b/nova/tests/unit/compute/test_resource_tracker.py @@ -4002,14 +4002,24 @@ class ProviderConfigTestCases(BaseTestCase): """If traits from provider config are duplicated with traits from virt driver or placement api, make sure exception will be raised. """ - ex_trait = "EXCEPTION_TRAIT" provider = self._get_provider_config(uuid=uuids.cn1) # add the same trait in p_tree and provider config # for raising exception + ex_trait = "EXCEPTION_TRAIT" self.p_tree.add_traits(uuids.cn1, ex_trait) provider["traits"]["additional"].append(ex_trait) + # add the same trait in p_tree and provider config + # for testing ignoring CUSTOM trait code logic. + # If a programmer accidently forgets to ignore (substract) + # existing custom traits, this test case will fail as we only expect + # "EXCEPTION_TRAIT" showed in ValueError exception rather than + # "EXCEPTION_TRAIT,CUSTOM_IGNORE_TRAIT" + ignore_trait = "CUSTOM_IGNORE_TRAIT" + self.p_tree.add_traits(uuids.cn1, ignore_trait) + provider["traits"]["additional"].append(ignore_trait) + expected = ("Provider config 'test_provider_config.yaml' attempts to " "define a trait that is owned by the virt driver or " "specified via the placment api. Invalid traits '" + diff --git a/nova/virt/fake.py b/nova/virt/fake.py index dac487395503..4265591b484a 100644 --- a/nova/virt/fake.py +++ b/nova/virt/fake.py @@ -510,7 +510,7 @@ class FakeDriver(driver.ComputeDriver): 'sockets': 4, }), ]) - if nodename not in self._nodes: + if nodename not in self.get_available_nodes(): return {} host_status = self.host_status_base.copy() diff --git a/releasenotes/notes/provider-config-file-bf026380cb5a7898.yaml b/releasenotes/notes/provider-config-file-bf026380cb5a7898.yaml new file mode 100644 index 000000000000..35088c6af254 --- /dev/null +++ b/releasenotes/notes/provider-config-file-bf026380cb5a7898.yaml @@ -0,0 +1,13 @@ +--- +features: + - | + Nova now supports defining of additional resource provider traits and + inventories by way of YAML configuration files. The location of these + files is defined by the new config option + ``[compute]provider_config_location``. Nova will look in this directory + for ``*.yaml`` files. See the `specification`__ and `admin guide`__ for + more details. + + __ https://specs.openstack.org/openstack/nova-specs/specs/ussuri/approved/provider-config-file.html + __ https://docs.openstack.org/nova/latest/admin/managing-resource-providers.html +