From d5cb7ce3938d60bcf81f6d6c3b0e2971b7c0434d Mon Sep 17 00:00:00 2001 From: John Fulton Date: Tue, 23 Mar 2021 21:21:55 +0000 Subject: [PATCH] Add CephHciOsdCount/CephHciOsdType to tripleo_derive_hci_parameters The tripleo_derive_hci_parameters module uses the devices and lvm_volumes list in CephAnsibleDisksConfig from Heat to count the expected number of OSDs. However, with the move to cephadm, the OSD service spec language is more flexible and the user can pass a description like "all available block devices". As a result, the module can no longer count the expected number of OSDs. As an alternative expose a paramter so the user may directly pass CephOsdCount. Similarly expose a CephOsdType parameter (HDD, SSD) since there is no path to lookup the device in Ironic. Related-Bug: #1920954 Change-Id: Ia6bbdf023e2a0961cd91d3e9f40a8a5a26253ba3 (cherry picked from commit b015a79dc4628b9d2b5907739c22ff3b4e6379a3) --- .../modules/tripleo_derive_hci_parameters.py | 117 +++++++++++++++--- .../modules/test_derive_hci_parameters.py | 70 +++++++++-- 2 files changed, 165 insertions(+), 22 deletions(-) diff --git a/tripleo_ansible/ansible_plugins/modules/tripleo_derive_hci_parameters.py b/tripleo_ansible/ansible_plugins/modules/tripleo_derive_hci_parameters.py index 3a4bc3593..35536957a 100644 --- a/tripleo_ansible/ansible_plugins/modules/tripleo_derive_hci_parameters.py +++ b/tripleo_ansible/ansible_plugins/modules/tripleo_derive_hci_parameters.py @@ -116,7 +116,7 @@ MB_PER_GB = 1024 def derive(mem_gb, vcpus, osds, average_guest_memory_size_in_mb=0, average_guest_cpu_utilization_percentage=0, - mem_gb_per_osd=5, vcpus_per_osd=1.0, total_memory_threshold=0.8): + mem_gb_per_osd=5, vcpus_per_osd=1, total_memory_threshold=0.8): """ Determines the recommended Nova scheduler values based on Ceph needs and described average Nova guest workload in CPU and Memory utilization. @@ -249,12 +249,11 @@ def count_osds(tripleo_environment_parameters): Returns an integer representing the count. """ total = 0 - try: + if 'CephAnsibleDisksConfig' in tripleo_environment_parameters: disks_config = tripleo_environment_parameters['CephAnsibleDisksConfig'] for key in ['devices', 'lvm_volumes']: - total = total + len(disks_config[key]) - except KeyError: - pass + if key in disks_config: + total = total + len(disks_config[key]) return total @@ -348,7 +347,7 @@ def count_vcpus(module): return vcpus -def get_vcpus_per_osd(ironic, tripleo_environment_parameters, num_osds): +def get_vcpus_per_osd_from_ironic(ironic, tripleo_environment_parameters, num_osds): """ Dynamically sets the vCPU to OSD ratio based the OSD type to: HDD | OSDs per device: 1 | vCPUs per device: 1 @@ -358,7 +357,7 @@ def get_vcpus_per_osd(ironic, tripleo_environment_parameters, num_osds): and looks up the device type in ironic input. Returns the vCPUs per OSD, an explanation message. """ - cpus = 1.0 + cpus = 1 nvme_re = re.compile('.*nvme.*') type_map = {} hdd_count = ssd_count = nvme_count = 0 @@ -448,6 +447,86 @@ def get_vcpus_per_osd(ironic, tripleo_environment_parameters, num_osds): return cpus, msg +def get_vcpus_per_osd(tripleo_environment_parameters, osd_count, osd_type, osd_spec): + """ + Dynamically sets the vCPU to OSD ratio based the OSD type to: + HDD | OSDs per device: 1 | vCPUs per device: 1 + SSD | OSDs per device: 1 | vCPUs per device: 4 + NVMe | OSDs per device: 4 | vCPUs per device: 3 + Relies on parameters from tripleo_environment_parameters input. + Returns the vCPUs per OSD and an explanation message. + """ + cpus = 1 + messages = [] + warning = False + + # This module can analyze a THT file even when it is not called from + # within Heat. Thus, we cannot assume THT validations are enforced. + if osd_type not in ['hdd', 'ssd', 'nvme']: + warning = True + messages.append(("'%s' is not a valid osd_type so " + "defaulting to 'hdd'. ") % osd_type) + osd_type = 'hdd' + messages.append(("CephHciOsdType: %s\n") % osd_type) + + if osd_type == 'hdd': + cpus = 1 + elif osd_type == 'ssd': + cpus = 4 + elif osd_type == 'nvme': + # If they set it to NVMe and used a manual spec, then 3 is also valid + cpus = 3 + if type(osd_spec) is not dict: + messages.append("\nNo valid CephOsdSpec was not found. Unable " + "to determine if osds_per_device is being used. " + "osds_per_device: 4 is recommended for 'nvme'. ") + warning = True + if 'osds_per_device' in osd_spec: + if osd_spec['osds_per_device'] == 4: + cpus = 3 + else: + cpus = 4 + messages.append("\nosds_per_device not set to 4 " + "but all OSDs are of type NVMe. \n" + "Recommendation to improve IO: " + "set osds_per_device to 4 and re-run \n" + "so that vCPU to OSD ratio is 3 " + "for 12 vCPUs per OSD device.") + warning = True + + messages.append(("vCPU to OSD ratio: %i\n" % cpus)) + if osd_spec != 0 and 'osds_per_device' in osd_spec: + messages.append(" (found osds_per_device set to: %i)" % + osd_spec['osds_per_device']) + msg = "".join(messages) + if warning: + msg = "WARNING: " + msg + + return cpus, msg + + +def find_parameter(env, param, role=""): + """ + Find a parameter in an environment map and return it. + If paramter is not found return 0. + + Supports role parameters too. E.g. given the following + inside of env, with param=CephHciOsdCount and role="", + this function returns 3. But if role=ComputeHCI, then + it would return 4. + + CephHciOsdCount: 3 + ComputeHCIParameters: + CephHciOsdCount: 4 + """ + role_parameters = role + 'Parameters' + if role_parameters in env and param in env[role_parameters]: + return env[role_parameters][param] + elif param in env: + return env[param] + return 0 + + def main(): """Main method of Ansible module """ @@ -474,16 +553,26 @@ def main(): module.params['derived_parameters'] = {} vcpus = count_vcpus(module) - num_osds = count_osds(module.params['tripleo_environment_parameters']) mem_gb = count_memory(module.params['introspection_data']) - - mem_gb_per_osd = 5 - vcpu_ratio, vcpu_ratio_msg = get_vcpus_per_osd( - module.params['introspection_data'], - module.params['tripleo_environment_parameters'], - num_osds) + num_osds = find_parameter(module.params['tripleo_environment_parameters'], + 'CephHciOsdCount', module.params['tripleo_role_name']) + if num_osds > 0: + osd_type = find_parameter(module.params['tripleo_environment_parameters'], + 'CephHciOsdType', module.params['tripleo_role_name']) + osd_spec = find_parameter(module.params['tripleo_environment_parameters'], + 'CephOsdSpec', module.params['tripleo_role_name']) + vcpu_ratio, vcpu_ratio_msg = get_vcpus_per_osd( + module.params['tripleo_environment_parameters'], + num_osds, osd_type, osd_spec) + else: + num_osds = count_osds(module.params['tripleo_environment_parameters']) + vcpu_ratio, vcpu_ratio_msg = get_vcpus_per_osd_from_ironic( + module.params['introspection_data'], + module.params['tripleo_environment_parameters'], + num_osds) # Derive HCI parameters + mem_gb_per_osd = 5 derivation = derive(mem_gb, vcpus, num_osds, module.params['average_guest_memory_size_in_mb'], module.params['average_guest_cpu_utilization_percentage'], diff --git a/tripleo_ansible/tests/modules/test_derive_hci_parameters.py b/tripleo_ansible/tests/modules/test_derive_hci_parameters.py index 02423edfb..a8e5d92ce 100644 --- a/tripleo_ansible/tests/modules/test_derive_hci_parameters.py +++ b/tripleo_ansible/tests/modules/test_derive_hci_parameters.py @@ -96,7 +96,7 @@ class TestTripleoDeriveHciParameters(tests_base.TestCase): i += 1 return ironic - def get_env(flavor='hdd', osds_per_device=1): + def get_env_ceph_ansible(flavor='hdd', osds_per_device=1): """Returns a dictionary which mocks the content of the tripleo_environment_parameters CephAnsibleDisksConfig where the deployer requests four OSDs using device @@ -133,6 +133,28 @@ class TestTripleoDeriveHciParameters(tests_base.TestCase): } return env + def get_env_cephadm(flavor='hdd', osds_per_device=1): + """Returns a dictionary which mocks the content of the + tripleo_environment_parameters CephOsd{Count,Type,Spec} + where the deployer requests a number of OSDs of differing + flavor types. The flavor may be set to one of hdd, ssd, + or nvme and it is also possible to set the osds_per_device + (usually used with NVMe). + """ + if osds_per_device == 0: + osds_per_device = 1 + env_cephadm = { + "CephHciOsdCount": 5, + "CephHciOsdType": flavor, + "CephOsdSpec": { + "data_devices": { + "all": True, + }, + "osds_per_device": osds_per_device + } + } + return env_cephadm + ratio_map = { 'hdd': 1, 'ssd': 4, @@ -140,18 +162,31 @@ class TestTripleoDeriveHciParameters(tests_base.TestCase): 'nvme': 3 } for flavor in ratio_map: + envs = [] if flavor == 'nvme': osds_per_device = 4 else: osds_per_device = 0 - env = get_env(flavor, osds_per_device) + envs.append(get_env_ceph_ansible(flavor, osds_per_device)) + if flavor != 'by_path': + envs.append(get_env_cephadm(flavor, osds_per_device)) ironic = get_ironic(flavor) - num_osds = len(env['CephAnsibleDisksConfig']['devices']) - vcpu_ratio, vcpu_msg = derive_params.get_vcpus_per_osd(ironic, - env, - num_osds) - self.assertEqual(vcpu_ratio, ratio_map[flavor]) - self.assertIsNotNone(vcpu_msg) + for env in envs: + if "CephHciOsdCount" in env and "CephHciOsdType" in env: + vcpu_ratio, vcpu_msg = derive_params\ + .get_vcpus_per_osd(env, + env['CephHciOsdCount'], + env['CephHciOsdType'], + env['CephOsdSpec']) + + else: + num_osds = derive_params.count_osds(env) + vcpu_ratio, vcpu_msg = \ + derive_params.get_vcpus_per_osd_from_ironic(ironic, + env, + num_osds) + self.assertEqual(vcpu_ratio, ratio_map[flavor]) + self.assertIsNotNone(vcpu_msg) def test_derive_without_workload(self): """Test the derive method without passing the expected average @@ -175,3 +210,22 @@ class TestTripleoDeriveHciParameters(tests_base.TestCase): gb_from_mb = derive_params.count_memory(mock_ironic_memory_mb) gb_from_bytes = derive_params.count_memory(mock_ironic_memory_bytes) self.assertEqual(gb_from_mb, gb_from_bytes) + + def test_find_parameter(self): + """Tests that the find_parameter method returns the + expected output for particular inputs. + """ + env = {'CephHciOsdCount': 3, + 'CephHciOsdType': 'ssd', + 'ComputeHCIParameters': { + 'CephHciOsdCount': 4 + }, + } + value = derive_params.find_parameter(env, 'CephHciOsdCount', 'ComputeHCI') + self.assertEqual(value, 4) + value = derive_params.find_parameter(env, 'CephHciOsdCount') + self.assertEqual(value, 3) + value = derive_params.find_parameter(env, 'CephOsdSpec') + self.assertEqual(value, 0) + value = derive_params.find_parameter(env, 'CephHciOsdType') + self.assertEqual(value, 'ssd')