Derive HCI reseverved host memory without average guest workload

Add new option to tripleo_derive_hci_parameters module so that
if both the average_guest_cpu_utilization_percentage and the
average_guest_memory_size_in_mb are not passed, then a simpler
calculation is performed which computes the required memory for
the Ceph OSDs.

It is preferable for the user to pass the expected workload but
if this value is unknown it is still better to at least reserve
the bare minimum in memory for the OSDs so that they do not get
shutdown from an OOM error.

Change-Id: I68ad3783d3e0bf47fd63ab79f6ea0b0655c751c1
This commit is contained in:
John Fulton 2020-09-08 18:50:53 +00:00
parent 75afb950a5
commit cf1744373f
2 changed files with 71 additions and 36 deletions

View File

@ -48,12 +48,14 @@ options:
type: map
average_guest_cpu_utilization_percentage:
description: Percentage of CPU utilization expected for average guest, e.g. 99 means 99% and 10 means 10%
required: True
required: False
type: int
default: 0
average_guest_memory_size_in_mb:
description: Amount of memory in MB required by the average guest
required: True
required: False
type: int
default: 0
derived_parameters:
description: any previously derived parameters which should be included in the final result
required: False
@ -113,11 +115,13 @@ derived_parameters:
MB_PER_GB = 1024
def derive(mem_gb, vcpus, osds, average_guest_memory_size_in_mb,
average_guest_cpu_utilization_percentage,
def derive(mem_gb, vcpus, osds, average_guest_memory_size_in_mb=0,
average_guest_cpu_utilization_percentage=0,
mem_gb_per_osd=5, vcpus_per_osd=1.0, total_memory_threshold=0.8):
"""
Determines the recommended Nova scheduler values based on Ceph needs.
Determines the recommended Nova scheduler values based on Ceph needs
and described average Nova guest workload in CPU and Memory utilization.
If expected guest utilization is not provided result is less accurate.
Returns dictionary containing the keys: cpu_allocation_ratio (float),
nova_reserved_mem_mb (int), message (string), failed (boolean).
"""
@ -128,6 +132,12 @@ def derive(mem_gb, vcpus, osds, average_guest_memory_size_in_mb,
derived['message'] = ""
derived['failed'] = False
if average_guest_memory_size_in_mb == 0 and \
average_guest_cpu_utilization_percentage == 0:
workload = False
else:
workload = True
# catch possible errors in parameters
if mem_gb < 1:
msg = "Unable to determine the amount of physical memory "
@ -147,13 +157,13 @@ def derive(mem_gb, vcpus, osds, average_guest_memory_size_in_mb,
derived['message'] += msg + "\n"
derived['failed'] = True
if average_guest_memory_size_in_mb < 0:
msg = "The average_guest_memory_size_in_mb must be a positive integer."
if average_guest_memory_size_in_mb < 0 and workload:
msg = "If average_guest_memory_size_in_mb is used it must be greater than 0"
derived['message'] += msg + "\n"
derived['failed'] = True
if average_guest_cpu_utilization_percentage < 0:
msg = "The average_guest_cpu_utilization_percentage must be a positive integer."
if average_guest_cpu_utilization_percentage < 0 and workload:
msg = "If average_guest_cpu_utilization_percentage is used it must be greater than 0"
derived['message'] += msg + "\n"
derived['failed'] = True
@ -169,6 +179,7 @@ def derive(mem_gb, vcpus, osds, average_guest_memory_size_in_mb,
return derived
# perform the calculation
if workload:
average_guest_size = average_guest_memory_size_in_mb / float(MB_PER_GB)
average_guest_util = average_guest_cpu_utilization_percentage * 0.01
number_of_guests = int(left_over_mem
@ -178,10 +189,13 @@ def derive(mem_gb, vcpus, osds, average_guest_memory_size_in_mb,
nonceph_vcpus = vcpus - (vcpus_per_osd * osds)
guest_vcpus = nonceph_vcpus / average_guest_util
cpu_allocation_ratio = guest_vcpus / vcpus
else:
nova_reserved_mem_mb = MB_PER_GB * (mem_gb_per_osd * osds)
# save calculation results
derived['cpu_allocation_ratio'] = cpu_allocation_ratio
derived['nova_reserved_mem_mb'] = int(nova_reserved_mem_mb)
if workload:
derived['cpu_allocation_ratio'] = cpu_allocation_ratio
# capture derivation details in message
msg = "Derived Parameters results"
@ -189,16 +203,19 @@ def derive(mem_gb, vcpus, osds, average_guest_memory_size_in_mb,
msg += "\n - Total host RAM in GB: %d" % mem_gb
msg += "\n - Total host vCPUs: %d" % vcpus
msg += "\n - Ceph OSDs per host: %d" % osds
if workload:
msg += "\n - Average guest memory size in GB: %d" % average_guest_size
msg += "\n - Average guest CPU utilization: %.0f%%" % \
average_guest_cpu_utilization_percentage
msg += "\n "
msg += "\n Outputs:"
if workload:
msg += "\n - number of guests allowed based on memory = %d" % number_of_guests
msg += "\n - number of guest vCPUs allowed = %d" % int(guest_vcpus)
msg += "\n - nova.conf reserved_host_memory = %d MB" % nova_reserved_mem_mb
msg += "\n - nova.conf cpu_allocation_ratio = %2.2f" % cpu_allocation_ratio
msg += "\n - nova.conf reserved_host_memory = %d MB" % nova_reserved_mem_mb
msg += "\n "
if workload:
msg += "\nCompare \"guest vCPUs allowed\" to \"guests allowed based on memory\""
msg += "\nfor actual guest count."
msg += "\n "
@ -207,11 +224,20 @@ def derive(mem_gb, vcpus, osds, average_guest_memory_size_in_mb,
if nova_reserved_mem_mb > (MB_PER_GB * mem_gb * total_memory_threshold):
warning_msg += "ERROR: %d GB is not enough memory to run hyperconverged\n" % mem_gb
derived['failed'] = True
if workload:
if cpu_allocation_ratio < 0.5:
warning_msg += "ERROR: %d is not enough vCPU to run hyperconverged\n" % vcpus
derived['failed'] = True
if cpu_allocation_ratio > 16.0:
warning_msg += "WARNING: do not increase vCPU overcommit ratio beyond 16:1\n"
else:
warning_msg += "WARNING: the average guest workload was not provided. \n"
warning_msg += "Both average_guest_cpu_utilization_percentage and \n"
warning_msg += "average_guest_memory_size_in_mb are defaulted to 0. \n"
warning_msg += "The HCI derived parameter calculation cannot set the \n"
warning_msg += "Nova cpu_allocation_ratio. The Nova reserved_host_memory_mb \n"
warning_msg += "will be set based on the number of OSDs but the Nova \n"
warning_msg += "guest memory overhead will not be taken into account. \n"
derived['message'] = warning_msg + msg
return derived
@ -422,8 +448,8 @@ def main():
tripleo_environment_parameters=dict(type=dict, required=True),
tripleo_role_name=dict(type=str, required=True),
introspection_data=dict(type=dict, required=True),
average_guest_cpu_utilization_percentage=dict(type=int, required=True),
average_guest_memory_size_in_mb=dict(type=int, required=True),
average_guest_cpu_utilization_percentage=dict(type=int, required=False, default=0),
average_guest_memory_size_in_mb=dict(type=int, required=False, default=0),
derived_parameters=dict(type=dict, required=False),
new_heat_environment_path=dict(type=str, required=False),
report_path=dict(type=str, required=False),
@ -465,6 +491,7 @@ def main():
if not derivation['failed']:
role_derivation = {}
role_derivation['NovaReservedHostMemory'] = derivation['nova_reserved_mem_mb']
if 'cpu_allocation_ratio' in derivation:
role_derivation['NovaCPUAllocationRatio'] = derivation['cpu_allocation_ratio']
role_name_parameters = module.params['tripleo_role_name'] + 'Parameters'
existing_params[role_name_parameters] = role_derivation

View File

@ -153,3 +153,11 @@ class TestTripleoDeriveHciParameters(tests_base.TestCase):
self.assertEqual(vcpu_ratio, ratio_map[flavor])
self.assertIsNotNone(vcpu_msg)
self.assertFalse(vcpu_warn)
def test_derive_without_workload(self):
"""Test the derive method without passing the expected average
guest cpu and mem utilization and confirm expected result
"""
der = derive_params.derive(mem_gb=256, vcpus=56, osds=16)
self.assertFalse(der['failed'])
self.assertEqual(der['nova_reserved_mem_mb'], 81920)