Merge "Derive deployment parameters for HCI"
This commit is contained in:
commit
c14554659c
@ -0,0 +1,13 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
Add a Mistral workflow that uses hardware introspection data to derive
|
||||
deployment parameters for features such as DPDK and HCI (hyperconverged
|
||||
Nova compute and Ceph OSD nodes). The derived parameters workflow is
|
||||
automatically invoked during deployment when the workflow is listed in
|
||||
the plan environment file.
|
||||
|
||||
For each role in the deployment, the workflow analyzes the Heat resource
|
||||
tree to determine which features are relevant to that role. The main
|
||||
workflow invokes secondary workflows responsible for deriving parameters
|
||||
associated with each feature.
|
@ -199,9 +199,12 @@ workflows:
|
||||
action: baremetal_introspection.get_data uuid=<% $.profile_node_uuid %>
|
||||
publish:
|
||||
hw_data: <% task().result %>
|
||||
# Establish an empty dictionary of derived_parameters prior to
|
||||
# invoking the individual "feature" algorithms
|
||||
derived_parameters: <% dict() %>
|
||||
on-success:
|
||||
- get_dpdk_derive_params: <% $.role_features.contains("DPDK") %>
|
||||
# TODO: Needs to include condition to call other service derive params if DPDK is not available.
|
||||
- get_dpdk_derive_params: <% $.role_features.contains('DPDK') %>
|
||||
- get_hci_derive_params: <% not $.role_features.contains('DPDK') and $.role_features.contains('HCI') %>
|
||||
on-error: set_status_failed_get_introspection_data
|
||||
|
||||
get_dpdk_derive_params:
|
||||
@ -225,8 +228,23 @@ workflows:
|
||||
derived_parameters: <% $.derived_parameters %>
|
||||
publish:
|
||||
derived_parameters: <% task().result.get('derived_parameters', {}) %>
|
||||
on-success:
|
||||
- get_hci_derive_params: <% $.role_features.contains('HCI') %>
|
||||
on-error: set_status_failed_get_host_derive_params
|
||||
# Workflow ends here because there are no more algorithms.
|
||||
|
||||
get_hci_derive_params:
|
||||
workflow: tripleo.derive_params_formulas.v1.hci_derive_params
|
||||
input:
|
||||
role_name: <% $.role_name %>
|
||||
environment_parameters: <% $.environment_parameters %>
|
||||
heat_resource_tree: <% $.heat_resource_tree %>
|
||||
introspection_data: <% $.hw_data %>
|
||||
user_inputs: <% $.user_inputs %>
|
||||
derived_parameters: <% $.derived_parameters %>
|
||||
publish:
|
||||
derived_parameters: <% task().result.get('derived_parameters', {}) %>
|
||||
on-error: set_status_failed_get_hci_derive_params
|
||||
# Done (no more derived parameter features)
|
||||
|
||||
set_status_failed_get_role_info:
|
||||
publish:
|
||||
@ -284,6 +302,13 @@ workflows:
|
||||
message: <% task(get_host_derive_params).result %>
|
||||
on-success: fail
|
||||
|
||||
set_status_failed_get_hci_derive_params:
|
||||
publish:
|
||||
role_name: <% $.role_name %>
|
||||
status: FAILED
|
||||
message: <% task(get_hci_derive_params).result %>
|
||||
on-success: fail
|
||||
|
||||
|
||||
_get_role_info:
|
||||
description: >
|
||||
|
@ -39,7 +39,7 @@ workflows:
|
||||
dpdk_nics_numa_info: <% task().result %>
|
||||
on-success:
|
||||
# TODO: Need to remove condtions here
|
||||
# adding condition and trhow error in action for empty check
|
||||
# adding condition and throw error in action for empty check
|
||||
- get_dpdk_nics_numa_nodes: <% $.dpdk_nics_numa_info %>
|
||||
- set_status_failed_get_dpdk_nics_numa_info: <% not $.dpdk_nics_numa_info %>
|
||||
on-error: set_status_failed_on_error_get_dpdk_nics_numa_info
|
||||
@ -340,3 +340,215 @@ workflows:
|
||||
status: FAILED
|
||||
message: 'Unable to determine huge pages'
|
||||
on-success: fail
|
||||
|
||||
|
||||
hci_derive_params:
|
||||
description: Derive the deployment parameters for HCI
|
||||
input:
|
||||
- role_name
|
||||
- environment_parameters
|
||||
- heat_resource_tree
|
||||
- introspection_data
|
||||
- user_inputs
|
||||
- derived_parameters: {}
|
||||
|
||||
output:
|
||||
derived_parameters: <% $.derived_parameters.mergeWith($.get('hci_parameters', {})) %>
|
||||
|
||||
tasks:
|
||||
get_hci_inputs:
|
||||
publish:
|
||||
hci_profile: <% $.user_inputs.get('hci_profile', '') %>
|
||||
hci_profile_config: <% $.user_inputs.get('hci_profile_config', {}) %>
|
||||
MB_PER_GB: 1024
|
||||
on-success:
|
||||
- get_average_guest_memory_size_in_mb: <% $.hci_profile and $.hci_profile_config.get($.hci_profile, {}) %>
|
||||
- set_failed_invalid_hci_profile: <% $.hci_profile and not $.hci_profile_config.get($.hci_profile, {}) %>
|
||||
# When no hci_profile is specified, the workflow terminates without deriving any HCI parameters.
|
||||
|
||||
get_average_guest_memory_size_in_mb:
|
||||
publish:
|
||||
average_guest_memory_size_in_mb: <% $.hci_profile_config.get($.hci_profile, {}).get('average_guest_memory_size_in_mb', 0) %>
|
||||
on-success:
|
||||
- get_average_guest_cpu_utilization_percentage: <% isInteger($.average_guest_memory_size_in_mb) %>
|
||||
- set_failed_invalid_average_guest_memory_size_in_mb: <% not isInteger($.average_guest_memory_size_in_mb) %>
|
||||
|
||||
get_average_guest_cpu_utilization_percentage:
|
||||
publish:
|
||||
average_guest_cpu_utilization_percentage: <% $.hci_profile_config.get($.hci_profile, {}).get('average_guest_cpu_utilization_percentage', 0) %>
|
||||
on-success:
|
||||
- get_gb_overhead_per_guest: <% isInteger($.average_guest_cpu_utilization_percentage) %>
|
||||
- set_failed_invalid_average_guest_cpu_utilization_percentage: <% not isInteger($.average_guest_cpu_utilization_percentage) %>
|
||||
|
||||
get_gb_overhead_per_guest:
|
||||
publish:
|
||||
gb_overhead_per_guest: <% $.user_inputs.get('gb_overhead_per_guest', 0.5) %>
|
||||
on-success:
|
||||
- get_gb_per_osd: <% isNumber($.gb_overhead_per_guest) %>
|
||||
- set_failed_invalid_gb_overhead_per_guest: <% not isNumber($.gb_overhead_per_guest) %>
|
||||
|
||||
get_gb_per_osd:
|
||||
publish:
|
||||
gb_per_osd: <% $.user_inputs.get('gb_per_osd', 3) %>
|
||||
on-success:
|
||||
- get_cores_per_osd: <% isNumber($.gb_per_osd) %>
|
||||
- set_failed_invalid_gb_per_osd: <% not isNumber($.gb_per_osd) %>
|
||||
|
||||
get_cores_per_osd:
|
||||
publish:
|
||||
cores_per_osd: <% $.user_inputs.get('cores_per_osd', 1.0) %>
|
||||
on-success:
|
||||
- get_extra_configs: <% isNumber($.cores_per_osd) %>
|
||||
- set_failed_invalid_cores_per_osd: <% not isNumber($.cores_per_osd) %>
|
||||
|
||||
get_extra_configs:
|
||||
publish:
|
||||
extra_config: <% $.environment_parameters.get('ExtraConfig', {}) %>
|
||||
role_extra_config: <% $.environment_parameters.get(concat($.role_name, 'ExtraConfig'), {}) %>
|
||||
on-success: get_num_osds
|
||||
|
||||
get_num_osds:
|
||||
publish:
|
||||
num_osds: <% $.role_extra_config.get('ceph::profile::params::osds', $.extra_config.get('ceph::profile::params::osds', {})).keys().count() %>
|
||||
on-success:
|
||||
- get_memory_mb: <% $.num_osds %>
|
||||
- set_failed_no_osds: <% not $.num_osds %>
|
||||
|
||||
get_memory_mb:
|
||||
publish:
|
||||
memory_mb: <% $.introspection_data.get('memory_mb', 0) %>
|
||||
on-success:
|
||||
- get_num_cores: <% $.memory_mb %>
|
||||
- set_failed_get_memory_mb: <% not $.memory_mb %>
|
||||
|
||||
get_num_cores:
|
||||
publish:
|
||||
# TODO(abishop): If NovaVcpuPinSet is defined then use it to determine num_cores
|
||||
num_cores: <% $.introspection_data.get('cpus', 0) %>
|
||||
on-success:
|
||||
- calculate_nova_parameters: <% $.num_cores %>
|
||||
- set_failed_get_num_cores: <% not $.num_cores %>
|
||||
|
||||
# HCI calculations are broken into multiple steps. This is necessary
|
||||
# because variables published by a Mistral task are not available
|
||||
# for use by that same task. Variables computed and published in a task
|
||||
# are only available in subsequent tasks.
|
||||
#
|
||||
# The HCI calculations compute two Nova parameters:
|
||||
# - reserved_host_memory
|
||||
# - cpu_allocation_ratio
|
||||
#
|
||||
# The reserved_host_memory calculation computes the amount of memory
|
||||
# that needs to be reserved for Ceph and the total amount of "guest
|
||||
# overhead" memory that is based on the anticipated number of guests.
|
||||
# Psuedo-code for the calculation (disregarding MB and GB units) is
|
||||
# as follows:
|
||||
#
|
||||
# ceph_memory = mem_per_osd * num_osds
|
||||
# nova_memory = total_memory - ceph_memory
|
||||
# num_guests = nova_memory /
|
||||
# (average_guest_memory_size + overhead_per_guest)
|
||||
# reserved_memory = ceph_memory + (num_guests * overhead_per_guest)
|
||||
#
|
||||
# The cpu_allocation_ratio calculation is similar in that it takes into
|
||||
# account the number of cores that must be reserved for Ceph.
|
||||
#
|
||||
# ceph_cores = cores_per_osd * num_osds
|
||||
# guest_cores = num_cores - ceph_cores
|
||||
# guest_vcpus = guest_cores / average_guest_utilization
|
||||
# cpu_allocation_ratio = guest_vcpus / num_cores
|
||||
|
||||
calculate_nova_parameters:
|
||||
publish:
|
||||
avg_guest_util: <% $.average_guest_cpu_utilization_percentage / 100.0 %>
|
||||
avg_guest_size_gb: <% $.average_guest_memory_size_in_mb / float($.MB_PER_GB) %>
|
||||
memory_gb: <% $.memory_mb / float($.MB_PER_GB) %>
|
||||
ceph_mem_gb: <% $.gb_per_osd * $.num_osds %>
|
||||
nonceph_cores: <% $.num_cores - int($.cores_per_osd * $.num_osds) %>
|
||||
on-success: calc_step_2
|
||||
|
||||
calc_step_2:
|
||||
publish:
|
||||
num_guests: <% int(($.memory_gb - $.ceph_mem_gb) / ($.avg_guest_size_gb + $.gb_overhead_per_guest)) %>
|
||||
guest_vcpus: <% $.nonceph_cores / $.avg_guest_util %>
|
||||
on-success: calc_step_3
|
||||
|
||||
calc_step_3:
|
||||
publish:
|
||||
reserved_host_memory: <% $.MB_PER_GB * int($.ceph_mem_gb + ($.num_guests * $.gb_overhead_per_guest)) %>
|
||||
cpu_allocation_ratio: <% $.guest_vcpus / $.num_cores %>
|
||||
on-success: validate_results
|
||||
|
||||
validate_results:
|
||||
publish:
|
||||
# Verify whether HCI is viable:
|
||||
# - At least 80% of the memory is reserved for Ceph and guest overhead
|
||||
# - At least half of the CPU cores must be available to Nova
|
||||
mem_ok: <% $.reserved_host_memory <= ($.memory_mb * 0.8) %>
|
||||
cpu_ok: <% $.cpu_allocation_ratio >= 0.5 %>
|
||||
on-success:
|
||||
- set_failed_insufficient_mem: <% not $.mem_ok %>
|
||||
- set_failed_insufficient_cpu: <% not $.cpu_ok %>
|
||||
- publish_hci_parameters: <% $.mem_ok and $.cpu_ok %>
|
||||
|
||||
publish_hci_parameters:
|
||||
publish:
|
||||
# TODO(abishop): Update this when the cpu_allocation_ratio can be set
|
||||
# via a THT parameter (no such parameter currently exists). Until a
|
||||
# THT parameter exists, use hiera data to set the cpu_allocation_ratio.
|
||||
hci_parameters: <% dict(concat($.role_name, 'Parameters') => dict('NovaReservedHostMemory' => $.reserved_host_memory)) + dict(concat($.role_name, 'ExtraConfig') => dict('nova::cpu_allocation_ratio' => $.cpu_allocation_ratio)) %>
|
||||
|
||||
set_failed_invalid_hci_profile:
|
||||
publish:
|
||||
message: "'<% $.hci_profile %>' is not a valid HCI profile."
|
||||
on-success: fail
|
||||
|
||||
set_failed_invalid_average_guest_memory_size_in_mb:
|
||||
publish:
|
||||
message: "'<% $.average_guest_memory_size_in_mb %>' is not a valid average_guest_memory_size_in_mb value."
|
||||
on-success: fail
|
||||
|
||||
set_failed_invalid_gb_overhead_per_guest:
|
||||
publish:
|
||||
message: "'<% $.gb_overhead_per_guest %>' is not a valid gb_overhead_per_guest value."
|
||||
on-success: fail
|
||||
|
||||
set_failed_invalid_gb_per_osd:
|
||||
publish:
|
||||
message: "'<% $.gb_per_osd %>' is not a valid gb_per_osd value."
|
||||
on-success: fail
|
||||
|
||||
set_failed_invalid_cores_per_osd:
|
||||
publish:
|
||||
message: "'<% $.cores_per_osd %>' is not a valid cores_per_osd value."
|
||||
on-success: fail
|
||||
|
||||
set_failed_invalid_average_guest_cpu_utilization_percentage:
|
||||
publish:
|
||||
message: "'<% $.average_guest_cpu_utilization_percentage %>' is not a valid average_guest_cpu_utilization_percentage value."
|
||||
on-success: fail
|
||||
|
||||
set_failed_no_osds:
|
||||
publish:
|
||||
message: "No Ceph OSDs found in the overcloud definition ('ceph::profile::params::osds')."
|
||||
on-success: fail
|
||||
|
||||
set_failed_get_memory_mb:
|
||||
publish:
|
||||
message: "Unable to determine the amount of physical memory (no 'memory_mb' found in introspection_data)."
|
||||
on-success: fail
|
||||
|
||||
set_failed_get_num_cores:
|
||||
publish:
|
||||
message: "Unable to determine the number of CPU cores (no 'cpus' found in introspection_data)."
|
||||
on-success: fail
|
||||
|
||||
set_failed_insufficient_mem:
|
||||
publish:
|
||||
message: "<% $.memory_mb %> MB is not enough memory to run hyperconverged."
|
||||
on-success: fail
|
||||
|
||||
set_failed_insufficient_cpu:
|
||||
publish:
|
||||
message: "<% $.num_cores %> CPU cores are not enough to run hyperconverged."
|
||||
on-success: fail
|
||||
|
Loading…
x
Reference in New Issue
Block a user