tripleo-common/workbooks/derive_params_formulas.yaml
Jaganathan Palanisamy 741bb3fff7 NeutronPhysnetNUMANodesMapping parameter issue
NUMA aware vSwitch parameter 'NeutronPhysnetNUMANodesMapping' is
not derived correctly in ease of deployment.

Change-Id: I1f8462e30ccb871db67adbf67232839424fc12f3
Closes-Bug: #1842894
2019-09-05 14:39:30 +05:30

837 lines
34 KiB
YAML

---
version: '2.0'
name: tripleo.derive_params_formulas.v1
description: TripleO Workflows to derive deployment parameters from the introspected data
workflows:
dpdk_derive_params:
description: >
Workflow to derive parameters for DPDK service.
input:
- plan
- role_name
- heat_resource_tree
- hw_data # introspection data
- user_inputs
- derived_parameters: {}
output:
derived_parameters: <% $.derived_parameters.mergeWith($.get('dpdk_parameters', {})) %>
tags:
- tripleo-common-managed
tasks:
get_network_config:
action: tripleo.parameters.get_network_config
input:
container: <% $.plan %>
role_name: <% $.role_name %>
publish:
network_configs: <% task().result.get('network_config', []) %>
on-success: get_dpdk_nics_numa_info
on-error: set_status_failed_get_network_config
get_dpdk_nics_numa_info:
action: tripleo.derive_params.get_dpdk_nics_numa_info
input:
network_configs: <% $.network_configs %>
inspect_data: <% $.hw_data %>
publish:
dpdk_nics_numa_info: <% task().result %>
on-success:
# TODO: Need to remove condtions here
# adding condition and throw error in action for empty check
- get_dpdk_nics_numa_nodes: <% $.dpdk_nics_numa_info %>
- set_status_failed_get_dpdk_nics_numa_info: <% not $.dpdk_nics_numa_info %>
on-error: set_status_failed_on_error_get_dpdk_nics_numa_info
get_dpdk_nics_numa_nodes:
publish:
dpdk_nics_numa_nodes: <% $.dpdk_nics_numa_info.groupBy($.numa_node).select($[0]).orderBy($) %>
on-success:
- get_numa_nodes: <% $.dpdk_nics_numa_nodes %>
- set_status_failed_get_dpdk_nics_numa_nodes: <% not $.dpdk_nics_numa_nodes %>
get_numa_nodes:
publish:
numa_nodes: <% $.hw_data.numa_topology.ram.select($.numa_node).orderBy($) %>
on-success:
- get_num_phy_cores_per_numa_for_pmd: <% $.numa_nodes %>
- set_status_failed_get_numa_nodes: <% not $.numa_nodes %>
get_num_phy_cores_per_numa_for_pmd:
publish:
num_phy_cores_per_numa_node_for_pmd: <% $.user_inputs.get('num_phy_cores_per_numa_node_for_pmd', 0) %>
on-success:
- get_num_cores_per_numa_nodes: <% isInteger($.num_phy_cores_per_numa_node_for_pmd) and $.num_phy_cores_per_numa_node_for_pmd > 0 %>
- set_status_failed_get_num_phy_cores_per_numa_for_pmd_invalid: <% not isInteger($.num_phy_cores_per_numa_node_for_pmd) %>
- set_status_failed_get_num_phy_cores_per_numa_for_pmd_not_provided: <% $.num_phy_cores_per_numa_node_for_pmd = 0 %>
# For NUMA node with DPDK nic, number of cores should be used from user input
# For NUMA node without DPDK nic, number of cores should be 1
get_num_cores_per_numa_nodes:
publish:
num_cores_per_numa_nodes: <% let(dpdk_nics_nodes => $.dpdk_nics_numa_nodes, cores => $.num_phy_cores_per_numa_node_for_pmd) -> $.numa_nodes.select(switch($ in $dpdk_nics_nodes => $cores, not $ in $dpdk_nics_nodes => 1)) %>
on-success: get_pmd_cpus
get_pmd_cpus:
action: tripleo.derive_params.get_dpdk_core_list
input:
inspect_data: <% $.hw_data %>
numa_nodes_cores_count: <% $.num_cores_per_numa_nodes %>
publish:
pmd_cpus: <% task().result %>
on-success:
- get_pmd_cpus_range_list: <% $.pmd_cpus %>
- set_status_failed_get_pmd_cpus: <% not $.pmd_cpus %>
on-error: set_status_failed_on_error_get_pmd_cpus
get_pmd_cpus_range_list:
action: tripleo.derive_params.convert_number_to_range_list
input:
num_list: <% $.pmd_cpus %>
publish:
pmd_cpus: <% task().result %>
on-success: get_host_cpus
on-error: set_status_failed_get_pmd_cpus_range_list
get_host_cpus:
workflow: tripleo.derive_params_formulas.v1.get_host_cpus
input:
role_name: <% $.role_name %>
hw_data: <% $.hw_data %>
publish:
host_cpus: <% task().result.get('host_cpus', '') %>
on-success: get_sock_mem
on-error: set_status_failed_get_host_cpus
get_sock_mem:
action: tripleo.derive_params.get_dpdk_socket_memory
input:
dpdk_nics_numa_info: <% $.dpdk_nics_numa_info %>
numa_nodes: <% $.numa_nodes %>
overhead: <% $.user_inputs.get('overhead', 800) %>
packet_size_in_buffer: <% 4096*64 %>
publish:
sock_mem: <% task().result %>
on-success:
- get_neutron_bridge_mappings: <% $.sock_mem %>
- set_status_failed_get_sock_mem: <% not $.sock_mem %>
on-error: set_status_failed_on_error_get_sock_mem
get_neutron_bridge_mappings:
publish:
neutron_bridge_mappings: <% $.heat_resource_tree.parameters.get('NeutronBridgeMappings', {}).get('default', '') %>
on-success:
- get_phy_nw_bridge_mappings: <% $.neutron_bridge_mappings %>
- get_neutron_network_type: <% not $.neutron_bridge_mappings %>
# Gets the physical network and ovs bridge mappings
get_phy_nw_bridge_mappings:
publish:
phy_nw_bridge_mappings: <% $.neutron_bridge_mappings.split(',').select(let(mapping => $.split(':')) -> dict($mapping[0] => $mapping[1])).sum() %>
on-success: get_bridge_numa_nodes_mappings
# Gets the ovs bridge and NUMA nodes mappings
get_bridge_numa_nodes_mappings:
publish:
bridge_numa_nodes_mappings: <% $.dpdk_nics_numa_info.groupBy($.bridge_name).select(dict($[0]=>$[1].select($.numa_node).distinct())).sum() %>
on-success: get_phy_nw_numa_nodes_mappings
# Gets the physical network and NUMA nodes mappings
get_phy_nw_numa_nodes_mappings:
publish:
phy_nw_numa_nodes_mappings: <% let(nw_bridge_mappings => $.phy_nw_bridge_mappings) -> $.bridge_numa_nodes_mappings.items().select(let(br => $[0], nodes => $[1]) -> $nw_bridge_mappings.items().where($[1]=$br).select(dict($[0] => $nodes))).sum() %>
on-success: get_neutron_network_type
get_neutron_network_type:
publish:
neutron_network_type: <% $.heat_resource_tree.parameters.get('NeutronNetworkType', {}).get('default', '') %>
on-success:
- get_tunnel_numa_nodes_mappings: <% 'vxlan' in $.neutron_network_type %>
- get_dpdk_parameters: <% not 'vxlan' in $.neutron_network_type %>
# Gets the list of NUMA nodes associated to all tunneled networks
# OVS-DPDK on VxLAN tunnel requires Tenant Network IP to be applied on the OVS User Bridge itself.
# With this assumption, if the IP is set on the OVS User Bridge, then OVS-DPDK is used for VxLAN tunnels also.
# Here dpdk_nics_numa_info will have the OVS User Bridges with DPDK ports only.
get_tunnel_numa_nodes_mappings:
publish:
tunnel_numa_nodes_mappings: <% $.dpdk_nics_numa_info.where($.addresses.any($.ip_netmask)).select($.numa_node).distinct() %>
on-success: get_dpdk_parameters
get_dpdk_parameters:
publish:
dpdk_parameters: <% dict(concat($.role_name, 'Parameters') => dict('OvsPmdCoreList' => $.get('pmd_cpus', ''), 'OvsDpdkCoreList' => $.get('host_cpus', ''), 'OvsDpdkSocketMemory' => $.get('sock_mem', ''))) %>
on-success:
- add_phy_nw_numa_nodes_mappings: <% $.get('phy_nw_numa_nodes_mappings', {}) %>
- add_tunnel_numa_nodes_mappings: <% $.get('tunnel_numa_nodes_mappings', []) %>
add_phy_nw_numa_nodes_mappings:
publish:
dpdk_parameters: <% $.dpdk_parameters.mergeWith(dict(concat($.role_name, 'Parameters') => dict('NeutronPhysnetNUMANodesMapping' => $.get('phy_nw_numa_nodes_mappings', {})))) %>
add_tunnel_numa_nodes_mappings:
publish:
dpdk_parameters: <% $.dpdk_parameters.mergeWith(dict(concat($.role_name, 'Parameters') => dict('NeutronTunnelNUMANodes' => $.get('tunnel_numa_nodes_mappings', [])))) %>
set_status_failed_get_network_config:
publish:
status: FAILED
message: <% task(get_network_config).result %>
on-success: fail
set_status_failed_get_dpdk_nics_numa_info:
publish:
status: FAILED
message: "Unable to determine DPDK NIC's NUMA information"
on-success: fail
set_status_failed_on_error_get_dpdk_nics_numa_info:
publish:
status: FAILED
message: <% task(get_dpdk_nics_numa_info).result %>
on-success: fail
set_status_failed_get_dpdk_nics_numa_nodes:
publish:
status: FAILED
message: "Unable to determine DPDK NIC's numa nodes"
on-success: fail
set_status_failed_get_numa_nodes:
publish:
status: FAILED
message: 'Unable to determine available NUMA nodes'
on-success: fail
set_status_failed_get_num_phy_cores_per_numa_for_pmd_invalid:
publish:
status: FAILED
message: <% "num_phy_cores_per_numa_node_for_pmd user input '{0}' is invalid".format($.num_phy_cores_per_numa_node_for_pmd) %>
on-success: fail
set_status_failed_get_num_phy_cores_per_numa_for_pmd_not_provided:
publish:
status: FAILED
message: 'num_phy_cores_per_numa_node_for_pmd user input is not provided'
on-success: fail
set_status_failed_get_pmd_cpus:
publish:
status: FAILED
message: 'Unable to determine OvsPmdCoreList parameter'
on-success: fail
set_status_failed_on_error_get_pmd_cpus:
publish:
status: FAILED
message: <% task(get_pmd_cpus).result %>
on-success: fail
set_status_failed_get_pmd_cpus_range_list:
publish:
status: FAILED
message: <% task(get_pmd_cpus_range_list).result %>
on-success: fail
set_status_failed_get_host_cpus:
publish:
status: FAILED
message: <% task(get_host_cpus).result.get('message', '') %>
on-success: fail
set_status_failed_get_sock_mem:
publish:
status: FAILED
message: 'Unable to determine OvsDpdkSocketMemory parameter'
on-success: fail
set_status_failed_on_error_get_sock_mem:
publish:
status: FAILED
message: <% task(get_sock_mem).result %>
on-success: fail
sriov_derive_params:
description: >
This workflow derives parameters for the SRIOV feature.
input:
- role_name
- hw_data # introspection data
- derived_parameters: {}
output:
derived_parameters: <% $.derived_parameters.mergeWith($.get('sriov_parameters', {})) %>
tags:
- tripleo-common-managed
tasks:
get_host_cpus:
workflow: tripleo.derive_params_formulas.v1.get_host_cpus
input:
role_name: <% $.role_name %>
hw_data: <% $.hw_data %>
publish:
host_cpus: <% task().result.get('host_cpus', '') %>
on-success: get_sriov_parameters
on-error: set_status_failed_get_host_cpus
get_sriov_parameters:
publish:
# SriovHostCpusList parameter is added temporarily and it's removed later from derived parameters result.
sriov_parameters: <% dict(concat($.role_name, 'Parameters') => dict('SriovHostCpusList' => $.get('host_cpus', ''))) %>
set_status_failed_get_host_cpus:
publish:
status: FAILED
message: <% task(get_host_cpus).result.get('message', '') %>
on-success: fail
get_host_cpus:
description: >
Fetching the host CPU list from the introspection data, and then converting the raw list into a range list.
input:
- hw_data # introspection data
output:
host_cpus: <% $.get('host_cpus', '') %>
tags:
- tripleo-common-managed
tasks:
get_host_cpus:
action: tripleo.derive_params.get_host_cpus_list inspect_data=<% $.hw_data %>
publish:
host_cpus: <% task().result %>
on-success:
- get_host_cpus_range_list: <% $.host_cpus %>
- set_status_failed_get_host_cpus: <% not $.host_cpus %>
on-error: set_status_failed_on_error_get_host_cpus
get_host_cpus_range_list:
action: tripleo.derive_params.convert_number_to_range_list
input:
num_list: <% $.host_cpus %>
publish:
host_cpus: <% task().result %>
on-error: set_status_failed_get_host_cpus_range_list
set_status_failed_get_host_cpus:
publish:
status: FAILED
message: 'Unable to determine host cpus'
on-success: fail
set_status_failed_on_error_get_host_cpus:
publish:
status: FAILED
message: <% task(get_host_cpus).result %>
on-success: fail
set_status_failed_get_host_cpus_range_list:
publish:
status: FAILED
message: <% task(get_host_cpus_range_list).result %>
on-success: fail
host_derive_params:
description: >
This workflow derives parameters for the Host process, and is mainly associated with CPU pinning and huge memory pages.
This workflow can be dependent on any feature or also can be invoked individually as well.
input:
- role_name
- hw_data # introspection data
- user_inputs
- derived_parameters: {}
output:
derived_parameters: <% $.derived_parameters.mergeWith($.get('host_parameters', {})) %>
tags:
- tripleo-common-managed
tasks:
get_cpus:
publish:
cpus: <% $.hw_data.numa_topology.cpus %>
on-success:
- get_role_derive_params: <% $.cpus %>
- set_status_failed_get_cpus: <% not $.cpus %>
get_role_derive_params:
publish:
role_derive_params: <% $.derived_parameters.get(concat($.role_name, 'Parameters'), {}) %>
# removing the role parameters (eg. ComputeParameters) in derived_parameters dictionary since already copied in role_derive_params.
derived_parameters: <% $.derived_parameters.delete(concat($.role_name, 'Parameters')) %>
on-success: get_host_cpus
get_host_cpus:
publish:
host_cpus: <% $.role_derive_params.get('OvsDpdkCoreList', '') or $.role_derive_params.get('SriovHostCpusList', '') %>
# SriovHostCpusList parameter is added temporarily for host_cpus and not needed in derived_parameters result.
# SriovHostCpusList parameter is deleted in derived_parameters list and adding the updated role parameters
# back in the derived_parameters.
derived_parameters: <% $.derived_parameters + dict(concat($.role_name, 'Parameters') => $.role_derive_params.delete('SriovHostCpusList')) %>
on-success: get_host_dpdk_combined_cpus
get_host_dpdk_combined_cpus:
publish:
host_dpdk_combined_cpus: <% let(pmd_cpus => $.role_derive_params.get('OvsPmdCoreList', '')) -> switch($pmd_cpus => concat($pmd_cpus, ',', $.host_cpus), not $pmd_cpus => $.host_cpus) %>
reserved_cpus: []
on-success:
- get_host_dpdk_combined_cpus_num_list: <% $.host_dpdk_combined_cpus %>
- set_status_failed_get_host_dpdk_combined_cpus: <% not $.host_dpdk_combined_cpus %>
get_host_dpdk_combined_cpus_num_list:
action: tripleo.derive_params.convert_range_to_number_list
input:
range_list: <% $.host_dpdk_combined_cpus %>
publish:
host_dpdk_combined_cpus: <% task().result %>
reserved_cpus: <% task().result.split(',') %>
on-success: get_nova_cpus
on-error: set_status_failed_get_host_dpdk_combined_cpus_num_list
get_nova_cpus:
publish:
nova_cpus: <% let(reserved_cpus => $.reserved_cpus) -> $.cpus.select($.thread_siblings).flatten().where(not (str($) in $reserved_cpus)).join(',') %>
on-success:
- get_isol_cpus: <% $.nova_cpus %>
- set_status_failed_get_nova_cpus: <% not $.nova_cpus %>
# concatinates OvsPmdCoreList range format and NovaVcpuPinSet in range format. it may not be in perfect range format.
# example: concatinates '12-15,19' and 16-18' ranges '12-15,19,16-18'
get_isol_cpus:
publish:
isol_cpus: <% let(pmd_cpus => $.role_derive_params.get('OvsPmdCoreList','')) -> switch($pmd_cpus => concat($pmd_cpus, ',', $.nova_cpus), not $pmd_cpus => $.nova_cpus) %>
on-success: get_isol_cpus_num_list
# Gets the isol_cpus in the number list
# example: '12-15,19,16-18' into '12,13,14,15,16,17,18,19'
get_isol_cpus_num_list:
action: tripleo.derive_params.convert_range_to_number_list
input:
range_list: <% $.isol_cpus %>
publish:
isol_cpus: <% task().result %>
on-success: get_nova_cpus_range_list
on-error: set_status_failed_get_isol_cpus_num_list
get_nova_cpus_range_list:
action: tripleo.derive_params.convert_number_to_range_list
input:
num_list: <% $.nova_cpus %>
publish:
nova_cpus: <% task().result %>
on-success: get_isol_cpus_range_list
on-error: set_status_failed_get_nova_cpus_range_list
# converts number format isol_cpus into range format
# example: '12,13,14,15,16,17,18,19' into '12-19'
get_isol_cpus_range_list:
action: tripleo.derive_params.convert_number_to_range_list
input:
num_list: <% $.isol_cpus %>
publish:
isol_cpus: <% task().result %>
on-success: get_host_mem
on-error: set_status_failed_get_isol_cpus_range_list
get_host_mem:
publish:
host_mem: <% $.user_inputs.get('host_mem_default', 4096) %>
on-success: check_default_hugepage_supported
check_default_hugepage_supported:
publish:
default_hugepage_supported: <% $.hw_data.get('inventory', {}).get('cpu', {}).get('flags', []).contains('pdpe1gb') %>
on-success:
- get_total_memory: <% $.default_hugepage_supported %>
- set_status_failed_check_default_hugepage_supported: <% not $.default_hugepage_supported %>
get_total_memory:
publish:
total_memory: <% $.hw_data.get('inventory', {}).get('memory', {}).get('physical_mb', 0) %>
on-success:
- get_hugepage_allocation_percentage: <% $.total_memory %>
- set_status_failed_get_total_memory: <% not $.total_memory %>
get_hugepage_allocation_percentage:
publish:
huge_page_allocation_percentage: <% $.user_inputs.get('huge_page_allocation_percentage', 0) %>
on-success:
- get_hugepages: <% isInteger($.huge_page_allocation_percentage) and $.huge_page_allocation_percentage > 0 %>
- set_status_failed_get_hugepage_allocation_percentage_invalid: <% not isInteger($.huge_page_allocation_percentage) %>
- set_status_failed_get_hugepage_allocation_percentage_not_provided: <% $.huge_page_allocation_percentage = 0 %>
get_hugepages:
publish:
hugepages: <% let(huge_page_perc => float($.huge_page_allocation_percentage)/100)-> int((($.total_memory/1024)-4) * $huge_page_perc) %>
on-success:
- get_cpu_model: <% $.hugepages %>
- set_status_failed_get_hugepages: <% not $.hugepages %>
get_cpu_model:
publish:
intel_cpu_model: <% $.hw_data.get('inventory', {}).get('cpu', {}).get('model_name', '').startsWith('Intel') %>
on-success: get_iommu_info
get_iommu_info:
publish:
iommu_info: <% switch($.intel_cpu_model => 'intel_iommu=on iommu=pt', not $.intel_cpu_model => '') %>
on-success: get_kernel_args
get_kernel_args:
publish:
kernel_args: <% concat('default_hugepagesz=1GB hugepagesz=1G ', 'hugepages=', str($.hugepages), ' ', $.iommu_info, ' isolcpus=', $.isol_cpus) %>
on-success: get_host_parameters
get_host_parameters:
publish:
host_parameters: <% dict(concat($.role_name, 'Parameters') => dict('NovaVcpuPinSet' => $.get('nova_cpus', ''), 'NovaReservedHostMemory' => $.get('host_mem', ''), 'KernelArgs' => $.get('kernel_args', ''), 'IsolCpusList' => $.get('isol_cpus', ''))) %>
set_status_failed_get_cpus:
publish:
status: FAILED
message: "Unable to determine CPU's on NUMA nodes"
on-success: fail
set_status_failed_get_host_dpdk_combined_cpus:
publish:
status: FAILED
message: 'Unable to combine host and dpdk cpus list'
on-success: fail
set_status_failed_get_host_dpdk_combined_cpus_num_list:
publish:
status: FAILED
message: <% task(get_host_dpdk_combined_cpus_num_list).result %>
on-success: fail
set_status_failed_get_nova_cpus:
publish:
status: FAILED
message: 'Unable to determine nova vcpu pin set'
on-success: fail
set_status_failed_get_nova_cpus_range_list:
publish:
status: FAILED
message: <% task(get_nova_cpus_range_list).result %>
on-success: fail
set_status_failed_get_isol_cpus_num_list:
publish:
status: FAILED
message: <% task(get_isol_cpus_num_list).result %>
on-success: fail
set_status_failed_get_isol_cpus_range_list:
publish:
status: FAILED
message: <% task(get_isol_cpus_range_list).result %>
on-success: fail
set_status_failed_check_default_hugepage_supported:
publish:
status: FAILED
message: 'default huge page size 1GB is not supported'
on-success: fail
set_status_failed_get_total_memory:
publish:
status: FAILED
message: 'Unable to determine total memory'
on-success: fail
set_status_failed_get_hugepage_allocation_percentage_invalid:
publish:
status: FAILED
message: <% "huge_page_allocation_percentage user input '{0}' is invalid".format($.huge_page_allocation_percentage) %>
on-success: fail
set_status_failed_get_hugepage_allocation_percentage_not_provided:
publish:
status: FAILED
message: 'huge_page_allocation_percentage user input is not provided'
on-success: fail
set_status_failed_get_hugepages:
publish:
status: FAILED
message: 'Unable to determine huge pages'
on-success: fail
hci_derive_params:
description: Derive the deployment parameters for HCI
input:
- role_name
- environment_parameters
- heat_resource_tree
- introspection_data
- user_inputs
- derived_parameters: {}
output:
derived_parameters: <% $.derived_parameters.mergeWith($.get('hci_parameters', {})) %>
tags:
- tripleo-common-managed
tasks:
get_hci_inputs:
publish:
hci_profile: <% $.user_inputs.get('hci_profile', '') %>
hci_profile_config: <% $.user_inputs.get('hci_profile_config', {}) %>
MB_PER_GB: 1024
on-success:
- get_average_guest_memory_size_in_mb: <% $.hci_profile and $.hci_profile_config.get($.hci_profile, {}) %>
- set_failed_invalid_hci_profile: <% $.hci_profile and not $.hci_profile_config.get($.hci_profile, {}) %>
# When no hci_profile is specified, the workflow terminates without deriving any HCI parameters.
get_average_guest_memory_size_in_mb:
publish:
average_guest_memory_size_in_mb: <% $.hci_profile_config.get($.hci_profile, {}).get('average_guest_memory_size_in_mb', 0) %>
on-success:
- get_average_guest_cpu_utilization_percentage: <% isInteger($.average_guest_memory_size_in_mb) %>
- set_failed_invalid_average_guest_memory_size_in_mb: <% not isInteger($.average_guest_memory_size_in_mb) %>
get_average_guest_cpu_utilization_percentage:
publish:
average_guest_cpu_utilization_percentage: <% $.hci_profile_config.get($.hci_profile, {}).get('average_guest_cpu_utilization_percentage', 0) %>
on-success:
- get_gb_overhead_per_guest: <% isInteger($.average_guest_cpu_utilization_percentage) %>
- set_failed_invalid_average_guest_cpu_utilization_percentage: <% not isInteger($.average_guest_cpu_utilization_percentage) %>
get_gb_overhead_per_guest:
publish:
gb_overhead_per_guest: <% $.user_inputs.get('gb_overhead_per_guest', 0.5) %>
on-success:
- get_gb_per_osd: <% isNumber($.gb_overhead_per_guest) %>
- set_failed_invalid_gb_overhead_per_guest: <% not isNumber($.gb_overhead_per_guest) %>
get_gb_per_osd:
publish:
gb_per_osd: <% $.user_inputs.get('gb_per_osd', 5) %>
on-success:
- get_cores_per_osd: <% isNumber($.gb_per_osd) %>
- set_failed_invalid_gb_per_osd: <% not isNumber($.gb_per_osd) %>
get_cores_per_osd:
publish:
cores_per_osd: <% $.user_inputs.get('cores_per_osd', 1.0) %>
on-success:
- get_extra_configs: <% isNumber($.cores_per_osd) %>
- set_failed_invalid_cores_per_osd: <% not isNumber($.cores_per_osd) %>
get_extra_configs:
publish:
extra_config: <% $.environment_parameters.get('ExtraConfig', {}) %>
role_extra_config: <% $.environment_parameters.get(concat($.role_name, 'ExtraConfig'), {}) %>
role_env_params: <% $.environment_parameters.get(concat($.role_name, 'Parameters'), {}) %>
role_derive_params: <% $.derived_parameters.get(concat($.role_name, 'Parameters'), {}) %>
on-success: calc_osds
calc_osds:
publish:
num_dev: <% $.heat_resource_tree.parameters.get('CephAnsibleDisksConfig', {}).get('default', {}).get('devices', []).count() %>
num_lvm: <% $.heat_resource_tree.parameters.get('CephAnsibleDisksConfig', {}).get('default', {}).get('lvm_volumes', []).count() %>
on-success: get_num_osds
get_num_osds:
publish:
num_osds: <% int($.num_dev + $.num_lvm) %>
on-success:
- get_memory_mb: <% $.num_osds %>
# If there's no CephAnsibleDisksConfig then look for OSD configuration in hiera data
- get_num_osds_from_hiera: <% not $.num_osds %>
get_num_osds_from_hiera:
publish:
num_osds: <% $.role_extra_config.get('ceph::profile::params::osds', $.extra_config.get('ceph::profile::params::osds', {})).keys().count() %>
on-success:
- get_memory_mb: <% $.num_osds %>
- set_failed_no_osds: <% not $.num_osds %>
get_memory_mb:
publish:
memory_mb: <% $.introspection_data.get('memory_mb', 0) %>
on-success:
- get_nova_vcpu_pin_set: <% $.memory_mb %>
- set_failed_get_memory_mb: <% not $.memory_mb %>
# Determine the number of CPU cores available to Nova and Ceph. If
# NovaVcpuPinSet is defined then use the number of vCPUs in the set,
# otherwise use all of the cores identified in the introspection data.
get_nova_vcpu_pin_set:
publish:
# NovaVcpuPinSet can be defined in multiple locations, and it's
# important to select the value in order of precedence:
#
# 1) User specified value for this role
# 2) User specified default value for all roles
# 3) Value derived by another derived parameters workflow
nova_vcpu_pin_set: <% $.role_env_params.get('NovaVcpuPinSet', $.environment_parameters.get('NovaVcpuPinSet', $.role_derive_params.get('NovaVcpuPinSet', ''))) %>
on-success:
- get_nova_vcpu_count: <% $.nova_vcpu_pin_set %>
- get_num_cores: <% not $.nova_vcpu_pin_set %>
get_nova_vcpu_count:
action: tripleo.derive_params.convert_range_to_number_list
input:
range_list: <% $.nova_vcpu_pin_set %>
publish:
num_cores: <% task().result.split(',').count() %>
on-success: calculate_nova_parameters
on-error: set_failed_get_nova_vcpu_count
get_num_cores:
publish:
num_cores: <% $.introspection_data.get('cpus', 0) %>
on-success:
- calculate_nova_parameters: <% $.num_cores %>
- set_failed_get_num_cores: <% not $.num_cores %>
# HCI calculations are broken into multiple steps. This is necessary
# because variables published by a Mistral task are not available
# for use by that same task. Variables computed and published in a task
# are only available in subsequent tasks.
#
# The HCI calculations compute two Nova parameters:
# - reserved_host_memory
# - cpu_allocation_ratio
#
# The reserved_host_memory calculation computes the amount of memory
# that needs to be reserved for Ceph and the total amount of "guest
# overhead" memory that is based on the anticipated number of guests.
# Psuedo-code for the calculation (disregarding MB and GB units) is
# as follows:
#
# ceph_memory = mem_per_osd * num_osds
# nova_memory = total_memory - ceph_memory
# num_guests = nova_memory /
# (average_guest_memory_size + overhead_per_guest)
# reserved_memory = ceph_memory + (num_guests * overhead_per_guest)
#
# The cpu_allocation_ratio calculation is similar in that it takes into
# account the number of cores that must be reserved for Ceph.
#
# ceph_cores = cores_per_osd * num_osds
# guest_cores = num_cores - ceph_cores
# guest_vcpus = guest_cores / average_guest_utilization
# cpu_allocation_ratio = guest_vcpus / num_cores
calculate_nova_parameters:
publish:
avg_guest_util: <% $.average_guest_cpu_utilization_percentage / 100.0 %>
avg_guest_size_gb: <% $.average_guest_memory_size_in_mb / float($.MB_PER_GB) %>
memory_gb: <% $.memory_mb / float($.MB_PER_GB) %>
ceph_mem_gb: <% $.gb_per_osd * $.num_osds %>
nonceph_cores: <% $.num_cores - int($.cores_per_osd * $.num_osds) %>
on-success: calc_step_2
calc_step_2:
publish:
num_guests: <% int(($.memory_gb - $.ceph_mem_gb) / ($.avg_guest_size_gb + $.gb_overhead_per_guest)) %>
guest_vcpus: <% $.nonceph_cores / $.avg_guest_util %>
on-success: calc_step_3
calc_step_3:
publish:
reserved_host_memory: <% $.MB_PER_GB * int($.ceph_mem_gb + ($.num_guests * $.gb_overhead_per_guest)) %>
cpu_allocation_ratio: <% $.guest_vcpus / $.num_cores %>
on-success: validate_results
validate_results:
publish:
# Verify whether HCI is viable:
# - At least 80% of the memory is reserved for Ceph and guest overhead
# - At least half of the CPU cores must be available to Nova
mem_ok: <% $.reserved_host_memory <= ($.memory_mb * 0.8) %>
cpu_ok: <% $.cpu_allocation_ratio >= 0.5 %>
on-success:
- set_failed_insufficient_mem: <% not $.mem_ok %>
- set_failed_insufficient_cpu: <% not $.cpu_ok %>
- publish_hci_parameters: <% $.mem_ok and $.cpu_ok %>
publish_hci_parameters:
publish:
# TODO(abishop): Update this when the cpu_allocation_ratio can be set
# via a THT parameter (no such parameter currently exists). Until a
# THT parameter exists, use hiera data to set the cpu_allocation_ratio.
hci_parameters: <% dict(concat($.role_name, 'Parameters') => dict('NovaReservedHostMemory' => $.reserved_host_memory)) + dict(concat($.role_name, 'ExtraConfig') => dict('nova::cpu_allocation_ratio' => $.cpu_allocation_ratio)) %>
set_failed_invalid_hci_profile:
publish:
message: "'<% $.hci_profile %>' is not a valid HCI profile."
on-success: fail
set_failed_invalid_average_guest_memory_size_in_mb:
publish:
message: "'<% $.average_guest_memory_size_in_mb %>' is not a valid average_guest_memory_size_in_mb value."
on-success: fail
set_failed_invalid_gb_overhead_per_guest:
publish:
message: "'<% $.gb_overhead_per_guest %>' is not a valid gb_overhead_per_guest value."
on-success: fail
set_failed_invalid_gb_per_osd:
publish:
message: "'<% $.gb_per_osd %>' is not a valid gb_per_osd value."
on-success: fail
set_failed_invalid_cores_per_osd:
publish:
message: "'<% $.cores_per_osd %>' is not a valid cores_per_osd value."
on-success: fail
set_failed_invalid_average_guest_cpu_utilization_percentage:
publish:
message: "'<% $.average_guest_cpu_utilization_percentage %>' is not a valid average_guest_cpu_utilization_percentage value."
on-success: fail
set_failed_no_osds:
publish:
message: "No Ceph OSDs found in the overcloud definition ('ceph::profile::params::osds')."
on-success: fail
set_failed_get_memory_mb:
publish:
message: "Unable to determine the amount of physical memory (no 'memory_mb' found in introspection_data)."
on-success: fail
set_failed_get_nova_vcpu_count:
publish:
message: <% task(get_nova_vcpu_count).result %>
on-success: fail
set_failed_get_num_cores:
publish:
message: "Unable to determine the number of CPU cores (no 'cpus' found in introspection_data)."
on-success: fail
set_failed_insufficient_mem:
publish:
message: "<% $.memory_mb %> MB is not enough memory to run hyperconverged."
on-success: fail
set_failed_insufficient_cpu:
publish:
message: "<% $.num_cores %> CPU cores are not enough to run hyperconverged."
on-success: fail