Browse Source

NFV: Support for config-download to deploy node with kernel args

For NFV deployments, specific kernel args should be applied and
the nodes should be restarted before running the NetworkDeployment.
It is supported in the heat deployment via PreNetworkConfig. In the
config-download mechanism, ansible steps need to be improved
to handle the reboot and wait for the node.

Change-Id: I43b383ad0e04b8be6c321f8c5b05e628b2520141
changes/67/560767/13
Saravanan KR 4 years ago
parent
commit
a3e4a90636
  1. 1
      common/deploy-steps.j2
  2. 5
      environments/disable-config-download-environment.yaml
  3. 102
      extraconfig/pre_network/boot-params-service.yaml
  4. 38
      extraconfig/pre_network/boot_param_tasks.yaml
  5. 18
      extraconfig/pre_network/host_config_and_reboot.yaml
  6. 4
      overcloud-resource-registry-puppet.j2.yaml
  7. 1
      roles/ComputeOvsDpdk.yaml
  8. 1
      roles/ComputeOvsDpdkRT.yaml
  9. 1
      roles/ComputeRealTime.yaml
  10. 1
      roles/ComputeSriov.yaml
  11. 1
      roles/ComputeSriovRT.yaml
  12. 6
      tools/yaml-validate.py

1
common/deploy-steps.j2

@ -714,3 +714,4 @@ outputs:
with_sequence: start=0 end={{post_update_steps_max-1}}
loop_control:
loop_var: step
boot_param_tasks: {get_file: ../extraconfig/pre_network/boot_param_tasks.yaml}

5
environments/disable-config-download-environment.yaml

@ -4,3 +4,8 @@ resource_registry:
OS::TripleO::Ssh::HostPubKey: ../extraconfig/tasks/ssh/host_public_key.yaml
OS::TripleO::Ssh::KnownHostsDeployment: OS::Heat::StructuredDeployments
OS::TripleO::DeploymentSteps: OS::Heat::StructuredDeploymentGroup
# Used only for config-download deployment, for non-config-download deployment,
# PreNetworkConfig (host-config-and-reboot.yaml) stack will handle it.
OS::TripleO::Services::BootParams: OS::Heat::None
OS::TripleO::Reboot::SoftwareDeployment: OS::TripleO::SoftwareDeployment

102
extraconfig/pre_network/boot-params-service.yaml

@ -0,0 +1,102 @@
heat_template_version: rocky
description: Host config and reboot service which configures Kernel Args and reboot
parameters:
RoleNetIpMap:
default: {}
type: json
ServiceData:
default: {}
description: Dictionary packing service data
type: json
ServiceNetMap:
default: {}
description: Mapping of service_name -> network name. Typically set
via parameter_defaults in the resource registry. This
mapping overrides those in ServiceNetMapDefaults.
type: json
DefaultPasswords:
default: {}
type: json
RoleName:
default: ''
description: Role name on which the service is applied
type: string
RoleParameters:
default: {}
description: Parameters specific to the role
type: json
EndpointMap:
default: {}
description: Mapping of service endpoint -> protocol. Typically set
via parameter_defaults in the resource registry.
type: json
KernelArgs:
default: ""
type: string
description: Kernel Args to apply to the host
tags:
- role_specific
TunedProfileName:
default: ""
type: string
description: Tuned Profile to apply to the host
tags:
- role_specific
IsolCpusList:
default: ""
type: string
description: >
List of logical CPU ids whic need to be isolated from the host processes.
This input is provided to the tuned profile cpu-partitioning to configure
systemd and repin interrupts (IRQ repinning).
tags:
- role_specific
NodeRebootWaitTimeout:
default: 900
type: number
description: Timeout in seconds to specify the wait time for ansible node reboot
conditions:
is_host_config_required: {not: {equals: [{get_param: [RoleParameters, KernelArgs]}, ""]}}
outputs:
role_data:
description: Role data for the Host Config Service
value:
service_name: boot_params_service
deploy_steps_tasks:
- if:
- is_host_config_required
- - name: Configure kernel args, tuned and reboot
when: step == 0
vars:
_KERNEL_ARGS_: {get_param: [RoleParameters, KernelArgs]}
_TUNED_PROFILE_NAME_: {get_param: [RoleParameters, TunedProfileName]}
_TUNED_CORES_: {get_param: [RoleParameters, IsolCpusList]}
block:
- import_tasks: ../boot_param_tasks.yaml
# Reboot the node
- name: Reboot after kernel args update
shell: "sleep 2 && /sbin/shutdown -r now"
async: 1
poll: 0
ignore_errors: true
become: yes
when: reboot_required is defined and reboot_required
- name: Wait for the ssh port to be ready on the restarted node
local_action: wait_for
become: no
args:
host: "{{ inventory_hostname }}"
port: 22
state: started
delay: 30
timeout:
get_param: NodeRebootWaitTimeout
when: reboot_required is defined and reboot_required
- null

38
extraconfig/pre_network/ansible_host_config.yaml → extraconfig/pre_network/boot_param_tasks.yaml

@ -1,9 +1,12 @@
---
- name: Configuration to be applied before rebooting the node
connection: local
hosts: localhost
- name: Get the command line args of the node
command: cat /proc/cmdline
register: cmdline
- name: Get the active tuned profile
command: tuned-adm active
become: true
register: tuned_active_profile
tasks:
# Kernel Args Configuration
- block:
- name: Ensure the kernel args ( {{ _KERNEL_ARGS_ }} ) is present as TRIPLEO_HEAT_TEMPLATE_KERNEL_ARGS
@ -19,8 +22,13 @@
insertafter: '^TRIPLEO_HEAT_TEMPLATE_KERNEL_ARGS.*'
- name: Generate grub config file
command: grub2-mkconfig -o /boot/grub2/grub.cfg
- name: Set reboot required fact
set_fact:
reboot_required: true
become: true
when: _KERNEL_ARGS_|default("") != ""
when:
- _KERNEL_ARGS_|default("") != ""
- _KERNEL_ARGS_|default("") not in cmdline.stdout_lines[0]
# Tune-d Configuration
- block:
@ -33,8 +41,20 @@
- name: Tune-d profile activation
shell: tuned-adm profile {{ _TUNED_PROFILE_NAME_ }}
- name: Set reboot required fact
set_fact:
reboot_required: true
become: true
when: _TUNED_PROFILE_NAME_|default("") != ""
when:
- _TUNED_PROFILE_NAME_|default("") != ""
- _TUNED_PROFILE_NAME_|default("") not in tuned_active_profile.stdout_lines[0]
# Check if os-net-config has run once, if yes, no need for the below workaround
- find:
paths: /etc/sysconfig/network-scripts/
patterns: ifcfg-*
contains: "# This file is autogenerated by os-net-config"
register: os_net_ifcfg_files
# Provisioning Network workaround
# The script will be executed before os-net-config, in which case, only Provisioning network will have IP
@ -51,8 +71,12 @@
replace: 'BOOTPROTO=none'
when:
- item.path | regex_replace('(^.*ifcfg-)(.*)', '\\2') != "lo"
# Ensure the interface information is available in the facts
- hostvars[inventory_hostname]['ansible_' + item.path | regex_replace('(^.*ifcfg-)(.*)', '\\2') ] is defined
# This condition will list all the interfaces except the one with valid IP (which is Provisioning network at this stage)
# Simpler Version - hostvars[inventory_hostname]['ansible_' + iface_name ]['ipv4'] is undefined
- hostvars[inventory_hostname]['ansible_' + item.path | regex_replace('(^.*ifcfg-)(.*)', '\\2') ]['ipv4'] is undefined
with_items:
- "{{ ifcfg_files.files }}"
become: true
when: os_net_ifcfg_files.matched == 0

18
extraconfig/pre_network/host_config_and_reboot.yaml

@ -179,10 +179,20 @@ resources:
outputs:
- name: result
config:
get_file: ansible_host_config.yaml
str_replace:
template: |
---
- name: Configuration to be applied before rebooting the node
connection: local
hosts: localhost
tasks:
_HOST_CONFIG_TASKS_
params:
_HOST_CONFIG_TASKS_:
get_file: boot_param_tasks.yaml
HostParametersDeployment:
type: OS::Heat::SoftwareDeployment
type: OS::TripleO::Reboot::SoftwareDeployment
condition: is_host_config_required
properties:
name: HostParametersDeployment
@ -247,7 +257,7 @@ resources:
/sbin/reboot
RebootDeployment:
type: OS::Heat::SoftwareDeployment
type: OS::TripleO::Reboot::SoftwareDeployment
depends_on: [HostParametersDeployment, DpdkVhostGroupDeployment]
condition: is_reboot_config_required
properties:
@ -280,7 +290,7 @@ resources:
echo "Reboot completed"
RebootEnsureDeployment:
type: OS::Heat::SoftwareDeployment
type: OS::TripleO::Reboot::SoftwareDeployment
depends_on: RebootDeployment
condition: is_reboot_config_required
properties:

4
overcloud-resource-registry-puppet.j2.yaml

@ -13,6 +13,8 @@ resource_registry:
OS::TripleO::DefaultPasswords: default_passwords.yaml
OS::TripleO::RandomString: OS::Heat::RandomString
OS::TripleO::Reboot::SoftwareDeployment: OS::Heat::None
{% for role in roles %}
OS::TripleO::{{role.name}}::PreNetworkConfig: OS::Heat::None
OS::TripleO::{{role.name}}PostDeploySteps: common/post.yaml
@ -364,6 +366,8 @@ resource_registry:
# Tempest
OS::TripleO::Services::Tempest: OS::Heat::None
OS::TripleO::Services::BootParams: extraconfig/pre_network/boot-params-service.yaml
# Deprecated, only defined to allow smooth transition of existing
# stacks. Can be removed in S release.
OS::TripleO::Tasks::UpdateWorkflow: OS::Heat::None

1
roles/ComputeOvsDpdk.yaml

@ -14,6 +14,7 @@
ServicesDefault:
- OS::TripleO::Services::Aide
- OS::TripleO::Services::AuditD
- OS::TripleO::Services::BootParams
- OS::TripleO::Services::CACerts
- OS::TripleO::Services::CephClient
- OS::TripleO::Services::CephExternal

1
roles/ComputeOvsDpdkRT.yaml

@ -16,6 +16,7 @@
ServicesDefault:
- OS::TripleO::Services::Aide
- OS::TripleO::Services::AuditD
- OS::TripleO::Services::BootParams
- OS::TripleO::Services::CACerts
- OS::TripleO::Services::CephClient
- OS::TripleO::Services::CephExternal

1
roles/ComputeRealTime.yaml

@ -22,6 +22,7 @@
ServicesDefault:
- OS::TripleO::Services::Aide
- OS::TripleO::Services::AuditD
- OS::TripleO::Services::BootParams
- OS::TripleO::Services::CACerts
- OS::TripleO::Services::CephClient
- OS::TripleO::Services::CephExternal

1
roles/ComputeSriov.yaml

@ -13,6 +13,7 @@
ServicesDefault:
- OS::TripleO::Services::Aide
- OS::TripleO::Services::AuditD
- OS::TripleO::Services::BootParams
- OS::TripleO::Services::CACerts
- OS::TripleO::Services::CephClient
- OS::TripleO::Services::CephExternal

1
roles/ComputeSriovRT.yaml

@ -16,6 +16,7 @@
ServicesDefault:
- OS::TripleO::Services::Aide
- OS::TripleO::Services::AuditD
- OS::TripleO::Services::BootParams
- OS::TripleO::Services::CACerts
- OS::TripleO::Services::CephClient
- OS::TripleO::Services::CephExternal

6
tools/yaml-validate.py

@ -240,6 +240,10 @@ WORKFLOW_TASKS_EXCLUSIONS = [
]
ANSIBLE_TASKS_YAMLS = [
'./extraconfig/pre_network/boot_param_tasks.yaml'
]
def exit_usage():
print('Usage %s <yaml file or directory>' % sys.argv[0])
sys.exit(1)
@ -1034,6 +1038,8 @@ def validate(filename, param_map):
retval |= validate_nic_config_file(filename, tpl)
except Exception:
if filename in ANSIBLE_TASKS_YAMLS:
return 0
print(traceback.format_exc())
return 1
# yaml is OK, now walk the parameters and output a warning for unused ones

Loading…
Cancel
Save