diff --git a/doc/source/roles/role-compute_tsx.rst b/doc/source/roles/role-compute_tsx.rst new file mode 100644 index 000000000..1b2ab344b --- /dev/null +++ b/doc/source/roles/role-compute_tsx.rst @@ -0,0 +1,9 @@ +=========== +compute_tsx +=========== + +.. literalinclude:: ../../../roles/compute_tsx/README.md + +.. ansibleautoplugin:: + :role: roles/compute_tsx + diff --git a/playbooks/compute-tsx.yaml b/playbooks/compute-tsx.yaml new file mode 100644 index 000000000..067d85f5a --- /dev/null +++ b/playbooks/compute-tsx.yaml @@ -0,0 +1,31 @@ +--- +- hosts: nova_libvirt + gather_facts: false + vars: + metadata: + name: RHEL8.x kernel flag for Compute nodes validation + description: | + RHEL-8.3 kernel disabled the Intel TSX (Transactional + Synchronization Extensions) feature by default as a preemptive + security measure, but it breaks live migration from RHEL-7.9 + (or even RHEL-8.1 or RHEL-8.2) to RHEL-8.3. + + Operators are expected to explicitly define the TSX flag in + their KernelArgs for the compute role to prevent live-migration + issues during the upgrade process. + + This also impacts upstream CentOS systems. + groups: + - pre-upgrade + - pre-system-upgrade + - pre-overcloud-prepare + - pre-overcloud-upgrade + - pre-overcloud-converge + - pre-update + - pre-update-prepare + - pre-update-run + - pre-update-converge + compute_tsx_debug: false + compute_tsx_warning: false + roles: + - compute_tsx diff --git a/releasenotes/notes/compute-tsx-validation-5d976a3fc5166536.yaml b/releasenotes/notes/compute-tsx-validation-5d976a3fc5166536.yaml new file mode 100644 index 000000000..572018c6e --- /dev/null +++ b/releasenotes/notes/compute-tsx-validation-5d976a3fc5166536.yaml @@ -0,0 +1,13 @@ +--- +features: + - | + RHEL-8.3 kernel disabled the Intel “TSX” (Transactional + Synchronization Extensions) feature by default as a preemptive + security measure, but it breaks live migration from RHEL-7.9 + (or even RHEL-8.1 or RHEL-8.2) to RHEL-8.3. + + Operators are expected to explicitly define the TSX flag in + their KernelArgs for the compute role to prevent live-migration + issues during the upgrade process. + + This also impacts upstream CentOS systems. diff --git a/roles/compute_tsx/README.md b/roles/compute_tsx/README.md new file mode 100644 index 000000000..63b2c574a --- /dev/null +++ b/roles/compute_tsx/README.md @@ -0,0 +1,64 @@ +Compute-TSX +=========== + +An Ansible role to verify that the compute nodes have the appropriate TSX flags before +proceeding with an upgrade. + +RHEL-8.3 kernel disabled the Intel TSX (Transactional Synchronization Extensions) +feature by default as a preemptive security measure, but it breaks live migration from +RHEL-7.9 (or even RHEL-8.1 or RHEL-8.2) to RHEL-8.3. + +Operators are expected to explicitly define the TSX flag in their KernelArgs for the +compute role to prevent live-migration issues during the upgrade process. + +This role is intended to be called by tripleo via the kernel deployment templates. + +It's also possible to call the role as a standalone. + +This also impacts upstream CentOS systems + +Requirements +------------ + +This role needs to be run on an Undercloud with a deployed Overcloud. + +Role Variables +-------------- + +- `compute_tsx_debug`: <'false'> -- Whether or not to print the computed variables during execution +- `compute_tsx_warning`: <'false'> -- Will not return a failure, but will simply print the failure +- `compute_tsx_kernel_args`: <''> -- This is meant to be used when called by tripleo-heat-templates. +- `compute_tsx_8_3_version`: <'4.18.0-240'> -- This is the kernel version that requires to have TSX flag enabled + +Dependencies +------------ + +No dependencies. + +Example Playbook +---------------- + +Standard playbook + + - hosts: nova_libvirt + roles: + - { role: compute_tsx} + + +Reporting playbook with no failure + + - hosts: nova_libvirt + vars: + - compute_tsx_warning: true + roles: + - { role: compute_tsx} + +License +------- + +Apache + +Author Information +------------------ + +Red Hat TripleO DFG:Compute Deployment Squad diff --git a/roles/compute_tsx/defaults/main.yml b/roles/compute_tsx/defaults/main.yml new file mode 100644 index 000000000..c01ca878e --- /dev/null +++ b/roles/compute_tsx/defaults/main.yml @@ -0,0 +1,26 @@ +--- +# Copyright 2021 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +# All variables intended for modification should place placed in this file. + +# All variables within this role should have a prefix of "compute_tsx" +compute_tsx_debug: false +compute_tsx_warning: false +compute_tsx_kernel_args: "" +compute_tsx_information_msg: | + For more information on why we must explicitly define the TSX flag, please visit: + https://access.redhat.com/solutions/6036141 diff --git a/roles/compute_tsx/molecule/default/converge.yml b/roles/compute_tsx/molecule/default/converge.yml new file mode 100644 index 000000000..974fc09a0 --- /dev/null +++ b/roles/compute_tsx/molecule/default/converge.yml @@ -0,0 +1,79 @@ +--- +# Copyright 2021 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +- name: Converge + hosts: all + vars: + tsx_assertion: {} + tasks: + - name: Assert a failure + block: + - name: Loading role with failure + include_role: + name: compute_tsx + vars: + tsx_rhel_8_2: true + tsx_cmdline: false + tsx_cpu_support: true + tsx_grub: false + rescue: + - name: Fail if no failure + fail: + msg: | + {{ tsx_assertion }} + when: + # The logic is reversed here + - tsx_assertion.failed + + - name: Assert a failure, with warning only + block: + - name: Loading role with failure + include_role: + name: compute_tsx + vars: + tsx_rhel_8_2: true + tsx_cmdline: false + tsx_cpu_support: true + tsx_grub: false + compute_tsx_warning: true + rescue: + - name: Fail if failure + fail: + msg: | + {{ tsx_assertion }} + when: + # The logic is reversed here + - not tsx_assertion.failed + + - name: Assert a success + block: + - name: Loading role with passed + include_role: + name: compute_tsx + vars: + tsx_rhel_8_2: true + tsx_cmdline: true + tsx_cpu_support: true + tsx_grub: false + rescue: + - name: Fail if failure + fail: + msg: | + {{ tsx_assertion }} + when: + # The logic is reversed here + - not tsx_assertion.failed diff --git a/roles/compute_tsx/molecule/default/molecule.yml b/roles/compute_tsx/molecule/default/molecule.yml new file mode 100644 index 000000000..ba05cf07d --- /dev/null +++ b/roles/compute_tsx/molecule/default/molecule.yml @@ -0,0 +1,3 @@ +--- +# inherits tripleo-validations/.config/molecule/config.yml +# To override default values, please take a look at the config.yml. diff --git a/roles/compute_tsx/tasks/main.yml b/roles/compute_tsx/tasks/main.yml new file mode 100644 index 000000000..a79307f7b --- /dev/null +++ b/roles/compute_tsx/tasks/main.yml @@ -0,0 +1,82 @@ +--- +# Copyright 2021 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +- name: Gathering TSX information + shell: | + uname -r | grep -oP "^[\d]+\.[\d]+\.[\d-]+" + grep -qP "[^a-zA-Z]tsx=(on|off|auto)" /proc/cmdline && echo true || echo false + grep -qP "hle|rtm" /proc/cpuinfo && echo true || echo false + grep -qP "[^a-zA-Z]tsx=(on|off|auto)" /etc/default/grub && echo true || echo false + register: node_infos + check_mode: false + changed_when: false + +- name: Parse custom node facts + set_fact: + tsx_rhel_8_2: "{{ node_infos.stdout_lines[0] is version(compute_tsx_8_3_version, '<') }}" + tsx_cmdline: "{{ node_infos.stdout_lines[1] | bool }}" + tsx_cpu_support: "{{ node_infos.stdout_lines[2] | bool }}" + tsx_grub: "{{ node_infos.stdout_lines[3] | bool }}" + tsx_kernel_args: "{{ 'tsx' in compute_tsx_kernel_args }}" + +- name: Print facts + when: + - compute_tsx_debug | bool + debug: + msg: | + tsx_rhel_8_2: {{ tsx_rhel_8_2 }} + tsx_cmdline: {{ tsx_cmdline }} + tsx_cpu_support: {{ tsx_cpu_support }} + tsx_grub: {{ tsx_grub }} + +# It's cleaner to assert only ANDs so we do a reverse assertion +- name: Validating facts + assert: + that: + - tsx_rhel_8_2 + - tsx_cpu_support + - not tsx_cmdline + - not tsx_grub + - not tsx_kernel_args + success_msg: | + {{ inventory_hostname }} doesn't have TSX flag configured + fail_msg: | + This is not a failure, assertion is successful. + {{ inventory_hostname }} has the right TSX setting according to its running or startup configuration + ignore_errors: true + register: tsx_assertion + +- name: Asserting errors + fail: + msg: | + {{ tsx_assertion.msg }} + + {{ compute_tsx_information_msg }} + + To prevent this validation from failing, you can run it with the compute_tsx_warning flag set to true like this: + openstack tripleo validator run --extra-vars compute_tsx_warning=true --validation compute-tsx + when: + - not tsx_assertion.failed + - not compute_tsx_warning | bool + +- name: Displaying errors + warn: + msg: | + {{ tsx_assertion.msg }} + + {{ compute_tsx_information_msg }} + when: + - not tsx_assertion.failed + - compute_tsx_warning | bool diff --git a/roles/compute_tsx/vars/main.yml b/roles/compute_tsx/vars/main.yml new file mode 100644 index 000000000..24dff119f --- /dev/null +++ b/roles/compute_tsx/vars/main.yml @@ -0,0 +1,23 @@ +--- +# Copyright 2021 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +# While options found within the vars/ path can be overridden using extra +# vars, items within this path are considered part of the role and not +# intended to be modified. + +# All variables within this role should have a prefix of "compute_tsx" +compute_tsx_8_3_version: "4.18.0-240" diff --git a/zuul.d/molecule.yaml b/zuul.d/molecule.yaml index 4a1ef99e9..2544f4bcf 100644 --- a/zuul.d/molecule.yaml +++ b/zuul.d/molecule.yaml @@ -9,6 +9,7 @@ - tripleo-validations-centos-8-molecule-check_rhsm_version - tripleo-validations-centos-8-molecule-check_uc_hostname - tripleo-validations-centos-8-molecule-check_undercloud_conf + - tripleo-validations-centos-8-molecule-compute_tsx - tripleo-validations-centos-8-molecule-controller_token - tripleo-validations-centos-8-molecule-controller_ulimits - tripleo-validations-centos-8-molecule-ctlplane_ip_range @@ -36,6 +37,7 @@ - tripleo-validations-centos-8-molecule-check_rhsm_version - tripleo-validations-centos-8-molecule-check_uc_hostname - tripleo-validations-centos-8-molecule-check_undercloud_conf + - tripleo-validations-centos-8-molecule-compute_tsx - tripleo-validations-centos-8-molecule-controller_token - tripleo-validations-centos-8-molecule-controller_ulimits - tripleo-validations-centos-8-molecule-ctlplane_ip_range @@ -547,3 +549,14 @@ parent: tripleo-validations-centos-8-base vars: tripleo_validations_role_name: validation_init +- job: + files: + - ^roles/compute_tsx/.* + - ^tests/prepare-test-host.yml + - ^ci/playbooks/pre.yml + - ^ci/playbooks/run.yml + - ^molecule-requirements.txt + name: tripleo-validations-centos-8-molecule-compute_tsx + parent: tripleo-validations-centos-8-base + vars: + tripleo_validations_role_name: compute_tsx