From 3c80b58b45625de00bc6d6fca8fe74af24f4690e Mon Sep 17 00:00:00 2001 From: David Vallee Delisle Date: Wed, 19 May 2021 04:05:38 +0000 Subject: [PATCH] Compute TSX validation RHEL-8.3 kernel disabled the Intel TSX (Transactional Synchronization Extensions) feature by default as a preemptive security measure, but it breaks live migration from RHEL-7.9 (or even RHEL-8.1 or RHEL-8.2) to RHEL-8.3. Operators are expected to explicitly define the TSX flag in their KernelArgs for the compute role to prevent live-migration issues during the upgrade process. This also impacts upstream CentOS systems. Co-Authored-By: Martin Schuppert Related: https://bugzilla.redhat.com/1923165 Closes-Bug: #1916758 Change-Id: Icfcfb1c07bbfbe05d27d67187d941c0c34fad2b2 (cherry picked from commit ede25c3e36a751daf68ce151521b371bb25f50dc) (cherry picked from commit 74c30cf49147dedad2944da332b58079bc703200) (cherry picked from commit 1d8c110b52bf90db9efaf55b4f071a1233270163) --- doc/source/roles/role-compute_tsx.rst | 9 ++ playbooks/compute-tsx.yaml | 31 +++++++ ...mpute-tsx-validation-5d976a3fc5166536.yaml | 13 +++ roles/compute_tsx/README.md | 64 +++++++++++++++ roles/compute_tsx/defaults/main.yml | 26 ++++++ .../compute_tsx/molecule/default/converge.yml | 79 ++++++++++++++++++ .../compute_tsx/molecule/default/molecule.yml | 3 + roles/compute_tsx/tasks/main.yml | 82 +++++++++++++++++++ roles/compute_tsx/vars/main.yml | 23 ++++++ zuul.d/molecule.yaml | 12 +++ 10 files changed, 342 insertions(+) create mode 100644 doc/source/roles/role-compute_tsx.rst create mode 100644 playbooks/compute-tsx.yaml create mode 100644 releasenotes/notes/compute-tsx-validation-5d976a3fc5166536.yaml create mode 100644 roles/compute_tsx/README.md create mode 100644 roles/compute_tsx/defaults/main.yml create mode 100644 roles/compute_tsx/molecule/default/converge.yml create mode 100644 roles/compute_tsx/molecule/default/molecule.yml create mode 100644 roles/compute_tsx/tasks/main.yml create mode 100644 roles/compute_tsx/vars/main.yml diff --git a/doc/source/roles/role-compute_tsx.rst b/doc/source/roles/role-compute_tsx.rst new file mode 100644 index 000000000..1b2ab344b --- /dev/null +++ b/doc/source/roles/role-compute_tsx.rst @@ -0,0 +1,9 @@ +=========== +compute_tsx +=========== + +.. literalinclude:: ../../../roles/compute_tsx/README.md + +.. ansibleautoplugin:: + :role: roles/compute_tsx + diff --git a/playbooks/compute-tsx.yaml b/playbooks/compute-tsx.yaml new file mode 100644 index 000000000..067d85f5a --- /dev/null +++ b/playbooks/compute-tsx.yaml @@ -0,0 +1,31 @@ +--- +- hosts: nova_libvirt + gather_facts: false + vars: + metadata: + name: RHEL8.x kernel flag for Compute nodes validation + description: | + RHEL-8.3 kernel disabled the Intel TSX (Transactional + Synchronization Extensions) feature by default as a preemptive + security measure, but it breaks live migration from RHEL-7.9 + (or even RHEL-8.1 or RHEL-8.2) to RHEL-8.3. + + Operators are expected to explicitly define the TSX flag in + their KernelArgs for the compute role to prevent live-migration + issues during the upgrade process. + + This also impacts upstream CentOS systems. + groups: + - pre-upgrade + - pre-system-upgrade + - pre-overcloud-prepare + - pre-overcloud-upgrade + - pre-overcloud-converge + - pre-update + - pre-update-prepare + - pre-update-run + - pre-update-converge + compute_tsx_debug: false + compute_tsx_warning: false + roles: + - compute_tsx diff --git a/releasenotes/notes/compute-tsx-validation-5d976a3fc5166536.yaml b/releasenotes/notes/compute-tsx-validation-5d976a3fc5166536.yaml new file mode 100644 index 000000000..572018c6e --- /dev/null +++ b/releasenotes/notes/compute-tsx-validation-5d976a3fc5166536.yaml @@ -0,0 +1,13 @@ +--- +features: + - | + RHEL-8.3 kernel disabled the Intel “TSX” (Transactional + Synchronization Extensions) feature by default as a preemptive + security measure, but it breaks live migration from RHEL-7.9 + (or even RHEL-8.1 or RHEL-8.2) to RHEL-8.3. + + Operators are expected to explicitly define the TSX flag in + their KernelArgs for the compute role to prevent live-migration + issues during the upgrade process. + + This also impacts upstream CentOS systems. diff --git a/roles/compute_tsx/README.md b/roles/compute_tsx/README.md new file mode 100644 index 000000000..63b2c574a --- /dev/null +++ b/roles/compute_tsx/README.md @@ -0,0 +1,64 @@ +Compute-TSX +=========== + +An Ansible role to verify that the compute nodes have the appropriate TSX flags before +proceeding with an upgrade. + +RHEL-8.3 kernel disabled the Intel TSX (Transactional Synchronization Extensions) +feature by default as a preemptive security measure, but it breaks live migration from +RHEL-7.9 (or even RHEL-8.1 or RHEL-8.2) to RHEL-8.3. + +Operators are expected to explicitly define the TSX flag in their KernelArgs for the +compute role to prevent live-migration issues during the upgrade process. + +This role is intended to be called by tripleo via the kernel deployment templates. + +It's also possible to call the role as a standalone. + +This also impacts upstream CentOS systems + +Requirements +------------ + +This role needs to be run on an Undercloud with a deployed Overcloud. + +Role Variables +-------------- + +- `compute_tsx_debug`: <'false'> -- Whether or not to print the computed variables during execution +- `compute_tsx_warning`: <'false'> -- Will not return a failure, but will simply print the failure +- `compute_tsx_kernel_args`: <''> -- This is meant to be used when called by tripleo-heat-templates. +- `compute_tsx_8_3_version`: <'4.18.0-240'> -- This is the kernel version that requires to have TSX flag enabled + +Dependencies +------------ + +No dependencies. + +Example Playbook +---------------- + +Standard playbook + + - hosts: nova_libvirt + roles: + - { role: compute_tsx} + + +Reporting playbook with no failure + + - hosts: nova_libvirt + vars: + - compute_tsx_warning: true + roles: + - { role: compute_tsx} + +License +------- + +Apache + +Author Information +------------------ + +Red Hat TripleO DFG:Compute Deployment Squad diff --git a/roles/compute_tsx/defaults/main.yml b/roles/compute_tsx/defaults/main.yml new file mode 100644 index 000000000..c01ca878e --- /dev/null +++ b/roles/compute_tsx/defaults/main.yml @@ -0,0 +1,26 @@ +--- +# Copyright 2021 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +# All variables intended for modification should place placed in this file. + +# All variables within this role should have a prefix of "compute_tsx" +compute_tsx_debug: false +compute_tsx_warning: false +compute_tsx_kernel_args: "" +compute_tsx_information_msg: | + For more information on why we must explicitly define the TSX flag, please visit: + https://access.redhat.com/solutions/6036141 diff --git a/roles/compute_tsx/molecule/default/converge.yml b/roles/compute_tsx/molecule/default/converge.yml new file mode 100644 index 000000000..974fc09a0 --- /dev/null +++ b/roles/compute_tsx/molecule/default/converge.yml @@ -0,0 +1,79 @@ +--- +# Copyright 2021 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +- name: Converge + hosts: all + vars: + tsx_assertion: {} + tasks: + - name: Assert a failure + block: + - name: Loading role with failure + include_role: + name: compute_tsx + vars: + tsx_rhel_8_2: true + tsx_cmdline: false + tsx_cpu_support: true + tsx_grub: false + rescue: + - name: Fail if no failure + fail: + msg: | + {{ tsx_assertion }} + when: + # The logic is reversed here + - tsx_assertion.failed + + - name: Assert a failure, with warning only + block: + - name: Loading role with failure + include_role: + name: compute_tsx + vars: + tsx_rhel_8_2: true + tsx_cmdline: false + tsx_cpu_support: true + tsx_grub: false + compute_tsx_warning: true + rescue: + - name: Fail if failure + fail: + msg: | + {{ tsx_assertion }} + when: + # The logic is reversed here + - not tsx_assertion.failed + + - name: Assert a success + block: + - name: Loading role with passed + include_role: + name: compute_tsx + vars: + tsx_rhel_8_2: true + tsx_cmdline: true + tsx_cpu_support: true + tsx_grub: false + rescue: + - name: Fail if failure + fail: + msg: | + {{ tsx_assertion }} + when: + # The logic is reversed here + - not tsx_assertion.failed diff --git a/roles/compute_tsx/molecule/default/molecule.yml b/roles/compute_tsx/molecule/default/molecule.yml new file mode 100644 index 000000000..ba05cf07d --- /dev/null +++ b/roles/compute_tsx/molecule/default/molecule.yml @@ -0,0 +1,3 @@ +--- +# inherits tripleo-validations/.config/molecule/config.yml +# To override default values, please take a look at the config.yml. diff --git a/roles/compute_tsx/tasks/main.yml b/roles/compute_tsx/tasks/main.yml new file mode 100644 index 000000000..a79307f7b --- /dev/null +++ b/roles/compute_tsx/tasks/main.yml @@ -0,0 +1,82 @@ +--- +# Copyright 2021 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +- name: Gathering TSX information + shell: | + uname -r | grep -oP "^[\d]+\.[\d]+\.[\d-]+" + grep -qP "[^a-zA-Z]tsx=(on|off|auto)" /proc/cmdline && echo true || echo false + grep -qP "hle|rtm" /proc/cpuinfo && echo true || echo false + grep -qP "[^a-zA-Z]tsx=(on|off|auto)" /etc/default/grub && echo true || echo false + register: node_infos + check_mode: false + changed_when: false + +- name: Parse custom node facts + set_fact: + tsx_rhel_8_2: "{{ node_infos.stdout_lines[0] is version(compute_tsx_8_3_version, '<') }}" + tsx_cmdline: "{{ node_infos.stdout_lines[1] | bool }}" + tsx_cpu_support: "{{ node_infos.stdout_lines[2] | bool }}" + tsx_grub: "{{ node_infos.stdout_lines[3] | bool }}" + tsx_kernel_args: "{{ 'tsx' in compute_tsx_kernel_args }}" + +- name: Print facts + when: + - compute_tsx_debug | bool + debug: + msg: | + tsx_rhel_8_2: {{ tsx_rhel_8_2 }} + tsx_cmdline: {{ tsx_cmdline }} + tsx_cpu_support: {{ tsx_cpu_support }} + tsx_grub: {{ tsx_grub }} + +# It's cleaner to assert only ANDs so we do a reverse assertion +- name: Validating facts + assert: + that: + - tsx_rhel_8_2 + - tsx_cpu_support + - not tsx_cmdline + - not tsx_grub + - not tsx_kernel_args + success_msg: | + {{ inventory_hostname }} doesn't have TSX flag configured + fail_msg: | + This is not a failure, assertion is successful. + {{ inventory_hostname }} has the right TSX setting according to its running or startup configuration + ignore_errors: true + register: tsx_assertion + +- name: Asserting errors + fail: + msg: | + {{ tsx_assertion.msg }} + + {{ compute_tsx_information_msg }} + + To prevent this validation from failing, you can run it with the compute_tsx_warning flag set to true like this: + openstack tripleo validator run --extra-vars compute_tsx_warning=true --validation compute-tsx + when: + - not tsx_assertion.failed + - not compute_tsx_warning | bool + +- name: Displaying errors + warn: + msg: | + {{ tsx_assertion.msg }} + + {{ compute_tsx_information_msg }} + when: + - not tsx_assertion.failed + - compute_tsx_warning | bool diff --git a/roles/compute_tsx/vars/main.yml b/roles/compute_tsx/vars/main.yml new file mode 100644 index 000000000..24dff119f --- /dev/null +++ b/roles/compute_tsx/vars/main.yml @@ -0,0 +1,23 @@ +--- +# Copyright 2021 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +# While options found within the vars/ path can be overridden using extra +# vars, items within this path are considered part of the role and not +# intended to be modified. + +# All variables within this role should have a prefix of "compute_tsx" +compute_tsx_8_3_version: "4.18.0-240" diff --git a/zuul.d/molecule.yaml b/zuul.d/molecule.yaml index a99e9577d..2525a203d 100644 --- a/zuul.d/molecule.yaml +++ b/zuul.d/molecule.yaml @@ -8,6 +8,7 @@ - tripleo-validations-centos-8-molecule-check_rhsm_version - tripleo-validations-centos-8-molecule-check_undercloud_conf - tripleo-validations-centos-8-molecule-check_uc_hostname + - tripleo-validations-centos-8-molecule-compute_tsx - tripleo-validations-centos-8-molecule-controller_token - tripleo-validations-centos-8-molecule-controller_ulimits - tripleo-validations-centos-8-molecule-ctlplane_ip_range @@ -32,6 +33,7 @@ - tripleo-validations-centos-8-molecule-check_rhsm_version - tripleo-validations-centos-8-molecule-check_uc_hostname - tripleo-validations-centos-8-molecule-check_undercloud_conf + - tripleo-validations-centos-8-molecule-compute_tsx - tripleo-validations-centos-8-molecule-controller_token - tripleo-validations-centos-8-molecule-controller_ulimits - tripleo-validations-centos-8-molecule-ctlplane_ip_range @@ -479,3 +481,13 @@ parent: tripleo-validations-centos-8-base vars: tripleo_validations_role_name: validation_init +- job: + files: + - ^roles/compute_tsx/.* + - ^tests/prepare-test-host.yml + - ^ci/playbooks/pre.yml + - ^ci/playbooks/run.yml + name: tripleo-validations-centos-8-molecule-compute_tsx + parent: tripleo-validations-centos-8-base + vars: + tripleo_validations_role_name: compute_tsx