From 34c49b9dbed81c606c958ce95078178db1cc9492 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Piliszek?= Date: Mon, 30 Aug 2021 09:33:31 +0000 Subject: [PATCH] Restore libvirtd cgroupfs mount It was removed in [1] as part of cgroupsv2 cleanup. However, the testing did not catch the fact that the legacy cgroups behaviour was actually still breaking despite latest Docker and setting to use host's cgroups namespace. [1] 286a03bad20955aa4d3f7009cef5856d328b76f1 Closes-Bug: #1941706 Change-Id: I629bb9e70a3fd6bd1e26b2ca22ffcff5e9e8c731 --- ansible/roles/nova-cell/defaults/main.yml | 1 + .../notes/bug-1941706-a8f9e9544f1540e3.yaml | 13 +++++++++++++ 2 files changed, 14 insertions(+) create mode 100644 releasenotes/notes/bug-1941706-a8f9e9544f1540e3.yaml diff --git a/ansible/roles/nova-cell/defaults/main.yml b/ansible/roles/nova-cell/defaults/main.yml index 4e0dc54688..c8e41cd05a 100644 --- a/ansible/roles/nova-cell/defaults/main.yml +++ b/ansible/roles/nova-cell/defaults/main.yml @@ -346,6 +346,7 @@ nova_libvirt_default_volumes: - "/lib/modules:/lib/modules:ro" - "/run/:/run/:shared" - "/dev:/dev" + - "/sys/fs/cgroup:/sys/fs/cgroup" - "kolla_logs:/var/log/kolla/" - "libvirtd:/var/lib/libvirt" - "{{ nova_instance_datadir_volume }}:/var/lib/nova/" diff --git a/releasenotes/notes/bug-1941706-a8f9e9544f1540e3.yaml b/releasenotes/notes/bug-1941706-a8f9e9544f1540e3.yaml new file mode 100644 index 0000000000..8eaf1aaa3b --- /dev/null +++ b/releasenotes/notes/bug-1941706-a8f9e9544f1540e3.yaml @@ -0,0 +1,13 @@ +--- +critical: + - | + Fixes a critical bug which caused Nova instances (VMs) using libvirtd + (the default/usual choice) to get killed on libvirtd (``nova_libvirt``) + container stop (and thus any restart - either manual or done by running + Kolla Ansible). It was affecting Wallaby+ on CentOS, Ubuntu and Debian + Buster (not Bullseye). If your deployment is also affected, please read the + referenced Launchpad bug report, comment #22, for how to fix it without + risking data loss. In short: fixing requires redeploying and this will + trigger the bug so one has to first migrate important VMs away and only + then redeploy empty compute nodes. + `LP#1941706 `__