From 287f0295f268e45b019949b98de0de7adf90a2fe Mon Sep 17 00:00:00 2001 From: Sofer Athlan-Guyot Date: Tue, 5 Jan 2021 17:08:33 +0100 Subject: [PATCH] [Queens] Handle ceph service restart during update. Since this change[1] in ceph-ansible the ceph services are stopped if docker is stopped. This happens during the update steps when docker needs to be updated[2]. This in turn breaks the update in several ways. To avoid any problem, when docker need a restart, we: 1. make sure we restart the ceph services if docker was stopped; 2. set the necessary flags so that no rebalance action happens when osd services are stopped. To achieve this we make sure to set the docker restart related facts in steps 0 so that they are available for step1,2,4. For the flag we also set "run_once" so that setting the flags happens only once if the ceph-osd are updated in batch[3]. I resuffle the when conditional on top of the blocks to work around lp#1910418. [1] https://github.com/ceph/ceph-ansible/pull/5444 [2] https://opendev.org/openstack/tripleo-heat-templates/src/branch/stable/queens/puppet/services/docker.yaml#L178-L184 [3] Shouldn't be the default as osd should be updated serially, but it's cleaner this way. Queens-Only because it happens only when using docker. Change-Id: If7ebd5b0fed0c84b720729e1e75f659bae2355fe Closes-Bug: #1910124 --- docker/services/ceph-ansible/ceph-osd.yaml | 29 ++++++++++++++++++++++ puppet/services/docker.yaml | 13 ++++++++-- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/docker/services/ceph-ansible/ceph-osd.yaml b/docker/services/ceph-ansible/ceph-osd.yaml index bc32056534..b2cd8a3e23 100644 --- a/docker/services/ceph-ansible/ceph-osd.yaml +++ b/docker/services/ceph-ansible/ceph-osd.yaml @@ -60,6 +60,35 @@ outputs: tags: validation when: step|int == 0 shell: test "nil" == "$(hiera -c /etc/puppet/hiera.yaml ceph::profile::params::osds)" + update_tasks: + - name: Set noout flag + shell: "docker exec -u root ceph-mon-${HOSTNAME%%.*} ceph osd set {{ item }}" + become: true + with_items: + - noout + - norecover + - nobackfill + - norebalance + - nodeep-scrub + when: + - step|int == 1 + - docker_rpm_needs_update + run_once: true + delegate_to: "{{ groups['ceph_mon']|first }}" + - name: Unset noout flag + shell: "docker exec -u root ceph-mon-${HOSTNAME%%.*} ceph osd unset {{ item }}" + become: true + with_items: + - noout + - norecover + - nobackfill + - norebalance + - nodeep-scrub + when: + - step|int == 4 + - docker_rpm_needs_update + run_once: true + delegate_to: "{{ groups['ceph_mon']|first }}" puppet_config: config_image: '' config_volume: '' diff --git a/puppet/services/docker.yaml b/puppet/services/docker.yaml index 2963676a06..44bd6291d4 100644 --- a/puppet/services/docker.yaml +++ b/puppet/services/docker.yaml @@ -140,7 +140,8 @@ outputs: when: step|int == 3 yum: name=docker state=latest update_tasks: - block: + - when: step|int == 0 + block: - name: Detect if puppet on the docker profile would restart the service # Note that due to https://tickets.puppetlabs.com/browse/PUP-686 --noop # always exits 0, so we cannot rely on that to detect if puppet is going to change stuff @@ -162,6 +163,8 @@ outputs: set_fact: docker_rpm_needs_update={{ docker_check_update.rc == 100 }} - name: Set puppet_docker_is_outofsync fact set_fact: puppet_docker_is_outofsync={{ puppet_docker_noop_output.stdout|trim|int >= 1 }} + - when: step|int == 2 + block: - name: Stop all containers except ceph or neutron side containers # xargs is preferable to docker stop $(docker ps -q) as that might generate a # a too long command line @@ -193,4 +196,10 @@ outputs: register: puppet_docker_apply failed_when: puppet_docker_apply.rc not in [0, 2] changed_when: puppet_docker_apply.rc == 2 - when: step|int == 2 + - name: Ensure all ceph services are started + shell: | + systemctl --plain list-dependencies docker.service --reverse | \ + egrep 'ceph|rbd' | xargs systemctl start + ignore_errors: true + when: docker_rpm_needs_update +