[Queens] Handle ceph service restart during update.

Since this change[1] in ceph-ansible the ceph services are stopped if docker is stopped. This happens during the update steps when docker needs to be updated[2]. This in turn breaks the update in several ways. To avoid any problem, when docker need a restart, we: 1. make sure we restart the ceph services if docker was stopped; 2. set the necessary flags so that no rebalance action happens when osd services are stopped. To achieve this we make sure to set the docker restart related facts in steps 0 so that they are available for step1,2,4. For the flag we also set "run_once" so that setting the flags happens only once if the ceph-osd are updated in batch[3]. I resuffle the when conditional on top of the blocks to work around lp#1910418. [1] https://github.com/ceph/ceph-ansible/pull/5444 [2] https://opendev.org/openstack/tripleo-heat-templates/src/branch/stable/queens/puppet/services/docker.yaml#L178-L184 [3] Shouldn't be the default as osd should be updated serially, but it's cleaner this way. Queens-Only because it happens only when using docker. Change-Id: If7ebd5b0fed0c84b720729e1e75f659bae2355fe Closes-Bug: #1910124
2021-01-05 17:08:33 +01:00 · 2021-01-05 17:08:33 +01:00 · 287f0295f2
parent 09e7915e00
commit 287f0295f2
2 changed files with 40 additions and 2 deletions
--- a/docker/services/ceph-ansible/ceph-osd.yaml
+++ b/docker/services/ceph-ansible/ceph-osd.yaml
@ -60,6 +60,35 @@ outputs:
          tags: validation
          when: step|int == 0
          shell: test "nil" == "$(hiera -c /etc/puppet/hiera.yaml ceph::profile::params::osds)"
+      update_tasks:
+        - name: Set noout flag
+          shell: "docker exec -u root ceph-mon-${HOSTNAME%%.*} ceph osd set {{ item }}"
+          become: true
+          with_items:
+            - noout
+            - norecover
+            - nobackfill
+            - norebalance
+            - nodeep-scrub
+          when:
+            - step|int == 1
+            - docker_rpm_needs_update
+          run_once: true
+          delegate_to: "{{ groups['ceph_mon']|first }}"
+        - name: Unset noout flag
+          shell: "docker exec -u root ceph-mon-${HOSTNAME%%.*} ceph osd unset {{ item }}"
+          become: true
+          with_items:
+            - noout
+            - norecover
+            - nobackfill
+            - norebalance
+            - nodeep-scrub
+          when:
+            - step|int == 4
+            - docker_rpm_needs_update
+          run_once: true
+          delegate_to: "{{ groups['ceph_mon']|first }}"
      puppet_config:
        config_image: ''
        config_volume: ''
--- a/puppet/services/docker.yaml
+++ b/puppet/services/docker.yaml
@ -140,7 +140,8 @@ outputs:
          when: step|int == 3
          yum: name=docker state=latest
      update_tasks:
-        block:
+        - when: step|int == 0
+          block:
          - name: Detect if puppet on the docker profile would restart the service
            # Note that due to https://tickets.puppetlabs.com/browse/PUP-686 --noop
            # always exits 0, so we cannot rely on that to detect if puppet is going to change stuff
@ -162,6 +163,8 @@ outputs:
            set_fact: docker_rpm_needs_update={{ docker_check_update.rc == 100 }}
          - name: Set puppet_docker_is_outofsync fact
            set_fact: puppet_docker_is_outofsync={{ puppet_docker_noop_output.stdout|trim|int >= 1 }}
+        - when: step|int == 2
+          block:
          - name: Stop all containers except ceph or neutron side containers
            # xargs is preferable to docker stop $(docker ps -q) as that might generate a
            # a too long command line
@ -193,4 +196,10 @@ outputs:
            register: puppet_docker_apply
            failed_when: puppet_docker_apply.rc not in [0, 2]
            changed_when: puppet_docker_apply.rc == 2
-        when: step|int == 2
+          - name: Ensure all ceph services are started
+            shell: |
+              systemctl --plain list-dependencies docker.service --reverse | \
+                egrep 'ceph|rbd' | xargs systemctl start
+            ignore_errors: true
+            when: docker_rpm_needs_update
+