[Queens] Handle ceph service restart during update.

Since this change[1] in ceph-ansible the ceph services are stopped if
docker is stopped. This happens during the update steps when docker
needs to be updated[2].

This in turn breaks the update in several ways.

To avoid any problem, when docker need a restart, we:
 1. make sure we restart the ceph services if docker was stopped;
 2. set the necessary flags so that no rebalance action happens when
    osd services are stopped.

To achieve this we make sure to set the docker restart related facts
in steps 0 so that they are available for step1,2,4.

For the flag we also set "run_once" so that setting the flags happens
only once if the ceph-osd are updated in batch[3].

I resuffle the when conditional on top of the blocks to work around
lp#1910418.

[1] https://github.com/ceph/ceph-ansible/pull/5444
[2] https://opendev.org/openstack/tripleo-heat-templates/src/branch/stable/queens/puppet/services/docker.yaml#L178-L184
[3] Shouldn't be the default as osd should be updated serially, but it's cleaner this way.

Queens-Only because it happens only when using docker.

Change-Id: If7ebd5b0fed0c84b720729e1e75f659bae2355fe
Closes-Bug: #1910124
This commit is contained in:
Sofer Athlan-Guyot 2021-01-05 17:08:33 +01:00
parent 09e7915e00
commit 287f0295f2
2 changed files with 40 additions and 2 deletions

View File

@ -60,6 +60,35 @@ outputs:
tags: validation
when: step|int == 0
shell: test "nil" == "$(hiera -c /etc/puppet/hiera.yaml ceph::profile::params::osds)"
update_tasks:
- name: Set noout flag
shell: "docker exec -u root ceph-mon-${HOSTNAME%%.*} ceph osd set {{ item }}"
become: true
with_items:
- noout
- norecover
- nobackfill
- norebalance
- nodeep-scrub
when:
- step|int == 1
- docker_rpm_needs_update
run_once: true
delegate_to: "{{ groups['ceph_mon']|first }}"
- name: Unset noout flag
shell: "docker exec -u root ceph-mon-${HOSTNAME%%.*} ceph osd unset {{ item }}"
become: true
with_items:
- noout
- norecover
- nobackfill
- norebalance
- nodeep-scrub
when:
- step|int == 4
- docker_rpm_needs_update
run_once: true
delegate_to: "{{ groups['ceph_mon']|first }}"
puppet_config:
config_image: ''
config_volume: ''

View File

@ -140,7 +140,8 @@ outputs:
when: step|int == 3
yum: name=docker state=latest
update_tasks:
block:
- when: step|int == 0
block:
- name: Detect if puppet on the docker profile would restart the service
# Note that due to https://tickets.puppetlabs.com/browse/PUP-686 --noop
# always exits 0, so we cannot rely on that to detect if puppet is going to change stuff
@ -162,6 +163,8 @@ outputs:
set_fact: docker_rpm_needs_update={{ docker_check_update.rc == 100 }}
- name: Set puppet_docker_is_outofsync fact
set_fact: puppet_docker_is_outofsync={{ puppet_docker_noop_output.stdout|trim|int >= 1 }}
- when: step|int == 2
block:
- name: Stop all containers except ceph or neutron side containers
# xargs is preferable to docker stop $(docker ps -q) as that might generate a
# a too long command line
@ -193,4 +196,10 @@ outputs:
register: puppet_docker_apply
failed_when: puppet_docker_apply.rc not in [0, 2]
changed_when: puppet_docker_apply.rc == 2
when: step|int == 2
- name: Ensure all ceph services are started
shell: |
systemctl --plain list-dependencies docker.service --reverse | \
egrep 'ceph|rbd' | xargs systemctl start
ignore_errors: true
when: docker_rpm_needs_update