Add checks in ansible upgrade tasks for CephMon and CephOSD
Adds two checks, one for the CephMon and one for the CephOSD upgrade tasks borrowed from ceph-ansible. Change-Id: I0a0e60d277240130c6bd76a74ccc13354b87a30a Co-Authored-By: Sebastien Han <seb@redhat.com>
This commit is contained in:
parent
d5b0c38f0b
commit
a3df16776d
@ -18,10 +18,6 @@ parameters:
|
||||
constraints:
|
||||
- allowed_values: ['auto', 'yes', 'no']
|
||||
default: 'auto'
|
||||
IgnoreCephUpgradeWarnings:
|
||||
type: boolean
|
||||
default: false
|
||||
description: If enabled, Ceph upgrade will be forced even though cluster or PGs status is not clean
|
||||
KeepSaharaServicesOnUpgrade:
|
||||
type: boolean
|
||||
default: true
|
||||
|
@ -59,6 +59,14 @@ parameters:
|
||||
}
|
||||
default: {}
|
||||
type: json
|
||||
CephValidationRetries:
|
||||
type: number
|
||||
default: 5
|
||||
description: Number of retry attempts for Ceph validation
|
||||
CephValidationDelay:
|
||||
type: number
|
||||
default: 10
|
||||
description: Interval (in seconds) in between validation checks
|
||||
MonitoringSubscriptionCephMon:
|
||||
default: 'overcloud-ceph-mon'
|
||||
type: string
|
||||
@ -119,21 +127,32 @@ outputs:
|
||||
# rolling upgrade of all osd nodes in step1
|
||||
- name: Check status
|
||||
tags: step0,validation
|
||||
shell: ceph health | grep -qv HEALTH_ERR
|
||||
# FIXME(shardy) I suspect we can use heat or ansible facts here instead?
|
||||
- name: Get hostname
|
||||
shell: ceph health | egrep -sq "HEALTH_OK|HEALTH_WARN"
|
||||
- name: Stop CephMon
|
||||
tags: step0
|
||||
shell: hostname -s
|
||||
register: mon_id
|
||||
- name: Stop Ceph Mon
|
||||
service:
|
||||
name: ceph-mon@{{ ansible_hostname }}
|
||||
state: stopped
|
||||
- name: Update Ceph packages
|
||||
tags: step0
|
||||
service: name=ceph-mon@{{mon_id.stdout}} pattern=ceph-mon state=stopped
|
||||
- name: Update ceph packages
|
||||
yum:
|
||||
name: ceph-mon
|
||||
state: latest
|
||||
- name: Start CephMon
|
||||
tags: step0
|
||||
yum: name=ceph-mon state=latest
|
||||
- name: Start ceph-mon service
|
||||
tags: step0
|
||||
service: name=ceph-mon@{{mon_id.stdout}} state=started
|
||||
service:
|
||||
name: ceph-mon@{{ ansible_hostname }}
|
||||
state: started
|
||||
# ceph-ansible
|
||||
# https://github.com/ceph/ceph-ansible/blob/master/infrastructure-playbooks/rolling_update.yml#L149-L157
|
||||
- name: Wait for the monitor to join the quorum...
|
||||
tags: step0,ceph_quorum_validation
|
||||
shell: |
|
||||
ceph -s | grep monmap | sed 's/.*quorum//' | egrep -sq {{ ansible_hostname }}
|
||||
register: ceph_quorum_nodecheck
|
||||
until: ceph_quorum_nodecheck.rc == 0
|
||||
retries: {get_param: CephValidationRetries}
|
||||
delay: {get_param: CephValidationDelay}
|
||||
- name: ceph osd crush tunables default
|
||||
tags: step0
|
||||
shell: ceph osd crush tunables default
|
||||
|
@ -21,6 +21,24 @@ parameters:
|
||||
MonitoringSubscriptionCephOsd:
|
||||
default: 'overcloud-ceph-osd'
|
||||
type: string
|
||||
CephValidationRetries:
|
||||
type: number
|
||||
default: 40
|
||||
description: Number of retry attempts for Ceph validation
|
||||
CephValidationDelay:
|
||||
type: number
|
||||
default: 30
|
||||
description: Interval (in seconds) in between validation checks
|
||||
IgnoreCephUpgradeWarnings:
|
||||
type: boolean
|
||||
default: false
|
||||
description: If enabled, Ceph upgrade will be forced even though cluster or PGs status is not clean
|
||||
|
||||
parameter_groups:
|
||||
- label: deprecated
|
||||
description: Do not use deprecated params, they will be removed.
|
||||
parameters:
|
||||
- IgnoreCephUpgradeWarnings
|
||||
|
||||
resources:
|
||||
CephBase:
|
||||
@ -66,17 +84,37 @@ outputs:
|
||||
- name: ceph osd set noscrub
|
||||
tags: step1
|
||||
command: ceph osd set noscrub
|
||||
- name: Stop Ceph OSD
|
||||
- name: Stop CephOSD
|
||||
tags: step1
|
||||
service: name=ceph-osd@{{ item }} state=stopped
|
||||
service:
|
||||
name: ceph-osd@{{ item }}
|
||||
state: stopped
|
||||
with_items: "{{osd_ids.stdout.strip().split()}}"
|
||||
- name: Update ceph OSD packages
|
||||
- name: Update Ceph packages
|
||||
tags: step1
|
||||
yum: name=ceph-osd state=latest
|
||||
- name: Start ceph-osd service
|
||||
yum:
|
||||
name: ceph-osd
|
||||
state: latest
|
||||
- name: Start CephOSD
|
||||
tags: step1
|
||||
service: name=ceph-osd@{{ item }} state=started
|
||||
service:
|
||||
name: ceph-osd@{{ item }}
|
||||
state: started
|
||||
with_items: "{{osd_ids.stdout.strip().split()}}"
|
||||
# with awk we are meant to check if $2 and $4 are *the same* but it returns 1 when
|
||||
# they are, so the check is inverted to produce an useful exit code
|
||||
- name: Wait for clean pgs...
|
||||
tags: step1,ceph_pgs_clean_validation
|
||||
vars:
|
||||
ignore_warnings: {get_param: IgnoreCephUpgradeWarnings}
|
||||
shell: |
|
||||
ceph pg stat | awk '{exit($2!=$4)}' && ceph health | egrep -sq "HEALTH_OK|HEALTH_WARN"
|
||||
register: ceph_pgs_healthcheck
|
||||
until: ceph_pgs_healthcheck.rc == 0
|
||||
retries: {get_param: CephValidationRetries}
|
||||
delay: {get_param: CephValidationDelay}
|
||||
when:
|
||||
- not ignore_warnings
|
||||
- name: ceph osd unset noout
|
||||
tags: step1
|
||||
command: ceph osd unset noout
|
||||
|
@ -87,4 +87,6 @@ outputs:
|
||||
tags: step0,validation
|
||||
- name: Stop RGW instance
|
||||
tags: step1
|
||||
service: name=ceph-radosgw@{{rgw_id.stdout}} state=stopped
|
||||
service:
|
||||
name: ceph-radosgw@{{rgw_id.stdout}}
|
||||
state: stopped
|
||||
|
Loading…
Reference in New Issue
Block a user