tripleo-heat-templates/puppet/services/ceph-osd.yaml
Giulio Fidente a3df16776d Add checks in ansible upgrade tasks for CephMon and CephOSD
Adds two checks, one for the CephMon and one for the CephOSD upgrade
tasks borrowed from ceph-ansible.

Change-Id: I0a0e60d277240130c6bd76a74ccc13354b87a30a
Co-Authored-By: Sebastien Han <seb@redhat.com>
2017-02-21 20:53:01 +00:00

130 lines
4.3 KiB
YAML

heat_template_version: ocata
description: >
Ceph OSD service.
parameters:
ServiceNetMap:
default: {}
description: Mapping of service_name -> network name. Typically set
via parameter_defaults in the resource registry. This
mapping overrides those in ServiceNetMapDefaults.
type: json
DefaultPasswords:
default: {}
type: json
EndpointMap:
default: {}
description: Mapping of service endpoint -> protocol. Typically set
via parameter_defaults in the resource registry.
type: json
MonitoringSubscriptionCephOsd:
default: 'overcloud-ceph-osd'
type: string
CephValidationRetries:
type: number
default: 40
description: Number of retry attempts for Ceph validation
CephValidationDelay:
type: number
default: 30
description: Interval (in seconds) in between validation checks
IgnoreCephUpgradeWarnings:
type: boolean
default: false
description: If enabled, Ceph upgrade will be forced even though cluster or PGs status is not clean
parameter_groups:
- label: deprecated
description: Do not use deprecated params, they will be removed.
parameters:
- IgnoreCephUpgradeWarnings
resources:
CephBase:
type: ./ceph-base.yaml
properties:
ServiceNetMap: {get_param: ServiceNetMap}
DefaultPasswords: {get_param: DefaultPasswords}
EndpointMap: {get_param: EndpointMap}
outputs:
role_data:
description: Role data for the Cinder OSD service.
value:
service_name: ceph_osd
monitoring_subscription: {get_param: MonitoringSubscriptionCephOsd}
config_settings:
map_merge:
- get_attr: [CephBase, role_data, config_settings]
- tripleo.ceph_osd.firewall_rules:
'111 ceph_osd':
dport:
- '6800-7300'
step_config: |
include ::tripleo::profile::base::ceph::osd
upgrade_batch_tasks:
- name: Check status
tags: step1,validation
shell: ceph health | grep -qv HEALTH_ERR
- name: Get OSD IDs
tags: step1
shell: ls /var/lib/ceph/osd | awk 'BEGIN { FS = "-" } ; { print $2 }'
register: osd_ids
# "so that mirrors aren't rebalanced as if the OSD died" - gfidente / leseb
- name: ceph osd set noout
tags: step1
command: ceph osd set noout
- name: ceph osd set norebalance
tags: step1
command: ceph osd set norebalance
- name: ceph osd set nodeep-scrub
tags: step1
command: ceph osd set nodeep-scrub
- name: ceph osd set noscrub
tags: step1
command: ceph osd set noscrub
- name: Stop CephOSD
tags: step1
service:
name: ceph-osd@{{ item }}
state: stopped
with_items: "{{osd_ids.stdout.strip().split()}}"
- name: Update Ceph packages
tags: step1
yum:
name: ceph-osd
state: latest
- name: Start CephOSD
tags: step1
service:
name: ceph-osd@{{ item }}
state: started
with_items: "{{osd_ids.stdout.strip().split()}}"
# with awk we are meant to check if $2 and $4 are *the same* but it returns 1 when
# they are, so the check is inverted to produce an useful exit code
- name: Wait for clean pgs...
tags: step1,ceph_pgs_clean_validation
vars:
ignore_warnings: {get_param: IgnoreCephUpgradeWarnings}
shell: |
ceph pg stat | awk '{exit($2!=$4)}' && ceph health | egrep -sq "HEALTH_OK|HEALTH_WARN"
register: ceph_pgs_healthcheck
until: ceph_pgs_healthcheck.rc == 0
retries: {get_param: CephValidationRetries}
delay: {get_param: CephValidationDelay}
when:
- not ignore_warnings
- name: ceph osd unset noout
tags: step1
command: ceph osd unset noout
- name: ceph osd unset norebalance
tags: step1
command: ceph osd unset norebalance
- name: ceph osd unset nodeep-scrub
tags: step1
command: ceph osd unset nodeep-scrub
- name: ceph osd unset noscrub
tags: step1
command: ceph osd unset noscrub