Introduce Ceph placement group validation

Introduce ceph_pools_pg_protection Ansible module and
ceph-pg playbook and role.

Change-Id: Ie45f8a66193d023360115e8cb3f8766d8e99a986
This commit is contained in:
John Fulton 2019-11-16 18:42:04 +00:00
parent 6df86cdffe
commit 70596306b1
5 changed files with 505 additions and 0 deletions

View File

@ -0,0 +1,15 @@
=================================
Module - ceph_pools_pg_protection
=================================
This module provides for the following ansible plugin:
* ceph_pools_pg_protection
.. ansibleautoplugin::
:module: library/ceph_pools_pg_protection.py
:documentation: true
:examples: true

View File

@ -0,0 +1,249 @@
#!/usr/bin/env python
# Copyright 2019 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from yaml import safe_load as yaml_safe_load
from ansible.module_utils.basic import AnsibleModule
ANSIBLE_METADATA = {
'metadata_version': '0.1',
'status': ['preview'],
'supported_by': 'community'
}
DOCUMENTATION = '''
---
module: ceph_pools_pg_protection
short_description: Warn if Ceph will not create CephPools based on PG and OSD numbers
description:
- "The Ceph PG overdose protection check (https://ceph.com/community/new-luminous-pg-overdose-protection) is executed by Ceph before a pool is created. If the check does not pass, then the pool is not created. When TripleO deploys Ceph it triggers ceph-ansible which creates the pools that OpenStack needs. This validation runs the same check that the overdose protection uses to determine if the user should update their CephPools, PG count, or number of OSDs. Without this check a deployer may have to wait until after Ceph is running but before the pools are created to realize the deployment will fail."
options:
num_osds:
description:
- The number of Ceph OSDs expected to be running during Pool creation.
- TripleO does not have this parameter
- In theory you can derive this parameter from TripleO parameters
required: True
type: int
ceph_pool_default_size:
description:
- The same as the TripleO CephPoolDefaultSize parameter
- Number of replicas of the data
required: False
default: 3
type: int
ceph_pool_default_pg_num:
description:
- The same as the TripleO CephPoolDefaultPgNum parameter
- The default number of Placement Groups a pool should have
- Ceph defaults this number to 16
- TripleO defaults this number to 128
required: False
default: 128
type: int
ceph_pools:
description:
- The same as the TripleO CephPools parameter
- A list of dictionaries
- Each embedded dict must have a name parameter
- Optional pg_num and size parameters may be set per pool
required: True
type: list
author:
- John Fulton (fultonj)
'''
EXAMPLES = '''
# Call this module from TripleO Ansible Validations
- name: Is the CephPools parameter configured correctly?
ceph_pools_pg_protection:
num_osds: 36
ceph_pool_default_size: 3
ceph_pool_default_pg_num: 128
ceph_pools:
- {"name": volumes, "pg_num": 1024,"pgp_num": 1024, "application": rbd, "size": 3}
- {"name": vms, "pg_num": 512, "pgp_num": 512, "application": rbd, "size": 3}
- {"name": images, "pg_num": 128, "pgp_num": 128, "application": rbd, "size": 3}
register: pool_creation_simulation
- name: Fail if CephPools parameter is not configured correctly
fail:
msg: pool_creation_simulation["message"]
when: not pool_creation_simulation["valid_input"]
# Call this module from within TripleO Heat Templates (if only num_osds was derived)
- name: Is the CephPools parameter configured correctly?
ceph_pools_pg_protection:
num_osds: 36
ceph_pool_default_size: {get_param: CephPoolDefaultSize}
ceph_pool_default_pg_num: {get_param: CephPoolDefaultPgNum}
ceph_pools: {get_param: CephPools}
register: pool_creation_simulation
'''
RETURN = '''
message:
description: A description of why Ceph might refuse to create the requested CephPools
type: str
returned: always
valid_input:
description: True only if Ceph would create all requested pools
type: boolean
returned: always
'''
def check_pg_num(pool, pg_num, size, num_osds=0, max_pgs_per_osd=200, pools={}):
"""
Returns empty string only if the Pool PG numbers are correct for the OSDs.
Otherwise returns an error message like the one Ceph would return.
"""
# The original check in C++ from the Ceph source code is:
#
# int OSDMonitor::check_pg_num(int64_t pool, int pg_num, int size, ostream *ss)
# {
# auto max_pgs_per_osd = g_conf->get_val<uint64_t>("mon_max_pg_per_osd");
# auto num_osds = std::max(osdmap.get_num_in_osds(), 3u); // assume min cluster size 3
# auto max_pgs = max_pgs_per_osd * num_osds;
# uint64_t projected = 0;
# if (pool < 0) {
# projected += pg_num * size;
# }
# for (const auto& i : osdmap.get_pools()) {
# if (i.first == pool) {
# projected += pg_num * size;
# } else {
# projected += i.second.get_pg_num() * i.second.get_size();
# }
# }
# if (projected > max_pgs) {
# if (pool >= 0) {
# *ss << "pool id " << pool;
# }
# *ss << " pg_num " << pg_num << " size " << size
# << " would mean " << projected
# << " total pgs, which exceeds max " << max_pgs
# << " (mon_max_pg_per_osd " << max_pgs_per_osd
# << " * num_in_osds " << num_osds << ")";
# return -ERANGE;
# }
# return 0;
# }
import six
msg = ""
max_pgs = max_pgs_per_osd * num_osds
projected = 0
if len(pool) < 0:
projected = projected + (pg_num * size)
for pool_name, pool_sizes in six.iteritems(pools):
if pool_name == pool:
projected = projected + (pg_num * size)
else:
projected = projected + (int(pool_sizes['pg_num']) * int(pool_sizes['size']))
if projected > max_pgs:
msg = "Cannot add pool: " + str(pool) + \
" pg_num " + str(pg_num) + " size " + str(size) + \
" would mean " + str(projected) + \
" total pgs, which exceeds max " + str(max_pgs) + \
" (mon_max_pg_per_osd " + str(max_pgs_per_osd) + \
" * num_in_osds " + str(num_osds) + ")"
return msg
def simulate_pool_creation(num_osds, ceph_pools,
ceph_pool_default_size=3,
ceph_pool_default_pg_num=128,
max_pgs_per_osd=200):
"""
Simulate ceph-ansible asking Ceph to create the pools in the ceph_pools list
"""
msg = ""
failed = False
created_pools = {}
for pool in ceph_pools:
if 'size' not in pool:
pool['size'] = ceph_pool_default_size
if 'pg_num' not in pool:
pool['pg_num'] = ceph_pool_default_pg_num
ceph_msg = check_pg_num(pool['name'], pool['pg_num'], pool['size'],
num_osds, max_pgs_per_osd, created_pools)
if len(ceph_msg) == 0:
created_pools[pool['name']] = {'pg_num': pool['pg_num'], 'size': pool['size']}
else:
failed = True
break
if failed:
msg = "The following Ceph pools would be created (but no others):" + \
"\n" + str(created_pools) + "\n" + \
"Pool creation would then fail with the following from Ceph:" + \
"\n" + ceph_msg + "\n" + \
"Please use https://ceph.io/pgcalc and then update the CephPools parameter"
simulation_results = {}
simulation_results['failed'] = failed
simulation_results['msg'] = msg
return simulation_results
def run_module():
# Seed the result dict in the object
result = dict(
changed=False,
valid_input=True,
message=''
)
# Use AnsibleModule object abstraction to work with Ansible
module = AnsibleModule(
argument_spec=yaml_safe_load(DOCUMENTATION)['options'],
supports_check_mode=False
)
# Check mode not supported
if module.check_mode:
module.exit_json(**result)
# Simulate Ceph pool creation
simulation = simulate_pool_creation(module.params['num_osds'],
module.params['ceph_pools'],
module.params['ceph_pool_default_size'],
module.params['ceph_pool_default_pg_num'])
if simulation['failed']:
result['message'] = "Invalid Ceph configuration: " + simulation['msg']
result['valid_input'] = False
else:
result['message'] = 'Provided CephPools satisfy PG overdose protection'
result['valid_input'] = True
# This module never changes state of a target system, it only
# evaluates if inputs will work when Ceph processes then.
# There shouldn't be anything like the following
# result['changed'] = True
# This module does not currently have fail options. It should
# only evaluate input and make result of the evaluation available.
# So it doesn't currently do anything like the following by design.
# module.fail_json(msg='Failing for invalid input', **result)
# Exit and pass the key/value results of the simulation
module.exit_json(**result)
def main():
run_module()
if __name__ == '__main__':
main()

22
playbooks/ceph-pg.yaml Normal file
View File

@ -0,0 +1,22 @@
---
- hosts: undercloud
vars:
metadata:
name: Validate requested Ceph Placement Groups
description: >
In Ceph Lumionus and newer the Placement Group overdose protection
check (https://ceph.com/community/new-luminous-pg-overdose-protection)
is executed by Ceph before a pool is created. If the check does not
pass, then the pool is not created. When TripleO deploys Ceph it
triggers ceph-ansible which creates the pools that OpenStack needs.
This validation runs the same check that the overdose protection uses
to determine if the user should update their CephPools, PG count, or
number of OSD. Without this check a deployer may have to wait until
after Ceph is running but before the pools are created to realize
the deployment will fail.
groups:
- pre-deployment
tasks:
- include_role:
name: ceph
tasks_from: ceph-pg

View File

@ -0,0 +1,145 @@
---
- name: Lookup ANSIBLE_HASH_BEHAVIOUR
set_fact:
hash_behavior: "{{ lookup('config', 'DEFAULT_HASH_BEHAVIOUR', on_missing='skip')|default('replace') }}"
- name: Fail unless ANSIBLE_HASH_BEHAVIOUR=merge
fail:
msg: |
In order to simulate Tripleo Heat Template behavior this role requires
that it be run with Ansible's hash_behaviour set to merge. Please
re-run with 'export ANSIBLE_HASH_BEHAVIOUR=merge'"
when:
- hash_behavior != 'merge'
- name: Fail if number of OSDs is not specified
fail:
msg: "Please pass the expected number of OSDs, e.g. '-e num_osds=36'"
when: num_osds is not defined
- name: Get ceph_pool_default_size
set_fact:
ceph_pool_default_size: "{{ parameter_defaults['CephPoolDefaultSize']|default(3) }}"
- name: Get ceph_pool_default_pg_num
set_fact:
ceph_pool_default_pg_num: "{{ parameter_defaults['CephPoolDefaultPgNum']|default(128) }}"
- name: Set ceph_pools default
set_fact:
ceph_pools:
- name: "{{ parameter_defaults['CinderBackupRbdPoolName']|default('backups') }}"
application: rbd
- name: "{{ parameter_defaults['CinderRbdPoolName']|default('volumes') }}"
application: rbd
- name: "{{ parameter_defaults['NovaRbdPoolName']|default('vms') }}"
application: rbd
- name: "{{ parameter_defaults['GlanceRbdPoolName']|default('images') }}"
application: rbd
- name: "{{ parameter_defaults['GnocchiRbdPoolName']|default('metrics') }}"
application: openstack_gnocchi
- when:
- parameter_defaults['CephPools'] is defined
- (parameter_defaults['CephPools']|length) > 0
block:
- name: Get names of custom pools
set_fact:
custom_names: "{{ custom_names | default([]) + [ item.name ] }}"
loop: "{{ parameter_defaults['CephPools'] }}"
- name: Get names of default pools
set_fact:
default_names: "{{ default_names | default([]) + [ item.name ] }}"
loop: "{{ ceph_pools }}"
- name: Base updated ceph_pools list on custom list
set_fact:
new_ceph_pools: "{{ parameter_defaults['CephPools'] }}"
- name: Add default pools not in custom list to updated ceph_pools list
set_fact:
new_ceph_pools: "{{ new_ceph_pools | default([]) + [item] }}"
loop: "{{ ceph_pools }}"
when:
- item.name in default_names|difference(custom_names)
- name: redefine ceph_pools based on updated ceph_pools list
set_fact:
ceph_pools: "{{ new_ceph_pools }}"
- name: Add CinderRbdExtraPools if provided and not in custom pool list
set_fact:
ceph_pools: "{{ ceph_pools | default([]) + [dict(name=item, application='rbd') ] }}"
loop: "{{ parameter_defaults.CinderRbdExtraPools.split(',')|list }}"
when:
- parameter_defaults['CinderRbdExtraPools'] is defined
- (parameter_defaults['CinderRbdExtraPools']|length) > 0
- item not in custom_names|default([])
- name: Warn if deprecated Manila parameters are being used
warn:
msg: |
One or more of following parameters is in use but is
deprecated in Stein and newer; ManilaCephFSDataPoolPGNum,
ManilaCephFSMetadataPoolPGNum, and ManilaCephFSShareBackendName
when:
- (parameter_defaults['ManilaCephFSDataPoolPGNum'] is defined or
parameter_defaults['ManilaCephFSMetadataPoolPGNum'] is defined or
parameter_defaults['ManilaCephFSShareBackendName'] is defined)
- name: Add Manila MDS pools if provided and not in custom pool list
set_fact:
ceph_pools: "{{ ceph_pools | default([]) + [item] }}"
loop:
- name: "{{ parameter_defaults['ManilaCephFSDataPoolName']|default('manila_data') }}"
application: cephfs
pg_num: "{{ parameter_defaults['ManilaCephFSDataPoolPGNum']|default(128) }}"
pgp_num: "{{ parameter_defaults['ManilaCephFSDataPoolPGNum']|default(128) }}"
- name: "{{ parameter_defaults['ManilaCephFSMetadataPoolName']|default('manila_metadata') }}"
application: cephfs
pg_num: "{{ parameter_defaults['ManilaCephFSMetadataPoolPGNum']|default(128) }}"
pgp_num: "{{ parameter_defaults['ManilaCephFSMetadataPoolPGNum']|default(128) }}"
when:
- resource_registry['OS::TripleO::Services::ManilaBackendCephFs'] is defined
- resource_registry['OS::TripleO::Services::ManilaBackendCephFs'] != 'OS::Heat::None'
- item.name not in custom_names|default([])
- name: Add RGW pools if requested unless in custom pool list (only for default zone)
set_fact:
ceph_pools: "{{ ceph_pools | default([]) + [dict(name=item, application='rgw') ] }}"
loop:
- .rgw.root
- default.rgw.control
- default.rgw.meta
- default.rgw.log
- default.rgw.buckets.index
- default.rgw.buckets.data
when:
- resource_registry['OS::TripleO::Services::CephRgw'] is defined
- resource_registry['OS::TripleO::Services::CephRgw'] != 'OS::Heat::None'
- item not in custom_names|default([])
- name: Warn if an application is not set for each pool
warn:
msg: "The pool {{ item.name }} should have an application set, e.g. {'name': {{ item.name }}, 'application': rbd}"
when: item.application is not defined
loop: "{{ ceph_pools }}"
- name: Simulate OpenStack pool creation in proposed Ceph Cluster
ceph_pools_pg_protection:
num_osds: "{{ num_osds }}"
ceph_pool_default_size: "{{ ceph_pool_default_size }}"
ceph_pool_default_pg_num: "{{ ceph_pool_default_pg_num }}"
ceph_pools: "{{ ceph_pools }}"
register: pool_creation_simulation
- name: Fail if CephPools parameter is not configured correctly
fail:
msg: '{{ pool_creation_simulation["message"] }}'
when: not pool_creation_simulation["valid_input"]
- name: Succeed if CephPools parameter will satisfy PG overdoce protection
debug:
msg: '{{ pool_creation_simulation["message"] }}'
when: pool_creation_simulation["valid_input"]

View File

@ -0,0 +1,74 @@
# -*- coding: utf-8 -*-
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""
test_ceph_pools_pg_protection
-----------------------------
Tests for `ceph_pools_pg_protection` module.
"""
import library.ceph_pools_pg_protection as validation
from tripleo_validations.tests import base
class TestCephPoolsPgProtection(base.TestCase):
def test_check_pg_num_enough_osds(self):
'''Test adding one more pool to the existing pools with 36 OSDs'''
num_osds = 36
pools = {'images': {'pg_num': 128, 'size': 3},
'vms': {'pg_num': 256, 'size': 3},
'volumes': {'pg_num': 512, 'size': 3}}
msg = validation.check_pg_num('backups', 128, 3, num_osds, 200, pools)
self.assertEqual(msg, "")
def test_check_pg_num_not_enough_osds(self):
'''Test adding one more pool to the existing pools with 1 OSD'''
num_osds = 1
error = "Cannot add pool: backups pg_num 128 size 3 "
error += "would mean 2688 total pgs, which exceeds max 200 "
error += "(mon_max_pg_per_osd 200 * num_in_osds 1)"
pools = {'images': {'pg_num': 128, 'size': 3},
'vms': {'pg_num': 256, 'size': 3},
'volumes': {'pg_num': 512, 'size': 3}}
msg = validation.check_pg_num('backups', 128, 3, num_osds, 200, pools)
self.assertEqual(msg, error)
def test_simulate_pool_creation_enough_osds(self):
'''Test creating 3 pools with differing PGs with 36 OSDs'''
num_osds = 36
pools = [{'name': 'images', 'pg_num': 128, 'size': 3},
{'name': 'vms', 'pg_num': 256, 'size': 3},
{'name': 'volumes', 'pg_num': 512, 'size': 3}]
sim = validation.simulate_pool_creation(num_osds, pools)
self.assertEqual(sim['failed'], False)
self.assertEqual(sim['msg'], "")
def test_simulate_pool_creation_not_enough_osds(self):
'''Test creating 3 pools with differing PGs with 1 OSD'''
num_osds = 1
error = "The following Ceph pools would be created (but no others):\n"
error += "{'images': {'pg_num': 128, 'size': 3}}\n"
error += "Pool creation would then fail with the following from Ceph:\n"
error += "Cannot add pool: vms pg_num 256 size 3 would mean 384 total pgs, "
error += "which exceeds max 200 (mon_max_pg_per_osd 200 * num_in_osds 1)\n"
error += "Please use https://ceph.io/pgcalc and then update the "
error += "CephPools parameter"
pools = [{'name': 'images', 'pg_num': 128, 'size': 3},
{'name': 'vms', 'pg_num': 256, 'size': 3},
{'name': 'volumes', 'pg_num': 512, 'size': 3}]
sim = validation.simulate_pool_creation(num_osds, pools)
self.assertEqual(sim['failed'], True)
self.assertEqual(sim['msg'], error)