diff --git a/doc/source/modules/modules-ceph_pools_pg_protection.rst b/doc/source/modules/modules-ceph_pools_pg_protection.rst new file mode 100644 index 000000000..d1a1f8797 --- /dev/null +++ b/doc/source/modules/modules-ceph_pools_pg_protection.rst @@ -0,0 +1,15 @@ +================================= +Module - ceph_pools_pg_protection +================================= + + +This module provides for the following ansible plugin: + + * ceph_pools_pg_protection + + +.. ansibleautoplugin:: + :module: library/ceph_pools_pg_protection.py + :documentation: true + :examples: true + diff --git a/library/ceph_pools_pg_protection.py b/library/ceph_pools_pg_protection.py new file mode 100644 index 000000000..861ff2fd8 --- /dev/null +++ b/library/ceph_pools_pg_protection.py @@ -0,0 +1,249 @@ +#!/usr/bin/env python +# Copyright 2019 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from yaml import safe_load as yaml_safe_load +from ansible.module_utils.basic import AnsibleModule + + +ANSIBLE_METADATA = { + 'metadata_version': '0.1', + 'status': ['preview'], + 'supported_by': 'community' +} + +DOCUMENTATION = ''' +--- +module: ceph_pools_pg_protection +short_description: Warn if Ceph will not create CephPools based on PG and OSD numbers +description: + - "The Ceph PG overdose protection check (https://ceph.com/community/new-luminous-pg-overdose-protection) is executed by Ceph before a pool is created. If the check does not pass, then the pool is not created. When TripleO deploys Ceph it triggers ceph-ansible which creates the pools that OpenStack needs. This validation runs the same check that the overdose protection uses to determine if the user should update their CephPools, PG count, or number of OSDs. Without this check a deployer may have to wait until after Ceph is running but before the pools are created to realize the deployment will fail." +options: + num_osds: + description: + - The number of Ceph OSDs expected to be running during Pool creation. + - TripleO does not have this parameter + - In theory you can derive this parameter from TripleO parameters + required: True + type: int + ceph_pool_default_size: + description: + - The same as the TripleO CephPoolDefaultSize parameter + - Number of replicas of the data + required: False + default: 3 + type: int + ceph_pool_default_pg_num: + description: + - The same as the TripleO CephPoolDefaultPgNum parameter + - The default number of Placement Groups a pool should have + - Ceph defaults this number to 16 + - TripleO defaults this number to 128 + required: False + default: 128 + type: int + ceph_pools: + description: + - The same as the TripleO CephPools parameter + - A list of dictionaries + - Each embedded dict must have a name parameter + - Optional pg_num and size parameters may be set per pool + required: True + type: list +author: + - John Fulton (fultonj) +''' + +EXAMPLES = ''' +# Call this module from TripleO Ansible Validations + +- name: Is the CephPools parameter configured correctly? + ceph_pools_pg_protection: + num_osds: 36 + ceph_pool_default_size: 3 + ceph_pool_default_pg_num: 128 + ceph_pools: + - {"name": volumes, "pg_num": 1024,"pgp_num": 1024, "application": rbd, "size": 3} + - {"name": vms, "pg_num": 512, "pgp_num": 512, "application": rbd, "size": 3} + - {"name": images, "pg_num": 128, "pgp_num": 128, "application": rbd, "size": 3} + register: pool_creation_simulation +- name: Fail if CephPools parameter is not configured correctly + fail: + msg: pool_creation_simulation["message"] + when: not pool_creation_simulation["valid_input"] + +# Call this module from within TripleO Heat Templates (if only num_osds was derived) +- name: Is the CephPools parameter configured correctly? + ceph_pools_pg_protection: + num_osds: 36 + ceph_pool_default_size: {get_param: CephPoolDefaultSize} + ceph_pool_default_pg_num: {get_param: CephPoolDefaultPgNum} + ceph_pools: {get_param: CephPools} + register: pool_creation_simulation + +''' + +RETURN = ''' +message: + description: A description of why Ceph might refuse to create the requested CephPools + type: str + returned: always +valid_input: + description: True only if Ceph would create all requested pools + type: boolean + returned: always +''' + + +def check_pg_num(pool, pg_num, size, num_osds=0, max_pgs_per_osd=200, pools={}): + """ + Returns empty string only if the Pool PG numbers are correct for the OSDs. + Otherwise returns an error message like the one Ceph would return. + """ + # The original check in C++ from the Ceph source code is: + # + # int OSDMonitor::check_pg_num(int64_t pool, int pg_num, int size, ostream *ss) + # { + # auto max_pgs_per_osd = g_conf->get_val("mon_max_pg_per_osd"); + # auto num_osds = std::max(osdmap.get_num_in_osds(), 3u); // assume min cluster size 3 + # auto max_pgs = max_pgs_per_osd * num_osds; + # uint64_t projected = 0; + # if (pool < 0) { + # projected += pg_num * size; + # } + # for (const auto& i : osdmap.get_pools()) { + # if (i.first == pool) { + # projected += pg_num * size; + # } else { + # projected += i.second.get_pg_num() * i.second.get_size(); + # } + # } + # if (projected > max_pgs) { + # if (pool >= 0) { + # *ss << "pool id " << pool; + # } + # *ss << " pg_num " << pg_num << " size " << size + # << " would mean " << projected + # << " total pgs, which exceeds max " << max_pgs + # << " (mon_max_pg_per_osd " << max_pgs_per_osd + # << " * num_in_osds " << num_osds << ")"; + # return -ERANGE; + # } + # return 0; + # } + import six + msg = "" + max_pgs = max_pgs_per_osd * num_osds + projected = 0 + if len(pool) < 0: + projected = projected + (pg_num * size) + for pool_name, pool_sizes in six.iteritems(pools): + if pool_name == pool: + projected = projected + (pg_num * size) + else: + projected = projected + (int(pool_sizes['pg_num']) * int(pool_sizes['size'])) + if projected > max_pgs: + msg = "Cannot add pool: " + str(pool) + \ + " pg_num " + str(pg_num) + " size " + str(size) + \ + " would mean " + str(projected) + \ + " total pgs, which exceeds max " + str(max_pgs) + \ + " (mon_max_pg_per_osd " + str(max_pgs_per_osd) + \ + " * num_in_osds " + str(num_osds) + ")" + return msg + + +def simulate_pool_creation(num_osds, ceph_pools, + ceph_pool_default_size=3, + ceph_pool_default_pg_num=128, + max_pgs_per_osd=200): + """ + Simulate ceph-ansible asking Ceph to create the pools in the ceph_pools list + """ + msg = "" + failed = False + created_pools = {} + for pool in ceph_pools: + if 'size' not in pool: + pool['size'] = ceph_pool_default_size + if 'pg_num' not in pool: + pool['pg_num'] = ceph_pool_default_pg_num + ceph_msg = check_pg_num(pool['name'], pool['pg_num'], pool['size'], + num_osds, max_pgs_per_osd, created_pools) + if len(ceph_msg) == 0: + created_pools[pool['name']] = {'pg_num': pool['pg_num'], 'size': pool['size']} + else: + failed = True + break + if failed: + msg = "The following Ceph pools would be created (but no others):" + \ + "\n" + str(created_pools) + "\n" + \ + "Pool creation would then fail with the following from Ceph:" + \ + "\n" + ceph_msg + "\n" + \ + "Please use https://ceph.io/pgcalc and then update the CephPools parameter" + simulation_results = {} + simulation_results['failed'] = failed + simulation_results['msg'] = msg + return simulation_results + + +def run_module(): + # Seed the result dict in the object + result = dict( + changed=False, + valid_input=True, + message='' + ) + + # Use AnsibleModule object abstraction to work with Ansible + module = AnsibleModule( + argument_spec=yaml_safe_load(DOCUMENTATION)['options'], + supports_check_mode=False + ) + + # Check mode not supported + if module.check_mode: + module.exit_json(**result) + + # Simulate Ceph pool creation + simulation = simulate_pool_creation(module.params['num_osds'], + module.params['ceph_pools'], + module.params['ceph_pool_default_size'], + module.params['ceph_pool_default_pg_num']) + if simulation['failed']: + result['message'] = "Invalid Ceph configuration: " + simulation['msg'] + result['valid_input'] = False + else: + result['message'] = 'Provided CephPools satisfy PG overdose protection' + result['valid_input'] = True + + # This module never changes state of a target system, it only + # evaluates if inputs will work when Ceph processes then. + # There shouldn't be anything like the following + # result['changed'] = True + + # This module does not currently have fail options. It should + # only evaluate input and make result of the evaluation available. + # So it doesn't currently do anything like the following by design. + # module.fail_json(msg='Failing for invalid input', **result) + + # Exit and pass the key/value results of the simulation + module.exit_json(**result) + + +def main(): + run_module() + +if __name__ == '__main__': + main() diff --git a/playbooks/ceph-pg.yaml b/playbooks/ceph-pg.yaml new file mode 100644 index 000000000..6d5ae0f8e --- /dev/null +++ b/playbooks/ceph-pg.yaml @@ -0,0 +1,22 @@ +--- +- hosts: undercloud + vars: + metadata: + name: Validate requested Ceph Placement Groups + description: > + In Ceph Lumionus and newer the Placement Group overdose protection + check (https://ceph.com/community/new-luminous-pg-overdose-protection) + is executed by Ceph before a pool is created. If the check does not + pass, then the pool is not created. When TripleO deploys Ceph it + triggers ceph-ansible which creates the pools that OpenStack needs. + This validation runs the same check that the overdose protection uses + to determine if the user should update their CephPools, PG count, or + number of OSD. Without this check a deployer may have to wait until + after Ceph is running but before the pools are created to realize + the deployment will fail. + groups: + - pre-deployment + tasks: + - include_role: + name: ceph + tasks_from: ceph-pg diff --git a/roles/ceph/tasks/ceph-pg.yaml b/roles/ceph/tasks/ceph-pg.yaml new file mode 100644 index 000000000..f317c5fb5 --- /dev/null +++ b/roles/ceph/tasks/ceph-pg.yaml @@ -0,0 +1,145 @@ +--- +- name: Lookup ANSIBLE_HASH_BEHAVIOUR + set_fact: + hash_behavior: "{{ lookup('config', 'DEFAULT_HASH_BEHAVIOUR', on_missing='skip')|default('replace') }}" + +- name: Fail unless ANSIBLE_HASH_BEHAVIOUR=merge + fail: + msg: | + In order to simulate Tripleo Heat Template behavior this role requires + that it be run with Ansible's hash_behaviour set to merge. Please + re-run with 'export ANSIBLE_HASH_BEHAVIOUR=merge'" + when: + - hash_behavior != 'merge' + +- name: Fail if number of OSDs is not specified + fail: + msg: "Please pass the expected number of OSDs, e.g. '-e num_osds=36'" + when: num_osds is not defined + +- name: Get ceph_pool_default_size + set_fact: + ceph_pool_default_size: "{{ parameter_defaults['CephPoolDefaultSize']|default(3) }}" + +- name: Get ceph_pool_default_pg_num + set_fact: + ceph_pool_default_pg_num: "{{ parameter_defaults['CephPoolDefaultPgNum']|default(128) }}" + +- name: Set ceph_pools default + set_fact: + ceph_pools: + - name: "{{ parameter_defaults['CinderBackupRbdPoolName']|default('backups') }}" + application: rbd + - name: "{{ parameter_defaults['CinderRbdPoolName']|default('volumes') }}" + application: rbd + - name: "{{ parameter_defaults['NovaRbdPoolName']|default('vms') }}" + application: rbd + - name: "{{ parameter_defaults['GlanceRbdPoolName']|default('images') }}" + application: rbd + - name: "{{ parameter_defaults['GnocchiRbdPoolName']|default('metrics') }}" + application: openstack_gnocchi + +- when: + - parameter_defaults['CephPools'] is defined + - (parameter_defaults['CephPools']|length) > 0 + block: + - name: Get names of custom pools + set_fact: + custom_names: "{{ custom_names | default([]) + [ item.name ] }}" + loop: "{{ parameter_defaults['CephPools'] }}" + + - name: Get names of default pools + set_fact: + default_names: "{{ default_names | default([]) + [ item.name ] }}" + loop: "{{ ceph_pools }}" + + - name: Base updated ceph_pools list on custom list + set_fact: + new_ceph_pools: "{{ parameter_defaults['CephPools'] }}" + + - name: Add default pools not in custom list to updated ceph_pools list + set_fact: + new_ceph_pools: "{{ new_ceph_pools | default([]) + [item] }}" + loop: "{{ ceph_pools }}" + when: + - item.name in default_names|difference(custom_names) + + - name: redefine ceph_pools based on updated ceph_pools list + set_fact: + ceph_pools: "{{ new_ceph_pools }}" + +- name: Add CinderRbdExtraPools if provided and not in custom pool list + set_fact: + ceph_pools: "{{ ceph_pools | default([]) + [dict(name=item, application='rbd') ] }}" + loop: "{{ parameter_defaults.CinderRbdExtraPools.split(',')|list }}" + when: + - parameter_defaults['CinderRbdExtraPools'] is defined + - (parameter_defaults['CinderRbdExtraPools']|length) > 0 + - item not in custom_names|default([]) + +- name: Warn if deprecated Manila parameters are being used + warn: + msg: | + One or more of following parameters is in use but is + deprecated in Stein and newer; ManilaCephFSDataPoolPGNum, + ManilaCephFSMetadataPoolPGNum, and ManilaCephFSShareBackendName + when: + - (parameter_defaults['ManilaCephFSDataPoolPGNum'] is defined or + parameter_defaults['ManilaCephFSMetadataPoolPGNum'] is defined or + parameter_defaults['ManilaCephFSShareBackendName'] is defined) + +- name: Add Manila MDS pools if provided and not in custom pool list + set_fact: + ceph_pools: "{{ ceph_pools | default([]) + [item] }}" + loop: + - name: "{{ parameter_defaults['ManilaCephFSDataPoolName']|default('manila_data') }}" + application: cephfs + pg_num: "{{ parameter_defaults['ManilaCephFSDataPoolPGNum']|default(128) }}" + pgp_num: "{{ parameter_defaults['ManilaCephFSDataPoolPGNum']|default(128) }}" + - name: "{{ parameter_defaults['ManilaCephFSMetadataPoolName']|default('manila_metadata') }}" + application: cephfs + pg_num: "{{ parameter_defaults['ManilaCephFSMetadataPoolPGNum']|default(128) }}" + pgp_num: "{{ parameter_defaults['ManilaCephFSMetadataPoolPGNum']|default(128) }}" + when: + - resource_registry['OS::TripleO::Services::ManilaBackendCephFs'] is defined + - resource_registry['OS::TripleO::Services::ManilaBackendCephFs'] != 'OS::Heat::None' + - item.name not in custom_names|default([]) + +- name: Add RGW pools if requested unless in custom pool list (only for default zone) + set_fact: + ceph_pools: "{{ ceph_pools | default([]) + [dict(name=item, application='rgw') ] }}" + loop: + - .rgw.root + - default.rgw.control + - default.rgw.meta + - default.rgw.log + - default.rgw.buckets.index + - default.rgw.buckets.data + when: + - resource_registry['OS::TripleO::Services::CephRgw'] is defined + - resource_registry['OS::TripleO::Services::CephRgw'] != 'OS::Heat::None' + - item not in custom_names|default([]) + +- name: Warn if an application is not set for each pool + warn: + msg: "The pool {{ item.name }} should have an application set, e.g. {'name': {{ item.name }}, 'application': rbd}" + when: item.application is not defined + loop: "{{ ceph_pools }}" + +- name: Simulate OpenStack pool creation in proposed Ceph Cluster + ceph_pools_pg_protection: + num_osds: "{{ num_osds }}" + ceph_pool_default_size: "{{ ceph_pool_default_size }}" + ceph_pool_default_pg_num: "{{ ceph_pool_default_pg_num }}" + ceph_pools: "{{ ceph_pools }}" + register: pool_creation_simulation + +- name: Fail if CephPools parameter is not configured correctly + fail: + msg: '{{ pool_creation_simulation["message"] }}' + when: not pool_creation_simulation["valid_input"] + +- name: Succeed if CephPools parameter will satisfy PG overdoce protection + debug: + msg: '{{ pool_creation_simulation["message"] }}' + when: pool_creation_simulation["valid_input"] diff --git a/tripleo_validations/tests/library/test_ceph_pools_pg_protection.py b/tripleo_validations/tests/library/test_ceph_pools_pg_protection.py new file mode 100644 index 000000000..52e66f551 --- /dev/null +++ b/tripleo_validations/tests/library/test_ceph_pools_pg_protection.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- + +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" +test_ceph_pools_pg_protection +----------------------------- + +Tests for `ceph_pools_pg_protection` module. +""" + +import library.ceph_pools_pg_protection as validation +from tripleo_validations.tests import base + + +class TestCephPoolsPgProtection(base.TestCase): + + def test_check_pg_num_enough_osds(self): + '''Test adding one more pool to the existing pools with 36 OSDs''' + num_osds = 36 + pools = {'images': {'pg_num': 128, 'size': 3}, + 'vms': {'pg_num': 256, 'size': 3}, + 'volumes': {'pg_num': 512, 'size': 3}} + msg = validation.check_pg_num('backups', 128, 3, num_osds, 200, pools) + self.assertEqual(msg, "") + + def test_check_pg_num_not_enough_osds(self): + '''Test adding one more pool to the existing pools with 1 OSD''' + num_osds = 1 + error = "Cannot add pool: backups pg_num 128 size 3 " + error += "would mean 2688 total pgs, which exceeds max 200 " + error += "(mon_max_pg_per_osd 200 * num_in_osds 1)" + pools = {'images': {'pg_num': 128, 'size': 3}, + 'vms': {'pg_num': 256, 'size': 3}, + 'volumes': {'pg_num': 512, 'size': 3}} + msg = validation.check_pg_num('backups', 128, 3, num_osds, 200, pools) + self.assertEqual(msg, error) + + def test_simulate_pool_creation_enough_osds(self): + '''Test creating 3 pools with differing PGs with 36 OSDs''' + num_osds = 36 + pools = [{'name': 'images', 'pg_num': 128, 'size': 3}, + {'name': 'vms', 'pg_num': 256, 'size': 3}, + {'name': 'volumes', 'pg_num': 512, 'size': 3}] + sim = validation.simulate_pool_creation(num_osds, pools) + self.assertEqual(sim['failed'], False) + self.assertEqual(sim['msg'], "") + + def test_simulate_pool_creation_not_enough_osds(self): + '''Test creating 3 pools with differing PGs with 1 OSD''' + num_osds = 1 + error = "The following Ceph pools would be created (but no others):\n" + error += "{'images': {'pg_num': 128, 'size': 3}}\n" + error += "Pool creation would then fail with the following from Ceph:\n" + error += "Cannot add pool: vms pg_num 256 size 3 would mean 384 total pgs, " + error += "which exceeds max 200 (mon_max_pg_per_osd 200 * num_in_osds 1)\n" + error += "Please use https://ceph.io/pgcalc and then update the " + error += "CephPools parameter" + pools = [{'name': 'images', 'pg_num': 128, 'size': 3}, + {'name': 'vms', 'pg_num': 256, 'size': 3}, + {'name': 'volumes', 'pg_num': 512, 'size': 3}] + sim = validation.simulate_pool_creation(num_osds, pools) + self.assertEqual(sim['failed'], True) + self.assertEqual(sim['msg'], error)