charm-ceph-mon/actions/pg_repair.py
Luciano Lo Giudice 1ee3d04fda First rewrite of ceph-mon with operator framework
This patchset implements the first rewrite of the charm using the
operator framework by simply calling into the hooks.

This change also includes functional validation about charm upgrades
from the previous stable to the locally built charm.

Fix tempest breakage for python < 3.8

Co-authored-by: Chris MacNaughton <chris.macnaughton@canonical.com>

Change-Id: I61308bb2900134ea163d9e92444066a3cb0de43d
func-test-pr: https://github.com/openstack-charmers/zaza-openstack-tests/pull/849
2022-08-19 19:00:56 -03:00

189 lines
5.5 KiB
Python
Executable File

#!/usr/bin/env python3
#
# Copyright 2022 Canonical Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
from subprocess import check_output, CalledProcessError
from charmhelpers.core.hookenv import (
log,
function_fail,
function_set,
)
from charms_ceph.utils import list_pools
def get_rados_inconsistent_objs(pg):
"""Get all inconsistent objects for a given placement group.
:param pg: Name of a placement group
:type pg: str
:return: list of inconsistent objects
:rtype: list[str]
"""
return json.loads(
check_output(
["rados", "list-inconsistent-obj", pg, "--format=json-pretty"]
).decode("UTF-8")
)
def get_rados_inconsistent_pgs(pool):
"""Get all inconsistent placement groups for a given pool.
:param pool: Name of a Ceph pool
:type pool: str
:returns: list of inconsistent placement group IDs
:rtype: list[str]
"""
return json.loads(
check_output(["rados", "list-inconsistent-pg", pool]).decode("UTF-8")
)
def get_inconsistent_pgs(ceph_pools):
"""Get all inconsistent placement groups for a list of pools.
:param ceph_pools: List of names of Ceph pools
:type ceph_pools: list[str]
:returns: list of inconsistent placement group IDs as a set
:rtype: set[str]
"""
inconsistent_pgs = set()
for pool in ceph_pools:
inconsistent_pgs.update(get_rados_inconsistent_pgs(pool))
return inconsistent_pgs
def get_safe_pg_repairs(inconsistent_pgs):
"""Filters inconsistent placement groups for ones that are safe to repair.
:param inconsistent_pgs: List of inconsistent placement groups
:type inconsistent_pgs: list[str]
:returns: list of safely repairable placement groups as a set
:rtype: set[str]
"""
return {pg for pg in inconsistent_pgs if is_pg_safe_to_repair(pg)}
def is_pg_safe_to_repair(pg):
"""Determines if a placement group is safe to repair.
:param pg: Name of an inconsistent placement group
:type pg: str
:returns: placement group is safe to repair
:rtype: bool
"""
# Additional tests for known safe cases can be added here.
return has_read_error_only(pg)
def has_read_error_only(pg):
"""Determines if an inconsistent placement group is caused by a read error.
Returns False if no read errors are found, or if any errors other than read
errors are found.
:param pg: ID of an inconsistent placement group
:type pg: str
:returns: placement group is safe to repair
:rtype: bool
"""
rados_inconsistent_objs = get_rados_inconsistent_objs(pg)
read_error_found = False
for inconsistent in rados_inconsistent_objs.get("inconsistents", []):
for shard in inconsistent.get("shards", []):
errors = shard.get("errors", [])
if errors == ["read_error"]:
if read_error_found:
return False
read_error_found = True
continue
elif errors:
# Error other than "read_error" detected
return False
return read_error_found
def perform_pg_repairs(pgs):
"""Runs `ceph pg repair` on a group of placement groups.
All placement groups provided should be confirmed as safe prior to using
this method.
:param pgs: List of safe-to-repair placement groups
:type pg: list[str]
"""
for pg in pgs:
log("Repairing ceph placement group {}".format(pg))
check_output(["ceph", "pg", "repair", pg])
def pg_repair():
"""Repair all inconsistent placement groups caused by read errors."""
ceph_pools = list_pools()
if not ceph_pools:
msg = "No Ceph pools found."
log(msg)
function_set(msg)
return
# Get inconsistent placement groups
inconsistent_pgs = get_inconsistent_pgs(ceph_pools)
if not inconsistent_pgs:
msg = "No inconsistent placement groups found."
log(msg)
function_set(msg)
return
# Filter for known safe cases
safe_pg_repairs = get_safe_pg_repairs(inconsistent_pgs)
unsafe_pg_repairs = inconsistent_pgs.difference(safe_pg_repairs)
# Perform safe placement group repairs
if unsafe_pg_repairs:
log(
"Ignoring unsafe placement group repairs: {}".format(
unsafe_pg_repairs
)
)
if safe_pg_repairs:
log("Safe placement group repairs found: {}".format(safe_pg_repairs))
perform_pg_repairs(safe_pg_repairs)
function_set(
{
"message": "placement groups repaired: {}".format(
sorted(safe_pg_repairs)
)
}
)
else:
msg = "No safe placement group repairs found."
log(msg)
function_set(msg)
def main():
try:
pg_repair()
except CalledProcessError as e:
log(e)
function_fail(
"Safe placement group repair failed with error: {}".format(str(e))
)
if __name__ == "__main__":
main()