Stephen Taylor 0aad6d05f0 [ceph-client] Correct check for too many OSDs in the pool job
The target OSD count and the final target OSD count may differ in
cases where a deployment may not include all of the hardware it is
expected to include eventually. This change corrects the check for
more OSDs running than expected to be based on the final OSD count
rather than the intermediate one to avoid false failures when the
intermediate target is exceeded and the final target is not.

Change-Id: I03a13cfe3b9053b6abc5d961426e7a8e92743808
2022-12-15 11:09:12 -07:00

230 lines
6.1 KiB
Bash
Executable File

#!/bin/bash
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
set -xe
# setup loopback devices for ceph
./tools/deployment/common/setup-ceph-loopback-device.sh --ceph-osd-data \
${CEPH_OSD_DATA_DEVICE:=/dev/loop0} --ceph-osd-dbwal ${CEPH_OSD_DB_WAL_DEVICE:=/dev/loop1}
#NOTE: Lint and package chart
for CHART in ceph-mon ceph-osd ceph-client ceph-provisioners; do
make "${CHART}"
done
#NOTE: Deploy command
: ${OSH_EXTRA_HELM_ARGS:=""}
[ -s /tmp/ceph-fs-uuid.txt ] || uuidgen > /tmp/ceph-fs-uuid.txt
CEPH_FS_ID="$(cat /tmp/ceph-fs-uuid.txt)"
#NOTE(portdirect): to use RBD devices with Ubuntu kernels < 4.5 this
# should be set to 'hammer'
. /etc/os-release
if [ "x${ID}" == "xcentos" ] || \
([ "x${ID}" == "xubuntu" ] && \
dpkg --compare-versions "$(uname -r)" "lt" "4.5"); then
CRUSH_TUNABLES=hammer
else
CRUSH_TUNABLES=null
fi
tee /tmp/ceph.yaml <<EOF
endpoints:
ceph_mon:
namespace: ceph
port:
mon:
default: 6789
ceph_mgr:
namespace: ceph
port:
mgr:
default: 7000
metrics:
default: 9283
network:
public: 172.17.0.1/16
cluster: 172.17.0.1/16
port:
mon: 6789
rgw: 8088
mgr: 7000
deployment:
storage_secrets: true
ceph: true
rbd_provisioner: true
csi_rbd_provisioner: true
cephfs_provisioner: true
client_secrets: false
rgw_keystone_user_and_endpoints: false
bootstrap:
enabled: true
conf:
rgw_ks:
enabled: false
ceph:
global:
fsid: ${CEPH_FS_ID}
mon_addr: :6789
osd_pool_default_size: 1
osd:
osd_crush_chooseleaf_type: 0
pool:
crush:
tunables: ${CRUSH_TUNABLES}
target:
osd: 1
final_osd: 1
pg_per_osd: 100
default:
crush_rule: same_host
spec:
# Health metrics pool
- name: device_health_metrics
application: mgr_devicehealth
replication: 1
percent_total_data: 5
# RBD pool
- name: rbd
application: rbd
replication: 1
percent_total_data: 40
# CephFS pools
- name: cephfs_metadata
application: cephfs
replication: 1
percent_total_data: 5
- name: cephfs_data
application: cephfs
replication: 1
percent_total_data: 10
# RadosGW pools
- name: .rgw.root
application: rgw
replication: 1
percent_total_data: 0.1
- name: default.rgw.control
application: rgw
replication: 1
percent_total_data: 0.1
- name: default.rgw.data.root
application: rgw
replication: 1
percent_total_data: 0.1
- name: default.rgw.gc
application: rgw
replication: 1
percent_total_data: 0.1
- name: default.rgw.log
application: rgw
replication: 1
percent_total_data: 0.1
- name: default.rgw.intent-log
application: rgw
replication: 1
percent_total_data: 0.1
- name: default.rgw.meta
application: rgw
replication: 1
percent_total_data: 0.1
- name: default.rgw.usage
application: rgw
replication: 1
percent_total_data: 0.1
- name: default.rgw.users.keys
application: rgw
replication: 1
percent_total_data: 0.1
- name: default.rgw.users.email
application: rgw
replication: 1
percent_total_data: 0.1
- name: default.rgw.users.swift
application: rgw
replication: 1
percent_total_data: 0.1
- name: default.rgw.users.uid
application: rgw
replication: 1
percent_total_data: 0.1
- name: default.rgw.buckets.extra
application: rgw
replication: 1
percent_total_data: 0.1
- name: default.rgw.buckets.index
application: rgw
replication: 1
percent_total_data: 3
- name: default.rgw.buckets.data
application: rgw
replication: 1
percent_total_data: 29
storage:
osd:
- data:
type: bluestore
location: ${CEPH_OSD_DATA_DEVICE}
block_db:
location: ${CEPH_OSD_DB_WAL_DEVICE}
size: "5GB"
block_wal:
location: ${CEPH_OSD_DB_WAL_DEVICE}
size: "2GB"
pod:
replicas:
mds: 1
mgr: 1
rgw: 1
jobs:
ceph_defragosds:
# Execute every 15 minutes for gates
cron: "*/15 * * * *"
history:
# Number of successful job to keep
successJob: 1
# Number of failed job to keep
failJob: 1
concurrency:
# Skip new job if previous job still active
execPolicy: Forbid
startingDeadlineSecs: 60
manifests:
job_bootstrap: false
EOF
for CHART in ceph-mon ceph-osd ceph-client ceph-provisioners; do
helm upgrade --install ${CHART} ./${CHART} \
--namespace=ceph \
--values=/tmp/ceph.yaml \
${OSH_INFRA_EXTRA_HELM_ARGS} \
${OSH_INFRA_EXTRA_HELM_ARGS_CEPH_DEPLOY:-$(./tools/deployment/common/get-values-overrides.sh ${CHART})}
#NOTE: Wait for deploy
./tools/deployment/common/wait-for-pods.sh ceph
#NOTE: Validate deploy
MON_POD=$(kubectl get pods \
--namespace=ceph \
--selector="application=ceph" \
--selector="component=mon" \
--no-headers | awk '{ print $1; exit }')
kubectl exec -n ceph ${MON_POD} -- ceph -s
done
# Delete the test pod if it still exists
kubectl delete pods -l application=ceph-osd,release_group=ceph-osd,component=test --namespace=ceph --ignore-not-found
helm test ceph-osd --namespace ceph --timeout 900s
# Delete the test pod if it still exists
kubectl delete pods -l application=ceph-client,release_group=ceph-client,component=test --namespace=ceph --ignore-not-found
helm test ceph-client --namespace ceph --timeout 900s