Enable optional AIO-DX floating monitor
This will enable integration of the floating monitor chart into the application with: - SM service monitor changes: - Add and remove floating monitor placement labels in the start/stop functions. This will ensure that when SM is transitioning activity labels will align on the active controller. - The stop function will delete the pod to force a reschedule. - The status function will detect the presence of the DRBD mounted filesystem and adjust the labeling accordingly in case start/stop functions did not label as desired. - application plugin changes: - Add constants support for 'rook-ceph-floating-monitor' helmrelease - Provide initial utility functions to detect if the DRBD controller filesystem is enabled and if the floating monitor is assigned (via a helm use override) - Add a new function to get the IP family from the cluster-pod network to set overrides and determine the IPv4/IPv6 static address - Update the ceph cluster plugin to use a new utility function for detecting the IP family - Add the floating monitor helm plugin to generate the ip_family and static ip_address based on that family. Initial support provided for the cluster-pod network - Update the lifecycle plugin to optionally remove the floating monitor helm release on application remove - application metadata - disable the 'rook-ceph-floating-monitor' chart by default - FluxCD manifest changes - Change helmrepository API to v1 to clean up an error - Add manifests for the 'rook-ceph-floating-monitor' helm release - Temporarily set deletionPropagation in the rook-ceph-cluster, the rook-ceph-provisioner and rook-ceph-floating-monitor helmreleases to provide more predictive delete behavior - Update rook-ceph-cluster-static-overrides.yaml to add network defaults and disable the host network as the default provider. This was done to avoid port conflicts with the floating monitor. The cluster-pod network will now be the network used for the ceph cluster and its pods Enable monitor at runtime: - system helm-override-list rook-ceph -l - system helm-override-show rook-ceph rook-ceph-floating-monitor \ rook-ceph - system helm-override-update rook-ceph rook-ceph-floating-monitor \ rook-ceph --set assigned="true" - system helm-override-show rook-ceph rook-ceph-floating-monitor \ rook-ceph - system application-apply rook-ceph Disable monitor at runtime: - system helm-override-list rook-ceph -l - system helm-override-show rook-ceph rook-ceph-floating-monitor \ rook-ceph - system helm-override-update rook-ceph rook-ceph-floating-monitor \ rook-ceph --set assigned="false" - system helm-override-show rook-ceph rook-ceph-floating-monitor \ rook-ceph - system application-apply rook-ceph Future Improvements: - Pickup the desired network from the storage backend (cluster-pod, cluster-host, etc) and - update _get_ip_family() to use this value - update _get_static_floating_mon_ip() to get address pool range and calculate an appropriate static IP address for the monitor Test Plan: PASS - Pkg build + ISO generation PASS - Successful AIO-DX Installation PASS - Initial Rook deployment without floating monitor. PASS - Initial Rook deployment with floating monitor. PASS - Runtime override enable of Rook floating monitor + reapply PASS - Runtime override disable of Rook floating monitor + reapply Change-Id: Ie1ff75481b6c2f0d9d34eb228d3019465e36bc1e Depends-On: https://review.opendev.org/c/starlingx/config/+/926374 Story: 2011066 Task: 50838 Signed-off-by: Robert Church <robert.church@windriver.com>
This commit is contained in:
parent
435e3c6372
commit
41adbba935
@ -12,15 +12,34 @@ HELM_NS_ROOK_CEPH = 'rook-ceph'
|
||||
HELM_APP_ROOK_CEPH = 'rook-ceph'
|
||||
|
||||
# Helm: Supported charts:
|
||||
# These values match the names in the chart package's Chart.yaml
|
||||
# Helm: Supported charts:
|
||||
# 1 -These are names representing the helm chart, they do not have to match
|
||||
# exactly
|
||||
#
|
||||
# 2- This needs to be reflected in the helm plugin name in setup.conf so that
|
||||
# the name presented via the helm-overrides api corresponds to this plugin
|
||||
# systemconfig.helm_plugins.deployment_manager =
|
||||
# 001_YYY = <plugin path>; where YYY == value of HELM_CHART_XXX
|
||||
#
|
||||
HELM_CHART_ROOK_CEPH = 'rook-ceph'
|
||||
HELM_CHART_ROOK_CEPH_CLUSTER = 'rook-ceph-cluster'
|
||||
HELM_CHART_ROOK_CEPH_FLOATING_MONITOR = 'rook-ceph-floating-monitor'
|
||||
HELM_CHART_ROOK_CEPH_PROVISIONER = 'rook-ceph-provisioner'
|
||||
|
||||
# FluxCD
|
||||
FLUXCD_HELMRELEASE_ROOK_CEPH = 'rook-ceph'
|
||||
FLUXCD_HELMRELEASE_ROOK_CEPH_CLUSTER = 'rook-ceph-cluster'
|
||||
FLUXCD_HELMRELEASE_ROOK_CEPH_PROVISIONER = 'rook-ceph-provisioner'
|
||||
# 1 - Current app framework limitation requires these names to be the same as the helm
|
||||
# chart above
|
||||
#
|
||||
# 2 - This needs to be reflected in:
|
||||
# - helmrelease.yaml : metadata.name; set this to FLUXCD_HELMRELEASE_XXX
|
||||
# - helmrelease.yaml : YYY-system-overrides.yaml; where YYY == value of
|
||||
# FLUXCD_HELMRELEASE_XXX
|
||||
# - kustomization.yaml: YYY-system-overrides.yaml; where YYY == value of
|
||||
# FLUXCD_HELMRELEASE_XXX
|
||||
FLUXCD_HELMRELEASE_ROOK_CEPH = HELM_CHART_ROOK_CEPH
|
||||
FLUXCD_HELMRELEASE_ROOK_CEPH_CLUSTER = HELM_CHART_ROOK_CEPH_CLUSTER
|
||||
FLUXCD_HELMRELEASE_ROOK_CEPH_FLOATING_MONITOR = HELM_CHART_ROOK_CEPH_FLOATING_MONITOR
|
||||
FLUXCD_HELMRELEASE_ROOK_CEPH_PROVISIONER = HELM_CHART_ROOK_CEPH_PROVISIONER
|
||||
|
||||
# Secrets: Cluster
|
||||
SECRET_NAMESPACE = 'rook-ceph'
|
||||
@ -99,3 +118,8 @@ CEPH_CLUSTER_OSD_FAIL_DOMAIN = 'osd'
|
||||
ALARM_TYPE_MISSING_MONS = "monitors"
|
||||
ALARM_TYPE_REPLICATION_MISSING_OSDS = "osds"
|
||||
ALARM_TYPE_REPLICATION_MISSING_HOSTS = "osd-hosts"
|
||||
|
||||
# Supported network deployments
|
||||
# - constants.NETWORK_TYPE_CLUSTER_HOST (not validated)
|
||||
# - constants.NETWORK_TYPE_CLUSTER_POD
|
||||
DEFAULT_NETWORK_TYPE = constants.NETWORK_TYPE_CLUSTER_POD
|
||||
|
@ -4,7 +4,10 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
import yaml
|
||||
|
||||
from k8sapp_rook_ceph.common import constants as app_constants
|
||||
from sysinv.common import constants
|
||||
from sysinv.common import exception
|
||||
|
||||
|
||||
@ -31,3 +34,31 @@ def get_ceph_rook_backend(dbapi):
|
||||
except exception.StorageBackendNotFoundByName:
|
||||
return None
|
||||
return sb
|
||||
|
||||
|
||||
def is_ceph_controllerfs_provisioned(dbapi):
|
||||
try:
|
||||
dbapi.controller_fs_get_by_name(
|
||||
constants.FILESYSTEM_NAME_CEPH_DRBD)
|
||||
except exception.ControllerFSNameNotFound:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def is_floating_monitor_assigned(dbapi):
|
||||
# TODO (rchurch): replace this with function states from ceph
|
||||
# controllerfs
|
||||
|
||||
try:
|
||||
overrides = dbapi.helm_override_get(
|
||||
dbapi.kube_app_get(app_constants.HELM_APP_ROOK_CEPH).id,
|
||||
app_constants.HELM_CHART_ROOK_CEPH_FLOATING_MONITOR,
|
||||
app_constants.HELM_NS_ROOK_CEPH,
|
||||
)
|
||||
user_overrides = {}
|
||||
if overrides.user_overrides:
|
||||
user_overrides = yaml.safe_load(overrides.user_overrides)
|
||||
return user_overrides['assigned'] if 'assigned' in user_overrides else False
|
||||
except exception.HelmOverrideNotFound:
|
||||
return False
|
||||
|
@ -324,12 +324,6 @@ class RookCephClusterHelm(storage.StorageBaseHelm):
|
||||
|
||||
def _get_cluster_overrides(self):
|
||||
|
||||
cluster_host_addr_name = utils.format_address_name(
|
||||
constants.CONTROLLER_HOSTNAME, constants.NETWORK_TYPE_CLUSTER_HOST)
|
||||
address = utils.get_primary_address_by_name(
|
||||
self.dbapi, cluster_host_addr_name,
|
||||
constants.NETWORK_TYPE_CLUSTER_HOST, True)
|
||||
|
||||
cluster = {
|
||||
'mon': {
|
||||
'count': self._get_desired_mon_count(),
|
||||
@ -338,7 +332,7 @@ class RookCephClusterHelm(storage.StorageBaseHelm):
|
||||
'count': self._get_desired_mon_count(),
|
||||
},
|
||||
'network': {
|
||||
'ipFamily': 'IPv' + str(address.family)
|
||||
'ipFamily': self._get_ip_family()
|
||||
},
|
||||
'storage': {
|
||||
'nodes': self._get_osds_by_node(),
|
||||
|
@ -0,0 +1,69 @@
|
||||
#
|
||||
# Copyright (c) 2024 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
from k8sapp_rook_ceph.common import constants as app_constants
|
||||
from k8sapp_rook_ceph.helm import storage
|
||||
|
||||
from oslo_log import log as logging
|
||||
from sysinv.common import constants
|
||||
from sysinv.common import exception
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RookCephFloatingMonitorHelm(storage.StorageBaseHelm):
|
||||
"""Class to encapsulate helm operations for the rook-ceph-floating-monitor
|
||||
chart
|
||||
"""
|
||||
|
||||
CHART = app_constants.HELM_CHART_ROOK_CEPH_FLOATING_MONITOR
|
||||
HELM_RELEASE = app_constants.FLUXCD_HELMRELEASE_ROOK_CEPH_FLOATING_MONITOR
|
||||
|
||||
def _is_enabled(self, app_name, chart_name, namespace):
|
||||
"""
|
||||
Detrmine if this chart should be deployed.
|
||||
|
||||
This chart is disabled by default via application metatdata and should
|
||||
only be enabled if a Ceph controller filesystem has been deployed
|
||||
|
||||
:param app_name: Application name
|
||||
:param chart_name: Chart supplied with the application
|
||||
:param namespace: Namespace where the chart will be executed
|
||||
"""
|
||||
return (self._is_ceph_controllerfs_provisioned() and
|
||||
self._is_floating_monitor_assigned())
|
||||
|
||||
def _get_static_floating_mon_ip(self, family):
|
||||
# TODO (rchurch): This needs to be updated to grab the pool info for the
|
||||
# network, calculate, and reserve (if needed) an IP in the static range
|
||||
# for the network:
|
||||
# https://kubernetes.io/docs/concepts/services-networking/cluster-ip-allocation/#avoid-ClusterIP-conflict
|
||||
|
||||
if app_constants.DEFAULT_NETWORK_TYPE == constants.NETWORK_TYPE_CLUSTER_POD:
|
||||
if family == "IPv4":
|
||||
return '10.96.0.16'
|
||||
else:
|
||||
return '[aefd:207::16]'
|
||||
|
||||
def get_overrides(self, namespace=None):
|
||||
ip_family = self._get_ip_family()
|
||||
|
||||
overrides = {
|
||||
app_constants.HELM_NS_ROOK_CEPH: {
|
||||
'config': {
|
||||
'ip_family': ip_family,
|
||||
'ip_address': self._get_static_floating_mon_ip(ip_family),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if namespace in self.SUPPORTED_NAMESPACES:
|
||||
return overrides[namespace]
|
||||
elif namespace:
|
||||
raise exception.InvalidHelmNamespace(chart=self.CHART,
|
||||
namespace=namespace)
|
||||
else:
|
||||
return overrides
|
@ -140,11 +140,22 @@ class StorageBaseHelm(BaseHelm):
|
||||
|
||||
return replication
|
||||
|
||||
def _is_ceph_controllerfs_provisioned(self):
|
||||
try:
|
||||
self.dbapi.controller_fs_get_by_name(
|
||||
constants.FILESYSTEM_NAME_CEPH_DRBD)
|
||||
except exception.ControllerFSNameNotFound:
|
||||
return False
|
||||
def _get_ip_family(self):
|
||||
|
||||
return True
|
||||
network = None
|
||||
try:
|
||||
network = self.dbapi.network_get_by_type(
|
||||
app_constants.DEFAULT_NETWORK_TYPE)
|
||||
except exception.NetworkTypeNotFound:
|
||||
pass
|
||||
|
||||
if network:
|
||||
return network.primary_pool_family
|
||||
|
||||
return 'IPv4'
|
||||
|
||||
def _is_ceph_controllerfs_provisioned(self):
|
||||
return app_utils.is_ceph_controllerfs_provisioned(self.dbapi)
|
||||
|
||||
def _is_floating_monitor_assigned(self):
|
||||
return app_utils.is_floating_monitor_assigned(self.dbapi)
|
||||
|
@ -109,7 +109,7 @@ class RookCephAppLifecycleOperator(base.AppLifecycleOperator):
|
||||
"""
|
||||
|
||||
LOG.info("Cleaning up the ceph cluster")
|
||||
self.cluster_cleanup()
|
||||
self.cluster_cleanup(app_op)
|
||||
|
||||
LOG.info("Removing ceph alarms")
|
||||
self.remove_alarms(app_op)
|
||||
@ -145,7 +145,7 @@ class RookCephAppLifecycleOperator(base.AppLifecycleOperator):
|
||||
|
||||
self.delete_crds()
|
||||
|
||||
def cluster_cleanup(self):
|
||||
def cluster_cleanup(self, app_op):
|
||||
""" Run cluster cleanup
|
||||
|
||||
This function set the cleanup confirmation in the ceph cluster
|
||||
@ -155,48 +155,99 @@ class RookCephAppLifecycleOperator(base.AppLifecycleOperator):
|
||||
self._load_kube_config()
|
||||
|
||||
kube_batch = client.BatchV1Api()
|
||||
kube_core = client.CoreV1Api()
|
||||
kube_custom = client.CustomObjectsApi()
|
||||
|
||||
namespace = 'rook-ceph'
|
||||
|
||||
# Patch the cluster to allow for cleanup
|
||||
# -------
|
||||
# Patch the cluster to allow for cleanup. Stops cluster orchestration
|
||||
# -------
|
||||
LOG.info("Cleanup: Patch the cluster to enable clean-up operations")
|
||||
try:
|
||||
patch = {'spec': {'cleanupPolicy': {'confirmation': 'yes-really-destroy-data'}}}
|
||||
kube_custom.patch_namespaced_custom_object('ceph.rook.io',
|
||||
'v1',
|
||||
namespace,
|
||||
app_constants.HELM_NS_ROOK_CEPH,
|
||||
'cephclusters',
|
||||
'rook-ceph',
|
||||
patch)
|
||||
except Exception as e:
|
||||
LOG.error("Error applying ceph cluster patch: %s" % e)
|
||||
|
||||
# Delete the non-operator helm releases
|
||||
# -------
|
||||
# Delete the non-operator helm ceph provisioner release
|
||||
# -------
|
||||
LOG.info("Cleanup: Remove helmrelease: {}".format(
|
||||
app_constants.FLUXCD_HELMRELEASE_ROOK_CEPH_PROVISIONER))
|
||||
try:
|
||||
kube_custom.delete_namespaced_custom_object('helm.toolkit.fluxcd.io',
|
||||
'v2',
|
||||
namespace,
|
||||
'helmreleases',
|
||||
'rook-ceph-provisioner')
|
||||
kube_custom.delete_namespaced_custom_object(
|
||||
constants.FLUXCD_CRD_HELM_REL_GROUP,
|
||||
constants.FLUXCD_CRD_HELM_REL_VERSION,
|
||||
app_constants.HELM_NS_ROOK_CEPH,
|
||||
constants.FLUXCD_CRD_HELM_REL_PLURAL,
|
||||
app_constants.FLUXCD_HELMRELEASE_ROOK_CEPH_PROVISIONER,
|
||||
body=client.V1DeleteOptions(propagation_policy='Foreground'))
|
||||
except Exception as e:
|
||||
LOG.error("Error deleting 'rook-ceph-provisioner' helm release: %s" % e)
|
||||
LOG.error("Error deleting %s helm release: %s" % (
|
||||
app_constants.FLUXCD_HELMRELEASE_ROOK_CEPH_PROVISIONER, e))
|
||||
|
||||
# -------
|
||||
# Conditionally delete the floating monitor, if enabled
|
||||
# -------
|
||||
if (app_utils.is_ceph_controllerfs_provisioned(app_op._dbapi) and
|
||||
app_utils.is_floating_monitor_assigned(app_op._dbapi)):
|
||||
# Patch the floating monitor configmap to disable managed removal
|
||||
# and just tear down the floating monitor
|
||||
LOG.info("Cleanup: Patch rook-ceph-mon-float-inputs to disable managed removal")
|
||||
try:
|
||||
cm = kube_core.read_namespaced_config_map('rook-ceph-mon-float-inputs',
|
||||
app_constants.HELM_NS_ROOK_CEPH,)
|
||||
cm.data['managed_remove'] = 'false'
|
||||
kube_core.patch_namespaced_config_map('rook-ceph-mon-float-inputs',
|
||||
app_constants.HELM_NS_ROOK_CEPH,
|
||||
cm)
|
||||
except Exception as e:
|
||||
LOG.error("Failed to patch configmap: %s" % (e))
|
||||
|
||||
LOG.info("Cleanup: Remove helmrelease: {}".format(
|
||||
app_constants.FLUXCD_HELMRELEASE_ROOK_CEPH_FLOATING_MONITOR))
|
||||
try:
|
||||
kube_custom.delete_namespaced_custom_object(
|
||||
constants.FLUXCD_CRD_HELM_REL_GROUP,
|
||||
constants.FLUXCD_CRD_HELM_REL_VERSION,
|
||||
app_constants.HELM_NS_ROOK_CEPH,
|
||||
constants.FLUXCD_CRD_HELM_REL_PLURAL,
|
||||
app_constants.FLUXCD_HELMRELEASE_ROOK_CEPH_FLOATING_MONITOR,
|
||||
grace_period_seconds=60,
|
||||
body=client.V1DeleteOptions(propagation_policy='Foreground'))
|
||||
except Exception as e:
|
||||
LOG.error("Error deleting %s helm release: %s" % (
|
||||
app_constants.FLUXCD_HELMRELEASE_ROOK_CEPH_FLOATING_MONITOR, e))
|
||||
|
||||
# -------
|
||||
# Delete the cluster to allow for cleanup
|
||||
# -------
|
||||
LOG.info("Cleanup: Remove helmrelease: {}".format(
|
||||
app_constants.FLUXCD_HELMRELEASE_ROOK_CEPH_CLUSTER))
|
||||
try:
|
||||
kube_custom.delete_namespaced_custom_object('helm.toolkit.fluxcd.io',
|
||||
'v2',
|
||||
namespace,
|
||||
'helmreleases',
|
||||
'rook-ceph-cluster')
|
||||
kube_custom.delete_namespaced_custom_object(
|
||||
constants.FLUXCD_CRD_HELM_REL_GROUP,
|
||||
constants.FLUXCD_CRD_HELM_REL_VERSION,
|
||||
app_constants.HELM_NS_ROOK_CEPH,
|
||||
constants.FLUXCD_CRD_HELM_REL_PLURAL,
|
||||
app_constants.FLUXCD_HELMRELEASE_ROOK_CEPH_CLUSTER,
|
||||
body=client.V1DeleteOptions(propagation_policy='Foreground'))
|
||||
except Exception as e:
|
||||
LOG.error("Error deleting 'rook-ceph-cluster' helm release: %s" % e)
|
||||
LOG.error("Error deleting %s helm release: %s" % (
|
||||
app_constants.FLUXCD_HELMRELEASE_ROOK_CEPH_CLUSTER, e))
|
||||
|
||||
# wait for the cleanup jobs to be completed
|
||||
timeout = 120
|
||||
timeout_start = time()
|
||||
while time() < timeout_start + timeout:
|
||||
jobs = kube_batch.list_namespaced_job(namespace,
|
||||
jobs = kube_batch.list_namespaced_job(app_constants.HELM_NS_ROOK_CEPH,
|
||||
label_selector='app=rook-ceph-cleanup')
|
||||
if not jobs.items:
|
||||
sleep(1)
|
||||
continue
|
||||
is_jobs_completed = True
|
||||
for job in jobs.items:
|
||||
@ -206,12 +257,12 @@ class RookCephAppLifecycleOperator(base.AppLifecycleOperator):
|
||||
if is_jobs_completed:
|
||||
LOG.info("Jobs completed.")
|
||||
break
|
||||
LOG.info("Waiting jobs to be completed...")
|
||||
sleep(1)
|
||||
|
||||
# remove the completed cleanup jobs/pods
|
||||
try:
|
||||
kube_batch.delete_collection_namespaced_job(namespace,
|
||||
LOG.info("Cleanup: Remove cleanup jobs")
|
||||
kube_batch.delete_collection_namespaced_job(app_constants.HELM_NS_ROOK_CEPH,
|
||||
label_selector='app=rook-ceph-cleanup',
|
||||
propagation_policy='Foreground')
|
||||
except Exception as e:
|
||||
@ -485,6 +536,9 @@ class RookCephAppLifecycleOperator(base.AppLifecycleOperator):
|
||||
ceph_rook_backend = app_utils.get_ceph_rook_backend(dbapi)
|
||||
active_controller = utils.HostHelper.get_active_controller(dbapi)
|
||||
cephmon_label_count = dbapi.count_hosts_by_hostfs(constants.FILESYSTEM_NAME_CEPH)
|
||||
if (app_utils.is_ceph_controllerfs_provisioned(app_op._dbapi) and
|
||||
app_utils.is_floating_monitor_assigned(app_op._dbapi)):
|
||||
cephmon_label_count += 1
|
||||
|
||||
# CHECK, RAISE, and ALLOW: See if there is a minimum amount of MONs
|
||||
if ((cephmon_label_count < 1) or
|
||||
|
@ -33,6 +33,7 @@ systemconfig.helm_plugins.rook_ceph =
|
||||
001_rook-ceph = k8sapp_rook_ceph.helm.rook_ceph:RookCephHelm
|
||||
002_rook-ceph-cluster = k8sapp_rook_ceph.helm.rook_ceph_cluster:RookCephClusterHelm
|
||||
003_rook-ceph-provisioner = k8sapp_rook_ceph.helm.rook_ceph_provisioner:RookCephClusterProvisionerHelm
|
||||
004_rook-ceph-floating-monitor = k8sapp_rook_ceph.helm.rook_ceph_floating_monitor:RookCephFloatingMonitorHelm
|
||||
|
||||
systemconfig.fluxcd.kustomize_ops =
|
||||
rook-ceph = k8sapp_rook_ceph.kustomize.kustomize_rook_ceph:RookCephFluxCDKustomizeOperator
|
||||
|
@ -3,10 +3,11 @@ Section: admin
|
||||
Priority: optional
|
||||
Maintainer: StarlingX Developers <starlingx-discuss@lists.starlingx.io>
|
||||
Build-Depends: debhelper-compat (= 13),
|
||||
rook-ceph-helm,
|
||||
build-info,
|
||||
rook-ceph-provisioner-helm,
|
||||
python3-k8sapp-rook-ceph-wheels,
|
||||
rook-ceph-helm,
|
||||
rook-ceph-provisioner-helm,
|
||||
rook-ceph-floating-monitor-helm,
|
||||
Standards-Version: 4.1.2
|
||||
Homepage: https://www.starlingx.io
|
||||
|
||||
|
@ -31,7 +31,7 @@ override_dh_auto_build:
|
||||
cp -Rv fluxcd-manifests $(STAGING)
|
||||
|
||||
mkdir -p $(STAGING)/charts
|
||||
cp $(HELM_FOLDER)/rook-ceph*.tgz $(STAGING)/charts
|
||||
cp $(HELM_FOLDER)/rook-ceph-*.tgz $(STAGING)/charts
|
||||
|
||||
# Adjust the helmrelease yamls based on the chart versions
|
||||
for c in $(STAGING)/charts/*; do \
|
||||
|
@ -7,46 +7,90 @@
|
||||
#
|
||||
|
||||
RETVAL=0
|
||||
DRBD_MOUNT="/var/lib/ceph/mon-float"
|
||||
DRBD_MAJ_DEV_NUM="147"
|
||||
REQUEST_TIMEOUT='5s'
|
||||
|
||||
################################################################################
|
||||
# Start Action
|
||||
################################################################################
|
||||
function start {
|
||||
return
|
||||
# Add label for pod scheduling
|
||||
# NOTE: Because SM and k8s can be restarted indpendently the k8s API may not
|
||||
# be available at the time of the start action. Don't fail. Confirm label is
|
||||
# applied in the status check
|
||||
kubectl --kubeconfig=/etc/kubernetes/admin.conf \
|
||||
--request-timeout ${REQUEST_TIMEOUT} \
|
||||
label node $(hostname) \
|
||||
ceph-mon-float-placement=enabled
|
||||
RETVAL=0
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Stop Action
|
||||
################################################################################
|
||||
function stop {
|
||||
pgrep ceph-mon
|
||||
if [ x"$?" = x"0" ]; then
|
||||
kubectl --kubeconfig=/etc/kubernetes/admin.conf delete \
|
||||
deployments.apps -n rook-ceph rook-ceph-mon-a
|
||||
kubectl --kubeconfig=/etc/kubernetes/admin.conf delete po \
|
||||
-n rook-ceph --selector="app=rook-ceph-mon,mon=a"
|
||||
fi
|
||||
# Add remove label to prevent pod scheduling
|
||||
# NOTE: Because SM and k8s can be restarted indpendently the k8s API may not
|
||||
# be available at the time of the start action. Don't fail. Confirm label is
|
||||
# applied in the status check
|
||||
kubectl --kubeconfig=/etc/kubernetes/admin.conf \
|
||||
--request-timeout ${REQUEST_TIMEOUT} \
|
||||
label node $(hostname) \
|
||||
ceph-mon-float-placement-
|
||||
|
||||
pgrep ceph-osd
|
||||
if [ x"$?" = x"0" ]; then
|
||||
kubectl --kubeconfig=/etc/kubernetes/admin.conf delete \
|
||||
deployments.apps -n rook-ceph \
|
||||
--selector="app=rook-ceph-osd,failure-domain=$(hostname)"
|
||||
kubectl --kubeconfig=/etc/kubernetes/admin.conf delete po \
|
||||
--selector="app=rook-ceph-osd,failure-domain=$(hostname)" \
|
||||
-n rook-ceph
|
||||
# Get floating monitor pod running on this node
|
||||
POD=$(kubectl --kubeconfig=/etc/kubernetes/admin.conf \
|
||||
--request-timeout ${REQUEST_TIMEOUT} \
|
||||
get pod -n rook-ceph \
|
||||
-l app="rook-ceph-mon,mon=float" --no-headers=true \
|
||||
--field-selector=spec.nodeName=$(hostname) \
|
||||
-o=custom-columns=NAME:.metadata.name)
|
||||
# Is there a floating monitor here?
|
||||
if [ ! -z "${POD}" ]; then
|
||||
# delete detected pod to force a reschedule
|
||||
kubectl --kubeconfig=/etc/kubernetes/admin.conf \
|
||||
--request-timeout ${REQUEST_TIMEOUT} \
|
||||
delete pod -n rook-ceph \
|
||||
${POD}
|
||||
fi
|
||||
RETVAL=0
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Status Action
|
||||
################################################################################
|
||||
function status {
|
||||
pgrep sysinv-api
|
||||
|
||||
RETVAL=$?
|
||||
# Status is based on if this host is labeled correctly to run the floating
|
||||
# monitor
|
||||
|
||||
return
|
||||
# Is this host labeled for the floating monitor
|
||||
NODE_LABELED=$(kubectl --kubeconfig=/etc/kubernetes/admin.conf \
|
||||
--request-timeout ${REQUEST_TIMEOUT} \
|
||||
get nodes \
|
||||
-l ceph-mon-float-placement --no-headers=true \
|
||||
--field-selector=metadata.name=$(hostname) \
|
||||
-o=custom-columns=NAME:.metadata.name)
|
||||
|
||||
mountpoint -d ${DRBD_MOUNT} | grep -q ^${DRBD_MAJ_DEV_NUM}
|
||||
if [ $? -eq 0 ]; then
|
||||
if [ -z "${NODE_LABELED}" ]; then
|
||||
kubectl --kubeconfig=/etc/kubernetes/admin.conf \
|
||||
--request-timeout ${REQUEST_TIMEOUT} \
|
||||
label node $(hostname) \
|
||||
ceph-mon-float-placement=enabled
|
||||
fi
|
||||
RETVAL=0
|
||||
else
|
||||
if [ ! -z "${NODE_LABELED}" ]; then
|
||||
kubectl --kubeconfig=/etc/kubernetes/admin.conf \
|
||||
--request-timeout ${REQUEST_TIMEOUT} \
|
||||
label node $(hostname) \
|
||||
ceph-mon-float-placement-
|
||||
fi
|
||||
RETVAL=1
|
||||
fi
|
||||
}
|
||||
|
||||
################################################################################
|
||||
|
@ -2,6 +2,9 @@ app_name: APP_REPLACE_NAME
|
||||
app_version: APP_REPLACE_VERSION
|
||||
helm_repo: HELM_REPLACE_REPO
|
||||
|
||||
disabled_charts:
|
||||
- rook-ceph-floating-monitor
|
||||
|
||||
helm_toolkit_required: false
|
||||
maintain_user_overrides: true
|
||||
maintain_attributes: true
|
||||
|
@ -4,7 +4,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
apiVersion: source.toolkit.fluxcd.io/v1beta1
|
||||
apiVersion: source.toolkit.fluxcd.io/v1
|
||||
kind: HelmRepository
|
||||
metadata:
|
||||
name: stx-platform
|
||||
|
@ -11,4 +11,5 @@ resources:
|
||||
- base
|
||||
- rook-ceph
|
||||
- rook-ceph-cluster
|
||||
- rook-ceph-floating-monitor
|
||||
- rook-ceph-provisioner
|
||||
|
@ -31,6 +31,7 @@ spec:
|
||||
disableHooks: false
|
||||
uninstall:
|
||||
disableHooks: false
|
||||
deletionPropagation: foreground
|
||||
valuesFrom:
|
||||
- kind: Secret
|
||||
name: rook-ceph-cluster-static-overrides
|
||||
|
@ -33,8 +33,15 @@ cephClusterSpec:
|
||||
image: quay.io/ceph/ceph:v18.2.2
|
||||
allowUnsupported: true
|
||||
network:
|
||||
provider: host
|
||||
#ipFamily: "IPv6"
|
||||
connections:
|
||||
encryption:
|
||||
enabled: false
|
||||
compression:
|
||||
enabled: false
|
||||
requireMsgr2: false
|
||||
# provider: host
|
||||
ipFamily: IPv4
|
||||
dualStack: false
|
||||
|
||||
# Whether or not continue if PGs are not clean during an upgrade
|
||||
continueUpgradeAfterChecksEvenIfNotHealthy: false
|
||||
|
@ -0,0 +1,41 @@
|
||||
#
|
||||
# Copyright (c) 2024 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
apiVersion: "helm.toolkit.fluxcd.io/v2"
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: rook-ceph-floating-monitor
|
||||
labels:
|
||||
chart_group: starlingx-rook-charts
|
||||
spec:
|
||||
releaseName: rook-ceph-floating-monitor
|
||||
chart:
|
||||
spec:
|
||||
chart: rook-ceph-floating-monitor
|
||||
version: REPLACE_HELM_CHART_VERSION
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: stx-platform
|
||||
interval: 5m
|
||||
timeout: 30m
|
||||
dependsOn:
|
||||
- name: rook-ceph-cluster
|
||||
test:
|
||||
enable: false
|
||||
install:
|
||||
disableHooks: false
|
||||
upgrade:
|
||||
disableHooks: false
|
||||
uninstall:
|
||||
disableHooks: false
|
||||
deletionPropagation: foreground
|
||||
valuesFrom:
|
||||
- kind: Secret
|
||||
name: rook-ceph-floating-monitor-static-overrides
|
||||
valuesKey: rook-ceph-floating-monitor-static-overrides.yaml
|
||||
- kind: Secret
|
||||
name: rook-ceph-floating-monitor-system-overrides
|
||||
valuesKey: rook-ceph-floating-monitor-system-overrides.yaml
|
@ -0,0 +1,18 @@
|
||||
#
|
||||
# Copyright (c) 2024 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
namespace: rook-ceph
|
||||
resources:
|
||||
- helmrelease.yaml
|
||||
secretGenerator:
|
||||
- name: rook-ceph-floating-monitor-static-overrides
|
||||
files:
|
||||
- rook-ceph-floating-monitor-static-overrides.yaml
|
||||
- name: rook-ceph-floating-monitor-system-overrides
|
||||
files:
|
||||
- rook-ceph-floating-monitor-system-overrides.yaml
|
||||
generatorOptions:
|
||||
disableNameSuffixHash: true
|
@ -0,0 +1,11 @@
|
||||
#
|
||||
# Copyright (c) 2024 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# NOTE: keep alphabetical so can easily compare with runtime env
|
||||
|
||||
images:
|
||||
tags:
|
||||
ceph: quay.io/ceph/ceph:v18.2.2
|
||||
kubectl: docker.io/bitnami/kubectl:1.29
|
@ -0,0 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2024 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
@ -31,6 +31,7 @@ spec:
|
||||
disableHooks: false
|
||||
uninstall:
|
||||
disableHooks: false
|
||||
deletionPropagation: foreground
|
||||
valuesFrom:
|
||||
- kind: Secret
|
||||
name: rook-ceph-provisioner-static-overrides
|
||||
|
Loading…
Reference in New Issue
Block a user