Add dynamic overrides in rook-ceph app

This change add new dynamic overrides and enable/disable services based
on storage-backend.

Dynamic overrides added:
  Overrides based on how many hosts have host-fs ceph:
    - mds replicas size
    - mon count
    - mgr count
  Overrides based on host-stor
   - nodes
     - devices (osds)

Services that can be enabled:
 - CephFS (filesystem)
 - RBD (block or ecblock)
 - RGW (object)

Test Plan:
 - PASS: Load the rook-ceph app and check system-overrides for each
         chart
 - PASS: Apply the rook-ceph app and check if system-overrides have
         changed, only if something has changed before applying the app
 - PASS: Check if the services are enabled correctly based on the
         storage-backend services column
 - PASS: Check if the ceph is in HEALTH_OK status

Depends-On: https://review.opendev.org/c/starlingx/config/+/921801

Story: 2011066
Task: 50298

Change-Id: Ib245b0f1195d4c6437ed45346fe00cf16a69f67f
Signed-off-by: Gustavo Ornaghi Antunes <gustavo.ornaghiantunes@windriver.com>
This commit is contained in:
Gustavo Ornaghi Antunes 2024-06-07 16:45:49 -03:00 committed by Robert Church
parent a9f3b1e3da
commit cd79d4443a
8 changed files with 574 additions and 369 deletions

View File

@ -82,6 +82,13 @@ spec:
app: ceph-mon-audit
app.starlingx.io/component: platform
spec:
tolerations:
- effect: NoSchedule
operator: Exists
key: node-role.kubernetes.io/master
- effect: NoSchedule
operator: Exists
key: node-role.kubernetes.io/control-plane
serviceAccountName: {{ .Values.rbac.serviceAccount }}
restartPolicy: OnFailure
hostNetwork: true

View File

@ -22,18 +22,53 @@ FLUXCD_HELMRELEASE_ROOK_CEPH = 'rook-ceph'
FLUXCD_HELMRELEASE_ROOK_CEPH_CLUSTER = 'rook-ceph-cluster'
FLUXCD_HELMRELEASE_ROOK_CEPH_PROVISIONER = 'rook-ceph-provisioner'
ROOK_CEPH_CLUSTER_SECRET_NAMESPACE = 'rook-ceph'
SECRET_NAMESPACE = 'rook-ceph'
ROOK_CEPH_RDB_SECRET_NAME = 'rook-csi-rbd-provisioner'
ROOK_CEPH_RDB_NODE_SECRET_NAME = 'rook-csi-rbd-node'
RBD_SECRET_NAME = 'rook-csi-rbd-provisioner'
RBD_NODE_SECRET_NAME = 'rook-csi-rbd-node'
ROOK_CEPH_FS_SECRET_NAME = 'rook-csi-cephfs-provisioner'
ROOK_CEPH_FS_NODE_SECRET_NAME = 'rook-csi-cephfs-node'
ROOK_CEPH_CLUSTER_RDB_STORAGE_CLASS_NAME = 'general'
ROOK_CEPH_CLUSTER_CEPHFS_STORAGE_CLASS_NAME = 'cephfs'
ROOK_CEPH_CLUSTER_CEPHFS_FILE_SYSTEM_NAME = 'kube-cephfs'
FS_SECRET_NAME = 'rook-csi-cephfs-provisioner'
FS_NODE_SECRET_NAME = 'rook-csi-cephfs-node'
# Storage Backend Name
SB_NAME = constants.SB_DEFAULT_NAMES[constants.SB_TYPE_CEPH_ROOK]
# Node labels
LABEL_PLACEMENT_MON = "ceph-mon-placement"
LABEL_PLACEMENT_MGR = "ceph-mgr-placement"
LABEL_PLACEMENT_MDS = LABEL_PLACEMENT_MON
LABEL_PLACEMENT_OSD = "ceph-osd-placement"
# Deployment Models (values used in the storage backend)
DEP_MODEL_CONTROLLER = constants.CEPH_ROOK_DEPLOYMENT_CONTROLLER
DEP_MODEL_DEDICATED = constants.CEPH_ROOK_DEPLOYMENT_DEDICATED
DEP_MODEL_OPEN = constants.CEPH_ROOK_DEPLOYMENT_OPEN
# Services (values used in the storage backend)
SVC_BLOCK = constants.SB_SVC_CEPH_ROOK_BLOCK
SVC_ECBLOCK = constants.SB_SVC_CEPH_ROOK_ECBLOCK
SVC_FS = constants.SB_SVC_CEPH_ROOK_FILESYSTEM
SVC_OBJ = constants.SB_SVC_CEPH_ROOK_OBJECT
BLOCK_STORAGE_CLASS_NAME = 'general'
ECBLOCK_STORAGE_CLASS_NAME = 'general'
CEPHFS_STORAGE_CLASS_NAME = 'cephfs'
RGW_STORAGE_CLASS_NAME = 'cephrgw'
STORAGE_CLASS_NAMES = {
SVC_BLOCK: BLOCK_STORAGE_CLASS_NAME,
SVC_ECBLOCK: ECBLOCK_STORAGE_CLASS_NAME,
SVC_FS: CEPHFS_STORAGE_CLASS_NAME,
SVC_OBJ: RGW_STORAGE_CLASS_NAME
}
BLOCK_NAME = 'kube-rbd'
ECBLOCK_NAME = 'kube-rbd'
RGW_NAME = 'kube-rgw'
CEPHFS_NAME = 'kube-cephfs'
# Chart specific dynamic overrides
# -> HELM_CHART_ROOK_CEPH_CLUSTER
CEPH_CLUSTER_HOST_FAIL_DOMAIN = 'host'
CEPH_CLUSTER_OSD_FAIL_DOMAIN = 'osd'

View File

@ -12,14 +12,26 @@ from sysinv.common import exception
class RookCephHelm(storage.StorageBaseHelm):
"""Class to encapsulate helm operations for the rook-operator chart"""
CHART = app_constants.HELM_CHART_ROOK_CEPH
HELM_RELEASE = app_constants.FLUXCD_HELMRELEASE_ROOK_CEPH
def _get_csi_overrides(self):
csi_config = {
'enableRbdDriver': True,
'enableRBDSnapshotter': True,
'enableCephfsDriver': self._is_service_enabled(app_constants.SVC_FS),
'enableCephfsSnapshotter': self._is_service_enabled(app_constants.SVC_FS)
}
return csi_config
def get_overrides(self, namespace=None):
secrets = [{"name": "default-registry-key"}]
overrides = {
app_constants.HELM_NS_ROOK_CEPH: {
'imagePullSecrets': secrets,
'csi': self._get_csi_overrides()
}
}

View File

@ -11,9 +11,9 @@ from k8sapp_rook_ceph.helm import storage
from sysinv.common import constants
from sysinv.common import exception
from sysinv.common import utils as cutils
from sysinv.common import utils
import socket
import math
class RookCephClusterHelm(storage.StorageBaseHelm):
@ -22,43 +22,84 @@ class RookCephClusterHelm(storage.StorageBaseHelm):
CHART = app_constants.HELM_CHART_ROOK_CEPH_CLUSTER
HELM_RELEASE = app_constants.FLUXCD_HELMRELEASE_ROOK_CEPH_CLUSTER
def get_overrides(self, namespace=None):
overrides = {
app_constants.HELM_NS_ROOK_CEPH: {
'cephClusterSpec': self._get_cluster_override(),
'cephBlockPools': self._get_rdb_override(),
'cephFileSystems': self._get_cephfs_override(),
'hook': self._get_hook_override(),
}
FLOATING_MON_ALLOWED = False
def _get_mon_count(self):
labeled_hosts = self._get_host_count_with_label(
app_constants.LABEL_PLACEMENT_MON)
if utils.is_aio_duplex_system(self.dbapi):
if self.dbapi.count_hosts_matching_criteria() == 2:
# Bump for floating monitor on a AIO-DX only.
if self.FLOATING_MON_ALLOWED:
labeled_hosts += 1
return labeled_hosts
def _get_mds_count(self):
labeled_hosts = self._get_mon_count()
if not utils.is_aio_simplex_system(self.dbapi):
labeled_hosts = math.floor(labeled_hosts / 2)
return labeled_hosts
def _get_failure_domain(self):
# based on deployment model and installation type
if utils.is_aio_simplex_system(self.dbapi):
return app_constants.CEPH_CLUSTER_OSD_FAIL_DOMAIN
elif self._get_deployment_model() in [app_constants.DEP_MODEL_CONTROLLER,
app_constants.DEP_MODEL_DEDICATED]:
return app_constants.CEPH_CLUSTER_HOST_FAIL_DOMAIN
else:
return app_constants.CEPH_CLUSTER_OSD_FAIL_DOMAIN
def _get_mon_hostname_list(self):
return [h.hostname.encode('utf8', 'strict')
for h in
self._get_hosts_with_label(app_constants.LABEL_PLACEMENT_MON)]
def _get_duplex_preparation(self):
duplex = {
'enable': self.FLOATING_MON_ALLOWED
}
if namespace in self.SUPPORTED_NAMESPACES:
return overrides[namespace]
elif namespace:
raise exception.InvalidHelmNamespace(chart=self.CHART,
namespace=namespace)
else:
return overrides
if utils.is_aio_duplex_system(self.dbapi) and self.FLOATING_MON_ALLOWED:
# This can code only be executed on the active controller
duplex.update({'activeController':
utils.get_local_controller_hostname().encode(
'utf8', 'strict')})
def _get_cephfs_override(self):
if cutils.is_aio_simplex_system(self.dbapi):
replica = 1
else:
replica = 2
cluster_host_addr_name = utils.format_address_name(
constants.CONTROLLER_HOSTNAME, constants.NETWORK_TYPE_CLUSTER_HOST)
address = utils.get_primary_address_by_name(
self.dbapi, cluster_host_addr_name,
constants.NETWORK_TYPE_CLUSTER_HOST, True)
duplex.update({'floatIP': utils.format_url_address(address.address)})
return duplex
def _get_cephfs_overrides(self):
parameters = {
'csi.storage.k8s.io/provisioner-secret-name': app_constants.ROOK_CEPH_FS_SECRET_NAME,
'csi.storage.k8s.io/provisioner-secret-namespace': app_constants.ROOK_CEPH_CLUSTER_SECRET_NAMESPACE,
'csi.storage.k8s.io/controller-expand-secret-name': app_constants.ROOK_CEPH_FS_SECRET_NAME,
'csi.storage.k8s.io/controller-expand-secret-namespace': app_constants.ROOK_CEPH_CLUSTER_SECRET_NAMESPACE,
'csi.storage.k8s.io/node-stage-secret-name': app_constants.ROOK_CEPH_FS_NODE_SECRET_NAME,
'csi.storage.k8s.io/node-stage-secret-namespace': app_constants.ROOK_CEPH_CLUSTER_SECRET_NAMESPACE,
'csi.storage.k8s.io/provisioner-secret-name':
app_constants.FS_SECRET_NAME,
'csi.storage.k8s.io/provisioner-secret-namespace':
app_constants.SECRET_NAMESPACE,
'csi.storage.k8s.io/controller-expand-secret-name':
app_constants.FS_SECRET_NAME,
'csi.storage.k8s.io/controller-expand-secret-namespace':
app_constants.SECRET_NAMESPACE,
'csi.storage.k8s.io/node-stage-secret-name':
app_constants.FS_NODE_SECRET_NAME,
'csi.storage.k8s.io/node-stage-secret-namespace':
app_constants.SECRET_NAMESPACE,
'csi.storage.k8s.io/fstype': 'ext4'
}
storage_class = {
'enabled': True,
'name': app_constants.ROOK_CEPH_CLUSTER_CEPHFS_STORAGE_CLASS_NAME,
'name': app_constants.STORAGE_CLASS_NAMES[app_constants.SVC_FS],
'isDefault': False,
'pool': 'data',
'allowVolumeExpansion': True,
@ -66,6 +107,22 @@ class RookCephClusterHelm(storage.StorageBaseHelm):
'parameters': parameters
}
anti_affinity = {
'requiredDuringSchedulingIgnoredDuringExecution': [{
'labelSelector': {
'matchExpressions': [{
'key': 'app',
'operator': 'In',
'values': ['rook-ceph-mds']
}]
},
'topologyKey': 'kubernetes.io/hostname'
}]
}
if utils.is_aio_simplex_system(self.dbapi):
anti_affinity = {}
placement = {
'nodeAffinity': {
'requiredDuringSchedulingIgnoredDuringExecution': {
@ -78,29 +135,29 @@ class RookCephClusterHelm(storage.StorageBaseHelm):
}]
}
},
'podAntiAffinity': anti_affinity,
'tolerations': [{
'effect': 'NoSchedule',
'operator': 'Exists',
'key': 'node-role.kubernetes.io/master'
},
{
}, {
'effect': 'NoSchedule',
'operator': 'Exists',
'key': 'node-role.kubernetes.io/control-plane'
}]
}
ceph_fs_config = [{
'name': app_constants.ROOK_CEPH_CLUSTER_CEPHFS_FILE_SYSTEM_NAME,
fs_config = [{
'name': app_constants.CEPHFS_NAME,
'spec': {
'metadataPool': {
'replicated':
{'size': replica}},
{'size': self._get_data_replication_factor()}},
'metadataServer': {
'labels': {
'app.starlingx.io/component': "platform"
'app.starlingx.io/component': 'platform'
},
'activeCount': 1,
'activeCount': self._get_mds_count(),
'activeStandby': True,
'placement': placement,
'resources': {
@ -111,37 +168,39 @@ class RookCephClusterHelm(storage.StorageBaseHelm):
'cpu': '0'}},
'priorityClassName': 'system-cluster-critical'},
'dataPools': [{
'failureDomain': 'host',
'failureDomain': self._get_failure_domain(),
'name': 'data',
'replicated':
{'size': replica}}],
{'size': self._get_data_replication_factor()}}],
},
'storageClass': storage_class
}]
return ceph_fs_config
return fs_config
def _get_rdb_override(self):
if cutils.is_aio_simplex_system(self.dbapi):
replica = 1
else:
replica = 2
def _get_block_overrides(self):
parameters = {
'imageFormat': '2',
'imageFeatures': 'layering',
'csi.storage.k8s.io/provisioner-secret-name': app_constants.ROOK_CEPH_RDB_SECRET_NAME,
'csi.storage.k8s.io/provisioner-secret-namespace': app_constants.ROOK_CEPH_CLUSTER_SECRET_NAMESPACE,
'csi.storage.k8s.io/controller-expand-secret-name': app_constants.ROOK_CEPH_RDB_SECRET_NAME,
'csi.storage.k8s.io/controller-expand-secret-namespace': app_constants.ROOK_CEPH_CLUSTER_SECRET_NAMESPACE,
'csi.storage.k8s.io/node-stage-secret-name': app_constants.ROOK_CEPH_RDB_NODE_SECRET_NAME,
'csi.storage.k8s.io/node-stage-secret-namespace': app_constants.ROOK_CEPH_CLUSTER_SECRET_NAMESPACE,
'csi.storage.k8s.io/provisioner-secret-name':
app_constants.RBD_SECRET_NAME,
'csi.storage.k8s.io/provisioner-secret-namespace':
app_constants.SECRET_NAMESPACE,
'csi.storage.k8s.io/controller-expand-secret-name':
app_constants.RBD_SECRET_NAME,
'csi.storage.k8s.io/controller-expand-secret-namespace':
app_constants.SECRET_NAMESPACE,
'csi.storage.k8s.io/node-stage-secret-name':
app_constants.RBD_NODE_SECRET_NAME,
'csi.storage.k8s.io/node-stage-secret-namespace':
app_constants.SECRET_NAMESPACE,
'csi.storage.k8s.io/fstype': 'ext4'
}
storage_class = {
'enabled': True,
'name': app_constants.ROOK_CEPH_CLUSTER_RDB_STORAGE_CLASS_NAME,
'name': app_constants.STORAGE_CLASS_NAMES[app_constants.SVC_BLOCK],
'isDefault': True,
'allowVolumeExpansion': True,
'reclaimPolicy': 'Delete',
@ -149,93 +208,224 @@ class RookCephClusterHelm(storage.StorageBaseHelm):
'parameters': parameters
}
rdb_config = [{
'name': 'kube-rbd',
block_config = [{
'name': app_constants.BLOCK_NAME,
'spec': {
'failureDomain': 'host',
'replicated': {'size': replica}
'failureDomain': self._get_failure_domain(),
'replicated': {'size': self._get_data_replication_factor()}
},
'storageClass': storage_class
}]
return rdb_config
return block_config
def _get_cluster_override(self):
def _get_ecblock_overrides(self):
cluster_host_addr_name = cutils.format_address_name(constants.CONTROLLER_HOSTNAME,
constants.NETWORK_TYPE_CLUSTER_HOST)
address = cutils.get_primary_address_by_name(self.dbapi, cluster_host_addr_name,
ec_block_config = [{
'name': app_constants.ECBLOCK_NAME,
'spec': {
'failureDomain': self._get_failure_domain(),
'replicated': {
'size': self._get_data_replication_factor()
}
}
}, {
'name': 'ec-data-pool',
'spec': {
'failureDomain': self._get_failure_domain(),
'replicated': {
'size': self._get_data_replication_factor()
},
'deviceClass': 'hdd'
}
}]
return ec_block_config
def _get_ecblocksc_overrides(self):
parameters = {
'clusterID': app_constants.STORAGE_CLASS_NAMES[app_constants.SVC_ECBLOCK],
'dataPool': 'ec-data-pool',
'pool': app_constants.ECBLOCK_NAME,
'imageFormat': '2',
'imageFeatures': 'layering',
'csi.storage.k8s.io/provisioner-secret-name':
app_constants.RBD_SECRET_NAME,
'csi.storage.k8s.io/provisioner-secret-namespace':
app_constants.SECRET_NAMESPACE,
'csi.storage.k8s.io/controller-expand-secret-name':
app_constants.RBD_SECRET_NAME,
'csi.storage.k8s.io/controller-expand-secret-namespace':
app_constants.SECRET_NAMESPACE,
'csi.storage.k8s.io/node-stage-secret-name':
app_constants.RBD_NODE_SECRET_NAME,
'csi.storage.k8s.io/node-stage-secret-namespace':
app_constants.SECRET_NAMESPACE,
'csi.storage.k8s.io/fstype': 'ext4'
}
ec_block_sc_config = {
'name': app_constants.STORAGE_CLASS_NAMES[app_constants.SVC_ECBLOCK],
'isDefault': True,
'parameters': parameters,
'allowVolumeExpansion': True,
'reclaimPolicy': 'Delete',
'mountOptions': []
}
return ec_block_sc_config
def _get_rgw_overrides(self):
metadataPool = {
'failureDomain': self._get_failure_domain(),
'replicated': {
'size': self._get_data_replication_factor()
}
}
dataPool = {
'failureDomain': self._get_failure_domain(),
'replicated': {
'size': self._get_data_replication_factor()
}
}
gateway = {
'port': 9800,
'hostNetwork': True,
'resources': {
'limits': {
'memory': '4Gi'
},
'requests': {
'cpu': 0,
'memory': 0
}
},
'instances': 1,
'priorityClassName': 'system-cluster-critical'
}
parameters = {
'imageFormat': '2',
'imageFeatures': 'layering',
'csi.storage.k8s.io/provisioner-secret-name':
app_constants.RBD_SECRET_NAME,
'csi.storage.k8s.io/provisioner-secret-namespace':
app_constants.SECRET_NAMESPACE,
'csi.storage.k8s.io/controller-expand-secret-name':
app_constants.RBD_SECRET_NAME,
'csi.storage.k8s.io/controller-expand-secret-namespace':
app_constants.SECRET_NAMESPACE,
'csi.storage.k8s.io/node-stage-secret-name':
app_constants.RBD_NODE_SECRET_NAME,
'csi.storage.k8s.io/node-stage-secret-namespace':
app_constants.SECRET_NAMESPACE,
'csi.storage.k8s.io/fstype': 'ext4'
}
storage_class = {
'enabled': True,
'name': app_constants.STORAGE_CLASS_NAMES[app_constants.SVC_OBJ],
'isDefault': False,
'allowVolumeExpansion': True,
'volumeBindingMode': 'Immediate',
'reclaimPolicy': 'Delete',
'mountOptions': [],
'parameters': parameters
}
rgw_config = [{
'name': app_constants.RGW_NAME,
'spec': {
'metadataPool': metadataPool,
'dataPool': dataPool,
'preservePoolsOnDelete': True,
'gateway': gateway
},
'storageClass': storage_class,
'ingress': {
'enabled': False
}
}]
return rgw_config
def _get_cluster_overrides(self):
cluster_host_addr_name = utils.format_address_name(
constants.CONTROLLER_HOSTNAME, constants.NETWORK_TYPE_CLUSTER_HOST)
address = utils.get_primary_address_by_name(
self.dbapi, cluster_host_addr_name,
constants.NETWORK_TYPE_CLUSTER_HOST, True)
cluster = {
'mon': {
'count': self._get_mon_count(),
},
'mgr': {
'count': self._get_mon_count(),
},
'network': {
'ipFamily': 'IPv' + str(address.family)
},
'storage': {
'nodes': self._get_nodes_osds(),
},
}
return cluster
def _get_mon_count(self):
# change it with deployment configs:
# AIO simplex/duplex have 1 mon, multi-node has 3 mons,
# 2 controllers + first mon (and cannot reconfig)
if cutils.is_aio_system(self.dbapi):
return 1
else:
return 3
def _get_mds_override(self):
if cutils.is_aio_simplex_system(self.dbapi):
replica = 1
else:
replica = 2
mds = {
'replica': replica,
}
return mds
def _get_hook_override(self):
def _get_hook_overrides(self):
hook = {
'cleanup': {
'mon_hosts': self._get_mon_hosts(),
'mon_hosts': self._get_mon_hostname_list(),
},
'duplexPreparation': self._get_duplex_preparation(),
}
return hook
def _get_mon_hosts(self):
ceph_mon_label = "ceph-mon-placement=enabled"
mon_hosts = []
def get_overrides(self, namespace=None):
hosts = self.dbapi.ihost_get_list()
for h in hosts:
labels = self.dbapi.label_get_by_host(h.uuid)
for label in labels:
if (ceph_mon_label == str(label.label_key) + '=' + str(label.label_value)):
mon_hosts.append(h.hostname.encode('utf8', 'strict'))
return mon_hosts
def _get_duplex_preparation(self):
duplex = {
'enable': cutils.is_aio_duplex_system(self.dbapi)
overrides = {
app_constants.HELM_NS_ROOK_CEPH: {
'cephClusterSpec': self._get_cluster_overrides(),
'hook': self._get_hook_overrides(),
}
}
if cutils.is_aio_duplex_system(self.dbapi):
hosts = self.dbapi.ihost_get_by_personality(
constants.CONTROLLER)
for host in hosts:
if host['hostname'] == socket.gethostname():
duplex.update({'activeController': host['hostname'].encode('utf8', 'strict')})
# One of the block pools is required
# Get overrides based on the enabled block pool
if self._is_service_enabled(app_constants.SVC_BLOCK):
overrides[app_constants.HELM_NS_ROOK_CEPH].update(
{'cephBlockPools': self._get_block_overrides()})
else:
overrides[app_constants.HELM_NS_ROOK_CEPH].update(
{'cephECBlockPools': self._get_ecblock_overrides(),
'cephECStorageClass': self._get_ecblocksc_overrides()})
cluster_host_addr_name = cutils.format_address_name(constants.CONTROLLER_HOSTNAME,
constants.NETWORK_TYPE_CLUSTER_HOST)
address = cutils.get_primary_address_by_name(self.dbapi, cluster_host_addr_name,
constants.NETWORK_TYPE_CLUSTER_HOST, True)
duplex.update({'floatIP': cutils.format_url_address(address.address)})
# Enable optional filesystem store
if self._is_service_enabled(app_constants.SVC_FS):
overrides[app_constants.HELM_NS_ROOK_CEPH].update(
{'cephFileSystems': self._get_cephfs_overrides()})
else:
overrides[app_constants.HELM_NS_ROOK_CEPH].update(
{'cephFileSystems': []})
return duplex
# Enable optional object stores
if self._is_service_enabled(app_constants.SVC_OBJ):
overrides[app_constants.HELM_NS_ROOK_CEPH].update(
{'cephObjectStores': self._get_rgw_overrides()})
else:
overrides[app_constants.HELM_NS_ROOK_CEPH].update(
{'cephObjectStores': []})
if namespace in self.SUPPORTED_NAMESPACES:
return overrides[namespace]
elif namespace:
raise exception.InvalidHelmNamespace(chart=self.CHART,
namespace=namespace)
else:
return overrides

View File

@ -8,11 +8,9 @@
from k8sapp_rook_ceph.common import constants as app_constants
from k8sapp_rook_ceph.helm import storage
from kubernetes.client.rest import ApiException
from oslo_log import log as logging
from sysinv.common import constants
from sysinv.common import exception
from sysinv.common import kubernetes
from sysinv.common import utils as cutils
LOG = logging.getLogger(__name__)
@ -24,34 +22,76 @@ class RookCephClusterProvisionerHelm(storage.StorageBaseHelm):
CHART = app_constants.HELM_CHART_ROOK_CEPH_PROVISIONER
HELM_RELEASE = app_constants.FLUXCD_HELMRELEASE_ROOK_CEPH_PROVISIONER
def _get_controller_hosts(self):
controller_hosts = []
hosts = self.dbapi.ihost_get_by_personality(constants.CONTROLLER)
for h in hosts:
controller_hosts.append(h.hostname.encode('utf8', 'strict'))
return controller_hosts
def _get_cluster_host_ip(self, hostname):
addr_name = cutils.format_address_name(
hostname, constants.NETWORK_TYPE_CLUSTER_HOST)
address = cutils.get_primary_address_by_name(
self.dbapi, addr_name, constants.NETWORK_TYPE_CLUSTER_HOST, True)
return address.address
def _get_monitor_ips(self):
monitor_ips = []
# Get the IPs from the labeled monitors
hosts = self._get_hosts_with_label(app_constants.LABEL_PLACEMENT_MON)
for host in hosts:
monitor_ips.append(self._get_cluster_host_ip(host.hostname))
# Add the floating monitor
if cutils.is_aio_duplex_system(self.dbapi):
if (self.dbapi.count_hosts_matching_criteria() == 2 and
self._is_ceph_controllerfs_provisioned()):
monitor_ips.append(self._get_cluster_host_ip(
constants.CONTROLLER_HOSTNAME))
return ','.join(monitor_ips)
def _get_ceph_audit_jobs_overrides(self):
audit = {}
if cutils.is_aio_duplex_system(self.dbapi):
if (self.dbapi.count_hosts_matching_criteria() == 2 and
self._is_ceph_controllerfs_provisioned()):
audit.update({'floatIP': cutils.format_url_address(
self._get_cluster_host_ip(constants.CONTROLLER_HOSTNAME))})
return audit
def get_overrides(self, namespace=None):
base_name = 'ceph-pool'
secret_name = base_name + '-' + constants.CEPH_POOL_KUBE_NAME
if cutils.is_aio_simplex_system(self.dbapi):
replica = 1
else:
replica = 2
audit = cutils.is_aio_duplex_system(self.dbapi)
overrides = {
app_constants.HELM_NS_ROOK_CEPH: {
"global": {
"job_ceph_mon_audit": audit,
"job_ceph_mon_audit":
True if self._get_host_count_with_label(
app_constants.LABEL_PLACEMENT_MON) > 1 else False
},
"provisionStorage": {
"defaultStorageClass": constants.K8S_RBD_PROV_STOR_CLASS_NAME,
"defaultStorageClass":
constants.K8S_RBD_PROV_STOR_CLASS_NAME,
"classdefaults": {
"monitors": self._get_monitors(),
"monitors": self._get_monitor_ips(),
"adminId": constants.K8S_RBD_PROV_USER_NAME,
"adminSecretName": constants.K8S_RBD_PROV_ADMIN_SECRET_NAME,
"adminSecretName":
constants.K8S_RBD_PROV_ADMIN_SECRET_NAME,
},
"classes": {
"name": constants.K8S_RBD_PROV_STOR_CLASS_NAME,
"pool": {
"pool_name": constants.CEPH_POOL_KUBE_NAME,
"replication": replica,
"replication": self._get_data_replication_factor(),
"crush_rule_name": "storage_tier_ruleset",
"chunk_size": 64,
},
@ -64,7 +104,7 @@ class RookCephClusterProvisionerHelm(storage.StorageBaseHelm):
"host_provision": {
"controller_hosts": self._get_controller_hosts(),
},
"ceph_audit_jobs": self._get_ceph_audit(),
"ceph_audit_jobs": self._get_ceph_audit_jobs_overrides(),
}
}
@ -75,68 +115,3 @@ class RookCephClusterProvisionerHelm(storage.StorageBaseHelm):
namespace=namespace)
else:
return overrides
def _get_rook_mon_ip(self):
try:
kube = kubernetes.KubeOperator()
mon_ip_name = 'rook-ceph-mon-endpoints'
configmap = kube.kube_read_config_map(mon_ip_name,
app_constants.HELM_NS_ROOK_CEPH)
if configmap is not None:
data = configmap.data['data']
LOG.info('rook configmap data is %s' % data)
mons = data.split(',')
lists = []
for mon in mons:
mon = mon.split('=')
lists.append(mon[1])
ip_str = ','.join(lists)
LOG.info('rook mon ip is %s' % ip_str)
return ip_str
except Exception as e:
LOG.error("Kubernetes exception in rook mon ip: %s" % e)
raise
return ''
def _is_rook_ceph(self):
try:
label = "mon_cluster=" + app_constants.HELM_NS_ROOK_CEPH
kube = kubernetes.KubeOperator()
pods = kube.kube_get_pods_by_selector(app_constants.HELM_NS_ROOK_CEPH, label, "")
if len(pods) > 0:
return True
except ApiException as ae:
LOG.error("get monitor pod exception: %s" % ae)
except exception.SysinvException as se:
LOG.error("get sysinv exception: %s" % se)
return False
def _get_monitors(self):
if self._is_rook_ceph():
return self._get_rook_mon_ip()
else:
return ''
def _get_controller_hosts(self):
controller_hosts = []
hosts = self.dbapi.ihost_get_by_personality(constants.CONTROLLER)
for h in hosts:
controller_hosts.append(h.hostname.encode('utf8', 'strict'))
return controller_hosts
def _get_ceph_audit(self):
audit = {}
if cutils.is_aio_duplex_system(self.dbapi):
mgmt_addr_name = cutils.format_address_name(constants.CONTROLLER_HOSTNAME,
constants.NETWORK_TYPE_CLUSTER_HOST)
address = cutils.get_primary_address_by_name(self.dbapi, mgmt_addr_name,
constants.NETWORK_TYPE_CLUSTER_HOST, True)
audit.update({'floatIP': cutils.format_url_address(address.address)})
return audit

View File

@ -4,9 +4,13 @@
# SPDX-License-Identifier: Apache-2.0
#
from sysinv.helm import base
from k8sapp_rook_ceph.common import constants as app_constants
from sysinv.common import constants
from sysinv.common import exception
from sysinv.common import utils
from sysinv.helm import base
class BaseHelm(base.FluxCDBaseHelm):
"""Class to encapsulate storage related service operations for helm"""
@ -51,3 +55,110 @@ class StorageBaseHelm(BaseHelm):
if not self._is_enabled(operator.APP, self.CHART,
app_constants.HELM_NS_ROOK_CEPH):
operator.helm_release_resource_delete(self.HELM_RELEASE)
def _get_hosts_with_label(self, label):
return self.dbapi.ihosts_get_by_label(label)
def _get_host_count_with_label(self, label):
return self.dbapi.count_hosts_by_label(label)
def _get_ceph_rook_backend(self):
try:
sb = self.dbapi.storage_backend_get_by_name(app_constants.SB_NAME)
except exception.StorageBackendNotFoundByName:
return None
return sb
def _get_deployment_model(self):
sb = self._get_ceph_rook_backend()
deployment_model = None
if sb:
deployment_model = sb.capabilities.get(constants.CEPH_ROOK_BACKEND_DEPLOYMENT_CAP, None)
if not deployment_model:
raise exception.SysinvException(
'{} missing from storage backend {}'.format(
constants.CEPH_ROOK_BACKEND_DEPLOYMENT_CAP,
app_constants.SB_NAME))
return deployment_model
def _get_hosts(self):
dm = self._get_deployment_model()
if dm == app_constants.DEP_MODEL_CONTROLLER:
hosts = self.dbapi.ihost_get_by_personality(constants.CONTROLLER)
elif dm == app_constants.DEP_MODEL_DEDICATED:
hosts = self.dbapi.ihost_get_by_personality(constants.WORKER)
else:
hosts = self.dbapi.ihost_get_list()
return hosts
def _get_nodes_osds(self):
hosts = self._get_hosts()
nodes = []
for host in hosts:
new_node = {
'name': host.hostname,
'devices': []
}
istors = self.dbapi.istor_get_by_ihost(host.uuid)
for stor in istors:
if (stor.function == constants.STOR_FUNCTION_OSD and
(stor.state == constants.SB_STATE_CONFIGURED or
stor.state == constants.SB_STATE_CONFIGURING_WITH_APP)):
idisk = self.dbapi.idisk_get(stor.idisk_uuid)
new_node['devices'].append({
'name': idisk.device_path
})
nodes.append(new_node)
return nodes
def _get_data_replication_factor(self):
sb = self._get_ceph_rook_backend()
if not sb:
if utils.is_aio_simplex_system(self.dbapi):
return constants.AIO_SX_CEPH_REPLICATION_FACTOR_DEFAULT
return constants.CEPH_REPLICATION_FACTOR_DEFAULT
replication = sb.capabilities.get(constants.CEPH_BACKEND_REPLICATION_CAP, None)
if not replication:
raise exception.SysinvException(
'{} missing from storage backend {}.'.format(
constants.CEPH_BACKEND_REPLICATION_CAP,
app_constants.SB_NAME))
try:
replication = int(replication)
except ValueError:
raise exception.SysinvException(
'{} from storage backend {} must be a integer.'.format(
constants.CEPH_BACKEND_REPLICATION_CAP,
app_constants.SB_NAME))
return replication
def _is_ceph_controllerfs_provisioned(self):
try:
self.dbapi.controller_fs_get_by_name(
constants.FILESYSTEM_NAME_CEPH_DRBD)
except exception.ControllerFSNameNotFound:
return False
return True
def _get_services(self):
services_list = []
sb = self._get_ceph_rook_backend()
if sb:
services_list = sb.services.split(',')
return services_list
def _is_service_enabled(self, service):
services = self._get_services()
if services and service in services:
return True
return False

View File

@ -11,15 +11,36 @@ from sysinv.tests.db import base as dbbase
from sysinv.tests.db import utils as dbutils
from sysinv.tests.helm import base
from sysinv.common import constants
class RookTestCase(test_plugins.K8SAppRookAppMixin,
base.HelmTestCaseMixin):
def setUp(self):
super(RookTestCase, self).setUp()
self.app = dbutils.create_test_app(name=app_constants.HELM_APP_ROOK_CEPH)
self.dbapi = dbapi.get_instance()
# Create storage backend
self.backend = dbutils.create_ceph_rook_storage_backend()
# Create Ceph controllerfs
self.controller_fs = dbutils.create_test_controller_fs(
name=constants.FILESYSTEM_NAME_CEPH_DRBD)
# Create app
self.app = dbutils.create_test_app(name=app_constants.HELM_APP_ROOK_CEPH)
self.replication = int(self.backend.capabilities.get(constants.CEPH_BACKEND_REPLICATION_CAP, 1))
# Create Label hosts
hosts = self.dbapi.ihost_get_list()
for h in hosts:
dbutils.create_test_label(
host_id=h.id,
label_key=app_constants.LABEL_PLACEMENT_MON,
label_value="enabled")
class RookIPv4ControllerHostTestCase(RookTestCase,
dbbase.ProvisionedControllerHostTestCase):
@ -40,7 +61,7 @@ class RookIPv4ControllerHostTestCase(RookTestCase,
cnamespace=app_constants.HELM_NS_ROOK_CEPH)
self.assertOverridesParameters(e_overrides.get('cephFileSystems')[0].get('spec').
get('metadataPool').get('replicated').get('size'), 2)
get('metadataPool').get('replicated').get('size'), self.replication)
def test_rook_ceph_provisioner_overrides(self):
f_overrides = self.operator.get_helm_chart_overrides(
@ -74,7 +95,7 @@ class RookIPv6AIODuplexSystemTestCase(RookTestCase,
cnamespace=app_constants.HELM_NS_ROOK_CEPH)
self.assertOverridesParameters(b_overrides.get('cephFileSystems')[0].get('spec').
get('metadataPool').get('replicated').get('size'), 2)
get('metadataPool').get('replicated').get('size'), self.replication)
def test_rook_ceph_provisioner_overrides(self):
c_overrides = self.operator.get_helm_chart_overrides(
@ -107,7 +128,7 @@ class RookDualStackControllerIPv4TestCase(RookTestCase,
cnamespace=app_constants.HELM_NS_ROOK_CEPH)
self.assertOverridesParameters(h_overrides.get('cephFileSystems')[0].get('spec').
get('metadataPool').get('replicated').get('size'), 2)
get('metadataPool').get('replicated').get('size'), self.replication)
def test_rook_ceph_provisioner_overrides(self):
i_overrides = self.operator.get_helm_chart_overrides(

View File

@ -12,6 +12,9 @@ configOverride: |
[global]
osd_pool_default_size = 1
osd_pool_default_min_size = 1
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx
[osd]
osd_mkfs_type = xfs
@ -147,6 +150,9 @@ cephClusterSpec:
- effect: NoSchedule
operator: Exists
key: node-role.kubernetes.io/control-plane
- effect: NoExecute
operator: Exists
key: services
mon:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
@ -166,7 +172,6 @@ cephClusterSpec:
values:
- enabled
toolbox:
enabled: true
image: quay.io/ceph/ceph:v18.2.2
@ -191,169 +196,18 @@ monitoring:
cephFileSystems:
- name: cephfs
# see https://github.com/rook/rook/blob/master/Documentation/ceph-filesystem-crd.md#filesystem-settings for available configuration
spec:
metadataPool:
replicated:
size: 1
dataPools:
- failureDomain: osd # TODO
name: data
replicated:
size: 1
metadataServer:
labels:
app.starlingx.io/component: "platform"
activeCount: 1
activeStandby: true
resources:
limits:
memory: "4Gi"
requests:
cpu: 0
memory: 0
placement:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: ceph-mon-placement
operator: In
values:
- enabled
podAntiAffinity:
tolerations:
- effect: NoSchedule
operator: Exists
key: node-role.kubernetes.io/master
- effect: NoSchedule
operator: Exists
key: node-role.kubernetes.io/control-plane
priorityClassName: system-cluster-critical
storageClass:
enabled: true
isDefault: false
name: cephfs
pool: data
reclaimPolicy: Delete
allowVolumeExpansion: true
volumeBindingMode: "Immediate"
mountOptions: []
# see https://github.com/rook/rook/blob/master/Documentation/ceph-filesystem.md#provision-storage for available configuration
parameters:
# The secrets contain Ceph admin credentials.
csi.storage.k8s.io/provisioner-secret-name: rook-csi-cephfs-provisioner
csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph
csi.storage.k8s.io/controller-expand-secret-name: rook-csi-cephfs-provisioner
csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph
csi.storage.k8s.io/node-stage-secret-name: rook-csi-cephfs-node
csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph
# Specify the filesystem type of the volume. If not specified, csi-provisioner
# will set default as `ext4`. Note that `xfs` is not recommended due to potential deadlock
# in hyperconverged settings where the volume is mounted on the same node as the osds.
csi.storage.k8s.io/fstype: ext4
cephBlockPools:
- name: kube-rbd
# see https://github.com/rook/rook/blob/master/Documentation/ceph-pool-crd.md#spec for available configuration
spec:
failureDomain: osd
replicated:
size: 1
storageClass:
enabled: true
name: general
isDefault: true
reclaimPolicy: Delete
allowVolumeExpansion: true
volumeBindingMode: "Immediate"
mountOptions: []
allowedTopologies: []
# see https://github.com/rook/rook/blob/master/Documentation/ceph-block.md#provision-storage for available configuration
parameters:
# (optional) mapOptions is a comma-separated list of map options.
# For krbd options refer
# https://docs.ceph.com/docs/master/man/8/rbd/#kernel-rbd-krbd-options
# For nbd options refer
# https://docs.ceph.com/docs/master/man/8/rbd-nbd/#options
# mapOptions: lock_on_read,queue_depth=1024
# (optional) unmapOptions is a comma-separated list of unmap options.
# For krbd options refer
# https://docs.ceph.com/docs/master/man/8/rbd/#kernel-rbd-krbd-options
# For nbd options refer
# https://docs.ceph.com/docs/master/man/8/rbd-nbd/#options
# unmapOptions: force
# RBD image format. Defaults to "2".
imageFormat: "2"
# RBD image features. Available for imageFormat: "2". CSI RBD currently supports only `layering` feature.
imageFeatures: layering
# The secrets contain Ceph admin credentials.
csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner
csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph
csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner
csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph
csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node
csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph
# Specify the filesystem type of the volume. If not specified, csi-provisioner
# will set default as `ext4`. Note that `xfs` is not recommended due to potential deadlock
# in hyperconverged settings where the volume is mounted on the same node as the osds.
csi.storage.k8s.io/fstype: ext4
# -- A list of CephObjectStore configurations to deploy
# @default -- See [below](#ceph-object-stores)
cephObjectStores:
- name: ceph-objectstore
# see https://github.com/rook/rook/blob/master/Documentation/CRDs/Object-Storage/ceph-object-store-crd.md#object-store-settings for available configuration
spec:
metadataPool:
failureDomain: osd
replicated:
size: 0
dataPool:
failureDomain: osd
erasureCoded:
dataChunks: 0
codingChunks: 0
preservePoolsOnDelete: true
gateway:
port: 80
resources:
limits:
memory: "4Gi"
requests:
cpu: 0
memory: 0
# securePort: 443
# sslCertificateRef:
instances: 1
priorityClassName: system-cluster-critical
storageClass:
enabled: false
name: ceph-bucket
reclaimPolicy: Delete
volumeBindingMode: "Immediate"
# see https://github.com/rook/rook/blob/master/Documentation/Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-claim.md#storageclass for available configuration
parameters:
# note: objectStoreNamespace and objectStoreName are configured by the chart
region: us-east-1
ingress:
# Enable an ingress for the ceph-objectstore
enabled: false
# annotations: {}
# host:
# name: objectstore.example.com
# path: /
# tls:
# - hosts:
# - objectstore.example.com
# secretName: ceph-objectstore-tls
# ingressClassName: nginx
cephECBlockPools:
# cephECStorageClass also is disabled by default, please remove the comments and set desired values to enable it
# if cephECBlockPools are uncommented you must remove the comments of cephEcStorageClass as well
cephECStorageClass:
imagePullSecrets:
- name: default-registry-key