[rook-ceph] Constants, utils and image override

This change includes a set of constants and functions to be used by the
stx-openstack plugins and lifecycle in order to support the rook ceph
storage backend. It also adds a new override to the rook ceph client
image (ceph-config-helper) to enable the app to dinamically change the
client image and ensure client version compatibility with both the
baremetal and rook ceph deployments. Furthermore, the change adds unit
tests covering the new functions added for rook ceph.

The function is_rook_ceph_backend_available can be used by the app to
decide between the baremetal or rook ceph storage backends. It can also
be used to ensure that rook ceph app is successfully deployed and
configured.

The function is_rook_ceph_api_available can be used combined to the
function is_rook_ceph_backend_available to protect the access to the
rook ceph REST API, ensuring that the RESP API pods are running.

The function get_rook_ceph_uuid can be used to access the rook ceph API
and read the Rook Ceph secret UUID for storage backend configuration. It
relies on is_rook_ceph_api_available to protect the access to rook ceph
REST API.

The new override for the rook ceph client image was added in the clients
helm chart to make it simpler to the app/user to configure the ceph
client image. Based on this config, the app plugins can dynamically
change all the ceph-config-helper image config for all the charts using
it (glance, libvirt, nova, cinder, etc.). The default value and static
override was defined based on the ceph-config-helper image currently
used by rook ceph app [1].

[1]https://opendev.org/starlingx/app-rook-ceph/src/branch/master/stx-rook-ceph-helm/stx-rook-ceph-helm/fluxcd-manifests/rook-ceph-cluster/rook-ceph-cluster-static-overrides.yaml#L259

Test Plan:
[PASS] Build stx-openstack packages
[PASS] Unit tests

Since stx-openstack still doesn't support even uploading to the rook
ceph stx deployments, the full validation of the app and new functions
(including potential fixes) will occur in subsequent tasks.

Story: 2011388
Task: 51871

Change-Id: I00436f795c33f4f8fa9247a9dc10b053880cad9d
Signed-off-by: Alex Figueiredo <alex.fernandesfigueiredo@windriver.com>
This commit is contained in:
Alex Figueiredo 2025-04-15 17:04:24 -03:00
parent aaddbc515f
commit 05b355aee5
5 changed files with 296 additions and 0 deletions

View File

@ -95,6 +95,18 @@ NOVA_PCI_ALIAS_GPU_NVIDIA_TESLA_P40_NAME = "nvidia-tesla-p40"
NOVA_PCI_ALIAS_GPU_NVIDIA_TESLA_T4_PF_DEVICE = "1eb8"
NOVA_PCI_ALIAS_GPU_NVIDIA_TESLA_T4_PF_NAME = "nvidia-tesla-t4-pf"
# Ceph constants
HELM_APP_ROOK_CEPH = constants.HELM_APP_ROOK_CEPH
HELM_NS_ROOK_CEPH = 'rook-ceph'
CEPH_ROOK_BACKEND_NAME = constants.SB_DEFAULT_NAMES[constants.SB_TYPE_CEPH_ROOK]
CEPH_ROOK_IMAGE_DEFAULT_REPO = 'docker.io/openstackhelm/ceph-config-helper'
CEPH_ROOK_IMAGE_DEFAULT_TAG = 'ubuntu_jammy_18.2.2-1-20240312'
CEPH_ROOK_IMAGE_OVERRIDE = 'rook_ceph_config_helper'
CEPH_ROOK_MANAGER_APP = 'rook-ceph-mgr'
CEPH_ROOK_MANAGER_SVC = 'rook-ceph-mgr-restful'
CEPH_ROOK_POLL_CRUSH_RULE = 'kube-rbd'
CEPH_POOL_IMAGES_NAME = 'images'
CEPH_POOL_IMAGES_CHUNK_SIZE = 256
@ -105,6 +117,7 @@ CEPH_POOL_VOLUMES_NAME = 'cinder-volumes'
CEPH_POOL_VOLUMES_APP_NAME = 'cinder-volumes'
CEPH_POOL_VOLUMES_CHUNK_SIZE = 512
CEPH_POOL_BACKUP_NAME = 'cinder.backups'
CEPH_POOL_BACKUP_APP_NAME = 'cinder-backup'
CEPH_POOL_BACKUP_CHUNK_SIZE = 256
@ -127,3 +140,6 @@ OPENSTACK_CERT_CA = "openstack-cert-ca"
FORCE_READ_CERT_FILES = False
SERVICES_FQDN_PATTERN = "{service_name}.{endpoint_domain}"
OPENSTACK_NETAPP_NAMESPACE = "trident"
# Kubernetes
POD_SELECTOR_RUNNING = "status.phase==Running"

View File

@ -10,6 +10,7 @@ from sysinv.helm import common as helm_common
from sysinv.tests.db import base as dbbase
from k8sapp_openstack import utils as app_utils
from k8sapp_openstack.common import constants as app_constants
class UtilsTest(dbbase.ControllerHostTestCase):
@ -72,3 +73,193 @@ class UtilsTest(dbbase.ControllerHostTestCase):
return_value=True):
result = app_utils.is_openvswitch_enabled(hosts, labels_by_hostid)
self.assertFalse(result)
@mock.patch('k8sapp_openstack.utils._get_value_from_application',
return_value=f'{app_constants.CEPH_ROOK_IMAGE_DEFAULT_REPO}:'
f'{app_constants.CEPH_ROOK_IMAGE_DEFAULT_TAG}')
def test_get_image_rook_ceph(self, mock_get_value_from_application):
"""Test test_get_image_rook_ceph for valid image override
"""
expected = f'{app_constants.CEPH_ROOK_IMAGE_DEFAULT_REPO}:'\
f'{app_constants.CEPH_ROOK_IMAGE_DEFAULT_TAG}'
result = app_utils.get_image_rook_ceph()
self.assertEqual(result, expected)
mock_get_value_from_application.assert_called_once()
@mock.patch('cephclient.wrapper.CephWrapper')
@mock.patch('k8sapp_openstack.utils.is_rook_ceph_api_available',
return_value=True)
def test_get_rook_ceph_uuid(self, mock_api_available, mock_ceph_wrapper):
"""Test get_rook_ceph_uuid for rook ceph api available and responding ok
"""
mock_fsid = '89bd29e9-c505-4170-a097-04dc8e43c897'
mock_response = mock.MagicMock()
mock_response.ok = True
ceph_instance = mock_ceph_wrapper.return_value
ceph_instance.fsid.return_value = mock_response, mock_fsid
result = app_utils.get_rook_ceph_uuid()
self.assertEqual(result, mock_fsid)
ceph_instance.fsid.assert_called_once()
mock_api_available.assert_called_once()
@mock.patch('cephclient.wrapper.CephWrapper')
@mock.patch('k8sapp_openstack.utils.is_rook_ceph_api_available',
return_value=True)
def test_get_rook_ceph_uuid_nok(self,
mock_api_available,
mock_ceph_wrapper):
"""Test get_rook_ceph_uuid for rook ceph api available but not responding ok
"""
mock_fsid = None
mock_response = mock.MagicMock()
mock_response.ok = False
ceph_instance = mock_ceph_wrapper.return_value
ceph_instance.fsid.return_value = mock_response, mock_fsid
result = app_utils.get_rook_ceph_uuid()
self.assertEqual(result, None)
ceph_instance.fsid.assert_called_once()
mock_api_available.assert_called_once()
@mock.patch('cephclient.wrapper.CephWrapper')
@mock.patch('k8sapp_openstack.utils.is_rook_ceph_api_available',
return_value=False)
def test_get_rook_ceph_uuid_unavailable(self,
mock_api_available,
mock_ceph_wrapper):
"""Test get_rook_ceph_uuid for rook ceph api not available
"""
mock_fsid = None
mock_response = mock.MagicMock()
mock_response.ok = False
ceph_instance = mock_ceph_wrapper.return_value
ceph_instance.fsid.return_value = mock_response, mock_fsid
result = app_utils.get_rook_ceph_uuid()
self.assertEqual(result, None)
ceph_instance.fsid.assert_not_called()
mock_api_available.assert_called_once()
@mock.patch('cephclient.wrapper.CephWrapper')
@mock.patch('k8sapp_openstack.utils.is_rook_ceph_api_available',
return_value=True)
def test_get_rook_ceph_uuid_api_exception(self,
mock_api_available,
mock_ceph_wrapper):
"""Test get_rook_ceph_uuid for rook ceph api exception
"""
mock_response = mock.MagicMock()
mock_response.ok = False
ceph_instance = mock_ceph_wrapper.return_value
ceph_instance.fsid.side_effect = Exception()
result = app_utils.get_rook_ceph_uuid()
self.assertEqual(result, None)
ceph_instance.fsid.assert_called_once()
mock_api_available.assert_called_once()
@mock.patch('sysinv.db.api.get_instance')
def test_is_rook_backend_available(self, mock_dbapi_get_instance):
"""Test is_rook_backend_available for rook ceph configured and applied
"""
mock_backend_list = [mock.MagicMock()]
mock_backend_list[0].state = constants.SB_STATE_CONFIGURED
mock_backend_list[0].task = constants.APP_APPLY_SUCCESS
db_instance = mock_dbapi_get_instance.return_value
db_instance.storage_backend_get_list_by_type.return_value = mock_backend_list
result = app_utils.is_rook_ceph_backend_available()
self.assertEqual(result, True)
db_instance.storage_backend_get_list_by_type.assert_called_once_with(
backend_type=constants.SB_TYPE_CEPH_ROOK
)
@mock.patch('sysinv.db.api.get_instance')
def test_is_rook_backend_available_not_applied(self,
mock_dbapi_get_instance):
"""Test is_rook_backend_available for rook ceph not successfully applied
"""
mock_backend_list = [mock.MagicMock()]
mock_backend_list[0].state = constants.SB_STATE_CONFIGURED
mock_backend_list[0].task = constants.APP_APPLY_FAILURE
db_instance = mock_dbapi_get_instance.return_value
db_instance.storage_backend_get_list_by_type.return_value = mock_backend_list
result = app_utils.is_rook_ceph_backend_available()
self.assertEqual(result, False)
db_instance.storage_backend_get_list_by_type.assert_called_once_with(
backend_type=constants.SB_TYPE_CEPH_ROOK
)
@mock.patch('sysinv.db.api.get_instance')
def test_is_rook_backend_available_not_configured(self,
mock_dbapi_get_instance):
"""Test is_rook_backend_available for rook ceph not configured
"""
mock_backend_list = [mock.MagicMock()]
mock_backend_list[0].state = constants.SB_STATE_CONFIGURING
mock_backend_list[0].task = constants.APP_APPLY_SUCCESS
db_instance = mock_dbapi_get_instance.return_value
db_instance.storage_backend_get_list_by_type.return_value = mock_backend_list
result = app_utils.is_rook_ceph_backend_available()
self.assertEqual(result, False)
db_instance.storage_backend_get_list_by_type.assert_called_once_with(
backend_type=constants.SB_TYPE_CEPH_ROOK
)
@mock.patch('sysinv.db.api.get_instance', return_value=None)
def test_is_rook_backend_available_none_db(self, mock_dbapi_get_instance):
"""Test is_rook_backend_available for dbapi failure
"""
result = app_utils.is_rook_ceph_backend_available()
self.assertEqual(result, False)
mock_dbapi_get_instance.assert_called_once_with()
@mock.patch('sysinv.db.api.get_instance')
def test_is_rook_backend_available_empty(self, mock_dbapi_get_instance):
"""Test is_rook_backend_available for empty list of rook ceph backends
"""
mock_backend_list = []
db_instance = mock_dbapi_get_instance.return_value
db_instance.storage_backend_get_list_by_type.return_value = mock_backend_list
result = app_utils.is_rook_ceph_backend_available()
self.assertEqual(result, False)
db_instance.storage_backend_get_list_by_type.assert_called_once_with(
backend_type=constants.SB_TYPE_CEPH_ROOK
)
@mock.patch('sysinv.common.kubernetes.KubeOperator')
def test_is_rook_ceph_api_available(self, mock_kube_operator):
"""Test is_rook_backend_available for rook api pod in Running state
"""
mock_pod_list = [mock.MagicMock()]
mock_pod_list[0].metadata.name = \
f'{app_constants.CEPH_ROOK_MANAGER_APP}-a-74cf47c859-8cgsx'
kube_operator_instance = mock_kube_operator.return_value
kube_operator_instance.kube_get_pods_by_selector.return_value = mock_pod_list
result = app_utils.is_rook_ceph_api_available()
self.assertEqual(result, True)
kube_operator_instance.kube_get_pods_by_selector.assert_called_once_with(
app_constants.HELM_NS_ROOK_CEPH,
f"app={app_constants.CEPH_ROOK_MANAGER_APP}",
app_constants.POD_SELECTOR_RUNNING
)
@mock.patch('sysinv.common.kubernetes.KubeOperator')
def test_is_rook_ceph_api_available_not_running(self, mock_kube_operator):
"""Test is_rook_backend_available for rook api pod not in Running state
"""
mock_pod_list = []
kube_operator_instance = mock_kube_operator.return_value
kube_operator_instance.kube_get_pods_by_selector.return_value = mock_pod_list
result = app_utils.is_rook_ceph_api_available()
self.assertEqual(result, False)
kube_operator_instance.kube_get_pods_by_selector.assert_called_once_with(
app_constants.HELM_NS_ROOK_CEPH,
f"app={app_constants.CEPH_ROOK_MANAGER_APP}",
app_constants.POD_SELECTOR_RUNNING
)

View File

@ -13,6 +13,7 @@ import re
import shutil
from typing import Generator
from cephclient import wrapper as ceph
from eventlet.green import subprocess
from kubernetes.client.rest import ApiException as KubeApiException
from oslo_log import log as logging
@ -434,6 +435,31 @@ def get_ceph_uuid():
return pattern.findall(line)[0]
def get_rook_ceph_uuid():
"""Get Rook Ceph secret UUID for storage backend configuration
:returns: str -- The Rook Ceph's secret UUID
"""
uuid = None
if not is_rook_ceph_api_available():
LOG.error('Rook ceph API is not available')
return uuid
ceph_api = ceph.CephWrapper(
endpoint=f'http://{app_constants.CEPH_ROOK_MANAGER_SVC}.'
f'{app_constants.HELM_NS_ROOK_CEPH}.'
f'svc.cluster.local:{constants.CEPH_MGR_PORT}')
try:
response, fsid = ceph_api.fsid(body='text', timeout=10)
if not response.ok:
LOG.error(f"CEPH uuid request failed: {response.reason}")
else:
uuid = str(fsid.strip())
except Exception as e:
LOG.error(f"CEPH uuid request failed: {str(e)}")
return uuid
def is_subcloud():
db = dbapi.get_instance()
system = db.isystem_get_one()
@ -542,6 +568,54 @@ def is_netapp_available() -> bool:
return netapp_backends["nfs"] or netapp_backends["iscsi"]
def is_rook_ceph_backend_available() -> bool:
"""Check if Rook Ceph backend is available (configured and applied)
Returns:
bool: True if Rook Ceph backend is applied and configured
"""
db = dbapi.get_instance()
if db is None:
LOG.error("Database API is not available")
return False
rook_backends = db.storage_backend_get_list_by_type(
backend_type=constants.SB_TYPE_CEPH_ROOK)
if (not rook_backends) or (len(rook_backends) == 0):
LOG.debug("No rook ceph backends available")
return False
state = rook_backends[0].state
task = rook_backends[0].task
available = (state == constants.SB_STATE_CONFIGURED) \
and (task == constants.APP_APPLY_SUCCESS)
LOG.info(f"rook_ceph_backend_available={available}, "
f"state={state}, task={task}")
return available
def is_rook_ceph_api_available() -> bool:
"""Check if Rook Ceph REST API is available (running)
Returns:
bool: True if Rook Ceph REST API is running
"""
try:
label = f"app={app_constants.CEPH_ROOK_MANAGER_APP}"
field_selector = app_constants.POD_SELECTOR_RUNNING
kube = kubernetes.KubeOperator()
pods = kube.kube_get_pods_by_selector(app_constants.HELM_NS_ROOK_CEPH,
label,
field_selector)
if len(pods) > 0:
LOG.debug("Rook ceph API pods are available and in Running state")
return True
except Exception:
pass
LOG.info("Rook ceph API pods are not available or not in Running state")
return False
def is_openvswitch_enabled(hosts, labels_by_hostid) -> bool:
"""
Check if openvswitch is enabled.
@ -1018,3 +1092,16 @@ def delete_kubernetes_resource(resource_type, resource_name):
LOG.error(f"Failed to delete {resource_type}: {resource_name}, with error: {e}")
except Exception as e:
LOG.error(f"Unexpected error while deleting {resource_type}: {e}")
def get_image_rook_ceph():
"""Get client image to be used for rook ceph deployments
:returns: str -- The image in the formart <repository>:tag
"""
return _get_value_from_application(
default_value=f'{app_constants.CEPH_ROOK_IMAGE_DEFAULT_REPO}:'
f'{app_constants.CEPH_ROOK_IMAGE_DEFAULT_TAG}',
chart_name=app_constants.HELM_CHART_CLIENTS,
override_name=f'images.tags.{app_constants.CEPH_ROOK_IMAGE_OVERRIDE}'
)

View File

@ -17,6 +17,7 @@ images:
dep_check: quay.io/stackanetes/kubernetes-entrypoint:v0.3.1
bootstrap: docker.io/openstackhelm/heat:wallaby-ubuntu_focal
pre_apply_cleanup: docker.io/starlingx/stx-vault-manager:master-debian-stable-latest
rook_ceph_config_helper: docker.io/openstackhelm/ceph-config-helper:ubuntu_jammy_18.2.2-1-20240312
pull_policy: "IfNotPresent"
local_registry:
active: false

View File

@ -13,6 +13,7 @@ images:
openstack_clients: docker.io/starlingx/stx-openstackclients:master-debian-stable-latest
bootstrap: docker.io/starlingx/stx-heat:master-debian-stable-latest
pre_apply_cleanup: docker.io/starlingx/stx-vault-manager:master-debian-stable-latest
rook_ceph_config_helper: docker.io/openstackhelm/ceph-config-helper:ubuntu_jammy_18.2.2-1-20240312
pod:
tolerations:
clients: