Add checks to deny adding secondary tiers before ceph is up

On controller-0, at initial configuration, adding secondary
tiers fails. The reason is that ceph has to be up in order
for secondary tiers and secondary storage backends to be
added. This operation is now denied through a semantic
checks.

Another semantic check is added to verify if ceph is
functional before allowing creation of secondary tiers.

Change-Id: I75c2e9540c98a01044bdd2666e0eac821f385cfa
Closes-Bug: 1830191
Signed-off-by: Ovidiu Poncea <ovidiu.poncea@windriver.com>
This commit is contained in:
Ovidiu Poncea 2019-06-27 17:17:36 +03:00
parent 4f1dde2c9d
commit b94a088e1f
6 changed files with 67 additions and 21 deletions

View File

@ -2283,7 +2283,7 @@ class HostController(rest.RestController):
if (personality is not None and
personality.find(constants.STORAGE_HOSTNAME) != -1 and
not skip_ceph_checks):
num_monitors, required_monitors, quorum_names = \
num_monitors, required_monitors, __ = \
self._ceph.get_monitors_status(pecan.request.dbapi)
if num_monitors < required_monitors:
raise wsme.exc.ClientSideError(_(
@ -2292,7 +2292,7 @@ class HostController(rest.RestController):
"enabled hosts with monitors are required. Please"
" ensure hosts with monitors are unlocked and "
"enabled.") %
(num_monitors, constants.MIN_STOR_MONITORS))
(num_monitors, required_monitors))
# If it is the last storage node to delete, we need to delete
# ceph osd pools and update additional tier status to "defined"
@ -5180,7 +5180,7 @@ class HostController(rest.RestController):
"enabled hosts with monitors are required. Please"
" ensure hosts with monitors are unlocked and "
"enabled.") %
(num_monitors, constants.MIN_STOR_MONITORS))
(num_monitors, required_monitors))
if not force:
# sm-lock-pre-check
@ -5409,7 +5409,7 @@ class HostController(rest.RestController):
_("Can not unlock a storage node without any Storage Volumes configured"))
ceph_helper = ceph.CephApiOperator()
num_monitors, required_monitors, quorum_names = \
num_monitors, required_monitors, __ = \
ceph_helper.get_monitors_status(pecan.request.dbapi)
if num_monitors < required_monitors:
raise wsme.exc.ClientSideError(
@ -5418,7 +5418,7 @@ class HostController(rest.RestController):
"enabled hosts with monitors are required. Please"
" ensure hosts with monitors are unlocked and "
"enabled.") %
(num_monitors, constants.MIN_STOR_MONITORS))
(num_monitors, required_monitors))
# Check Ceph configuration, if it is wiped out (in the Backup & Restore
# process) then restore the configuration.
@ -5714,7 +5714,7 @@ class HostController(rest.RestController):
"enabled hosts with monitors are required. Please"
" ensure hosts with monitors are unlocked and "
"enabled.") %
(num_monitors, constants.MIN_STOR_MONITORS))
(num_monitors, required_monitors))
storage_nodes = pecan.request.dbapi.ihost_get_by_personality(
constants.STORAGE)
@ -5861,7 +5861,7 @@ class HostController(rest.RestController):
"enabled hosts with monitors are required. "
"Please ensure hosts with monitors are "
"unlocked and enabled.") %
(num_monitors, constants.MIN_STOR_MONITORS))
(num_monitors, required_monitors))
def check_unlock_interfaces(self, hostupdate):
"""Semantic check for interfaces on host-unlock."""

View File

@ -575,7 +575,7 @@ def _check_host(stor):
# semantic check: whether at least 2 unlocked hosts are monitors
if not utils.is_aio_system(pecan.request.dbapi):
ceph_helper = ceph.CephApiOperator()
num_monitors, required_monitors, quorum_names = \
num_monitors, required_monitors, __ = \
ceph_helper.get_monitors_status(pecan.request.dbapi)
# CGTS 503 for now update monitors requirement until controller-0 is
# inventoried

View File

@ -305,7 +305,7 @@ class StorageTierController(rest.RestController):
raise exception.PatchError(patch=patch, reason=e)
# Semantic Checks
_check("modify", tier.as_dict())
_check(self, "modify", tier.as_dict())
try:
# Update only the fields that have changed
for field in objects.storage_tier.fields:
@ -396,7 +396,7 @@ def _pre_patch_checks(tier_obj, patch_obj):
"cannot be changed.") % tier_obj.name)
def _check(op, tier):
def _check(self, op, tier):
# Semantic checks
LOG.debug("storage_tier: Semantic check for %s operation" % op)
@ -410,10 +410,16 @@ def _check(op, tier):
raise wsme.exc.ClientSideError(_("Storage tier (%s) "
"already present." %
tier['name']))
# Deny adding secondary tier if initial configuration is not done.
if not cutils.is_initial_config_complete():
msg = _("Operation denied. Adding secondary tiers to a cluster requires "
"initial configuration to be complete and controller node unlocked.")
raise wsme.exc.ClientSideError(msg)
if utils.is_aio_system(pecan.request.dbapi):
# Deny adding secondary tiers if primary tier backend is not configured
# for cluster. When secondary tier is added we also query ceph to create
# pools and set replication therefore cluster has to be up.
# for cluster.
clusterId = tier.get('forclusterid') or tier.get('cluster_uuid')
cluster_tiers = pecan.request.dbapi.storage_tier_get_by_cluster(clusterId)
configured = False if cluster_tiers else True
@ -430,9 +436,20 @@ def _check(op, tier):
msg = _("Operation denied. Adding secondary tiers to a cluster requires "
"primary tier storage backend of this cluster to be configured.")
raise wsme.exc.ClientSideError(msg)
else:
# Deny adding secondary tier if ceph is down on standard
num_monitors, required_monitors, __ = \
self._ceph.get_monitors_status(pecan.request.dbapi)
if num_monitors < required_monitors:
raise wsme.exc.ClientSideError(_(
"Operation denied. Ceph is not operational. "
"Only %d storage monitor available. "
"At least %s unlocked and enabled hosts with "
"monitors are required. Please ensure hosts "
"with monitors are unlocked and enabled.") %
(num_monitors, required_monitors))
elif op == "delete":
if tier['name'] == constants.SB_TIER_DEFAULT_NAMES[
constants.SB_TIER_TYPE_CEPH]:
raise wsme.exc.ClientSideError(_("Storage Tier %s cannot be "
@ -484,7 +501,7 @@ def _create(self, tier, iprofile=None):
tier = _set_defaults(tier)
# Semantic checks
tier = _check("add", tier)
tier = _check(self, "add", tier)
LOG.info("storage_tier._create with validated params: %s" % tier)
@ -524,7 +541,7 @@ def _delete(self, tier_uuid):
tier = objects.storage_tier.get_by_uuid(pecan.request.context, tier_uuid)
# Semantic checks
_check("delete", tier.as_dict())
_check(self, "delete", tier.as_dict())
# update the crushmap by removing the tier
try:

View File

@ -629,13 +629,15 @@ class CephApiOperator(object):
return True
def get_monitors_status(self, db_api):
# first check that the monitors are available in sysinv
num_active_monitors = 0
num_inv_monitors = 0
required_monitors = constants.MIN_STOR_MONITORS
if utils.is_aio_system(pecan.request.dbapi):
required_monitors = constants.MIN_STOR_MONITORS_AIO
else:
required_monitors = constants.MIN_STOR_MONITORS_MULTINODE
quorum_names = []
inventory_monitor_names = []
# first check that the monitors are available in sysinv
monitor_list = db_api.ceph_mon_get_list()
for mon in monitor_list:
ihost = db_api.ihost_get(mon['forihostid'])
@ -670,9 +672,16 @@ class CephApiOperator(object):
# the intersection of the sysinv reported unlocked-available monitor
# hosts and the monitors reported in the quorum via the ceph API.
active_monitors = list(set(inventory_monitor_names) & set(quorum_names))
LOG.info("Active ceph monitors = %s" % str(active_monitors))
num_active_monitors = len(active_monitors)
if (num_inv_monitors and num_active_monitors == 0 and
cutils.is_initial_config_complete() and
not utils.is_aio_system(pecan.request.dbapi)):
# The active controller always has a monitor.
# We are on standard or storage, initial configuration
# was completed and Ceph is down so we can't check if
# it is working. Assume it is.
num_active_monitors = 1
LOG.info("Active ceph monitors = %s" % str(active_monitors))
return num_active_monitors, required_monitors, active_monitors

View File

@ -480,7 +480,8 @@ CEPH_AIO_SX_MODEL = 'aio-sx'
CEPH_UNDEFINED_MODEL = 'undefined'
# Storage: Minimum number of monitors
MIN_STOR_MONITORS = 2
MIN_STOR_MONITORS_MULTINODE = 2
MIN_STOR_MONITORS_AIO = 1
BACKUP_OVERHEAD = 20

View File

@ -33,6 +33,7 @@ from sysinv.conductor import manager
from sysinv.conductor import rpcapi
from sysinv.common import ceph as ceph_utils
from sysinv.common import constants
from sysinv.common import utils as cutils
from sysinv.common.storage_backend_conf import StorageBackendConfig
from sysinv.db import api as dbapi
from sysinv.openstack.common import context
@ -44,10 +45,17 @@ from sysinv.tests.db import utils as dbutils
class StorageTierIndependentTCs(base.FunctionalTest):
set_crushmap_patcher = mock.patch.object(ceph_utils.CephApiOperator, 'set_crushmap')
set_monitors_status_patcher = mock.patch.object(ceph_utils.CephApiOperator, 'get_monitors_status')
set_is_initial_config_patcher = mock.patch.object(cutils, 'is_initial_config_complete')
def setUp(self):
super(StorageTierIndependentTCs, self).setUp()
self.mock_set_crushmap = self.set_crushmap_patcher.start()
self.set_monitors_status_patcher = self.set_monitors_status_patcher.start()
self.set_monitors_status_patcher.return_value = \
[3, 2, ['controller-0', 'controller-1', 'storage-0']]
self.set_is_initial_config_patcher = self.set_is_initial_config_patcher.start()
self.set_is_initial_config_patcher.return_value = True
self.system = dbutils.create_test_isystem()
self.cluster = dbutils.create_test_cluster(system_id=self.system.id, name='ceph_cluster')
self.load = dbutils.create_test_load()
@ -56,6 +64,8 @@ class StorageTierIndependentTCs(base.FunctionalTest):
def tearDown(self):
super(StorageTierIndependentTCs, self).tearDown()
self.set_crushmap_patcher.stop()
self.set_monitors_status_patcher = self.set_monitors_status_patcher.stop()
self.set_is_initial_config_patcher.stop()
def assertDeleted(self, fullPath):
self.get_json(fullPath, expect_errors=True) # Make sure this line raises an error
@ -531,10 +541,17 @@ class StorageTierIndependentTCs(base.FunctionalTest):
class StorageTierDependentTCs(base.FunctionalTest):
set_crushmap_patcher = mock.patch.object(ceph_utils.CephApiOperator, 'set_crushmap')
set_monitors_status_patcher = mock.patch.object(ceph_utils.CephApiOperator, 'get_monitors_status')
set_is_initial_config_patcher = mock.patch.object(cutils, 'is_initial_config_complete')
def setUp(self):
super(StorageTierDependentTCs, self).setUp()
self.mock_set_crushmap = self.set_crushmap_patcher.start()
self.set_monitors_status_patcher = self.set_monitors_status_patcher.start()
self.set_monitors_status_patcher.return_value = \
[3, 2, ['controller-0', 'controller-1', 'storage-0']]
self.set_is_initial_config_patcher = self.set_is_initial_config_patcher.start()
self.set_is_initial_config_patcher.return_value = True
self.service = manager.ConductorManager('test-host', 'test-topic')
self.service.dbapi = dbapi.get_instance()
self.context = context.get_admin_context()
@ -547,6 +564,8 @@ class StorageTierDependentTCs(base.FunctionalTest):
def tearDown(self):
super(StorageTierDependentTCs, self).tearDown()
self.set_crushmap_patcher.stop()
self.set_monitors_status_patcher = self.set_monitors_status_patcher.stop()
self.set_is_initial_config_patcher.stop()
def assertDeleted(self, fullPath):
self.get_json(fullPath, expect_errors=True) # Make sure this line raises an error