Fix mon state of workers in the database

When a monitor is created on a worker, and that worker is still
LOCKED, it is not possible to execute the puppet manifest runtime,
which makes it impossible to update the database.

To resolve this, when a monitor is created on a LOCKED host,
the initial state in the database is 'configuring-on-unlock'.
After unlocking, it is checked whether the host is “available”,
and then the mon status is changed to “configured”.

Test Plan:
- PASS: Fresh install on STD with DM and check
        output of 'system ceph-mon-list'
- PASS: Move monitor from worker-0 to worker-1

Closes-Bug: 2083558

Change-Id: I40ed513118b2a9b5414907c565b56a3d8adb6962
Signed-off-by: Erickson Silva de Oliveira <Erickson.SilvadeOliveira@windriver.com>
This commit is contained in:
Erickson Silva de Oliveira
2024-10-02 17:41:56 -03:00
parent 1a7444daae
commit 64f4eee452
2 changed files with 24 additions and 30 deletions

View File

@@ -482,6 +482,7 @@ def _create(ceph_mon):
_("Host not found uuid: %s ." % ceph_mon['ihost_uuid']))
ceph_mon['forihostid'] = chost['id']
ceph_mon['hostname'] = chost['hostname']
ceph_mon['device_path'] = chost['rootfs_device']
# check if ceph monitor is already configured
@@ -539,21 +540,15 @@ def _create(ceph_mon):
# In case we add the monitor on a worker node, the state
# and task must be set properly.
if chost.personality == constants.WORKER:
ceph_mon['state'] = constants.SB_STATE_CONFIGURING
ctrls = pecan.request.dbapi.ihost_get_by_personality(
constants.CONTROLLER)
valid_ctrls = [
ctrl for ctrl in ctrls if
(ctrl.administrative == constants.ADMIN_LOCKED and
ctrl.availability == constants.AVAILABILITY_ONLINE) or
(ctrl.administrative == constants.ADMIN_UNLOCKED and
ctrl.operational == constants.OPERATIONAL_ENABLED)]
if (chost.administrative == constants.ADMIN_UNLOCKED and
chost.operational == constants.OPERATIONAL_ENABLED):
ceph_mon['state'] = constants.SB_STATE_CONFIGURING
elif (chost.administrative == constants.ADMIN_LOCKED and
chost.availability == constants.AVAILABILITY_ONLINE):
ceph_mon['state'] = constants.SB_STATE_CONFIGURING_ON_UNLOCK
tasks = {}
for ctrl in valid_ctrls:
tasks[ctrl.hostname] = constants.SB_STATE_CONFIGURING
ceph_mon['task'] = str(tasks)
task = {ceph_mon['hostname']: ceph_mon['state']}
ceph_mon['task'] = str(task)
LOG.info("Creating ceph-mon DB entry for host uuid %s: %s" %
(ceph_mon['ihost_uuid'], str(ceph_mon)))
@@ -562,11 +557,12 @@ def _create(ceph_mon):
# We update the base config when adding a dynamic monitor.
# At this moment the only possibility to add a dynamic monitor
# is on a worker node, so we check for that.
if chost.personality == constants.WORKER:
if (chost.personality == constants.WORKER and
new_ceph_mon['state'] == constants.SB_STATE_CONFIGURING):
try:
# Storage nodes are not supported on a controller based
# storage model.
personalities = [constants.CONTROLLER, constants.WORKER]
personalities = [constants.WORKER]
pecan.request.rpcapi.update_ceph_base_config(
pecan.request.context,
personalities)

View File

@@ -6605,6 +6605,16 @@ class ConductorManager(service.PeriodicService):
values = {'state': constants.SB_STATE_CONFIGURED}
self.dbapi.istor_update(stor.uuid, values)
def _clear_ceph_mon_state(self, ihost_uuid):
mons = self.dbapi.ceph_mon_get_by_ihost(ihost_uuid)
for mon in mons:
if mon.state != constants.SB_STATE_CONFIGURED:
LOG.info("State of ceph_mon: '%s' is '%s', resetting to '%s'." %
(mon.uuid, mon.state,
constants.SB_STATE_CONFIGURED))
values = {'state': constants.SB_STATE_CONFIGURED, 'task': constants.SB_TASK_NONE}
self.dbapi.ceph_mon_update(mon.uuid, values)
def iplatform_update_by_ihost(self, context,
ihost_uuid, imsg_dict):
"""Update node data when sysinv-agent is started after a boot.
@@ -6703,6 +6713,7 @@ class ConductorManager(service.PeriodicService):
constants.SB_TYPE_CEPH):
# This should be run once after a node boot
self._clear_ceph_stor_state(ihost_uuid)
self._clear_ceph_mon_state(ihost_uuid)
# On first_report which occurs on restart, check if the
# reboot flag matches the applied config; as it is possible
@@ -11556,20 +11567,7 @@ class ConductorManager(service.PeriodicService):
LOG.error("Host %s does not have ceph monitor!" % host_uuid)
return
tasks = {host.hostname: constants.SB_STATE_CONFIGURED}
values = None
# Get the hosts that have ceph_mons
ceph_mon_hosts = self.dbapi.ceph_mon_get_list()
for host in ceph_mon_hosts:
if (tasks.get(host.hostname, '') != constants.SB_STATE_CONFIGURED):
# There are other hosts to get configured.
# Updating with current progress.
values = {'state': constants.SB_STATE_CONFIGURED, 'task': str(tasks)}
break
else:
# All hosts have completed configuration.
# Update the state and cleanup the tasks field.
values = {'state': constants.SB_STATE_CONFIGURED, 'task': None}
values = {'state': constants.SB_STATE_CONFIGURED, 'task': constants.SB_TASK_NONE}
self.dbapi.ceph_mon_update(monitor.uuid, values)
LOG.info("Ceph monitor update succeeded on host: %s" % host_uuid)