Fix mon state of workers in the database

When a monitor is created on a worker, and that worker is still LOCKED, it is not possible to execute the puppet manifest runtime, which makes it impossible to update the database. To resolve this, when a monitor is created on a LOCKED host, the initial state in the database is 'configuring-on-unlock'. After unlocking, it is checked whether the host is “available”, and then the mon status is changed to “configured”. Test Plan: - PASS: Fresh install on STD with DM and check output of 'system ceph-mon-list' - PASS: Move monitor from worker-0 to worker-1 Closes-Bug: 2083558 Change-Id: I40ed513118b2a9b5414907c565b56a3d8adb6962 Signed-off-by: Erickson Silva de Oliveira <Erickson.SilvadeOliveira@windriver.com>
2024-10-02 17:41:56 -03:00
parent 1a7444daae
commit 64f4eee452
2 changed files with 24 additions and 30 deletions
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/ceph_mon.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/ceph_mon.py
@@ -482,6 +482,7 @@ def _create(ceph_mon):
            _("Host not found uuid: %s ." % ceph_mon['ihost_uuid']))

    ceph_mon['forihostid'] = chost['id']
+    ceph_mon['hostname'] = chost['hostname']
    ceph_mon['device_path'] = chost['rootfs_device']

    # check if ceph monitor is already configured
@@ -539,21 +540,15 @@ def _create(ceph_mon):
    # In case we add the monitor on a worker node, the state
    # and task must be set properly.
    if chost.personality == constants.WORKER:
-        ceph_mon['state'] = constants.SB_STATE_CONFIGURING
-        ctrls = pecan.request.dbapi.ihost_get_by_personality(
-             constants.CONTROLLER)
-        valid_ctrls = [
-                ctrl for ctrl in ctrls if
-                (ctrl.administrative == constants.ADMIN_LOCKED and
-                 ctrl.availability == constants.AVAILABILITY_ONLINE) or
-                (ctrl.administrative == constants.ADMIN_UNLOCKED and
-                 ctrl.operational == constants.OPERATIONAL_ENABLED)]
+        if (chost.administrative == constants.ADMIN_UNLOCKED and
+                chost.operational == constants.OPERATIONAL_ENABLED):
+            ceph_mon['state'] = constants.SB_STATE_CONFIGURING
+        elif (chost.administrative == constants.ADMIN_LOCKED and
+                chost.availability == constants.AVAILABILITY_ONLINE):
+            ceph_mon['state'] = constants.SB_STATE_CONFIGURING_ON_UNLOCK

-        tasks = {}
-        for ctrl in valid_ctrls:
-            tasks[ctrl.hostname] = constants.SB_STATE_CONFIGURING
-
-        ceph_mon['task'] = str(tasks)
+        task = {ceph_mon['hostname']: ceph_mon['state']}
+        ceph_mon['task'] = str(task)

    LOG.info("Creating ceph-mon DB entry for host uuid %s: %s" %
             (ceph_mon['ihost_uuid'], str(ceph_mon)))
@@ -562,11 +557,12 @@ def _create(ceph_mon):
    # We update the base config when adding a dynamic monitor.
    # At this moment the only possibility to add a dynamic monitor
    # is on a worker node, so we check for that.
-    if chost.personality == constants.WORKER:
+    if (chost.personality == constants.WORKER and
+            new_ceph_mon['state'] == constants.SB_STATE_CONFIGURING):
        try:
            # Storage nodes are not supported on a controller based
            # storage model.
-            personalities = [constants.CONTROLLER, constants.WORKER]
+            personalities = [constants.WORKER]
            pecan.request.rpcapi.update_ceph_base_config(
                pecan.request.context,
                personalities)
--- a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py
+++ b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py
@@ -6605,6 +6605,16 @@ class ConductorManager(service.PeriodicService):
                values = {'state': constants.SB_STATE_CONFIGURED}
                self.dbapi.istor_update(stor.uuid, values)

+    def _clear_ceph_mon_state(self, ihost_uuid):
+        mons = self.dbapi.ceph_mon_get_by_ihost(ihost_uuid)
+        for mon in mons:
+            if mon.state != constants.SB_STATE_CONFIGURED:
+                LOG.info("State of ceph_mon: '%s' is '%s', resetting to '%s'." %
+                         (mon.uuid, mon.state,
+                          constants.SB_STATE_CONFIGURED))
+                values = {'state': constants.SB_STATE_CONFIGURED, 'task': constants.SB_TASK_NONE}
+                self.dbapi.ceph_mon_update(mon.uuid, values)
+
    def iplatform_update_by_ihost(self, context,
                                  ihost_uuid, imsg_dict):
        """Update node data when sysinv-agent is started after a boot.
@@ -6703,6 +6713,7 @@ class ConductorManager(service.PeriodicService):
                        constants.SB_TYPE_CEPH):
                    # This should be run once after a node boot
                    self._clear_ceph_stor_state(ihost_uuid)
+                    self._clear_ceph_mon_state(ihost_uuid)

                # On first_report which occurs on restart, check if the
                # reboot flag matches the applied config; as it is possible
@@ -11556,20 +11567,7 @@ class ConductorManager(service.PeriodicService):
            LOG.error("Host %s does not have ceph monitor!" % host_uuid)
            return

-        tasks = {host.hostname: constants.SB_STATE_CONFIGURED}
-        values = None
-        # Get the hosts that have ceph_mons
-        ceph_mon_hosts = self.dbapi.ceph_mon_get_list()
-        for host in ceph_mon_hosts:
-            if (tasks.get(host.hostname, '') != constants.SB_STATE_CONFIGURED):
-                # There are other hosts to get configured.
-                # Updating with current progress.
-                values = {'state': constants.SB_STATE_CONFIGURED, 'task': str(tasks)}
-                break
-        else:
-            # All hosts have completed configuration.
-            # Update the state and cleanup the tasks field.
-            values = {'state': constants.SB_STATE_CONFIGURED, 'task': None}
+        values = {'state': constants.SB_STATE_CONFIGURED, 'task': constants.SB_TASK_NONE}

        self.dbapi.ceph_mon_update(monitor.uuid, values)
        LOG.info("Ceph monitor update succeeded on host: %s" % host_uuid)