Remove additional ceph-mon on USM rollback

On stx-10 two additional monitors were added for AIO-DX, one
for each controller, so when rolling back from stx-10 to a
previous release, these additional monitors must be removed
from ceph, along with the corresponding pmon symlink, otherwise
multiple ceph failures are observed after rolling back the system.

This commit adds these tasks to USM rollback, so that when a
controller is host-rollback then it removes it's own monitor
and the symlink as well.

Test Plan
PASS: AIO-DX/AIO-SX - rollback controllers and verify no
      failures and no active alarms

Story: 2010676
Task: 50820

Signed-off-by: Heitor Matsui <heitorvieira.matsui@windriver.com>
Change-Id: I128c5ab72d315f5e4bb401748ced7cc47fa89d8d
This commit is contained in:
Heitor Matsui
2024-08-12 15:58:30 -03:00
parent dbbc1d8747
commit 2e4e579e8c
2 changed files with 29 additions and 0 deletions

View File

@@ -8,6 +8,7 @@ import filecmp
import glob
import os
import shutil
import socket
import subprocess
import software.constants as constants
@@ -182,6 +183,32 @@ class CreateUSMUpgradeInProgressFlag(BaseHook):
LOG.info("Created %s flag" % flag_file)
class RemoveCephMonHook(BaseHook):
"""
Remove additional ceph-mon added for each controller
"""
PMON_FILE = "/ostree/1/etc/pmon.d/ceph-fixed-mon.conf"
def run(self):
system_type = utils.get_platform_conf("system_type")
system_mode = utils.get_platform_conf("system_mode")
nodetype = utils.get_platform_conf("nodetype")
hostname = socket.gethostname()
# additional monitors were added only for AIO-DX
if (system_type == constants.SYSTEM_TYPE_ALL_IN_ONE and
system_mode != constants.SYSTEM_MODE_SIMPLEX and
nodetype == constants.CONTROLLER):
cmd = ["ceph", "mon", "rm", hostname]
try:
subprocess.check_call(cmd)
LOG.info("Removed mon.%s from ceph cluster." % hostname)
except subprocess.CalledProcessError as e:
LOG.exception("Failure removing mon.%s from ceph cluster: %s" % (hostname, str(e)))
raise
os.unlink(self.PMON_FILE)
LOG.info("Removed %s from pmon." % self.PMON_FILE)
# pre and post keywords
PRE = "pre"
POST = "post"
@@ -209,6 +236,7 @@ AGENT_HOOKS = {
],
POST: [
ReconfigureKernelHook,
RemoveCephMonHook,
],
},
}

View File

@@ -123,6 +123,7 @@ LAST_IN_SYNC = "last_in_sync"
ALARM_INSTANCE_ID_OUT_OF_SYNC = "%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST,
CONTROLLER_FLOATING_HOSTNAME)
SYSTEM_TYPE_ALL_IN_ONE = "All-in-one"
SYSTEM_MODE_SIMPLEX = "simplex"
SYSTEM_MODE_DUPLEX = "duplex"