Update config tracking for reboot required config

The reboot config tracking needs to be updated to ensure
it aligns the target with the actual applied config.
It must also initialize and only clear the config for
the active controller if a reboot has actually occurred.

On host-swact, the target controller is checked for config
up to date condition, rather than both source and target
controller; as the source active controller would still
tracked the config requirement in the persisted database.

Improve traceback of config requests.

Tests Performed:
  Perform host-swact with active controller config out of date
  Perform host-swact with standby controller config out of date
  Verify AIO-SX and Duplex config operations
  Restart sysinv with reboot config set and perform runtime config
  Perform runtime config while reboot required config set
  Perform runtime config while reboot required config cleared

Change-Id: I339ad82a2c7b37ac1c97c3eb790a231a40914250
Closes-Bug: 1914085
Signed-off-by: John Kung <john.kung@windriver.com>
This commit is contained in:
John Kung 2021-02-01 15:23:04 -06:00
parent 546dea3be3
commit 4cd77f035a
5 changed files with 180 additions and 31 deletions

View File

@ -5924,22 +5924,29 @@ class HostController(rest.RestController):
self._check_swact_device_image_update(hostupdate.ihost_orig,
ihost_ctr, force_swact)
if ihost_ctr.config_target:
if ihost_ctr.config_target != ihost_ctr.config_applied:
try:
upgrade = \
pecan.request.dbapi.software_upgrade_get_one()
except exception.NotFound:
upgrade = None
if upgrade and upgrade.state == \
constants.UPGRADE_ABORTING_ROLLBACK:
pass
else:
raise wsme.exc.ClientSideError(
_("%s target Config %s not yet applied."
" Apply target Config via Lock/Unlock prior"
" to Swact") %
(ihost_ctr.hostname, ihost_ctr.config_target))
if ihost_ctr.config_target and\
ihost_ctr.config_target != ihost_ctr.config_applied:
try:
upgrade = \
pecan.request.dbapi.software_upgrade_get_one()
except exception.NotFound:
upgrade = None
if upgrade and upgrade.state == \
constants.UPGRADE_ABORTING_ROLLBACK:
pass
elif not utils.is_host_active_controller(ihost_ctr):
# This condition occurs when attempting to host-swact
# away from "active" (platform services) controller.
#
# Since api (sysinv, sm) allows for host-swact
# services away from a "standby" controller, this enforcement
# is not required for host-swact to the already
# active controller.
raise wsme.exc.ClientSideError(
_("%s target Config %s not yet applied."
" Apply target Config via Lock/Unlock prior"
" to Swact") %
(ihost_ctr.hostname, ihost_ctr.config_target))
self._semantic_check_swact_upgrade(hostupdate.ihost_orig,
ihost_ctr,

View File

@ -1323,9 +1323,9 @@ INSTALL_STATE_COMPLETED = "completed"
tox_work_dir = os.environ.get("TOX_WORK_DIR")
if tox_work_dir:
SYSINV_LOCK_PATH = tox_work_dir
SYSINV_VOLATILE_PATH = tox_work_dir
else:
SYSINV_LOCK_PATH = os.path.join(tsc.VOLATILE_PATH, "sysinv")
SYSINV_VOLATILE_PATH = os.path.join(tsc.VOLATILE_PATH, "sysinv")
NETWORK_CONFIG_LOCK_FILE = os.path.join(
tsc.VOLATILE_PATH, "apply_network_config.lock")

View File

@ -1281,25 +1281,25 @@ def bytes_to_MiB(bytes_number):
def check_lock_path():
if os.path.isdir(constants.SYSINV_LOCK_PATH):
if os.path.isdir(constants.SYSINV_VOLATILE_PATH):
return
try:
uid = pwd.getpwnam(constants.SYSINV_USERNAME).pw_uid
gid = grp.getgrnam(constants.SYSINV_GRPNAME).gr_gid
os.makedirs(constants.SYSINV_LOCK_PATH)
os.chown(constants.SYSINV_LOCK_PATH, uid, gid)
os.makedirs(constants.SYSINV_VOLATILE_PATH)
os.chown(constants.SYSINV_VOLATILE_PATH, uid, gid)
LOG.info("Created directory=%s" %
constants.SYSINV_LOCK_PATH)
constants.SYSINV_VOLATILE_PATH)
except OSError as e:
LOG.exception("makedir %s OSError=%s encountered" %
(constants.SYSINV_LOCK_PATH, e))
(constants.SYSINV_VOLATILE_PATH, e))
def synchronized(name, external=True):
if external:
check_lock_path()
lock_path = constants.SYSINV_LOCK_PATH
lock_path = constants.SYSINV_VOLATILE_PATH
else:
lock_path = None
return lockutils.synchronized(name,

View File

@ -43,6 +43,7 @@ import shutil
import socket
import tempfile
import time
import traceback
import uuid
import xml.etree.ElementTree as ElementTree
from contextlib import contextmanager
@ -162,6 +163,9 @@ CONFIG_CONTROLLER_FINI_FLAG = os.path.join(tsc.VOLATILE_PATH,
".config_controller_fini")
CONFIG_FAIL_FLAG = os.path.join(tsc.VOLATILE_PATH, ".config_fail")
ACTIVE_CONFIG_REBOOT_REQUIRED = os.path.join(
constants.SYSINV_VOLATILE_PATH, ".reboot_required")
# configuration UUID reboot required flag (bit)
CONFIG_REBOOT_REQUIRED = (1 << 127)
@ -187,6 +191,7 @@ class ConductorManager(service.PeriodicService):
self.dbapi = None
self.fm_api = None
self.fm_log = None
self.host_uuid = None
self._app = None
self._ceph = None
self._ceph_api = ceph.CephWrapper(
@ -230,6 +235,7 @@ class ConductorManager(service.PeriodicService):
self.dbapi = dbapi.get_instance()
self.fm_api = fm_api.FaultAPIs()
self.fm_log = fm.FmCustomerLog()
self.host_uuid = self._get_active_controller_uuid()
self._openstack = openstack.OpenStackOperator(self.dbapi)
self._puppet = puppet.PuppetOperator(self.dbapi)
@ -237,6 +243,7 @@ class ConductorManager(service.PeriodicService):
# create /var/run/sysinv if required. On DOR, the manifests
# may not run to create this volatile directory.
cutils.check_lock_path()
self._initialize_active_controller_reboot_config()
system = self._create_default_system()
@ -267,6 +274,21 @@ class ConductorManager(service.PeriodicService):
# Save our start time for time limited init actions
self._start_time = timeutils.utcnow()
def _get_active_controller_uuid(self):
ahost = utils.HostHelper.get_active_controller(self.dbapi)
if ahost:
return ahost.uuid
else:
return None
def _initialize_active_controller_reboot_config(self):
# initialize host_reboot_config for active controller in case
# process has been restarted
if self.host_uuid and os.path.exists(ACTIVE_CONFIG_REBOOT_REQUIRED):
ahost = self.dbapi.ihost_get(self.host_uuid)
self._host_reboot_config_uuid[self.host_uuid] = \
[ahost.config_target]
def periodic_tasks(self, context, raise_on_error=False):
""" Periodic tasks are run at pre-specified intervals. """
return self.run_periodic_tasks(context, raise_on_error=raise_on_error)
@ -6122,7 +6144,7 @@ class ConductorManager(service.PeriodicService):
if (host.clock_synchronization == constants.PTP and
host.administrative == constants.ADMIN_UNLOCKED and
host.operational == constants.OPERATIONAL_ENABLED and
not (self._config_out_of_date(host) and
not (self._config_out_of_date(context, host) and
self._config_is_reboot_required(host.config_target))):
runtime_hosts.append(host.uuid)
@ -8677,7 +8699,7 @@ class ConductorManager(service.PeriodicService):
self._host_reboot_config_uuid[ihost_uuid].remove(config_uuid)
except ValueError:
LOG.info("_remove_config_from_reboot_config_list fail"
" host:%s", ihost_uuid)
" host:%s config_uuid %s" % (ihost_uuid, config_uuid))
pass
def _clear_config_from_reboot_config_list(self, ihost_uuid):
@ -8690,10 +8712,19 @@ class ConductorManager(service.PeriodicService):
" host: %s", ihost_uuid)
pass
def _config_out_of_date(self, ihost_obj):
def _config_out_of_date(self, context, ihost_obj):
def _align_config_target(context, ihost_obj, applied):
LOG.info("Config target with no reboot required, "
"align host_uuid=%s target applied=%s" %
(ihost_obj.uuid, applied))
ihost_obj.config_target = applied
ihost_obj.save(context)
target = ihost_obj.config_target
applied = ihost_obj.config_applied
applied_reboot = None
if applied is not None:
try:
applied_reboot = self._config_set_reboot_required(applied)
@ -8731,10 +8762,21 @@ class ConductorManager(service.PeriodicService):
elif target == applied_reboot:
if ihost_obj.uuid in self._host_reboot_config_uuid:
if len(self._host_reboot_config_uuid[ihost_obj.uuid]) == 0:
# There are no further config required for host, update config_target
_align_config_target(context, ihost_obj, applied)
return False
else:
LOG.info("%s: %s reboot required config_applied %s host_reboot_config %s " %
(ihost_obj.hostname, ihost_obj.uuid, applied,
self._host_reboot_config_uuid[ihost_obj.uuid]))
return True
else:
return False
if self.host_uuid == ihost_obj.uuid:
# In the active controller case, can clear if no reboot required config.
# The is tracked on initialization and protected from host-swact semantic.
_align_config_target(context, ihost_obj, applied)
return False
return True
else:
LOG.warn("%s: iconfig out of date: target %s, applied %s " %
(hostname, target, applied))
@ -8790,7 +8832,7 @@ class ConductorManager(service.PeriodicService):
entity_instance_id = self._get_fm_entity_instance_id(ihost_obj)
save_required = False
if self._config_out_of_date(ihost_obj) or \
if self._config_out_of_date(context, ihost_obj) or \
status == constants.CONFIG_STATUS_REINSTALL:
LOG.warn("SYS_I Raise system config alarm: host %s "
"config applied: %s vs. target: %s." %
@ -8955,6 +8997,11 @@ class ConductorManager(service.PeriodicService):
: update
:return The UUID of the configuration generation
"""
def _trace_caller(personalities, host_uuids, reboot, config_uuid):
tb = traceback.format_stack()
LOG.info("_config_update_hosts personalities=%s host_uuids=%s reboot=%s "
"config_uuid=%s tb=%s" %
(personalities, host_uuids, reboot, config_uuid, tb[-3]))
# generate a new configuration identifier for this update
config_uuid = uuid.uuid4()
@ -8968,6 +9015,8 @@ class ConductorManager(service.PeriodicService):
else:
config_uuid = self._config_clear_reboot_required(config_uuid)
_trace_caller(personalities, host_uuids, reboot, config_uuid)
if not host_uuids:
hosts = self.dbapi.ihost_get_list()
else:
@ -8981,6 +9030,10 @@ class ConductorManager(service.PeriodicService):
else:
self._host_reboot_config_uuid[host.uuid] = []
self._host_reboot_config_uuid[host.uuid].append(config_uuid)
if host.uuid == self.host_uuid:
# This ensures that the host_reboot_config_uuid tracking
# on this controller is aware that a reboot is required
cutils.touch(ACTIVE_CONFIG_REBOOT_REQUIRED)
self._update_host_config_target(context, host, config_uuid)
LOG.info("_config_update_hosts config_uuid=%s" % config_uuid)

View File

@ -2,7 +2,7 @@
# -*- encoding: utf-8 -*-
#
#
# Copyright (c) 2013-2020 Wind River Systems, Inc.
# Copyright (c) 2013-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -2321,7 +2321,9 @@ class TestPatchStdDuplexControllerAction(TestHost):
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE)
availability=constants.AVAILABILITY_ONLINE,
config_target=None,
config_applied=None)
# Swact to controller-0
response = self._patch_host_action(c1_host['hostname'],
@ -2345,6 +2347,93 @@ class TestPatchStdDuplexControllerAction(TestHost):
result = self.get_json('/ihosts/%s' % c1_host['hostname'])
self.assertEqual(constants.NONE_ACTION, result['action'])
def test_swact_action_config_out_of_date_on_active(self):
# Create controller-0
self._create_controller_0(
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE,
config_target='89fbefe7-7b43-4bd2-9500-663b33df2e57',
config_applied='f9fbefe7-7b43-4bd2-9500-663b33df2e57')
# Create controller-1
c1_host = self._create_controller_1(
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE,
config_target='b447d703-b581-4bf6-bcbd-f99ddcbe4663',
config_applied='b447d703-b581-4bf6-bcbd-f99ddcbe4663')
# controller-0 already active, per comment 'Behave as if the API is
# running on controller-0'; so swact from controller-1 is allowed
response = self._patch_host_action(c1_host['hostname'],
constants.SWACT_ACTION,
'sysinv-test',
expect_errors=True)
self.assertEqual(response.content_type, 'application/json')
self.assertEqual(response.status_code, http_client.OK)
def test_swact_action_from_config_out_of_date(self):
# Create active controller-0 with config out of date
c0_host = self._create_controller_0(
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE,
config_target='89fbefe7-7b43-4bd2-9500-663b33df2e57',
config_applied='f9fbefe7-7b43-4bd2-9500-663b33df2e57')
# Create controller-1
self._create_controller_1(
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE,
config_target='b447d703-b581-4bf6-bcbd-f99ddcbe4663',
config_applied='b447d703-b581-4bf6-bcbd-f99ddcbe4663')
# Swact from active controller-0 to controller-1
response = self._patch_host_action(c0_host['hostname'],
constants.SWACT_ACTION,
'sysinv-test')
self.assertEqual(response.content_type, 'application/json')
self.assertEqual(response.status_code, http_client.OK)
def test_swact_action_to_config_out_of_date(self):
# Create controller-0
c0_host = self._create_controller_0(
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE,
config_target='d1fd40ca-9306-44c8-a100-671f22111114',
config_applied='d1fd40ca-9306-44c8-a100-671f22111114')
# Create controller-1
c1_host = self._create_controller_1(
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE,
config_target='89fbefe7-7b43-4bd2-9500-663b33df2e57',
config_applied='f9fbefe7-7b43-4bd2-9500-663b33df2e57')
# controller-0 already active, swact from controller-0
response = self._patch_host_action(c0_host['hostname'],
constants.SWACT_ACTION,
'sysinv-test',
expect_errors=True)
self.assertEqual(response.content_type, 'application/json')
self.assertEqual(http_client.BAD_REQUEST, response.status_int)
self.assertTrue(response.json['error_message'])
self.assertIn("%s target Config %s not yet applied. " % (
c1_host['hostname'], c1_host['config_target']),
response.json['error_message'])
def test_force_swact_action(self):
# Create controller-0 in disabled state so force swact is required
self._create_controller_0(