Upgrade changes to support MGMT FQDN

The release stx.9 with FQDN support for MGMT network
uses the hieradata with the new pattern:
<hostname>.yaml
But the release stx.8 is still using the old name:
<mgmt_ip>.yaml
During an upgrade controller-0 want to update
the <mgmt_ip>.yaml and controller-1 wants to use
the <hostname>.yaml, so it is necessary to change
the code to use/update the right hieradata.
Additionally, during an upgrade the active
controller running the old release can't resolve
the FQDN (i.e: controller.internal ), for this
reason during the controller-1 upgrade, the FQDN
can not be used.

Test Plan:
IPv6 AIO-SX fresh install
IPv6 AIO-DX fresh install
IPv4 AIO-SX upgrade from previous release
    without story 2010722 to new release
    that has the story 2010722 (not master)
IPv4 AIO-DX upgrade from previous release
    without story 2010722 to new release
    that has the story 2010722 (not master)
IPv4 STANDARD upgrade from previous release
    without story 2010722 to new release
    that has the story 2010722 (not master)
IPv6 AIO-DX upgrade from previous release
    without story 2010722 to new release
    that has the story 2010722 (not master)
IPv6 DC lab upgrade from previous release
    without story 2010722 to new release
    that has the story 2010722 (not master)

Story: 2010722
Task: 48609

Signed-off-by: Fabiano Correa Mercer <fabiano.correamercer@windriver.com>
Change-Id: I555185bea7fadb772a4023b6ecb4379e01e0f16c
This commit is contained in:
Fabiano Correa Mercer 2024-02-21 14:34:02 -03:00
parent 2b1cbc169c
commit d449622f4a
13 changed files with 263 additions and 48 deletions

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2016-2022 Wind River Systems, Inc.
# Copyright (c) 2016-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -54,8 +54,8 @@ LOG = logging.getLogger(__name__)
POSTGRES_BIN = utils.get_postgres_bin()
POSTGRES_MOUNT_PATH = '/mnt/postgresql'
POSTGRES_DUMP_MOUNT_PATH = '/mnt/db_dump'
DB_CONNECTION_FORMAT = "connection=postgresql://%s:%s@127.0.0.1/%s\n"
DB_BARBICAN_CONNECTION_FORMAT = "postgresql://%s:%s@127.0.0.1/%s"
DB_CONNECTION_CONF_FORMAT = "connection=postgresql://%s:%s@127.0.0.1/%s\n"
DB_CONNECTION_EXEC_FORMAT = "postgresql://%s:%s@127.0.0.1/%s"
restore_patching_complete = '/etc/platform/.restore_patching_complete'
restore_compute_ready = '/var/run/.restore_compute_ready'
@ -159,14 +159,21 @@ def get_shared_services():
return shared_services
def get_connection_string(db_credentials, database):
""" Generates a connection string for a given database"""
def get_connection_string(db_credentials, database, exec_format=False):
""" Generates a connection string for a given database
exec_format
True: the connection string can be used in line command
( ex: barbican ) or in psycopg2.connect
False: the connection string is to be used in .conf files
"""
username = db_credentials[database]['username']
password = db_credentials[database]['password']
if database == 'barbican':
return DB_BARBICAN_CONNECTION_FORMAT % (username, password, database)
if exec_format:
return DB_CONNECTION_EXEC_FORMAT % (username, password, database)
else:
return DB_CONNECTION_FORMAT % (username, password, database)
# use format to be used in .conf files
return DB_CONNECTION_CONF_FORMAT % (username, password, database)
def create_temp_filesystem(vgname, lvname, mountpoint, size):
@ -660,7 +667,8 @@ def migrate_databases(from_release, shared_services, db_credentials,
# Migrate barbican
('barbican',
'barbican-manage db upgrade ' +
'--db-url %s' % get_connection_string(db_credentials, 'barbican')),
'--db-url %s' % get_connection_string(db_credentials, 'barbican',
True)),
]
# Migrate fm
@ -833,12 +841,9 @@ def apply_sriov_config(db_credentials, hostname):
# If controller-1 has any FEC devices or sriov vfs configured, apply the
# sriov runtime manifest. We can't apply it from controller-0 during the
# host-unlock process as controller-1 is running the new release.
database = 'sysinv'
username = db_credentials[database]['username']
password = db_credentials[database]['password']
# psycopg2 can connect with the barbican string eg postgresql:// ...
connection_string = DB_BARBICAN_CONNECTION_FORMAT % (
username, password, database)
connection_string = get_connection_string(db_credentials, 'sysinv', True)
conn = psycopg2.connect(connection_string)
cur = conn.cursor()
cur.execute(
@ -874,6 +879,32 @@ def apply_sriov_config(db_credentials, hostname):
os.remove(tmpfile)
def get_db_host_mgmt_ip(db_credentials, hostname):
""" Get the Hostname management IP from DB"""
# the postgres server was stopped during the upgrade_controller
# need to use db_credentials to acess the DB
connection_string = get_connection_string(db_credentials, 'sysinv', True)
conn = psycopg2.connect(connection_string)
db_hostname = hostname + "-mgmt"
try:
cur = conn.cursor()
cur.execute("SELECT address FROM addresses WHERE name='{}';".format(
db_hostname))
row = cur.fetchone()
if row is None:
msg = "MGMT IP not found for: '{}'".format(db_hostname)
raise Exception(msg)
return row[0]
except Exception as ex:
LOG.error("Failed to get MGMT IP for: '%s'" % db_hostname)
raise ex
def upgrade_controller(from_release, to_release):
""" Executed on the release N+1 side upgrade controller-1. """
@ -1047,6 +1078,33 @@ def upgrade_controller(from_release, to_release):
LOG.info("Failed to update hiera configuration")
raise
# this is just necessary for 22.12
# since the old releases uses the hieradata/<mgmt_ip>.yaml
# and the new one uses hieradata/<hostname>.yaml
# during the AIO-DX upgrade, the controller-0 runs the old
# release to upgrade the controller-1
# the controller-0 want to still use hieradata/<mgmt_ip>.yaml
# but the controller-1 want to use hieradata/<hostname>.yaml
# so rename the <hostname>.yaml to <mgmt_ip>.yaml
# and creates a symlink: <hostname>.yaml -> <mgmt_ip>.yaml
try:
ctrl1_mgmt_ip = get_db_host_mgmt_ip(db_credentials,
utils.CONTROLLER_1_HOSTNAME)
except Exception as e:
LOG.exception(e)
LOG.info("Failed to get MGMT IP for controller-1 during upgrade")
raise
ctrl1_hostname_hieradata = constants.HIERADATA_PERMDIR + "/" \
+ utils.CONTROLLER_1_HOSTNAME + ".yaml"
ctrl1_ipaddr_hieradata_file = ctrl1_mgmt_ip + ".yaml"
ctrl1_ipaddr_hieradata = constants.HIERADATA_PERMDIR + "/" \
+ ctrl1_ipaddr_hieradata_file
os.rename(ctrl1_hostname_hieradata, ctrl1_ipaddr_hieradata)
os.symlink(ctrl1_ipaddr_hieradata_file, ctrl1_hostname_hieradata)
apply_sriov_config(db_credentials, utils.CONTROLLER_1_HOSTNAME)
# Remove /etc/kubernetes/admin.conf after it is used to generate

View File

@ -709,10 +709,21 @@ start()
# Apply the puppet manifest
HIERADATA_PATH=${PUPPET_CACHE}/hieradata
HOST_HIERA=${HIERADATA_PATH}/${HOST}.yaml
IP_HIERA=${HIERADATA_PATH}/${IPADDR}.yaml
PUPPET_YML=${HOST}
# if IP_HIERA exists, it means an upgrade
# must use the <mgmt_ip>.yaml because the active controller
# running the previous version doesn't update the hostname.yaml
if [ -e $ETC_PLATFORM_DIR/.upgrade_do_not_use_fqdn ] && \
[ -f ${IP_HIERA} ]; then
HOST_HIERA=${IP_HIERA}
PUPPET_YML=${IPADDR}
fi
if [ -f ${HOST_HIERA} ]; then
echo "$0: Running puppet manifest apply"
puppet-manifest-apply.sh ${HIERADATA_PATH} ${HOST} ${subfunction}
echo "$0: Running puppet manifest apply for: ${PUPPET_YML}"
puppet-manifest-apply.sh ${HIERADATA_PATH} ${PUPPET_YML} ${subfunction}
RC=$?
if [ $RC -ne 0 ]
then

View File

@ -0,0 +1,97 @@
#!/usr/bin/env python
# Copyright (c) 2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# This migration script is used to disable the FQDN during an upgrade
# this flag must be deleted during thr upgrade complete/abort
# during the upgrade the controller-0 runs version X
# and controller-1 runs version X+1
# to use the FQDN the active controller must run dnsmasq
# with the FQDN entries. It doesn't happen during an upgrade.
#
# during migrate stage of platform upgrade. It will:
# - create a flag that will be used by sysinv and puppet code to not
# use FQDN entries
import sys
import subprocess
import os.path
import socket
from controllerconfig.common import log
PLATFORM_CONF_PATH = '/etc/platform'
PLATFORM_SIMPLEX_FLAG = '/etc/platform/simplex'
UPGRADE_DO_NOT_USE_FQDN = PLATFORM_CONF_PATH + \
'/.upgrade_do_not_use_fqdn'
LOG = log.get_logger(__name__)
def remove_unused_files_from_hieradata(to_release):
# after the upgrade it is necessary to remove the old hieradata
# <ctrl1>.yaml from /opt/platform/puppet/<TO_RELEASE>/hieradata
# the reason is: it was replaced by <hostname>.yaml
# i.e: controller-1.yaml
ctrl1_mgmt_ip = socket.getaddrinfo("controller-1", None)[0][4][0]
ctrl1_old_hiera = "/opt/platform/puppet/{}/hieradata/{}.yaml".format(
to_release, ctrl1_mgmt_ip)
command = "rm -f {}".format(ctrl1_old_hiera)
sub = subprocess.Popen(command, shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = sub.communicate()
if sub.returncode != 0:
LOG.error('Cmd Failed:\n%s\n.%s\n%s' %
(command, stdout, stderr))
raise Exception('Error removing unused file: {} '.format(
ctrl1_old_hiera))
def main():
action = None
from_release = None
to_release = None
arg = 1
res = 0
log.configure()
while arg < len(sys.argv):
if arg == 1:
from_release = sys.argv[arg]
elif arg == 2:
to_release = sys.argv[arg]
elif arg == 3:
action = sys.argv[arg]
else:
LOG.error("Invalid option %s." % sys.argv[arg])
return 1
arg += 1
LOG.info("%s invoked with from_release = %s to_release = %s action = %s"
% (sys.argv[0], from_release, to_release, action))
# create a flag to not use FQDN during a SW upgrade
# this flag must be deleted during thr upgrade complete/abort
# during the activate, remove the unused file in hieradata
if not os.path.exists(PLATFORM_SIMPLEX_FLAG):
if action in ['start', 'migrate'] and \
from_release in ['21.12', '22.12']:
open(UPGRADE_DO_NOT_USE_FQDN, 'w').close()
elif action in ['activate'] and to_release in ['24.09']:
remove_unused_files_from_hieradata(to_release)
return res
if __name__ == "__main__":
sys.exit(main())

View File

@ -850,21 +850,7 @@ class AgentManager(service.PeriodicService):
while (timeutils.utcnow() - wait_time).total_seconds() < MAXSLEEP:
# wait for controller to come up first may be a DOR
try:
ihost, mgmt_addr = rpcapi.get_ihost_by_macs(icontext, host_macs)
except ValueError:
# Retry because the N-1 host does not support the mgmt_addr
# parameter during upgrade
try:
ihost = rpcapi.get_ihost_by_macs(icontext, host_macs)
except Timeout:
if not rpc_timeout:
rpc_timeout = True
LOG.info("get_ihost_by_macs rpc Timeout.")
time.sleep(5) # avoid calling timedout RPC in sequence
continue
except Exception:
LOG.warn("Conductor RPC get_ihost_by_macs exception "
"response")
ihost = rpcapi.get_ihost_by_macs(icontext, host_macs)
except Timeout:
if not rpc_timeout:
rpc_timeout = True
@ -903,6 +889,17 @@ class AgentManager(service.PeriodicService):
LOG.info("get_address_by_host_networktype rpc Timeout.")
time.sleep(5) # avoid calling timedout RPC in sequence
continue
except RemoteError:
try:
# active controller is running an old release
# without get_address_by_host_networktype RPC
mgmt_addr = ihost['mgmt_ip']
LOG.info("get_address_by_host_networktype rpc RemoteError."
"using mgmt_ip from ihost: {}".format(mgmt_addr))
except Exception:
LOG.warn("ihost_inv_get_and_report: ihost does not have "
"mgmt_ip")
except Exception as ex:
LOG.warn("Conductor RPC get_address_by_host_networktype "
"exception response %s" % ex)

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2013-2023 Wind River Systems, Inc.
# Copyright (c) 2013-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -2121,6 +2121,10 @@ DEFAULT_DNS_SERVICE_DOMAIN = 'cluster.local'
ANSIBLE_BOOTSTRAP_FLAG = os.path.join(tsc.VOLATILE_PATH, ".ansible_bootstrap")
ANSIBLE_BOOTSTRAP_COMPLETED_FLAG = os.path.join(tsc.PLATFORM_CONF_PATH,
".bootstrap_completed")
# just used for upgrade purposes
OLD_ANSIBLE_BOOTSTRAP_COMPLETED_FLAG = os.path.join(tsc.CONFIG_PATH,
".bootstrap_completed")
UNLOCK_READY_FLAG = os.path.join(tsc.PLATFORM_CONF_PATH, ".unlock_ready")
INVENTORY_WAIT_TIMEOUT_IN_SECS = 120
DEFAULT_RPCAPI_TIMEOUT_IN_SECS = 60

View File

@ -2406,13 +2406,18 @@ def is_inventory_config_complete(dbapi, forihostid):
return False
def is_fqdn_ready_to_use():
def is_fqdn_ready_to_use(ignore_upgrade=False):
"""
Return true if FQDN can be used instead of IP ADDRESS
The use of FQDN is limited to management network
after the bootstrap.
During an duplex/standard upgrade the FQDN can't be used
since the old release doesn't support it.
"""
if (os.path.isfile(constants.ANSIBLE_BOOTSTRAP_COMPLETED_FLAG)):
if (os.path.isfile(constants.ANSIBLE_BOOTSTRAP_COMPLETED_FLAG) and
(os.path.isfile(tsc.PLATFORM_SIMPLEX_FLAG) or
(not os.path.isfile(tsc.UPGRADE_DO_NOT_USE_FQDN) or
ignore_upgrade))):
return True
return False

View File

@ -1324,6 +1324,11 @@ class ConductorManager(service.PeriodicService):
hostname = re.sub("-%s$" % constants.NETWORK_TYPE_MGMT,
'', str(address.name))
# during an upgrade the DB can have the unused
# controller-platform-nfs entry that must be ignored
if (hostname == 'controller-platform-nfs'):
continue
if (hostname != constants.SYSTEM_CONTROLLER_GATEWAY_IP_NAME):
controller_alias = [constants.CONTROLLER_HOSTNAME,
constants.DOCKER_REGISTRY_HOST,
@ -6477,7 +6482,7 @@ class ConductorManager(service.PeriodicService):
:param context: an admin context
:param ihost_macs: list of mac addresses
:returns: ihost object, including all fields and mgmt address.
:returns: ihost object, including all fields.
"""
ihosts = self.dbapi.ihost_get_list()
@ -6494,11 +6499,7 @@ class ConductorManager(service.PeriodicService):
for host in ihosts:
if host.mgmt_mac == mac:
LOG.info("Host found ihost db for macs: %s" % host.hostname)
mgmt_addr = None
mgmt_addr = self.get_address_by_host_networktype(
context, host.hostname,
constants.NETWORK_TYPE_MGMT)
return host, mgmt_addr
return host
LOG.debug("RPC get_ihost_by_macs called but found no ihost.")
def get_ihost_by_hostname(self, context, ihost_hostname):
@ -14281,6 +14282,18 @@ class ConductorManager(service.PeriodicService):
LOG.info("Deleting Sysinv Hybrid state")
rpcapi.delete_sysinv_hybrid_state(context, controller_1['uuid'])
# TODO(fcorream): This is just needed for upgrade from R7 to R8
# need to remove the flag that disables the use of FQDN during the
# upgrade
if (tsc.system_mode != constants.SYSTEM_MODE_SIMPLEX):
personalities = [constants.CONTROLLER]
config_uuid = self._config_update_hosts(context, personalities)
config_dict = {
"personalities": personalities,
"classes": ['platform::network::upgrade_fqdn_cleanup::runtime'],
}
self._config_apply_runtime_manifest(context, config_uuid, config_dict)
# Clear upgrades alarm
entity_instance_id = "%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST,
constants.CONTROLLER_HOSTNAME)

View File

@ -45,8 +45,14 @@ class LdapPuppet(base.BasePuppet):
def _is_openldap_certificate_created(self):
""" Returns True when it's safe to read the openldap certificate.
"""
# TODO<fcorream>: Remove OLD_ANSIBLE_BOOTSTRAP_COMPLETED_FLAG
# just needed for upgrade to R9 ( 24.09 )
is_upgrading = utils.is_upgrade_in_progress(self.dbapi)[0]
bootstrap_completed = \
os.path.isfile(constants.ANSIBLE_BOOTSTRAP_COMPLETED_FLAG)
os.path.isfile(constants.ANSIBLE_BOOTSTRAP_COMPLETED_FLAG) or \
(is_upgrading and
os.path.isfile(constants.OLD_ANSIBLE_BOOTSTRAP_COMPLETED_FLAG))
return bootstrap_completed

View File

@ -61,7 +61,7 @@ class NetworkingPuppet(base.BasePuppet):
})
# create flag for the mate controller to use FQDN or not
if utils.is_fqdn_ready_to_use():
if utils.is_fqdn_ready_to_use(True):
fqdn_ready = True
else:
fqdn_ready = False

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2017-2020 Wind River Systems, Inc.
# Copyright (c) 2017-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -108,7 +108,17 @@ class OpenstackBasePuppet(base.BasePuppet):
# (by services' endpoint reconfiguration), the system commands
# to add networks etc during ansible bootstrap will fail as
# haproxy has not been configured yet.
if os.path.isfile(constants.ANSIBLE_BOOTSTRAP_COMPLETED_FLAG) and \
# TODO<fcorream>: Remove OLD_ANSIBLE_BOOTSTRAP_COMPLETED_FLAG
# just needed for upgrade to R9
is_upgrading = cutils.is_upgrade_in_progress(self.dbapi)[0]
bootstrap_completed = \
os.path.isfile(constants.ANSIBLE_BOOTSTRAP_COMPLETED_FLAG) or \
(is_upgrading and
os.path.isfile(constants.OLD_ANSIBLE_BOOTSTRAP_COMPLETED_FLAG))
if bootstrap_completed and \
(self._distributed_cloud_role() ==
constants.DISTRIBUTED_CLOUD_ROLE_SYSTEMCONTROLLER or
self._distributed_cloud_role() ==

View File

@ -956,7 +956,16 @@ class PlatformPuppet(base.BasePuppet):
def _get_dc_root_ca_config(self):
config = {}
system = self._get_system()
if os.path.isfile(constants.ANSIBLE_BOOTSTRAP_COMPLETED_FLAG):
# TODO<fcorream>: Remove OLD_ANSIBLE_BOOTSTRAP_COMPLETED_FLAG
# just needed for upgrade to R9
is_upgrading = utils.is_upgrade_in_progress(self.dbapi)[0]
bootstrap_completed = \
os.path.isfile(constants.ANSIBLE_BOOTSTRAP_COMPLETED_FLAG) or \
(is_upgrading and
os.path.isfile(constants.OLD_ANSIBLE_BOOTSTRAP_COMPLETED_FLAG))
if bootstrap_completed:
cert_data = utils.get_admin_ep_cert(
system.distributed_cloud_role)

View File

@ -2949,7 +2949,7 @@ class ManagerTestCase(base.DbTestCase):
'address': '192.168.204.3'}
utils.create_test_address(**address)
ihost_macs = ['22:44:33:55:11:66', '22:44:33:88:11:66']
ihost, mgmt_addr = self.service.get_ihost_by_macs(self.context, ihost_macs)
ihost = self.service.get_ihost_by_macs(self.context, ihost_macs)
self.assertEqual(ihost.mgmt_mac, '22:44:33:55:11:66')
def test_get_ihost_by_macs_no_match(self):

View File

@ -1,5 +1,5 @@
"""
Copyright (c) 2014-2023 Wind River Systems, Inc.
Copyright (c) 2014-2024 Wind River Systems, Inc.
SPDX-License-Identifier: Apache-2.0
@ -223,6 +223,11 @@ MGMT_NETWORK_RECONFIGURATION_UNLOCK = os.path.join(
MGMT_NETWORK_RECONFIG_UPDATE_HOST_FILES = os.path.join(
PLATFORM_CONF_PATH, ".mgmt_reconfig_update_hosts_file")
# Set in the upgrade script to not use FQDN during SW upgrade
# must be deleted after upgrade complete /abort
UPGRADE_DO_NOT_USE_FQDN = os.path.join(
PLATFORM_CONF_PATH, ".upgrade_do_not_use_fqdn")
# Worker configuration flags
# Set after initial application of node manifest