From 960008e91a8ec31edc63e52151cdb70324bf699e Mon Sep 17 00:00:00 2001 From: Bin Qian Date: Tue, 26 Sep 2023 20:02:28 +0000 Subject: [PATCH] data migration worker This commit is a worker process to perform data migration. This process happens on the active controller running from-release s/w, in a chroot environment with to-release s/w. This subprocess takes from-release, to-release and port of 2nd instance of PostgreSQL service (which runs to-release s/w and environment) as command line. The worker process creates PostgreSQL databases, tables and import data from existing system, migrate keyring, pxeboot, helm, armada, sysinv, fluxcd and hieradata. TCs passed: successfully complete data migration, verify above mentioned data are created correctly. Story: 2010676 Task: 48837 Change-Id: I8c9205df27e68f7185ec154ee365101e33610a69 --- software/debian/deb_folder/rules | 1 + software/setup.cfg | 1 + software/software/utilities/__init__.py | 0 software/software/utilities/constants.py | 40 ++ software/software/utilities/migrate.py | 803 +++++++++++++++++++++++ software/software/utilities/utils.py | 483 ++++++++++++++ 6 files changed, 1328 insertions(+) create mode 100644 software/software/utilities/__init__.py create mode 100644 software/software/utilities/constants.py create mode 100644 software/software/utilities/migrate.py create mode 100644 software/software/utilities/utils.py diff --git a/software/debian/deb_folder/rules b/software/debian/deb_folder/rules index 28242507..837d1172 100755 --- a/software/debian/deb_folder/rules +++ b/software/debian/deb_folder/rules @@ -14,6 +14,7 @@ export METADATA_FILE="STX_${PLATFORM_RELEASE}_GA-metadata.xml" override_dh_auto_build: cp service-files/STX_GA-metadata.xml ${METADATA_FILE} sed -i "s/xxxPLATFORM_RELEASExxx/${PLATFORM_RELEASE}/g" ${METADATA_FILE} + sed -i "s/xxxPLATFORM_RELEASExxx/${PLATFORM_RELEASE}/g" software/utilities/constants.py override_dh_install: python3 setup.py install -f --install-layout=deb --root=$(ROOT) diff --git a/software/setup.cfg b/software/setup.cfg index ab153c8b..e48713b0 100644 --- a/software/setup.cfg +++ b/software/setup.cfg @@ -36,6 +36,7 @@ console_scripts = software = software.software_client:main software-controller-daemon = software.software_controller:main software-agent = software.software_agent:main + software-migrate = software.utilities.migrate:migrate [wheel] diff --git a/software/software/utilities/__init__.py b/software/software/utilities/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/software/software/utilities/constants.py b/software/software/utilities/constants.py new file mode 100644 index 00000000..7ca19496 --- /dev/null +++ b/software/software/utilities/constants.py @@ -0,0 +1,40 @@ +# +# Copyright (c) 2023 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# SW_VERSION should be built-in from build-tools +SW_VERSION = 'xxxPLATFORM_RELEASExxx' + +DISTRIBUTED_CLOUD_ROLE_SYSTEMCONTROLLER = 'systemcontroller' +WORKER = 'worker' +SERVICE_TYPE_IDENTITY = 'identity' + +CONFIG_WORKDIR = '/tmp/config' +CGCS_CONFIG_FILE = CONFIG_WORKDIR + '/cgcs_config' + +PLATFORM_PATH = "/opt/platform" +PLATFORM_CONFIG_PATH = PLATFORM_PATH + "/config/" + SW_VERSION + "/" +PLATFORM_CONF_FILE=PLATFORM_CONFIG_PATH + "/platform.conf" +CONFIG_PATH=PLATFORM_PATH + "/config/" + SW_VERSION + "/" +PLATFORM_CONFIG_PERMDIR = CONFIG_PATH + +HIERADATA_WORKDIR = '/tmp/hieradata' +PUPPET_PATH = PLATFORM_PATH + "/puppet/" + SW_VERSION + "/" +HIERADATA_PERMDIR = PUPPET_PATH + 'hieradata' + +KEYRING_WORKDIR = '/tmp/python_keyring' +KEYRING_PATH = PLATFORM_PATH + "/.keyring/" + SW_VERSION +KEYRING_PERMDIR = KEYRING_PATH + +VOLATILE_PXEBOOT_PATH = "/var/pxeboot" +# INITIAL_CONFIG_COMPLETE_FILE = '/etc/platform/.initial_config_complete' + +KUBERNETES_CONF_PATH = "/etc/kubernetes" +KUBERNETES_ADMIN_CONF_FILE = "admin.conf" +LOG_LOCAL1 = 'local1' + +CONTROLLER = 'controller' +CONTROLLER_1_HOSTNAME = 'controller-1' +IPV6_FAMILY = 6 diff --git a/software/software/utilities/migrate.py b/software/software/utilities/migrate.py new file mode 100644 index 00000000..afcba3b8 --- /dev/null +++ b/software/software/utilities/migrate.py @@ -0,0 +1,803 @@ +# +# Copyright (c) 2023 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +import argparse +import glob +import json +import os +import psycopg2 +import shutil +import socket +import subprocess +import yaml + + +from oslo_log import log + +from software.utilities.constants import HIERADATA_PERMDIR +from software.utilities.constants import KEYRING_PATH +from software.utilities.constants import PLATFORM_PATH +from software.utilities.constants import SW_VERSION +from software.utilities.constants import VOLATILE_PXEBOOT_PATH +from software.utilities.constants import KUBERNETES_CONF_PATH +from software.utilities.constants import KUBERNETES_ADMIN_CONF_FILE +from software.utilities.constants import DISTRIBUTED_CLOUD_ROLE_SYSTEMCONTROLLER +from software.utilities.constants import SERVICE_TYPE_IDENTITY +from software.utilities import utils + + +def get_postgres_bin(): + """ Get the path to the postgres binaries""" + + try: + return subprocess.check_output( + ['pg_config', '--bindir']).decode().rstrip('\n') + except subprocess.CalledProcessError: + LOG.exception("Failed to get postgres bin directory.") + raise + + +POSTGRES_BIN = get_postgres_bin() +LOG = log.getLogger(__name__) +POSTGRES_PATH = '/var/lib/postgresql' +POSTGRES_DATA_DIR = os.path.join(POSTGRES_PATH, SW_VERSION) +DB_CONNECTION_FORMAT = "connection=postgresql://%s:%s@127.0.0.1:%s/%s\n" +DB_BARBICAN_CONNECTION_FORMAT = "postgresql://%s:%s@127.0.0.1:%s/%s" + + +def migrate_keyring_data(from_release, to_release): + """ Migrates keyring data. """ + + LOG.info("Migrating keyring data") + # First delete any keyring files for the to_release - they can be created + # if release N+1 nodes are incorrectly left powered up when the release N + # load is installed. + target_path = os.path.join(PLATFORM_PATH, ".keyring", to_release) + shutil.rmtree(target_path, ignore_errors=True) + shutil.copytree(os.path.join(PLATFORM_PATH, ".keyring", from_release), target_path) + + + +def migrate_pxeboot_config(from_release, to_release): + """ Migrates pxeboot configuration. """ + LOG.info("Migrating pxeboot config") + + # Copy the entire pxelinux.cfg directory to pick up any changes made + # after the data was migrated (i.e. updates to the controller-1 load). + source_pxelinux = os.path.join(PLATFORM_PATH, "config", from_release, + "pxelinux.cfg", "") + dest_pxelinux = os.path.join(PLATFORM_PATH, "config", to_release, + "pxelinux.cfg") + + try: + subprocess.check_call( + ["rsync", + "-a", + os.path.join(source_pxelinux), + os.path.join(dest_pxelinux)], + stdout=result) + except subprocess.CalledProcessError: + LOG.exception("Failed to migrate %s" % source_pxelinux) + raise + + to_release_symlink_target = os.path.join(VOLATILE_PXEBOOT_PATH, + "pxelinux.cfg.files", "grub.cfg") + + dest_symlink_exists = os.path.islink(dest_pxelinux + "/grub.cfg") + if dest_symlink_exists: + os.unlink(dest_pxelinux + "/grub.cfg") + os.symlink(to_release_symlink_target, dest_pxelinux + "/grub.cfg") + + +def migrate_armada_config(from_release, to_release): + """ Migrates armada configuration. """ + + # Check if the folder exist before migration + if not os.path.exists(os.path.join(PLATFORM_PATH, "armada")): + LOG.info("Skipping armada migration, the directory doesn't exist") + return + + LOG.info("Migrating armada config") + # Copy the entire armada.cfg directory to pick up any changes made + # after the data was migrated (i.e. updates to the controller-1 load). + source_armada = os.path.join(PLATFORM_PATH, "armada", from_release, "") + dest_armada = os.path.join(PLATFORM_PATH, "armada", to_release) + try: + subprocess.check_call( + ["rsync", + "-a", + os.path.join(source_armada), + os.path.join(dest_armada)], + stdout=result) + except subprocess.CalledProcessError: + LOG.exception("Failed to migrate %s" % source_armada) + raise + + +def migrate_fluxcd_config(from_release, to_release): + """ Migrates fluxcd configuration. """ + + # Check if the folder exists before migration + if not os.path.exists(os.path.join(PLATFORM_PATH, "fluxcd")): + LOG.info("Skipping fluxcd migration, the directory doesn't exist") + return + + LOG.info("Migrating fluxcd config") + + # Copy the entire fluxcd.cfg directory to pick up any changes made + # after the data was migrated. + source_fluxcd = os.path.join(PLATFORM_PATH, "fluxcd", from_release, "") + dest_fluxcd = os.path.join(PLATFORM_PATH, "fluxcd", to_release) + try: + subprocess.check_call( + ["rsync", + "-a", + os.path.join(source_fluxcd), + os.path.join(dest_fluxcd)], + stdout=result) + except subprocess.CalledProcessError: + LOG.exception("Failed to migrate %s" % source_fluxcd) + raise + + +def migrate_helm_config(from_release, to_release): + """ Migrates helm configuration. """ + + LOG.info("Migrating helm config") + + # Copy the entire helm.cfg directory to pick up any changes made + # after the data was migrated (i.e. updates to the controller-1 load). + source_helm = os.path.join(PLATFORM_PATH, "helm", from_release, "") + dest_helm = os.path.join(PLATFORM_PATH, "helm", to_release) + try: + subprocess.check_call( + ["rsync", + "-a", + os.path.join(source_helm), + os.path.join(dest_helm)], + stdout=result) + except subprocess.CalledProcessError: + LOG.exception("Failed to migrate %s" % source_helm) + raise + +def migrate_sysinv_data(from_release, to_release, port): + """ Migrates sysinv data. """ + + LOG.info("Migrating sysinv data") + + # If the /opt/platform/sysinv//sysinv.conf.default file has + # changed between releases it must be modified at this point. + try: + subprocess.check_call( + ["rsync", + "-a", + os.path.join(PLATFORM_PATH, "sysinv", from_release, ""), + os.path.join(PLATFORM_PATH, "sysinv", to_release)], + stdout=result) + + except subprocess.CalledProcessError: + LOG.exception("Failed to copy sysinv platform dir to new version") + raise + + # Get the hiera data for the from release + hiera_path = os.path.join(PLATFORM_PATH, "puppet", from_release, + "hieradata") + static_file = os.path.join(hiera_path, "static.yaml") + with open(static_file, 'r') as file: + static_config = yaml.load(file, Loader=yaml.Loader) + + username = static_config["sysinv::db::postgresql::user"] + password = utils.get_password_from_keyring("sysinv", "database") + + # We need a bare bones /etc/sysinv/sysinv.conf file in order to do the + # sysinv database migration and then generate the upgrades manifests. + with open("/etc/sysinv/sysinv.conf", "w") as f: + f.write("[DEFAULT]\n") + f.write("logging_context_format_string=sysinv %(asctime)s.%" + "(msecs)03d %(process)d %(levelname)s %" + "(name)s [%(request_id)s %(user)s %" + "(tenant)s] %(instance)s%(message)s\n") + f.write("verbose=True\n") + f.write("syslog_log_facility=local6\n") + f.write("use_syslog=True\n") + f.write("logging_default_format_string=sysinv %(asctime)s.%" + "(msecs)03d %(process)d %(levelname)s %(name)s [-] %" + "(instance)s%(message)s\n") + f.write("debug=False\n") + f.write('sql_connection=postgresql://%s:%s@127.0.0.1:%s/sysinv\n' % + (username, password, port)) + + +def create_database(target_port): + """ Creates empty postgres database. """ + + + LOG.info("Creating postgres database") + + db_create_commands = [ + # Configure new data directory for postgres + 'rm -rf {}'.format(POSTGRES_DATA_DIR), + 'mkdir -p {}'.format(POSTGRES_DATA_DIR), + 'chown postgres {}'.format(POSTGRES_DATA_DIR), + 'sudo -u postgres {} -D {}'.format( + os.path.join(POSTGRES_BIN, 'initdb'), + POSTGRES_DATA_DIR), + 'chmod -R 700 ' + POSTGRES_DATA_DIR, + 'chown -R postgres ' + POSTGRES_DATA_DIR, + "sed -i 's/#port = 5432/port = {}/g' {}/postgresql.conf".format(target_port, POSTGRES_DATA_DIR), + 'mkdir -p /var/run/postgresql/', + 'chown -R postgres /var/run/postgresql', + ] + + # Execute db creation commands + for cmd in db_create_commands: + try: + LOG.info("Executing db create command: %s" % cmd) + subprocess.check_call([cmd], + shell=True, stdout=result, stderr=result) + except subprocess.CalledProcessError as ex: + LOG.exception("Failed to execute command: '%s' during upgrade " + "processing, return code: %d" % (cmd, ex.returncode)) + raise + + +def import_databases(target_port, from_path=None): + """ Imports databases. """ + + if not from_path: + from_dir = '/var/lib/postgresql/upgrade/' + + LOG.info("Importing databases") + try: + postgres_config_path = os.path.join( + from_dir, 'postgres.postgreSql.config') + # Do postgres schema import (suppress stderr due to noise) + subprocess.check_call(['sudo -u postgres psql --port=%s -f ' % target_port + + postgres_config_path + ' postgres'], + shell=True, + stdout=result, + stderr=result) + except subprocess.CalledProcessError: + LOG.exception("Failed to import schemas.") + raise + + import_commands = [] + + # Do postgres data import + for data in glob.glob(from_dir + '/*.*Sql.data'): + db_elem = data.split('/')[-1].split('.')[0] + LOG.info("importing {}".format(db_elem)) + import_commands.append((db_elem, + "sudo -u postgres psql --port=%s -f " % target_port + data + + " " + db_elem)) + + VIM_DB_NAME = 'vim_db_v1' + temp_db_path = '/tmp/' + db_dir = os.path.join(PLATFORM_PATH, 'nfv/vim', SW_VERSION) + db_path = os.path.join(db_dir, VIM_DB_NAME) + + import_commands.append( + ("remove %s" % db_dir, + "rm %s -rf" % db_dir)) + + import_commands.append( + ("create %s" % db_dir, + "mkdir %s -p" % db_dir)) + + import_commands.append( + ("nfv-vim", + "nfv-vim-manage db-load-data -d %s -f %s" % + (temp_db_path, os.path.join(from_dir, 'vim.data')))) + + # copy the vim db + import_commands.append( + ('move database to %s' % db_path, + ("mv %s %s" % (os.path.join(temp_db_path, VIM_DB_NAME), + db_path)))) + + # Execute import commands + for cmd in import_commands: + try: + print("Importing %s" % cmd[0]) + LOG.info("Executing import command: %s" % cmd[1]) + subprocess.check_call([cmd[1]], + shell=True, stdout=result) + + except subprocess.CalledProcessError as ex: + LOG.exception("Failed to execute command: '%s' during upgrade " + "processing, return code: %d" % + (cmd[1], ex.returncode)) + raise + + +def get_system_role(target_port): + """ Get the system role from the sysinv database""" + + conn = psycopg2.connect("dbname=sysinv user=postgres port=%s" % target_port) + cur = conn.cursor() + cur.execute("select distributed_cloud_role from i_system;") + row = cur.fetchone() + if row is None: + LOG.error("Failed to fetch i_system data") + raise psycopg2.ProgrammingError("Failed to fetch i_system data") + + role = row[0] + + return role + +def get_shared_services(target_port): + """ Get the list of shared services from the sysinv database""" + + shared_services = [] + DEFAULT_SHARED_SERVICES = [] + + conn = psycopg2.connect("dbname=sysinv user=postgres port=%s" % target_port) + cur = conn.cursor() + cur.execute("select capabilities from i_system;") + row = cur.fetchone() + if row is None: + LOG.error("Failed to fetch i_system data") + raise psycopg2.ProgrammingError("Failed to fetch i_system data") + + cap_obj = json.loads(row[0]) + region_config = cap_obj.get('region_config', None) + if region_config: + shared_services = cap_obj.get('shared_services', + DEFAULT_SHARED_SERVICES) + + return shared_services + + +def migrate_hiera_data(from_release): + """ Migrate static hiera data. """ + + LOG.info("Migrating hiera data") + from_hiera_path = os.path.join(PLATFORM_PATH, "puppet", from_release, + "hieradata") + to_hiera_path = HIERADATA_PERMDIR + + shutil.rmtree(to_hiera_path, ignore_errors=True) + os.makedirs(to_hiera_path) + + # Copy only the static yaml files. The other yaml files will be generated + # when required. + for f in ['secure_static.yaml', 'static.yaml']: + shutil.copy(os.path.join(from_hiera_path, f), to_hiera_path) + + # Make any necessary updates to the static yaml files. + # Update the static.yaml file + static_file = os.path.join(HIERADATA_PERMDIR, "static.yaml") + with open(static_file, 'r') as yaml_file: + static_config = yaml.load(yaml_file, Loader=yaml.Loader) + static_config.update({ + 'platform::params::software_version': SW_VERSION, + 'platform::client::credentials::params::keyring_directory': + KEYRING_PATH, + 'platform::client::credentials::params::keyring_file': + os.path.join(KEYRING_PATH, '.CREDENTIAL'), + }) + + with open(static_file, 'w') as yaml_file: + yaml.dump(static_config, yaml_file, default_flow_style=False) + + secure_static_file = os.path.join( + HIERADATA_PERMDIR, "secure_static.yaml") + with open(secure_static_file, 'r') as yaml_file: + secure_static_config = yaml.load(yaml_file, Loader=yaml.Loader) + + with open(secure_static_file, 'w') as yaml_file: + yaml.dump(secure_static_config, yaml_file, default_flow_style=False) + + +def get_db_credentials(shared_services, from_release, role=None): + """ + Returns the database credentials using the provided shared services, + from_release and role. + """ + db_credential_keys = \ + {'barbican': {'hiera_user_key': 'barbican::db::postgresql::user', + 'keyring_password_key': 'barbican', + }, + 'sysinv': {'hiera_user_key': 'sysinv::db::postgresql::user', + 'keyring_password_key': 'sysinv', + }, + 'fm': {'hiera_user_key': 'fm::db::postgresql::user', + 'keyring_password_key': 'fm', + }, + } + + if SERVICE_TYPE_IDENTITY not in shared_services: + db_credential_keys.update( + {'keystone': {'hiera_user_key': + 'keystone::db::postgresql::user', + 'keyring_password_key': 'keystone', + }}) + + if role == DISTRIBUTED_CLOUD_ROLE_SYSTEMCONTROLLER: + db_credential_keys.update( + {'dcmanager': {'hiera_user_key': 'dcmanager::db::postgresql::user', + 'keyring_password_key': 'dcmanager', + }, + 'dcorch': {'hiera_user_key': 'dcorch::db::postgresql::user', + 'keyring_password_key': 'dcorch', + }, + }) + + # Get the hiera data for the from release + hiera_path = os.path.join(PLATFORM_PATH, "puppet", from_release, + "hieradata") + static_file = os.path.join(hiera_path, "static.yaml") + with open(static_file, 'r') as file: + static_config = yaml.load(file, Loader=yaml.Loader) + + db_credentials = dict() + for database, values in db_credential_keys.items(): + username = static_config[values['hiera_user_key']] + password = utils.get_password_from_keyring( + values['keyring_password_key'], "database") + db_credentials[database] = {'username': username, 'password': password} + + return db_credentials + + +def create_databases(db_credentials): + """ Creates databases. """ + LOG.info("Creating new databases") + + # Create databases that are new in this release + + conn = psycopg2.connect('dbname=postgres user=postgres port=6666') + + # Postgres won't allow transactions around database create operations + # so we set the connection to autocommit + conn.set_isolation_level( + psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) + + databases_to_create = [] + if not databases_to_create: + return + + with conn: + with conn.cursor() as cur: + for database in databases_to_create: + print("Creating %s database" % database) + username = psycopg2.extensions.AsIs( + '\"%s\"' % db_credentials[database]['username']) + db_name = psycopg2.extensions.AsIs('\"%s\"' % database) + password = db_credentials[database]['password'] + + try: + # Here we create the new database and the role for it + # The role will be used by the dbsync command to + # connect to the database. This ensures any new tables + # are added with the correct owner + cur.execute('CREATE DATABASE %s', (db_name,)) + cur.execute('CREATE ROLE %s', (username,)) + cur.execute('ALTER ROLE %s LOGIN PASSWORD %s', + (username, password)) + cur.execute('GRANT ALL ON DATABASE %s TO %s', + (db_name, username)) + except Exception as ex: + LOG.exception("Failed to create database and role. " + + "(%s : %s) Exception: %s" % + (database, username, ex)) + raise + + +def migrate_sysinv_database(): + """ Migrates the sysinv database. """ + + sysinv_cmd = 'sysinv-dbsync' + try: + print("Migrating sysinv") + LOG.info("Executing migrate command: %s" % sysinv_cmd) + subprocess.check_call(sysinv_cmd, + shell=True, stdout=result, stderr=result) + + except subprocess.CalledProcessError as ex: + LOG.exception("Failed to execute command: '%s' during upgrade " + "processing, return code: %d" + % (sysinv_cmd, ex.returncode)) + raise + + +def migrate_databases(shared_services, db_credentials, port, + simplex=False, role=None): + """ Migrates databases. """ + + # Create minimal config files for each OpenStack service so they can + # run their database migration. + if SERVICE_TYPE_IDENTITY not in shared_services: + with open("/etc/keystone/keystone-dbsync.conf", "w") as f: + f.write("[database]\n") + f.write(get_connection_string(db_credentials, port, 'keystone')) + + migrate_commands = [ + # Migrate barbican + ('barbican', + 'barbican-manage db upgrade ' + + '--db-url %s' % get_connection_string(db_credentials, port, 'barbican')), + ] + + # Migrate fm + # append the migrate command for dcmanager db + with open("/etc/fm/fm.conf", "w") as f: + f.write("[database]\n") + f.write(get_connection_string(db_credentials, port, 'fm')) + + migrate_commands += [ + ('fm', + 'fm-dbsync') + ] + + if SERVICE_TYPE_IDENTITY not in shared_services: + # To avoid a deadlock during keystone contract we will use offline + # migration for simplex upgrades. Other upgrades will have to use + # another method to resolve the deadlock + if not simplex: + migrate_commands += [ + # Migrate keystone + # + # EXPAND - we will first expand the database scheme to a + # superset of what both the previous and next release can + # utilize, and create triggers to facilitate the live + # migration process. + # + # MIGRATE - will perform the data migration, while still] + # preserving the old schema + ('keystone', + 'keystone-manage --config-file ' + + '/etc/keystone/keystone-dbsync.conf db_sync --expand'), + ('keystone', + 'keystone-manage --config-file ' + + '/etc/keystone/keystone-dbsync.conf db_sync --migrate'), + ] + else: + migrate_commands += [ + # In simplex we're the only node so we can do an offline + # migration + ('keystone', + 'keystone-manage --config-file ' + + '/etc/keystone/keystone-dbsync.conf db_sync') + ] + + if role == DISTRIBUTED_CLOUD_ROLE_SYSTEMCONTROLLER: + # append the migrate command for dcmanager db + with open("/etc/dcmanager/dcmanager.conf", "w") as f: + f.write("[database]\n") + f.write(get_connection_string(db_credentials, port, 'dcmanager')) + + migrate_commands += [ + ('dcmanager', + 'dcmanager-manage db_sync') + ] + + # append the migrate command for dcorch db + with open("/etc/dcorch/dcorch.conf", "w") as f: + f.write("[database]\n") + f.write(get_connection_string(db_credentials, port, 'dcorch')) + + migrate_commands += [ + ('dcorch', + 'dcorch-manage db_sync') + ] + + # Execute migrate commands + for cmd in migrate_commands: + try: + print("Migrating %s" % cmd[0]) + LOG.info("Executing migrate command: %s" % cmd[1]) + subprocess.check_call([cmd[1]], + shell=True, stdout=result, stderr=result) + + except subprocess.CalledProcessError as ex: + LOG.exception("Failed to execute command: '%s' during upgrade " + "processing, return code: %d" % + (cmd[1], ex.returncode)) + raise + + # The database entry for controller-1 will be set to whatever it was when + # the sysinv database was dumped on controller-0. Update the state and + # from/to load to what it should be when it becomes active. + try: + subprocess.check_call( + ["/usr/bin/sysinv-upgrade", + "update_controller_state"], + stdout=result, stderr=result) + except subprocess.CalledProcessError: + LOG.exception("Failed to update state of %s" % + utils.CONTROLLER_1_HOSTNAME) + raise + + +def gethostaddress(hostname): + """ Get the IP address for a hostname, supporting IPv4 and IPv6. """ + return socket.getaddrinfo(hostname, None)[0][4][0] + + +def get_connection_string(db_credentials, port, database): + """ Generates a connection string for a given database""" + username = db_credentials[database]['username'] + password = db_credentials[database]['password'] + if database == 'barbican': + return DB_BARBICAN_CONNECTION_FORMAT % (username, password, port, database) + else: + return DB_CONNECTION_FORMAT % (username, password, port, database) + + +def upgrade_controller(from_release, to_release, target_port): + """ Executed on controller-0, under chroot N+1 deployment and N runtime. """ + + LOG.info("Upgrading controller from %s to %s" % (from_release, to_release)) + LOG.info("Mounting filesystems already done before chroot") + + # Migrate keyring data + print("Migrating keyring data...") + migrate_keyring_data(from_release, to_release) + + # Migrate pxeboot config + print("Migrating pxeboot configuration...") + migrate_pxeboot_config(from_release, to_release) + + # Migrate armada config + print("Migrating armada configuration...") + migrate_armada_config(from_release, to_release) + + # Migrate fluxcd config + print("Migrating fluxcd configuration...") + migrate_fluxcd_config(from_release, to_release) + + # Migrate helm config + print("Migrating helm configuration...") + migrate_helm_config(from_release, to_release) + + # Migrate sysinv data. + print("Migrating sysinv configuration...") + migrate_sysinv_data(from_release, to_release, target_port) + + # Prepare for database migration + print("Preparing for database migration...") + # prepare_postgres_filesystems() + + # Import databases + print("Importing databases...") + import_databases(target_port) + + role = get_system_role(target_port) + shared_services = get_shared_services(target_port) + + # Create /tmp/python_keyring - used by keystone manifest. + tmp_keyring_path = "/tmp/python_keyring" + key_ring_path = os.path.join(PLATFORM_PATH, ".keyring", to_release, + "python_keyring") + shutil.rmtree(tmp_keyring_path, ignore_errors=True) + shutil.copytree(key_ring_path, tmp_keyring_path) + + # Copy admin.conf file from /opt/platform to /etc/kubernetes/admin.conf + # during upgrade + try: + subprocess.check_call( + ["cp", + os.path.join(PLATFORM_PATH, "config", to_release, + "kubernetes", KUBERNETES_ADMIN_CONF_FILE), + os.path.join(KUBERNETES_CONF_PATH, + KUBERNETES_ADMIN_CONF_FILE)], + stdout=result) + except subprocess.CalledProcessError: + LOG.exception("Failed to copy %s" % + os.path.join(KUBERNETES_CONF_PATH, + KUBERNETES_ADMIN_CONF_FILE)) + raise + + # Migrate hiera data + migrate_hiera_data(from_release) + utils.add_upgrade_entries_to_hiera_data(from_release) + + # Get database credentials + db_credentials = get_db_credentials( + shared_services, from_release, role=role) + + # Create any new databases + print("Creating new databases...") + create_databases(db_credentials) + + print("Migrating databases...") + # Migrate sysinv database + migrate_sysinv_database() + + # Migrate databases + migrate_databases(shared_services, db_credentials, target_port, role=role) + + # TODO: bqian generate mainifest for controller-1 + print("Applying configuration...") + + # TODO: bqian reenable migration script after adding optional port parameter + # to migration scripts + # Execute migration scripts + utils.execute_migration_scripts( + from_release, to_release, utils.ACTION_MIGRATE, target_port) + + # TODO: bqian verify it will work with upgrade mainifest applied + # on first controller become active with to-release software + # Apply "upgrades" manifest + LOG.info("Skip applying upgrades manifest") + # myip = gethostaddress("controller-0") + # utils.apply_upgrade_manifest(myip) + + # Remove manifest and keyring files + # shutil.rmtree("/tmp/puppet", ignore_errors=True) + shutil.rmtree("/tmp/python_keyring") + + # Generate config to be used by "regular" manifest + print("Generating config for %s" % utils.CONTROLLER_1_HOSTNAME) + LOG.info("Generating config for %s" % utils.CONTROLLER_1_HOSTNAME) + try: + utils.create_system_config() + utils.create_host_config(utils.CONTROLLER_1_HOSTNAME) + except Exception as e: + LOG.exception(e) + LOG.info("Failed to update hiera configuration") + raise + + print("Generating config for %s ..done" % utils.CONTROLLER_1_HOSTNAME) + # TODO: this needs review to see if such work is needed in new upgrade scenario + # if it does, it needs to be done on deploy host controller-1 + # apply_sriov_config(db_credentials, utils.CONTROLLER_1_HOSTNAME) + + # don't need to remove /etc/kubernetes/admin.conf + # after it is used to generate + # the hiera data + + # Stop postgres server + # TODO: shutdown postgresql once debug complete + LOG.info("Shutting down PostgreSQL...") + # try: + # subprocess.check_call([ + # 'sudo', + # '-u', + # 'postgres', + # os.path.join(POSTGRES_BIN, 'pg_ctl'), + # '-D', + # POSTGRES_DATA_DIR, + # 'stop'], + # stdout=result) + # except subprocess.CalledProcessError: + # LOG.exception("Failed to stop postgres service") + # raise + + print("Data migration complete !!!") + LOG.info("Data migration complete !!!") + + +def migrate(): + # this is the entry point to start data migration + + parser = argparse.ArgumentParser( + add_help=False, + ) + + parser.add_argument("from_release", + default=False, + help="From release") + + parser.add_argument("to_release", + default=False, + help="To release") + + parser.add_argument('port', + default=6666, + help="PostgreSQL service port to access target database.") + + parser.add_argument('-v', '--verbose', + default=False, action="store_true", + help="Print more verbose output") + + args = parser.parse_args() + + open("/var/log/upgrade.log", "w") + + upgrade_controller(args.from_release, args.to_release, args.port) + diff --git a/software/software/utilities/utils.py b/software/software/utilities/utils.py new file mode 100644 index 00000000..3765f460 --- /dev/null +++ b/software/software/utilities/utils.py @@ -0,0 +1,483 @@ +# +# Copyright (c) 2023 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +# This file contains common upgrades functions that can be used by both sysinv +# and during the upgrade of controller-1. +# + +import keyring +import os +import psycopg2 +from psycopg2.extras import RealDictCursor +import six +import subprocess +import tempfile +import yaml +import netaddr + +# WARNING: The controller-1 upgrade is done before any puppet manifests +# have been applied, so only the static entries from tsconfig can be used. +# (the platform.conf file will not have been updated with dynamic values). +from software.utilities.constants import PLATFORM_PATH +from software.utilities.constants import KEYRING_PERMDIR + +from software.utilities import constants + +from oslo_log import log + +LOG = log.getLogger(__name__) + +DB_CONNECTION = "postgresql://%s:%s@127.0.0.1/%s\n" +KUBERNETES_CONF_PATH = "/etc/kubernetes" +KUBERNETES_ADMIN_CONF_FILE = "admin.conf" +PLATFORM_LOG = '/var/log/platform.log' +ERROR_FILE = '/tmp/upgrade_fail_msg' + +# well-known default domain name +DEFAULT_DOMAIN_NAME = 'Default' + +# Migration script actions +ACTION_START = "start" +ACTION_MIGRATE = "migrate" +ACTION_ACTIVATE = "activate" + + +def execute_migration_scripts(from_release, to_release, action, port=None, + migration_script_dir="/etc/upgrade.d"): + """ Execute migration scripts with an action: + start: Prepare for upgrade on release N side. Called during + "system upgrade-start". + migrate: Perform data migration on release N+1 side. Called while + controller-1 is performing its upgrade. + """ + + LOG.info("Executing migration scripts with from_release: %s, " + "to_release: %s, action: %s" % (from_release, to_release, action)) + + # Get a sorted list of all the migration scripts + # Exclude any files that can not be executed, including .pyc and .pyo files + files = [f for f in os.listdir(migration_script_dir) + if os.path.isfile(os.path.join(migration_script_dir, f)) and + os.access(os.path.join(migration_script_dir, f), os.X_OK)] + # From file name, get the number to sort the calling sequence, + # abort when the file name format does not follow the pattern + # "nnn-*.*", where "nnn" string shall contain only digits, corresponding + # to a valid unsigned integer (first sequence of characters before "-") + try: + files.sort(key=lambda x: int(x.split("-")[0])) + except Exception: + LOG.exception("Migration script sequence validation failed, invalid " + "file name format") + raise + + MSG_SCRIPT_FAILURE = "Migration script %s failed with returncode %d" \ + "Script output:\n%s" + # Execute each migration script + for f in files: + migration_script = os.path.join(migration_script_dir, f) + try: + LOG.info("Executing migration script %s" % migration_script) + cmdline = [migration_script, from_release, to_release, action] + if not port is None: + cmdline.append(port) + ret = subprocess.run(cmdline, + stderr=subprocess.STDOUT, + stdout=subprocess.PIPE, + text=True, check=True) + if ret.returncode != 0: + script_output = ret.stdout.splitlines() + output_list = [] + for item in script_output: + if item not in output_list: + output_list.append(item) + output_script = "\n".join(output_list) + msg = MSG_SCRIPT_FAILURE % (migration_script, + ret.returncode, + output_script) + LOG.error(msg) + msg_temp = search_script_output(PLATFORM_LOG, f) + save_temp_file(msg, msg_temp) + raise Exception(msg) + + except subprocess.CalledProcessError as e: + # log script output if script executed but failed. + LOG.error(MSG_SCRIPT_FAILURE % + (migration_script, e.returncode, e.output)) + # Abort when a migration script fails + raise + except Exception as e: + # log exception if script not executed. + LOG.exception(e) + raise + + +def search_script_output(file_name, script): + cmd = [ + "awk", + "/{script}/ {{last_match = $0}} " + "END {{if (last_match) print last_match}}".format(script=script), + file_name + ] + try: + process = subprocess.Popen(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + output, error = process.communicate() + last_match = output.decode().strip() + LOG.error("Failed to exec cmd. \n %s" % error) + + except Exception e: + LOG.error("Failed to exec cmd. \n %s" % e) + return None + return last_match + + +def save_temp_file(msg, error=None): + if os.path.isfile(ERROR_FILE): + os.remove(ERROR_FILE) + + MSG_FAILURE = '%s \n\n'\ + '%s \n\n'\ + 'Check specific service log or search for' \ + 'this app in sysinv.log for details' + msg = MSG_FAILURE % (msg, + error) + try: + with open(ERROR_FILE, 'w+') as error_file: + error_file.write(msg) + except Exception: + LOG.warning("Error opening file %s" % ERROR_FILE) + return None + + +def get_db_connection(hiera_db_records, database): + username = hiera_db_records[database]['username'] + password = hiera_db_records[database]['password'] + return "postgresql://%s:%s@%s/%s" % ( + username, password, 'localhost', database) + + +def get_password_from_keyring(service, username): + """Retrieve password from keyring""" + password = "" + os.environ["XDG_DATA_HOME"] = KEYRING_PERMDIR + try: + password = keyring.get_password(service, username) + except Exception as e: + LOG.exception("Received exception when attempting to get password " + "for service %s, username %s: %s" % + (service, username, e)) + raise + finally: + del os.environ["XDG_DATA_HOME"] + return password + + +def get_upgrade_token(from_release, + config, + secure_config): + + # Get the system hiera data from the from release + from_hiera_path = os.path.join(PLATFORM_PATH, "puppet", from_release, + "hieradata") + system_file = os.path.join(from_hiera_path, "system.yaml") + with open(system_file, 'r') as s_file: + system_config = yaml.load(s_file, Loader=yaml.Loader) + + # during a controller-1 upgrade, keystone is running + # on the controller UNIT IP, however the service catalog + # that was migrated from controller-0 since lists the + # floating controller IP. Keystone operations that use + # the AUTH URL will hit this service URL and fail, + # therefore we have to issue an Upgrade token for + # all Keystone operations during an Upgrade. This token + # will allow us to circumvent the service catalog entry, by + # providing a bypass endpoint. + keystone_upgrade_url = "http://{}:5000/{}".format( + '127.0.0.1', + system_config['openstack::keystone::params::api_version']) + + admin_user_domain = system_config.get( + 'platform::client::params::admin_user_domain') + if admin_user_domain is None: + # This value wasn't present in R2. So may be missing in upgrades from + # that release + LOG.info("platform::client::params::admin_user_domain key not found. " + "Using Default.") + admin_user_domain = DEFAULT_DOMAIN_NAME + + admin_project_domain = system_config.get( + 'platform::client::params::admin_project_domain') + if admin_project_domain is None: + # This value wasn't present in R2. So may be missing in upgrades from + # that release + LOG.info("platform::client::params::admin_project_domain key not " + "found. Using Default.") + admin_project_domain = DEFAULT_DOMAIN_NAME + + admin_password = get_password_from_keyring("CGCS", "admin") + admin_username = system_config.get( + 'platform::client::params::admin_username') + + # the upgrade token command + keystone_upgrade_token = ( + "openstack " + "--os-username {} " + "--os-password '{}' " + "--os-auth-url {} " + "--os-project-name admin " + "--os-user-domain-name {} " + "--os-project-domain-name {} " + "--os-interface internal " + "--os-identity-api-version 3 " + "token issue -c id -f value".format( + admin_username, + admin_password, + keystone_upgrade_url, + admin_user_domain, + admin_project_domain + )) + + config.update({ + 'openstack::keystone::upgrade::upgrade_token_file': + '/etc/keystone/upgrade_token', + 'openstack::keystone::upgrade::url': keystone_upgrade_url + }) + + secure_config.update({ + 'openstack::keystone::upgrade::upgrade_token_cmd': + keystone_upgrade_token, + }) + + +def get_upgrade_data(from_release, + system_config, + secure_config): + """ Retrieve required data from the from-release, update system_config + and secure_config with them. + This function is needed for adding new service account and endpoints + during upgrade. + """ + # Get the system hiera data from the from release + from_hiera_path = os.path.join(PLATFORM_PATH, "puppet", from_release, + "hieradata") + system_file = os.path.join(from_hiera_path, "system.yaml") + with open(system_file, 'r') as s_file: + system_config_from_release = yaml.load(s_file, Loader=yaml.Loader) + + # Get keystone region + keystone_region = system_config_from_release.get( + 'keystone::endpoint::region') + + system_config.update({ + 'platform::client::params::identity_region': keystone_region, + # Retrieve keystone::auth::region from the from-release for the new + # service. + # 'newservice::keystone::auth::region': keystone_region, + }) + + # Generate password for the new service + # password = sysinv_utils.generate_random_password(16) + + secure_config.update({ + # Generate and set the keystone::auth::password for the new service. + # 'newservice::keystone::auth::password': password, + }) + + +def add_upgrade_entries_to_hiera_data(from_release): + """ Adds upgrade entries to the hiera data """ + + filename = 'static.yaml' + secure_filename = 'secure_static.yaml' + path = constants.HIERADATA_PERMDIR + + # Get the hiera data for this release + filepath = os.path.join(path, filename) + with open(filepath, 'r') as c_file: + config = yaml.load(c_file, Loader=yaml.Loader) + secure_filepath = os.path.join(path, secure_filename) + with open(secure_filepath, 'r') as s_file: + secure_config = yaml.load(s_file, Loader=yaml.Loader) + + # File for system.yaml + # TODO: bqian This is needed for adding new service account and endpoints + # during upgrade. + system_filename = 'system.yaml' + system_filepath = os.path.join(path, system_filename) + + # Get a token and update the config + # Below should be removed. Need to ensure during data migration + get_upgrade_token(from_release, config, secure_config) + + # Get required data from the from-release and add them in system.yaml. + # We don't carry system.yaml from the from-release. + # This is needed for adding new service account and endpoints + # during upgrade. + # TODO: bqian Below should be replace with generating hieradata from + # migraged to-release database + system_config = {} + get_upgrade_data(from_release, system_config, secure_config) + + # Update the hiera data on disk + try: + fd, tmppath = tempfile.mkstemp(dir=path, prefix=filename, + text=True) + with open(tmppath, 'w') as f: + yaml.dump(config, f, default_flow_style=False) + os.close(fd) + os.rename(tmppath, filepath) + except Exception: + LOG.exception("failed to write config file: %s" % filepath) + raise + + try: + fd, tmppath = tempfile.mkstemp(dir=path, prefix=secure_filename, + text=True) + with open(tmppath, 'w') as f: + yaml.dump(secure_config, f, default_flow_style=False) + os.close(fd) + os.rename(tmppath, secure_filepath) + except Exception: + LOG.exception("failed to write secure config: %s" % secure_filepath) + raise + + # Add required hiera data into system.yaml. + # This is needed for adding new service account and endpoints + # during upgrade. + try: + fd, tmppath = tempfile.mkstemp(dir=path, prefix=system_filename, + text=True) + with open(tmppath, 'w') as f: + yaml.dump(system_config, f, default_flow_style=False) + os.close(fd) + os.rename(tmppath, system_filepath) + except Exception: + LOG.exception("failed to write system config: %s" % system_filepath) + raise + + +def create_simplex_runtime_config(filename): + """ Create any runtime parameters needed for simplex upgrades""" + config = {} + # Here is an example from a previous release... + # config.update({'nova::db::sync_api::cellv2_setup': False}) + create_manifest_runtime_config(filename, config) + + +def apply_upgrade_manifest(controller_address): + """Apply puppet upgrade manifest files.""" + + cmd = [ + "/usr/local/bin/puppet-manifest-apply.sh", + constants.HIERADATA_PERMDIR, + str(controller_address), + constants.CONTROLLER, + 'upgrade' + ] + + logfile = "/tmp/apply_manifest.log" + try: + with open(logfile, "w") as flog: + subprocess.check_call(cmd, stdout=flog, stderr=flog) + except subprocess.CalledProcessError: + msg = "Failed to execute upgrade manifest" + print(msg) + raise Exception(msg) + + +def format_url_address(address): + """Format the URL address according to RFC 2732""" + try: + addr = netaddr.IPAddress(address) + if addr.version == constants.IPV6_FAMILY: + return "[%s]" % address + else: + return str(address) + except netaddr.AddrFormatError: + return address + + +def get_keystone_user_id(user_name): + """ Get the a keystone user id by name""" + + conn = psycopg2.connect("dbname='keystone' user='postgres'") + with conn: + with conn.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute("SELECT user_id FROM local_user WHERE name='%s'" % + user_name) + user_id = cur.fetchone() + if user_id is not None: + return user_id['user_id'] + else: + return user_id + + +def get_keystone_project_id(project_name): + """ Get the a keystone project id by name""" + + conn = psycopg2.connect("dbname='keystone' user='postgres'") + with conn: + with conn.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute("SELECT id FROM project WHERE name='%s'" % + project_name) + project_id = cur.fetchone() + if project_id is not None: + return project_id['id'] + else: + return project_id + + +def get_postgres_bin(): + """ Get the path to the postgres binaries""" + + try: + return subprocess.check_output( + ['pg_config', '--bindir']).decode().rstrip('\n') + except subprocess.CalledProcessError: + LOG.exception("Failed to get postgres bin directory.") + raise + + +def create_manifest_runtime_config(filename, config): + """Write the runtime Puppet configuration to a runtime file.""" + if not config: + return + try: + with open(filename, 'w') as f: + yaml.dump(config, f, default_flow_style=False) + except Exception: + LOG.exception("failed to write config file: %s" % filename) + raise + + +def create_system_config(): + cmd = ["/usr/bin/sysinv-puppet", + "create-system-config", + constants.HIERADATA_PERMDIR] + try: + subprocess.check_call(cmd) + except subprocess.CalledProcessError: + msg = "Failed to update puppet hiera system config" + print(msg) + raise Exception(msg) + + +def create_host_config(hostname=None): + cmd = ["/usr/bin/sysinv-puppet", + "create-host-config", + constants.HIERADATA_PERMDIR] + if hostname: + cmd.append(hostname) + + try: + subprocess.check_call(cmd) + except subprocess.CalledProcessError: + msg = "Failed to update puppet hiera host config" + print(msg) + raise Exception(msg) +