Implement USM software deploy start for upgrade

When a major/minor release update is to be deployed, start
software-deploy-start script and execute the deploy start
operations in background process. Once the background subprocess
starts, notify user that the deployment has started.

Story: 2010676
Task: 48958

TCs:
    passed: complete deploy start on controller-0 on a DX system
    passed: complete deploy start on controller-0 on a SX system
    passed: upload and deploy start a patch

Depends-on: https://review.opendev.org/c/starlingx/update/+/898850

Change-Id: I5c0317c584348a9c1473719ffd01f810c3121212
Signed-off-by: Bin Qian <bin.qian@windriver.com>
This commit is contained in:
Bin Qian 2023-10-18 19:44:27 +00:00 committed by Luis Eduardo Bonatti
parent a8c5688264
commit 38d6646a9e
7 changed files with 200 additions and 67 deletions

View File

@ -5,3 +5,6 @@ api_port = 5493
controller_port = 5494
agent_port = 5495
# alternate PostgreSQL server port for bringing up
# db to run on to-release
alt_postgresql_port = 6666

View File

@ -21,6 +21,7 @@ agent_mcast_group = None
controller_port = 0
agent_port = 0
api_port = 0
alt_postgresql_port = 0
mgmt_if = None
nodetype = None
platform_conf_mtime = 0
@ -78,6 +79,7 @@ def read_config():
'api_port': "5493",
'controller_port': "5494",
'agent_port': "5495",
'alt_postgresql_port': "6666",
}
global controller_mcast_group
@ -85,6 +87,7 @@ def read_config():
global api_port
global controller_port
global agent_port
global alt_postgresql_port
config = configparser.ConfigParser(defaults)
@ -98,6 +101,7 @@ def read_config():
api_port = config.getint('runtime', 'api_port')
controller_port = config.getint('runtime', 'controller_port')
agent_port = config.getint('runtime', 'agent_port')
alt_postgresql_port = config.getint('runtime', 'alt_postgresql_port')
# The platform.conf file has no section headers, which causes problems
# for ConfigParser. So we'll fake it out.

View File

@ -91,7 +91,8 @@ DEPLOYMENT_STATE_INACTIVE = "Inactive"
DEPLOYMENT_STATE_PRESTAGING = "Prestaging"
DEPLOYMENT_STATE_PRESTAGED = "Prestaged"
UPGRADE_FEED_DIR = "/var/www/pages/feed/"
FEED_DIR = "/var/www/pages/feed/"
UPGRADE_FEED_DIR = FEED_DIR
TMP_DIR = "/tmp"
OSTREE_REPO = 'ostree_repo'

View File

@ -59,6 +59,7 @@ from software.software_functions import ReleaseData
from software.release_verify import verify_files
import software.config as cfg
import software.utils as utils
from software.sysinv_utils import get_k8s_ver
from software.db.api import get_instance
@ -1849,34 +1850,30 @@ class PatchController(PatchService):
msg = "Failed to delete the restart script for %s" % patch_id
LOG.exception(msg)
def software_deploy_precheck_api(self, deployment: str, force: bool, **kwargs) -> dict:
def _deploy_precheck(self, release_version: str, force: bool, region_name: str = "RegionOne") -> dict:
"""
Verify if system is capable to upgrade to a specified deployment
return: dict of info, warning and error messages
"""
msg_info = ""
msg_warning = ""
msg_error = ""
# We need to verify that the software release exists
release = self.release_data.metadata.get(deployment, None)
if not release:
msg = "Software release version corresponding to the specified deployment " \
"%s does not exist." % deployment
LOG.error(msg)
msg_error += msg + "\n"
return dict(info=msg_info, warning=msg_warning, error=msg_error)
# Check if software release directory location exists
release_version = utils.get_major_release_version(release["sw_version"])
deployment_dir = os.path.join(constants.FEED_OSTREE_BASE_DIR, "rel-%s" % release_version)
# TODO(bqian) when the deploy-precheck script is moved to /opt/software/rel-<ver>/,
# change the code below to call the right script with patch number in <ver>
rel_ver = utils.get_major_release_version(release_version)
rel_path = "rel-%s" % rel_ver
deployment_dir = os.path.join(constants.FEED_OSTREE_BASE_DIR, rel_path)
precheck_script = os.path.join(deployment_dir, "upgrades",
constants.SOFTWARE_DEPLOY_FOLDER, "deploy-precheck")
if not os.path.isdir(deployment_dir) or not os.path.isfile(precheck_script):
msg = "Upgrade files for deployment %s are not present on the system, " \
"cannot proceed with the precheck." % deployment
"cannot proceed with the precheck." % rel_ver
LOG.error(msg)
msg_error += msg + "\n"
msg_error = "Fail to perform deploy precheck. " \
"Uploaded release may have been damaged." \
"Try delete and re-upload the release.\n"
return dict(info=msg_info, warning=msg_warning, error=msg_error)
# parse local config file to pass parameters to precheck script
@ -1890,10 +1887,11 @@ class PatchController(PatchService):
project_name = ks_section["project_name"]
user_domain_name = ks_section["user_domain_name"]
project_domain_name = ks_section["project_domain_name"]
region_name = kwargs["region_name"]
except Exception as e:
msg = "Error parsing config file: %s." % str(e)
msg_error += msg + "\n"
LOG.error(msg)
msg_error = "Fail to perform deploy precheck. Internal error has occured." \
"Try lock and unlock the controller for recovery.\n"
return dict(info=msg_info, warning=msg_warning, error=msg_error)
# TODO(heitormatsui) if different region was passed as parameter then
@ -1927,6 +1925,69 @@ class PatchController(PatchService):
return dict(info=msg_info, warning=msg_warning, error=msg_error)
def software_deploy_precheck_api(self, deployment: str, force: bool, **kwargs) -> dict:
"""
Verify if system is capable to upgrade to a specified deployment
return: dict of info, warning and error messages
"""
msg_info = ""
msg_warning = ""
msg_error = ""
# We need to verify that the software release exists
release = self.release_data.metadata.get(deployment, None)
if not release:
msg = "Software release version corresponding to the specified release " \
"%s does not exist. " % deployment
LOG.error(msg)
msg_error += "Software release version corresponding to the specified " \
"release %s does not exist. " \
"Try deleting and re-uploading the software for " \
"recovery." % deployment
return dict(info=msg_info, warning=msg_warning, error=msg_error)
# Check if software release directory location exists
region_name = kwargs["region_name"]
release_version = release["sw_version"]
return self._deploy_precheck(release_version, force, region_name)
def _deploy_upgrade_start(self, to_release):
LOG.info("start deploy upgrade to %s from %s" % (to_release, SW_VERSION))
cmd_path = "/usr/sbin/software-deploy/software-deploy-start"
major_to_release = utils.get_major_release_version(to_release)
k8s_ver = get_k8s_ver()
postgresql_port = str(cfg.alt_postgresql_port)
feed = os.path.join(constants.FEED_DIR,
"rel-%s/ostree_repo" % major_to_release)
commit_id = None
LOG.info("k8s version %s" % k8s_ver)
upgrade_start_cmd = [cmd_path, SW_VERSION, major_to_release, k8s_ver, postgresql_port,
feed]
if commit_id is not None:
upgrade_start_cmd.append(commit_id)
# pass in keystone auth through environment variables
# OS_AUTH_URL, OS_USERNAME, OS_PASSWORD, OS_PROJECT_NAME, OS_USER_DOMAIN_NAME,
# OS_PROJECT_DOMAIN_NAME, OS_REGION_NAME are in env variables.
keystone_auth = CONF.get('keystone_authtoken')
env = {}
env["OS_AUTH_URL"] = keystone_auth["auth_url"] + '/v3'
env["OS_USERNAME"] = keystone_auth["username"]
env["OS_PASSWORD"] = keystone_auth["password"]
env["OS_PROJECT_NAME"] = keystone_auth["project_name"]
env["OS_USER_DOMAIN_NAME"] = keystone_auth["user_domain_name"]
env["OS_PROJECT_DOMAIN_NAME"] = keystone_auth["project_domain_name"]
env["OS_REGION_NAME"] = keystone_auth["region_name"]
try:
LOG.info("starting subprocess %s" % ' '.join(upgrade_start_cmd))
subprocess.Popen(' '.join(upgrade_start_cmd), start_new_session=True, shell=True, env=env)
LOG.info("subprocess started")
return True
except subprocess.SubprocessError as e:
LOG.error("Failed to start command: %s. Error %s" % (' '.join(upgrade_start_cmd), e))
return False
def software_deploy_start_api(self, deployment: str, **kwargs) -> dict:
"""
Start deployment by applying the changes to the feed ostree
@ -1936,14 +1997,37 @@ class PatchController(PatchService):
msg_warning = ""
msg_error = ""
# TODO(bqian) to create a separate function to check a release is uploaded and
# all materials exist. raise proper exception if not.
# We need to verify that the software release exists
if deployment not in self.release_data.metadata:
msg = "Software release version corresponding to the specified deployment " \
"%s does not exist" % deployment
release = self.release_data.metadata.get(deployment, None)
if not release:
msg = "Software release version corresponding to the specified release " \
"%s does not exist. " % deployment
LOG.error(msg)
msg_error += msg + "\n"
msg_error += "Software release version corresponding to the specified " \
"release %s does not exist. " \
"Try delete and re-upload the software for " \
"recovery." % deployment
return dict(info=msg_info, warning=msg_warning, error=msg_error)
if utils.is_upgrade_deploy(SW_VERSION, release["sw_version"]):
to_release = release["sw_version"]
ret = self._deploy_precheck(to_release, False)
if ret["error"]:
ret["error"] = "The following issues have been detected which prevent " \
"deploying %s\n" % deployment + \
ret["error"]
ret["error"] += "Please fix above issues then retry the deploy.\n"
return ret
collect_current_load_for_hosts()
if self._deploy_upgrade_start(to_release):
msg_info = "Deployment for %s started" % deployment
else:
msg_error = "Deployment for %s failed to start" % deployment
return dict(info=msg_info, warning=msg_warning, error=msg_error)
# Identify if this is apply or remove operation
# todo(jcasteli) Remove once the logic to include major release version
# in release list is implemented

View File

@ -29,10 +29,10 @@ from software.exceptions import ReleaseUploadFailure
from software.exceptions import ReleaseValidationFailure
from software.exceptions import ReleaseMismatchFailure
from software.exceptions import SoftwareFail
from software.exceptions import SysinvClientNotInitialized
import software.constants as constants
import software.utils as utils
from software.sysinv_utils import get_sysinv_client
try:
@ -1049,52 +1049,12 @@ def read_upgrade_support_versions(mounted_dir):
return to_release, supported_from_releases
def get_endpoints_token(config=None, service_type="platform"):
try:
if not config:
keystone_conf = CONF.get('keystone_authtoken')
else:
keystone_conf = config
user = {
'auth_url': keystone_conf["auth_url"] + '/v3',
'username': keystone_conf["username"],
'password': keystone_conf["password"],
'project_name': keystone_conf["project_name"],
'user_domain_name': keystone_conf["user_domain_name"],
'project_domain_name': keystone_conf["project_domain_name"],
}
region_name = keystone_conf["region_name"]
token, endpoint = utils.get_auth_token_and_endpoint(user=user,
service_type=service_type,
region_name=region_name,
interface='public')
return token, endpoint
except Exception as e:
LOG.error("Failed to get '%s' endpoint. Error: %s", service_type, str(e))
return None, None
def get_sysinv_client(token, endpoint):
try:
from cgtsclient import client
sysinv_client = client.Client(version='1', endpoint=endpoint, token=token, timeout=600)
return sysinv_client
except ImportError:
msg = "Failed to import cgtsclient"
LOG.exception(msg)
raise ImportError(msg)
except Exception as e:
msg = "Failed to get sysinv client. Error: %s" % str(e)
LOG.exception(msg)
raise SysinvClientNotInitialized(msg)
def collect_current_load_for_hosts():
load_data = {
"current_loads": []
}
try:
token, endpoint = get_endpoints_token()
token, endpoint = utils.get_endpoints_token()
sysinv_client = get_sysinv_client(token=token, endpoint=endpoint)
host_list = sysinv_client.ihost.list()
for ihost in host_list:

View File

@ -0,0 +1,42 @@
"""
Copyright (c) 2023 Wind River Systems, Inc.
SPDX-License-Identifier: Apache-2.0
"""
import logging
import software.utils as utils
from software.exceptions import SysinvClientNotInitialized
LOG = logging.getLogger('main_logger')
def get_sysinv_client(token, endpoint):
try:
from cgtsclient import client
sysinv_client = client.Client(version='1', endpoint=endpoint, token=token, timeout=600)
return sysinv_client
except ImportError:
msg = "Failed to import cgtsclient"
LOG.exception(msg)
raise ImportError(msg)
except Exception as e:
msg = "Failed to get sysinv client. Error: %s" % str(e)
LOG.exception(msg)
raise SysinvClientNotInitialized(msg)
def get_k8s_ver():
try:
token, endpoint = utils.get_endpoints_token()
sysinv_client = get_sysinv_client(token=token, endpoint=endpoint)
k8s_vers = sysinv_client.kube_version.list()
except Exception as err:
LOG.error("Error getting k8s version: %s", err)
raise
for k8s_ver in k8s_vers:
if k8s_ver.state == "active":
return k8s_ver.version
raise Exception("Failed to get current k8s version")

View File

@ -10,6 +10,8 @@ import re
import shutil
from netaddr import IPAddress
import os
from oslo_config import cfg as oslo_cfg
from packaging import version
import socket
from socket import if_nametoindex as if_nametoindex_func
@ -17,7 +19,9 @@ import software.constants as constants
from software.exceptions import StateValidationFailure
LOG = logging.getLogger('main_logger')
CONF = oslo_cfg.CONF
def if_nametoindex(name):
@ -306,6 +310,41 @@ def check_instances(params: list, instance):
raise ValueError(msg)
def get_endpoints_token(config=None, service_type="platform"):
try:
if not config:
keystone_conf = CONF.get('keystone_authtoken')
else:
keystone_conf = config
user = {
'auth_url': keystone_conf["auth_url"] + '/v3',
'username': keystone_conf["username"],
'password': keystone_conf["password"],
'project_name': keystone_conf["project_name"],
'user_domain_name': keystone_conf["user_domain_name"],
'project_domain_name': keystone_conf["project_domain_name"],
}
region_name = keystone_conf["region_name"]
token, endpoint = get_auth_token_and_endpoint(user=user,
service_type=service_type,
region_name=region_name,
interface='public')
return token, endpoint
except Exception as e:
LOG.error("Failed to get '%s' endpoint. Error: %s", service_type, str(e))
return None, None
def is_upgrade_deploy(from_release, to_release):
from_ver = version.Version(from_release)
to_ver = version.Version(to_release)
if from_ver.major == to_ver.major and from_ver.minor == to_ver.minor:
return False
else:
return True
def get_software_filesystem_data():
if os.path.exists(constants.SOFTWARE_JSON_FILE):
return load_from_json_file(constants.SOFTWARE_JSON_FILE)
@ -319,8 +358,8 @@ def validate_versions(versions):
:param versions: list of versions
:raise: ValueError if version is invalid
"""
for version in versions:
if not re.match(r'[0-9]+\.[0-9]+(\.[0-9]+)?$', version):
msg = "Invalid version: %s" % version
for ver in versions:
if not re.match(r'[0-9]+\.[0-9]+(\.[0-9]+)?$', ver):
msg = "Invalid version: %s" % ver
LOG.exception(msg)
raise ValueError(msg)