Get migration scripts output for subcloud
With this change, orchestrator will get the failed migration
script and output information from the platform log when
starting or activating upgrade step fails.
It will make the orchestrator to be more descriptive in these
failure cases.
Test plan:
PASS: Modify migration script involving action = 'start' in
order to make the starting upgrade step fail.
Run subcloud upgrade strategy.
Check strategy details to verify msg.
PASS: Modify migration involving action = 'activate' in order to
make the activating upgrade step fail. Run upgrade subcloud
strategy.
Check strategy details to verify msg.
PASS: Run strategy with the migration scripts well and check strategy
overcome the steps.
Story: 2010768
Task: 48079
Depends-On: https://review.opendev.org/c/starlingx/config/+/883831
Signed-off-by: fperez <fabrizio.perez@windriver.com>
Change-Id: I05d857b2e98d1fe71eac7348991df6353058611c
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
# Copyright 2016 Ericsson AB
|
||||
# Copyright (c) 2017-2022 Wind River Systems, Inc.
|
||||
# Copyright (c) 2017-2023 Wind River Systems, Inc.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
@@ -391,6 +391,10 @@ class SysinvClient(base.DriverBase):
|
||||
"""Get a list of upgrades."""
|
||||
return self.sysinv_client.upgrade.list()
|
||||
|
||||
def get_error_msg(self):
|
||||
"""Get the upgrade message."""
|
||||
return self.sysinv_client.upgrade.get_upgrade_msg()
|
||||
|
||||
def upgrade_activate(self):
|
||||
"""Invoke the API for 'system upgrade-activate', which is an update """
|
||||
patch = [{'op': 'replace',
|
||||
|
||||
@@ -187,6 +187,7 @@ DEPLOY_STATE_RECONFIGURING_NETWORK = 'reconfiguring-network'
|
||||
DEPLOY_STATE_RECONFIGURING_NETWORK_FAILED = 'network-reconfiguration-failed'
|
||||
# Subcloud errors
|
||||
ERROR_DESC_EMPTY = 'No errors present'
|
||||
ERROR_DESC_FAILED = 'Failed to get error message. Please check sysinv log'
|
||||
ERROR_DESC_CMD = 'dcmanager subcloud errors <subcloud-name>'
|
||||
|
||||
# Static content for error messages
|
||||
|
||||
@@ -945,3 +945,19 @@ def decode_and_normalize_passwd(input_passwd):
|
||||
passwd = "'" + passwd + "'"
|
||||
|
||||
return passwd
|
||||
|
||||
|
||||
def get_failure_msg(subcloud_name):
|
||||
try:
|
||||
os_client = OpenStackDriver(region_name=subcloud_name,
|
||||
region_clients=None)
|
||||
keystone_client = os_client.keystone_client
|
||||
endpoint = keystone_client.endpoint_cache.get_endpoint('sysinv')
|
||||
sysinv_client = SysinvClient(subcloud_name,
|
||||
keystone_client.session,
|
||||
endpoint=endpoint)
|
||||
msg = sysinv_client.get_error_msg()
|
||||
return msg
|
||||
except Exception as e:
|
||||
LOG.exception("{}: {}".format(subcloud_name, e))
|
||||
return consts.ERROR_DESC_FAILED
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2020 Wind River Systems, Inc.
|
||||
# Copyright (c) 2023 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
@@ -7,6 +7,8 @@ import time
|
||||
|
||||
from dcmanager.common import consts
|
||||
from dcmanager.common.exceptions import StrategyStoppedException
|
||||
from dcmanager.common import utils
|
||||
from dcmanager.db import api as db_api
|
||||
from dcmanager.orchestrator.states.base import BaseState
|
||||
|
||||
ACTIVATING_COMPLETED_STATES = ['activation-complete',
|
||||
@@ -84,8 +86,15 @@ class ActivatingUpgradeState(BaseState):
|
||||
|
||||
# if max retries have occurred, fail the state
|
||||
if activate_retry_counter >= self.max_failed_retries:
|
||||
raise Exception("Failed to activate upgrade. Please check "
|
||||
"sysinv.log on the subcloud for details.")
|
||||
error_msg = utils.get_failure_msg(strategy_step.subcloud.name)
|
||||
db_api.subcloud_update(
|
||||
self.context, strategy_step.subcloud_id,
|
||||
error_description=error_msg[0:consts.ERROR_DESCRIPTION_LENGTH])
|
||||
details = ("Failed to activate upgrade. Please check "
|
||||
"sysinv.log on the subcloud or "
|
||||
"%s on central for details." %
|
||||
(consts.ERROR_DESC_CMD))
|
||||
raise Exception(details)
|
||||
|
||||
# We may need multiple attempts to issue the first activate
|
||||
# if keystone is down, impacting the ability to send the activate
|
||||
@@ -137,9 +146,15 @@ class ActivatingUpgradeState(BaseState):
|
||||
break
|
||||
audit_counter += 1
|
||||
if audit_counter >= self.max_queries:
|
||||
raise Exception("Timeout waiting for activation to complete. "
|
||||
"Please check sysinv.log on the subcloud for "
|
||||
"details.")
|
||||
error_msg = utils.get_failure_msg(strategy_step.subcloud.name)
|
||||
db_api.subcloud_update(
|
||||
self.context, strategy_step.subcloud_id,
|
||||
error_description=error_msg[0:consts.ERROR_DESCRIPTION_LENGTH])
|
||||
details = ("Timeout waiting for activation to complete. "
|
||||
"Please check sysinv.log on the subcloud or "
|
||||
"%s on central for details." %
|
||||
(consts.ERROR_DESC_CMD))
|
||||
raise Exception(details)
|
||||
time.sleep(self.sleep_duration)
|
||||
|
||||
# When we return from this method without throwing an exception, the
|
||||
|
||||
@@ -158,10 +158,13 @@ class MigratingDataState(BaseState):
|
||||
# one for orchestrator strategy_step detail (shorter than the previous).
|
||||
msg_subcloud = utils.find_ansible_error_msg(
|
||||
strategy_step.subcloud.name, log_file, consts.DEPLOY_STATE_MIGRATING_DATA)
|
||||
# Get script output in case it is available
|
||||
error_msg = utils.get_failure_msg(strategy_step.subcloud.name)
|
||||
failure = ('%s \n%s' % (error_msg, msg_subcloud))
|
||||
db_api.subcloud_update(
|
||||
self.context, strategy_step.subcloud_id,
|
||||
deploy_status=consts.DEPLOY_STATE_DATA_MIGRATION_FAILED,
|
||||
error_description=msg_subcloud[0:consts.ERROR_DESCRIPTION_LENGTH])
|
||||
error_description=failure[0:consts.ERROR_DESCRIPTION_LENGTH])
|
||||
self.error_log(strategy_step, msg_subcloud)
|
||||
self.error_log(strategy_step, str(e))
|
||||
raise
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2020-2022 Wind River Systems, Inc.
|
||||
# Copyright (c) 2020-2023 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
@@ -9,6 +9,7 @@ from dccommon.drivers.openstack.vim import ALARM_RESTRICTIONS_RELAXED
|
||||
from dcmanager.common import consts
|
||||
from dcmanager.common.exceptions import StrategyStoppedException
|
||||
from dcmanager.common import utils
|
||||
from dcmanager.db import api as db_api
|
||||
from dcmanager.orchestrator.states.base import BaseState
|
||||
|
||||
DEFAULT_FORCE_FLAG = False
|
||||
@@ -95,9 +96,15 @@ class StartingUpgradeState(BaseState):
|
||||
if upgrade_state in UPGRADE_RETRY_STATES:
|
||||
retry_counter += 1
|
||||
if retry_counter >= self.max_failed_retries:
|
||||
raise Exception("Failed to start upgrade. Please "
|
||||
"check sysinv.log on the subcloud for "
|
||||
"details.")
|
||||
error_msg = utils.get_failure_msg(strategy_step.subcloud.name)
|
||||
db_api.subcloud_update(
|
||||
self.context, strategy_step.subcloud_id,
|
||||
error_description=error_msg[0:consts.ERROR_DESCRIPTION_LENGTH])
|
||||
details = ("Failed to start upgrade. Please "
|
||||
"check sysinv.log on the subcloud or "
|
||||
"%s on central for details." %
|
||||
(consts.ERROR_DESC_CMD))
|
||||
raise Exception(details)
|
||||
self.warn_log(strategy_step,
|
||||
"Upgrade start failed, retrying... State=%s"
|
||||
% upgrade_state)
|
||||
|
||||
Reference in New Issue
Block a user