From bd3096bde8bdaf6dc8130149ecdb45aee49245f0 Mon Sep 17 00:00:00 2001 From: Ken Fujimoto Date: Fri, 25 Feb 2022 01:32:36 +0000 Subject: [PATCH] Clean up at conductor start up Previously when the conductor fails down during operation of some vnf instances, operationState of VnfLcmOpOcc is left as STARTING, PROCCESING or ROLLING_BACK. As a result, no more operation of the vnf instance can't be processed. This patch fixes this situation. When the conductor (re) starts up, if VnfLcmOpOcc with its operationState is STARTING, PROCCESING or ROLLING_BACK exists, it is changed to ROLLED_BACK or FAILED_TEMP. Implements: blueprint support-nfv-solv3-error-handling Change-Id: Ib50869b8b32bfb9caaa4b0b95236b33d04dfb2fe --- tacker/sol_refactored/common/exceptions.py | 5 ++ .../sol_refactored/conductor/conductor_v2.py | 55 +++++++++++--- tacker/sol_refactored/objects/base.py | 2 + .../conductor/test_conductor_v2.py | 76 ++++++++++++++++++- 4 files changed, 125 insertions(+), 13 deletions(-) diff --git a/tacker/sol_refactored/common/exceptions.py b/tacker/sol_refactored/common/exceptions.py index 2c362ca38..e5beafa21 100644 --- a/tacker/sol_refactored/common/exceptions.py +++ b/tacker/sol_refactored/common/exceptions.py @@ -240,3 +240,8 @@ class InvalidScaleNumberOfSteps(SolHttpError400): class DeltaMissingInVnfd(SolHttpError400): message = _("Delta '%(delta)s' is not defined in " "VduScalingAspectDeltas.") + + +class ConductorProcessingError(SolException): + title = 'Internal Server Error' + message = _("Failure due to conductor processing error.") diff --git a/tacker/sol_refactored/conductor/conductor_v2.py b/tacker/sol_refactored/conductor/conductor_v2.py index b3d04cd3b..07aec8512 100644 --- a/tacker/sol_refactored/conductor/conductor_v2.py +++ b/tacker/sol_refactored/conductor/conductor_v2.py @@ -16,6 +16,7 @@ from oslo_log import log as logging from tacker.common import log +from tacker import context as tacker_context from tacker.sol_refactored.common import config from tacker.sol_refactored.common import coordinate from tacker.sol_refactored.common import exceptions as sol_ex @@ -39,6 +40,33 @@ class ConductorV2(object): self.endpoint = CONF.v2_vnfm.endpoint self.nfvo_client = nfvo_client.NfvoClient() + self._change_lcm_op_state() + + def _change_lcm_op_state(self): + # NOTE: If the conductor down during processing and + # the LcmOperationState STARTING/PROCESSING/ROLLING_BACK remain, + # change it at the next startup. + context = tacker_context.get_admin_context() + ex = sol_ex.ConductorProcessingError() + + state_list = [(fields.LcmOperationStateType.STARTING, + fields.LcmOperationStateType.ROLLED_BACK), + (fields.LcmOperationStateType.PROCESSING, + fields.LcmOperationStateType.FAILED_TEMP), + (fields.LcmOperationStateType.ROLLING_BACK, + fields.LcmOperationStateType.FAILED_TEMP)] + for before_state, after_state in state_list: + lcmoccs = objects.VnfLcmOpOccV2.get_by_filter(context, + operationState=before_state) + for lcmocc in lcmoccs: + lcmocc.operationState = after_state + self._set_lcmocc_error(lcmocc, ex) + inst = inst_utils.get_inst(context, lcmocc.vnfInstanceId) + lcmocc.update(context) + # send notification + self.nfvo_client.send_lcmocc_notification(context, lcmocc, + inst, self.endpoint) + def _set_lcmocc_error(self, lcmocc, ex): if isinstance(ex, sol_ex.SolException): problem_details = ex.make_problem_details() @@ -87,7 +115,17 @@ class ConductorV2(object): lcmocc.operationState = fields.LcmOperationStateType.PROCESSING lcmocc.grantId = grant.id - lcmocc.update(context) + with context.session.begin(subtransactions=True): + # save grant_req and grant to be used when retry + # NOTE: grant_req is saved because it is necessary to interpret + # the contents of grant. Though grant can be gotten from NFVO, + # it is saved here with grant_req so that it is not necessary + # to communicate with NFVO when retry. They are saved temporary + # and will be deleted when operationState becomes an end state + # (COMPLETED/FAILED/ROLLED_BACK). + grant_req.create(context) + grant.create(context) + lcmocc.update(context) except Exception as ex: LOG.exception("STARTING %s failed", lcmocc.operation) lcmocc.operationState = fields.LcmOperationStateType.ROLLED_BACK @@ -110,21 +148,14 @@ class ConductorV2(object): with context.session.begin(subtransactions=True): inst.update(context) lcmocc.update(context) + # grant_req and grant are not necessary any more. + grant_req.delete(context) + grant.delete(context) except Exception as ex: LOG.exception("PROCESSING %s failed", lcmocc.operation) lcmocc.operationState = fields.LcmOperationStateType.FAILED_TEMP self._set_lcmocc_error(lcmocc, ex) - with context.session.begin(subtransactions=True): - # save grant_req and grant to be used when retry - # NOTE: grant_req is saved because it is necessary to interpret - # the contents of grant. Though grant can be gotten from NFVO, - # it is saved here with grant_req so that it is not necessary - # to communicate with NFVO when retry. They are saved temporary - # and will be deleted when operationState becomes an end state - # (COMPLETED/FAILED/ROLLED_BACK). - grant_req.create(context) - grant.create(context) - lcmocc.update(context) + lcmocc.update(context) # send notification COMPLETED or FAILED_TEMP self.nfvo_client.send_lcmocc_notification(context, lcmocc, inst, diff --git a/tacker/sol_refactored/objects/base.py b/tacker/sol_refactored/objects/base.py index df6f80023..ad10f0a68 100644 --- a/tacker/sol_refactored/objects/base.py +++ b/tacker/sol_refactored/objects/base.py @@ -346,6 +346,8 @@ class TackerPersistentObject(TackerObject): context.session.add(inst) context.session.flush() # 'flush' must have succeeded because we are here. + if self._db_obj is None: + self._db_obj = inst self.obj_reset_changes() @db_api.context_manager.writer diff --git a/tacker/tests/unit/sol_refactored/conductor/test_conductor_v2.py b/tacker/tests/unit/sol_refactored/conductor/test_conductor_v2.py index c5f1648bd..6ac44f47f 100644 --- a/tacker/tests/unit/sol_refactored/conductor/test_conductor_v2.py +++ b/tacker/tests/unit/sol_refactored/conductor/test_conductor_v2.py @@ -16,6 +16,7 @@ from datetime import datetime from unittest import mock +import ddt from oslo_utils import uuidutils from tacker import context @@ -29,6 +30,7 @@ from tacker.sol_refactored.objects.v2 import fields from tacker.tests.unit.db import base as db_base +@ddt.ddt class TestConductorV2(db_base.SqlTestCase): def setUp(self): @@ -103,7 +105,9 @@ class TestConductorV2(db_base.SqlTestCase): # prepare lcmocc = self._create_inst_and_lcmocc() mocked_get_vnfd.return_value = mock.Mock() - mocked_grant.return_value = self._make_grant_req_and_grant(lcmocc) + grant_req, grant = self._make_grant_req_and_grant(lcmocc) + lcmocc.grantId = grant.id + mocked_grant.return_value = grant_req, grant op_state = [] @@ -121,6 +125,10 @@ class TestConductorV2(db_base.SqlTestCase): self.assertEqual(fields.LcmOperationStateType.PROCESSING, op_state[1]) self.assertEqual(fields.LcmOperationStateType.COMPLETED, op_state[2]) + # check grant_req and grant are deleted + self.assertRaises(sol_ex.GrantRequestOrGrantNotFound, + lcmocc_utils.get_grant_req_and_grant, self.context, lcmocc) + @mock.patch.object(nfvo_client.NfvoClient, 'send_lcmocc_notification') @mock.patch.object(nfvo_client.NfvoClient, 'get_vnfd') @mock.patch.object(vnflcm_driver_v2.VnfLcmDriverV2, 'grant') @@ -398,3 +406,69 @@ class TestConductorV2(db_base.SqlTestCase): lcmocc = lcmocc_utils.get_lcmocc(self.context, lcmocc.id) expected = ex.make_problem_details() self.assertEqual(expected, lcmocc.error.to_dict()) + + def _prepare_change_lcm_op_state(self, op_state): + inst = objects.VnfInstanceV2( + # required fields + id=uuidutils.generate_uuid(), + vnfdId=uuidutils.generate_uuid(), + vnfProvider='provider', + vnfProductName='product name', + vnfSoftwareVersion='software version', + vnfdVersion='vnfd version', + instantiationState='INSTANTIATED' + ) + + req = {"flavourId": "simple"} # instantiate request + lcmocc = objects.VnfLcmOpOccV2( + # required fields + id=uuidutils.generate_uuid(), + operationState=op_state, + stateEnteredTime=datetime.utcnow(), + startTime=datetime.utcnow(), + vnfInstanceId=inst.id, + operation=fields.LcmOperationType.SCALE, + isAutomaticInvocation=False, + isCancelPending=False, + operationParams=req) + + inst.create(self.context) + lcmocc.create(self.context) + + return lcmocc + + @ddt.data({'before_state': fields.LcmOperationStateType.STARTING, + 'after_state': fields.LcmOperationStateType.ROLLED_BACK}, + {'before_state': fields.LcmOperationStateType.PROCESSING, + 'after_state': fields.LcmOperationStateType.FAILED_TEMP}, + {'before_state': fields.LcmOperationStateType.ROLLING_BACK, + 'after_state': fields.LcmOperationStateType.FAILED_TEMP}) + @ddt.unpack + @mock.patch.object(nfvo_client.NfvoClient, 'send_lcmocc_notification') + @mock.patch.object(nfvo_client.NfvoClient, 'get_vnfd') + def test_change_lcm_op_state(self, mocked_get_vnfd, + mocked_send_lcmocc_notification, before_state, after_state): + # prepare + lcmocc = self._prepare_change_lcm_op_state(before_state) + mocked_get_vnfd.return_value = mock.Mock() + ex = sol_ex.ConductorProcessingError() + + op_state = [] + + def _store_state(context, lcmocc, inst, endpoint): + op_state.append(lcmocc.operationState) + + mocked_send_lcmocc_notification.side_effect = _store_state + + # run _change_lcm_op_state + self.conductor._change_lcm_op_state() + + # check operationState transition + self.assertEqual(1, mocked_send_lcmocc_notification.call_count) + self.assertEqual(after_state, op_state[0]) + + # check lcmocc.error + # get lcmocc from DB to be sure lcmocc saved to DB + lcmocc = lcmocc_utils.get_lcmocc(self.context, lcmocc.id) + expected = ex.make_problem_details() + self.assertEqual(expected, lcmocc.error.to_dict())