Fix rollback error caused by DB sync

Before DB synchronization, the OperationState of Opocc
needs to be checked. When FAILED_TEMP exists in the
operationState or the latest operationState is FAILED,
Tacker skips this DB synchronization for the VNF.

Closes-Bug: #1999632
Change-Id: Ie47b5a9092a938db9834c68fe5375cc1bf42c791
This commit is contained in:
Yi Feng
2022-12-15 21:17:08 +09:00
parent 46a28fc8a2
commit c5b0806c5e
6 changed files with 128 additions and 11 deletions

View File

@@ -29,8 +29,9 @@ and some error-handling operations.
* The maximum or minimum number of pods is out of range
* Error compute scale_level
* Conflict with LCM operation
* LCM operation
* Conflict with LCM operation
* Abnormal LCM operation status
The maximum or minimum number of pods is out of range
-----------------------------------------------------
@@ -221,7 +222,7 @@ When tacker-conductor.log contains the following error log,
it means compute scale_level error.
.. note:: If you don't have tacker-conductor.log,
you can execute the following CLI command to create tacker-conductor.log.
you can execute the following CLI command to show tacker-conductor.log.
.. code-block:: console
@@ -333,22 +334,26 @@ the initial increment is a multiple of the scale level.
for details.
LCM operation
-------------
Conflict with LCM operation
---------------------------
^^^^^^^^^^^^^^^^^^^^^^^^^^^
There are two kinds of conflicts:
* Database synchronization occurs while LCM operation is in progress.
* LCM operation occurs during DB synchronization.
Database synchronization occurs while a LCM operation is in progress
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
When tacker-conductor.log contains the following info log,
it means database synchronization conflict with LCM operation,
and database synchronization will skip.
.. note:: If you don't have tacker-conductor.log,
you can execute the following CLI command to create tacker-conductor.log.
you can execute the following CLI command to show tacker-conductor.log.
.. code-block:: console
@@ -365,7 +370,7 @@ Waiting for LCM operation completes
and database synchronization will be repeated at a default time.
LCM operation occurs during DB synchronization
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
''''''''''''''''''''''''''''''''''''''''''''''
When LCM operation responds 409, it conflicts with Database synchronization.
@@ -380,3 +385,37 @@ Debug log:
.. code-block:: console
Ended sync_db
Abnormal LCM operation status
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
During synchronization, Tacker checks the operationState of VnfLcmOpOcc.
For the same vnf instance, if ``FAILED_TEMP`` exists in the operationState,
or the latest operationState is ``FAILED``,
Tacker will output an error log and do not update database.
.. note:: If you don't have tacker-conductor.log,
you can execute the following CLI command to show tacker-conductor.log.
.. code-block:: console
journalctl -u devstack@tacker-conductor
Error log:
.. code-block:: console
The LCM operation status of the vnf: 81c4be9d-25ad-4726-8640-f2c4c326de2e is abnormal, so skip this DB synchronization.
Error-handling operations:
To solve this error, you can get with the following ways.
* For the operation state of ``FAILED_TEMP``, please refer to
`VNF LCM error-handling`_.
* For the operation state of ``FAILED``, please perform other LCM operations
on this vnf instance until the result is ``COMPLETED``.
.. _VNF LCM error-handling: https://docs.openstack.org/tacker/latest/user/etsi_vnf_error_handling.html

View File

@@ -555,6 +555,24 @@ def get_grant_req_and_grant(context, lcmocc):
return grant_reqs[0], grant
def is_lcmocc_failure_status(context, inst_id):
inst_lcmoccs = objects.VnfLcmOpOccV2.get_by_filter(
context, vnfInstanceId=inst_id)
failed_temp_lcmoccs = [
lcmocc for lcmocc in inst_lcmoccs
if lcmocc.operationState == fields.LcmOperationStateType.FAILED_TEMP]
failed_lcmocc = [
latest_lcmocc for latest_lcmocc in inst_lcmoccs
if latest_lcmocc.startTime == max(
[lcmocc.startTime for lcmocc in inst_lcmoccs])]
if failed_temp_lcmoccs or (failed_lcmocc[0].operationState ==
fields.LcmOperationStateType.FAILED):
return True
return False
def check_lcmocc_in_progress(context, inst_id):
# if the controller or conductor executes an operation for the vnf
# instance (i.e. operationState is ...ING), other operation for

View File

@@ -372,6 +372,14 @@ class ConductorV2(object):
@coordinate.lock_vnf_instance('{inst.id}')
def _sync_inst(self, context, inst, vim_info):
# NOTE(fengyi): The operation_state in the opocc of vnf_instance
# has FAILED_TEMP or FAILED, then Tacker cannot perform DB sync for
# the vnf_instance.
if lcmocc_utils.is_lcmocc_failure_status(context, inst.id):
raise sol_ex.DbSyncFailed(
f"The LCM operation status of the vnf: {inst.id} is abnormal, "
"so skip this DB synchronization.")
vnf_inst = inst_utils.get_inst(context, inst.id)
self.vnflcm_driver.sync_db(
context, vnf_inst, vim_info)

View File

@@ -3858,6 +3858,7 @@ class TestKubernetes(base.TestCase):
heal_vnf_request=heal_request_data_obj)
self.assertEqual(mock_list_namespaced_pod.call_count, 0)
@mock.patch.object(objects.VnfLcmOpOccList, "get_by_filters")
@mock.patch.object(kubernetes_driver.Kubernetes,
"_sync_vnfc_resource_and_pod_resource")
@mock.patch.object(objects.VimConnectionInfo, "obj_from_primitive")
@@ -3869,11 +3870,15 @@ class TestKubernetes(base.TestCase):
def test_sync_db(
self, mock_list_namespaced_pod, mock_check_pod_information,
mock_get_by_id, mock_save, mock_get_vim, mock_vim,
mock_sync_vnfc):
mock_sync_vnfc, mock_op_occs):
mock_list_namespaced_pod.return_value = client.V1PodList(
items=[fakes.get_fake_pod_info(kind='Deployment')])
mock_check_pod_information.return_value = True
vnf_lcm_op_occ = vnflcm_fakes.vnflcm_scale_out_cnf()
vnf_lcm_op_occs = objects.VnfLcmOpOccList(objects=[vnf_lcm_op_occ])
mock_op_occs.return_value = vnf_lcm_op_occs
vnf_instance_obj = vnflcm_fakes.return_vnf_instance(
fields.VnfInstanceState.INSTANTIATED)
vnf_instance_obj.vnf_metadata['namespace'] = "default"
@@ -3966,6 +3971,7 @@ class TestKubernetes(base.TestCase):
f"Failed to synchronize database vnf: "
f"{vnf_instance_obj.id}", cm.output[0])
@mock.patch.object(objects.VnfLcmOpOccList, "get_by_filters")
@mock.patch.object(objects.VimConnectionInfo, "obj_from_primitive")
@mock.patch.object(vnflcm_utils, "get_vim")
@mock.patch.object(VnfInstance, "save")
@@ -3974,7 +3980,7 @@ class TestKubernetes(base.TestCase):
@mock.patch.object(client.CoreV1Api, 'list_namespaced_pod')
def test_sync_db_check_pod_false(
self, mock_list_namespaced_pod, mock_check_pod_information,
mock_get_by_id, mock_save, mock_get_vim, mock_vim):
mock_get_by_id, mock_save, mock_get_vim, mock_vim, mock_op_occs):
mock_list_namespaced_pod.return_value = client.V1PodList(
items=[fakes.get_fake_pod_info(kind='Pod')])
mock_check_pod_information.side_effect = [True, False]
@@ -3992,6 +3998,10 @@ class TestKubernetes(base.TestCase):
mock_get_by_id.return_value = vnf_instance_obj
mock_vim.return_value = vim_connection_object
vnf_lcm_op_occ = vnflcm_fakes.vnflcm_scale_out_cnf()
vnf_lcm_op_occs = objects.VnfLcmOpOccList(objects=[vnf_lcm_op_occ])
mock_op_occs.return_value = vnf_lcm_op_occs
self.kubernetes.sync_db(
context=self.context, vnf_instance=vnf_instance_obj,
vim_info=vim_connection_object)
@@ -3999,6 +4009,7 @@ class TestKubernetes(base.TestCase):
self.assertEqual(2, mock_check_pod_information.call_count)
self.assertEqual(2, mock_save.call_count)
@mock.patch.object(objects.VnfLcmOpOccList, "get_by_filters")
@mock.patch.object(kubernetes_driver.Kubernetes,
"_sync_vnfc_resource_and_pod_resource")
@mock.patch.object(objects.VimConnectionInfo, "obj_from_primitive")
@@ -4010,7 +4021,7 @@ class TestKubernetes(base.TestCase):
def test_sync_db_not_succeeded(
self, mock_list_namespaced_pod, mock_check_pod_information,
mock_get_by_id, mock_save, mock_get_vim, mock_vim,
mock_sync_vnfc):
mock_sync_vnfc, mock_op_occs):
mock_list_namespaced_pod.return_value = client.V1PodList(
items=[fakes.get_fake_pod_info(kind='Pod')])
mock_check_pod_information.return_value = True
@@ -4029,11 +4040,16 @@ class TestKubernetes(base.TestCase):
mock_vim.return_value = vim_connection_object
mock_sync_vnfc.return_value = False
vnf_lcm_op_occ = vnflcm_fakes.vnflcm_scale_out_cnf()
vnf_lcm_op_occs = objects.VnfLcmOpOccList(objects=[vnf_lcm_op_occ])
mock_op_occs.return_value = vnf_lcm_op_occs
self.kubernetes.sync_db(
context=self.context, vnf_instance=vnf_instance_obj,
vim_info=vim_connection_object)
self.assertEqual(1, mock_sync_vnfc.call_count)
@mock.patch.object(objects.VnfLcmOpOccList, "get_by_filters")
@mock.patch.object(objects.VimConnectionInfo, "obj_from_primitive")
@mock.patch.object(vnflcm_utils, "get_vim")
@mock.patch.object(VnfInstance, "save")
@@ -4042,7 +4058,7 @@ class TestKubernetes(base.TestCase):
@mock.patch.object(client.CoreV1Api, 'list_namespaced_pod')
def test_sync_db_failed_update_db(
self, mock_list_namespaced_pod, mock_check_pod_information,
mock_get_by_id, mock_save, mock_get_vim, mock_vim):
mock_get_by_id, mock_save, mock_get_vim, mock_vim, mock_op_occs):
mock_list_namespaced_pod.return_value = client.V1PodList(
items=[fakes.get_fake_pod_info(kind='Deployment')])
mock_check_pod_information.return_value = True
@@ -4060,6 +4076,10 @@ class TestKubernetes(base.TestCase):
mock_get_by_id.return_value = vnf_instance_obj
mock_vim.return_value = vim_connection_object
vnf_lcm_op_occ = vnflcm_fakes.vnflcm_scale_out_cnf()
vnf_lcm_op_occs = objects.VnfLcmOpOccList(objects=[vnf_lcm_op_occ])
mock_op_occs.return_value = vnf_lcm_op_occs
log_name = "tacker.vnfm.infra_drivers.kubernetes.kubernetes_driver"
with self.assertLogs(logger=log_name, level=logging.ERROR) as cm:
self.kubernetes.sync_db(

View File

@@ -2734,6 +2734,15 @@ class Kubernetes(abstract_driver.VnfAbstractDriver,
context, vnf_inst.id)
if vnf_instance.instantiation_state != 'INSTANTIATED':
return False
# NOTE(fengyi): The operation_state in the opocc of vnf_instance
# has FAILED_TEMP or FAILED, then Tacker cannot perform DB sync for
# the vnf_instance.
if k8s_utils.is_lcmocc_failure_status(context, vnf_inst.id):
LOG.error(f"The LCM operation status of the vnf: {vnf_inst.id} "
f"is abnormal, so skip this DB synchronization.")
return False
# change task_state
vnf_instance.task_state = fields.VnfInstanceTaskState.DB_SYNCHRONIZING
vnf_instance.save()

View File

@@ -18,6 +18,8 @@
from oslo_log import log as logging
from tacker.common import exceptions
from tacker import objects
from tacker.objects import fields
LOG = logging.getLogger(__name__)
@@ -81,3 +83,24 @@ def get_namespace_from_manifests(chk_namespaces):
if namespaces:
return namespaces.pop()
return None
def is_lcmocc_failure_status(context, inst_id):
filters = {'field': 'vnf_instance_id', 'model': 'VnfLcmOpOccs',
'value': inst_id, 'op': '=='}
vnf_lcm_op_occs = objects.VnfLcmOpOccList.get_by_filters(
context, read_deleted='no', filters=filters)
failed_temp_lcmoccs = [
lcmocc for lcmocc in vnf_lcm_op_occs.objects if
lcmocc.operation_state == fields.LcmOccsOperationState.FAILED_TEMP]
failed_lcmocc = [
latest_lcmocc for latest_lcmocc in vnf_lcm_op_occs.objects
if latest_lcmocc.start_time == max(
[lcmocc.start_time for lcmocc in vnf_lcm_op_occs.objects])]
if failed_temp_lcmoccs or (failed_lcmocc[0].operation_state ==
fields.LcmOccsOperationState.FAILED):
return True
return False