Fix rollback error caused by DB sync
Before DB synchronization, the OperationState of Opocc needs to be checked. When FAILED_TEMP exists in the operationState or the latest operationState is FAILED, Tacker skips this DB synchronization for the VNF. Closes-Bug: #1999632 Change-Id: Ie47b5a9092a938db9834c68fe5375cc1bf42c791
This commit is contained in:
@@ -29,8 +29,9 @@ and some error-handling operations.
|
||||
|
||||
* The maximum or minimum number of pods is out of range
|
||||
* Error compute scale_level
|
||||
* Conflict with LCM operation
|
||||
|
||||
* LCM operation
|
||||
* Conflict with LCM operation
|
||||
* Abnormal LCM operation status
|
||||
|
||||
The maximum or minimum number of pods is out of range
|
||||
-----------------------------------------------------
|
||||
@@ -221,7 +222,7 @@ When tacker-conductor.log contains the following error log,
|
||||
it means compute scale_level error.
|
||||
|
||||
.. note:: If you don't have tacker-conductor.log,
|
||||
you can execute the following CLI command to create tacker-conductor.log.
|
||||
you can execute the following CLI command to show tacker-conductor.log.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
@@ -333,22 +334,26 @@ the initial increment is a multiple of the scale level.
|
||||
for details.
|
||||
|
||||
|
||||
LCM operation
|
||||
-------------
|
||||
|
||||
Conflict with LCM operation
|
||||
---------------------------
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
There are two kinds of conflicts:
|
||||
|
||||
* Database synchronization occurs while LCM operation is in progress.
|
||||
* LCM operation occurs during DB synchronization.
|
||||
|
||||
Database synchronization occurs while a LCM operation is in progress
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
|
||||
|
||||
When tacker-conductor.log contains the following info log,
|
||||
it means database synchronization conflict with LCM operation,
|
||||
and database synchronization will skip.
|
||||
|
||||
.. note:: If you don't have tacker-conductor.log,
|
||||
you can execute the following CLI command to create tacker-conductor.log.
|
||||
you can execute the following CLI command to show tacker-conductor.log.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
@@ -365,7 +370,7 @@ Waiting for LCM operation completes
|
||||
and database synchronization will be repeated at a default time.
|
||||
|
||||
LCM operation occurs during DB synchronization
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
''''''''''''''''''''''''''''''''''''''''''''''
|
||||
|
||||
When LCM operation responds 409, it conflicts with Database synchronization.
|
||||
|
||||
@@ -380,3 +385,37 @@ Debug log:
|
||||
.. code-block:: console
|
||||
|
||||
Ended sync_db
|
||||
|
||||
|
||||
Abnormal LCM operation status
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
During synchronization, Tacker checks the operationState of VnfLcmOpOcc.
|
||||
For the same vnf instance, if ``FAILED_TEMP`` exists in the operationState,
|
||||
or the latest operationState is ``FAILED``,
|
||||
Tacker will output an error log and do not update database.
|
||||
|
||||
.. note:: If you don't have tacker-conductor.log,
|
||||
you can execute the following CLI command to show tacker-conductor.log.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
journalctl -u devstack@tacker-conductor
|
||||
|
||||
Error log:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
The LCM operation status of the vnf: 81c4be9d-25ad-4726-8640-f2c4c326de2e is abnormal, so skip this DB synchronization.
|
||||
|
||||
Error-handling operations:
|
||||
|
||||
To solve this error, you can get with the following ways.
|
||||
|
||||
* For the operation state of ``FAILED_TEMP``, please refer to
|
||||
`VNF LCM error-handling`_.
|
||||
|
||||
* For the operation state of ``FAILED``, please perform other LCM operations
|
||||
on this vnf instance until the result is ``COMPLETED``.
|
||||
|
||||
.. _VNF LCM error-handling: https://docs.openstack.org/tacker/latest/user/etsi_vnf_error_handling.html
|
||||
|
@@ -555,6 +555,24 @@ def get_grant_req_and_grant(context, lcmocc):
|
||||
return grant_reqs[0], grant
|
||||
|
||||
|
||||
def is_lcmocc_failure_status(context, inst_id):
|
||||
inst_lcmoccs = objects.VnfLcmOpOccV2.get_by_filter(
|
||||
context, vnfInstanceId=inst_id)
|
||||
failed_temp_lcmoccs = [
|
||||
lcmocc for lcmocc in inst_lcmoccs
|
||||
if lcmocc.operationState == fields.LcmOperationStateType.FAILED_TEMP]
|
||||
failed_lcmocc = [
|
||||
latest_lcmocc for latest_lcmocc in inst_lcmoccs
|
||||
if latest_lcmocc.startTime == max(
|
||||
[lcmocc.startTime for lcmocc in inst_lcmoccs])]
|
||||
|
||||
if failed_temp_lcmoccs or (failed_lcmocc[0].operationState ==
|
||||
fields.LcmOperationStateType.FAILED):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def check_lcmocc_in_progress(context, inst_id):
|
||||
# if the controller or conductor executes an operation for the vnf
|
||||
# instance (i.e. operationState is ...ING), other operation for
|
||||
|
@@ -372,6 +372,14 @@ class ConductorV2(object):
|
||||
|
||||
@coordinate.lock_vnf_instance('{inst.id}')
|
||||
def _sync_inst(self, context, inst, vim_info):
|
||||
# NOTE(fengyi): The operation_state in the opocc of vnf_instance
|
||||
# has FAILED_TEMP or FAILED, then Tacker cannot perform DB sync for
|
||||
# the vnf_instance.
|
||||
if lcmocc_utils.is_lcmocc_failure_status(context, inst.id):
|
||||
raise sol_ex.DbSyncFailed(
|
||||
f"The LCM operation status of the vnf: {inst.id} is abnormal, "
|
||||
"so skip this DB synchronization.")
|
||||
|
||||
vnf_inst = inst_utils.get_inst(context, inst.id)
|
||||
self.vnflcm_driver.sync_db(
|
||||
context, vnf_inst, vim_info)
|
||||
|
@@ -3858,6 +3858,7 @@ class TestKubernetes(base.TestCase):
|
||||
heal_vnf_request=heal_request_data_obj)
|
||||
self.assertEqual(mock_list_namespaced_pod.call_count, 0)
|
||||
|
||||
@mock.patch.object(objects.VnfLcmOpOccList, "get_by_filters")
|
||||
@mock.patch.object(kubernetes_driver.Kubernetes,
|
||||
"_sync_vnfc_resource_and_pod_resource")
|
||||
@mock.patch.object(objects.VimConnectionInfo, "obj_from_primitive")
|
||||
@@ -3869,11 +3870,15 @@ class TestKubernetes(base.TestCase):
|
||||
def test_sync_db(
|
||||
self, mock_list_namespaced_pod, mock_check_pod_information,
|
||||
mock_get_by_id, mock_save, mock_get_vim, mock_vim,
|
||||
mock_sync_vnfc):
|
||||
mock_sync_vnfc, mock_op_occs):
|
||||
mock_list_namespaced_pod.return_value = client.V1PodList(
|
||||
items=[fakes.get_fake_pod_info(kind='Deployment')])
|
||||
mock_check_pod_information.return_value = True
|
||||
|
||||
vnf_lcm_op_occ = vnflcm_fakes.vnflcm_scale_out_cnf()
|
||||
vnf_lcm_op_occs = objects.VnfLcmOpOccList(objects=[vnf_lcm_op_occ])
|
||||
mock_op_occs.return_value = vnf_lcm_op_occs
|
||||
|
||||
vnf_instance_obj = vnflcm_fakes.return_vnf_instance(
|
||||
fields.VnfInstanceState.INSTANTIATED)
|
||||
vnf_instance_obj.vnf_metadata['namespace'] = "default"
|
||||
@@ -3966,6 +3971,7 @@ class TestKubernetes(base.TestCase):
|
||||
f"Failed to synchronize database vnf: "
|
||||
f"{vnf_instance_obj.id}", cm.output[0])
|
||||
|
||||
@mock.patch.object(objects.VnfLcmOpOccList, "get_by_filters")
|
||||
@mock.patch.object(objects.VimConnectionInfo, "obj_from_primitive")
|
||||
@mock.patch.object(vnflcm_utils, "get_vim")
|
||||
@mock.patch.object(VnfInstance, "save")
|
||||
@@ -3974,7 +3980,7 @@ class TestKubernetes(base.TestCase):
|
||||
@mock.patch.object(client.CoreV1Api, 'list_namespaced_pod')
|
||||
def test_sync_db_check_pod_false(
|
||||
self, mock_list_namespaced_pod, mock_check_pod_information,
|
||||
mock_get_by_id, mock_save, mock_get_vim, mock_vim):
|
||||
mock_get_by_id, mock_save, mock_get_vim, mock_vim, mock_op_occs):
|
||||
mock_list_namespaced_pod.return_value = client.V1PodList(
|
||||
items=[fakes.get_fake_pod_info(kind='Pod')])
|
||||
mock_check_pod_information.side_effect = [True, False]
|
||||
@@ -3992,6 +3998,10 @@ class TestKubernetes(base.TestCase):
|
||||
mock_get_by_id.return_value = vnf_instance_obj
|
||||
mock_vim.return_value = vim_connection_object
|
||||
|
||||
vnf_lcm_op_occ = vnflcm_fakes.vnflcm_scale_out_cnf()
|
||||
vnf_lcm_op_occs = objects.VnfLcmOpOccList(objects=[vnf_lcm_op_occ])
|
||||
mock_op_occs.return_value = vnf_lcm_op_occs
|
||||
|
||||
self.kubernetes.sync_db(
|
||||
context=self.context, vnf_instance=vnf_instance_obj,
|
||||
vim_info=vim_connection_object)
|
||||
@@ -3999,6 +4009,7 @@ class TestKubernetes(base.TestCase):
|
||||
self.assertEqual(2, mock_check_pod_information.call_count)
|
||||
self.assertEqual(2, mock_save.call_count)
|
||||
|
||||
@mock.patch.object(objects.VnfLcmOpOccList, "get_by_filters")
|
||||
@mock.patch.object(kubernetes_driver.Kubernetes,
|
||||
"_sync_vnfc_resource_and_pod_resource")
|
||||
@mock.patch.object(objects.VimConnectionInfo, "obj_from_primitive")
|
||||
@@ -4010,7 +4021,7 @@ class TestKubernetes(base.TestCase):
|
||||
def test_sync_db_not_succeeded(
|
||||
self, mock_list_namespaced_pod, mock_check_pod_information,
|
||||
mock_get_by_id, mock_save, mock_get_vim, mock_vim,
|
||||
mock_sync_vnfc):
|
||||
mock_sync_vnfc, mock_op_occs):
|
||||
mock_list_namespaced_pod.return_value = client.V1PodList(
|
||||
items=[fakes.get_fake_pod_info(kind='Pod')])
|
||||
mock_check_pod_information.return_value = True
|
||||
@@ -4029,11 +4040,16 @@ class TestKubernetes(base.TestCase):
|
||||
mock_vim.return_value = vim_connection_object
|
||||
mock_sync_vnfc.return_value = False
|
||||
|
||||
vnf_lcm_op_occ = vnflcm_fakes.vnflcm_scale_out_cnf()
|
||||
vnf_lcm_op_occs = objects.VnfLcmOpOccList(objects=[vnf_lcm_op_occ])
|
||||
mock_op_occs.return_value = vnf_lcm_op_occs
|
||||
|
||||
self.kubernetes.sync_db(
|
||||
context=self.context, vnf_instance=vnf_instance_obj,
|
||||
vim_info=vim_connection_object)
|
||||
self.assertEqual(1, mock_sync_vnfc.call_count)
|
||||
|
||||
@mock.patch.object(objects.VnfLcmOpOccList, "get_by_filters")
|
||||
@mock.patch.object(objects.VimConnectionInfo, "obj_from_primitive")
|
||||
@mock.patch.object(vnflcm_utils, "get_vim")
|
||||
@mock.patch.object(VnfInstance, "save")
|
||||
@@ -4042,7 +4058,7 @@ class TestKubernetes(base.TestCase):
|
||||
@mock.patch.object(client.CoreV1Api, 'list_namespaced_pod')
|
||||
def test_sync_db_failed_update_db(
|
||||
self, mock_list_namespaced_pod, mock_check_pod_information,
|
||||
mock_get_by_id, mock_save, mock_get_vim, mock_vim):
|
||||
mock_get_by_id, mock_save, mock_get_vim, mock_vim, mock_op_occs):
|
||||
mock_list_namespaced_pod.return_value = client.V1PodList(
|
||||
items=[fakes.get_fake_pod_info(kind='Deployment')])
|
||||
mock_check_pod_information.return_value = True
|
||||
@@ -4060,6 +4076,10 @@ class TestKubernetes(base.TestCase):
|
||||
mock_get_by_id.return_value = vnf_instance_obj
|
||||
mock_vim.return_value = vim_connection_object
|
||||
|
||||
vnf_lcm_op_occ = vnflcm_fakes.vnflcm_scale_out_cnf()
|
||||
vnf_lcm_op_occs = objects.VnfLcmOpOccList(objects=[vnf_lcm_op_occ])
|
||||
mock_op_occs.return_value = vnf_lcm_op_occs
|
||||
|
||||
log_name = "tacker.vnfm.infra_drivers.kubernetes.kubernetes_driver"
|
||||
with self.assertLogs(logger=log_name, level=logging.ERROR) as cm:
|
||||
self.kubernetes.sync_db(
|
||||
|
@@ -2734,6 +2734,15 @@ class Kubernetes(abstract_driver.VnfAbstractDriver,
|
||||
context, vnf_inst.id)
|
||||
if vnf_instance.instantiation_state != 'INSTANTIATED':
|
||||
return False
|
||||
|
||||
# NOTE(fengyi): The operation_state in the opocc of vnf_instance
|
||||
# has FAILED_TEMP or FAILED, then Tacker cannot perform DB sync for
|
||||
# the vnf_instance.
|
||||
if k8s_utils.is_lcmocc_failure_status(context, vnf_inst.id):
|
||||
LOG.error(f"The LCM operation status of the vnf: {vnf_inst.id} "
|
||||
f"is abnormal, so skip this DB synchronization.")
|
||||
return False
|
||||
|
||||
# change task_state
|
||||
vnf_instance.task_state = fields.VnfInstanceTaskState.DB_SYNCHRONIZING
|
||||
vnf_instance.save()
|
||||
|
@@ -18,6 +18,8 @@
|
||||
from oslo_log import log as logging
|
||||
|
||||
from tacker.common import exceptions
|
||||
from tacker import objects
|
||||
from tacker.objects import fields
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
@@ -81,3 +83,24 @@ def get_namespace_from_manifests(chk_namespaces):
|
||||
if namespaces:
|
||||
return namespaces.pop()
|
||||
return None
|
||||
|
||||
|
||||
def is_lcmocc_failure_status(context, inst_id):
|
||||
filters = {'field': 'vnf_instance_id', 'model': 'VnfLcmOpOccs',
|
||||
'value': inst_id, 'op': '=='}
|
||||
vnf_lcm_op_occs = objects.VnfLcmOpOccList.get_by_filters(
|
||||
context, read_deleted='no', filters=filters)
|
||||
|
||||
failed_temp_lcmoccs = [
|
||||
lcmocc for lcmocc in vnf_lcm_op_occs.objects if
|
||||
lcmocc.operation_state == fields.LcmOccsOperationState.FAILED_TEMP]
|
||||
failed_lcmocc = [
|
||||
latest_lcmocc for latest_lcmocc in vnf_lcm_op_occs.objects
|
||||
if latest_lcmocc.start_time == max(
|
||||
[lcmocc.start_time for lcmocc in vnf_lcm_op_occs.objects])]
|
||||
|
||||
if failed_temp_lcmoccs or (failed_lcmocc[0].operation_state ==
|
||||
fields.LcmOccsOperationState.FAILED):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
Reference in New Issue
Block a user