From 7a8b59624c87b6b74f28e98a1cff1d6a40edc65f Mon Sep 17 00:00:00 2001 From: Slawek Kaplonski Date: Thu, 12 Dec 2019 12:28:22 +0100 Subject: [PATCH] Add retries to update trunk port In [1] retry of trunk update was added to avoid StaleDataError exceptions to fail to set trunk port or subports to ACTIVE state. But it was only partial fix for the issue descibed in related bug and from [2] we know that it still can happen on high load systems from time to time. So I was checking this issue and reported bug again and I found out that retry was added only in _process_trunk_subport_bindings() method. But StaleDataError can be raised also in other cases where the same trunk is updated, e.g. in update_trunk_status() method. So this commit adds same retry mechanism to all trunk.update() actions in services.trunk.rpc.server module. [1] https://review.opendev.org/#/c/662236/ [2] https://bugzilla.redhat.com/show_bug.cgi?id=1733197 Conflicts: neutron/services/trunk/rpc/server.py Change-Id: I10e3619d5f3600ea97ed695321bb691dece3181f Partial-Bug: #1828375 (cherry picked from commit ade35a233edb5c9489cc3a68ae00672fb328f63d) --- neutron/services/trunk/rpc/server.py | 50 ++++++++++++++++------------ 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/neutron/services/trunk/rpc/server.py b/neutron/services/trunk/rpc/server.py index 229e99045fc..d3ea9f98943 100644 --- a/neutron/services/trunk/rpc/server.py +++ b/neutron/services/trunk/rpc/server.py @@ -102,28 +102,10 @@ class TrunkSkeleton(object): return updated_ports - def update_trunk_status(self, context, trunk_id, status): - """Update the trunk status to reflect outcome of data plane wiring.""" - with db_api.autonested_transaction(context.session): - trunk = trunk_objects.Trunk.get_object(context, id=trunk_id) - if trunk: - trunk.update(status=status) - - def _process_trunk_subport_bindings(self, context, trunk, port_ids): - """Process port bindings for subports on the given trunk.""" - updated_ports = [] - trunk_port_id = trunk.port_id - trunk_port = self.core_plugin.get_port(context, trunk_port_id) - trunk_host = trunk_port.get(portbindings.HOST_ID) - + def _safe_update_trunk(self, trunk, **kwargs): for try_cnt in range(db_api.MAX_RETRIES): try: - # NOTE(status_police) Set the trunk in BUILD state before - # processing subport bindings. The trunk will stay in BUILD - # state until an attempt has been made to bind all subports - # passed here and the agent acknowledges the operation was - # successful. - trunk.update(status=trunk_consts.BUILD_STATUS) + trunk.update(**kwargs) break except exc.StaleDataError as e: if try_cnt < db_api.MAX_RETRIES - 1: @@ -133,6 +115,28 @@ class TrunkSkeleton(object): # re-raise when all tries failed raise + def update_trunk_status(self, context, trunk_id, status): + """Update the trunk status to reflect outcome of data plane wiring.""" + with db_api.autonested_transaction(context.session): + trunk = trunk_objects.Trunk.get_object(context, id=trunk_id) + if trunk: + self._safe_update_trunk(trunk, status=status) + + def _process_trunk_subport_bindings(self, context, trunk, port_ids): + """Process port bindings for subports on the given trunk.""" + updated_ports = [] + trunk_port_id = trunk.port_id + trunk_port = self.core_plugin.get_port(context, trunk_port_id) + trunk_host = trunk_port.get(portbindings.HOST_ID) + + # NOTE(status_police) Set the trunk in BUILD state before + # processing subport bindings. The trunk will stay in BUILD + # state until an attempt has been made to bind all subports + # passed here and the agent acknowledges the operation was + # successful. + self._safe_update_trunk( + trunk, status=trunk_consts.BUILD_STATUS) + for port_id in port_ids: try: updated_port = self._handle_port_binding(context, port_id, @@ -146,7 +150,8 @@ class TrunkSkeleton(object): # NOTE(status_police) The subport binding has failed in a # manner in which we cannot proceed and the user must take # action to bring the trunk back to a sane state. - trunk.update(status=trunk_consts.ERROR_STATUS) + self._safe_update_trunk( + trunk, status=trunk_consts.ERROR_STATUS) return [] except Exception as e: msg = ("Failed to bind subport port %(port)s on trunk " @@ -154,7 +159,8 @@ class TrunkSkeleton(object): LOG.error(msg, {'port': port_id, 'trunk': trunk.id, 'exc': e}) if len(port_ids) != len(updated_ports): - trunk.update(status=trunk_consts.DEGRADED_STATUS) + self._safe_update_trunk( + trunk, status=trunk_consts.DEGRADED_STATUS) return updated_ports