From 4e77442d529d9803ff90de905b846af940eaf382 Mon Sep 17 00:00:00 2001 From: Kevin Benton Date: Sat, 13 Jun 2015 18:45:19 -0700 Subject: [PATCH] Add deadlock retry to API and ML2 RPC port update With the switch to the pymsql SQL driver, eventlet will now yield during database transactions. This greatly increased our probability of multiple coroutines running transactions on the same table that result in deadlocks. These deadlocks could result from many things including the following: * a coroutine holding a pessimistic "SELECT for UPDATE" lock when another tries to update the locked records * two coroutines both issue update statements using a WHERE clause invalidated by the other update (e.g. from a compare and swap approach) * two coroutines insert records that, when combined, violate a unique constraint on the table in a master-master Galera deployment * any two workers using "SELECT for UPDATE" in a master-master Galera deployment (write-set certification failure translates to deadlock) This problem is exacerbated by the switch to multiple API and RPC workers, each of which can lead to most of the errors above even without the switch to pymysql. This patch adds a deadlock retry decorator to the delete, create, and update methods at the HTTP API layer. Additionally, it adds a decorator to the update_port_status AMQP API in ML2 since it updates the port table, which is a heavily locked table by ML2 making it a prime candidate for deadlocks. Nova has had relied on the deadlock retry mechanism for quite some time now. We were limping along by not using additional workers and by relying on the unyielding nature of the MySQL C driver to serialize everything. Closes-Bug: #1464612 Change-Id: I635cc49ca69f589f99ab145d4d51e511b24194d2 --- neutron/api/v2/base.py | 8 ++++++++ neutron/plugins/ml2/plugin.py | 2 ++ 2 files changed, 10 insertions(+) diff --git a/neutron/api/v2/base.py b/neutron/api/v2/base.py index 8237905d26b..d0f2aa8f156 100644 --- a/neutron/api/v2/base.py +++ b/neutron/api/v2/base.py @@ -17,6 +17,7 @@ import copy import netaddr from oslo_config import cfg +from oslo_db import api as oslo_db_api from oslo_log import log as logging from oslo_policy import policy as oslo_policy from oslo_utils import excutils @@ -30,6 +31,7 @@ from neutron.api.v2 import resource as wsgi_resource from neutron.common import constants as const from neutron.common import exceptions from neutron.common import rpc as n_rpc +from neutron.db import api as db_api from neutron.i18n import _LE, _LI from neutron import policy from neutron import quota @@ -381,6 +383,8 @@ class Controller(object): # We need a way for ensuring that if it has been created, # it is then deleted + @oslo_db_api.wrap_db_retry(max_retries=db_api.MAX_RETRIES, + retry_on_deadlock=True) def create(self, request, body=None, **kwargs): """Creates a new instance of the requested entity.""" parent_id = kwargs.get(self._parent_id_name) @@ -465,6 +469,8 @@ class Controller(object): return notify({self._resource: self._view(request.context, obj)}) + @oslo_db_api.wrap_db_retry(max_retries=db_api.MAX_RETRIES, + retry_on_deadlock=True) def delete(self, request, id, **kwargs): """Deletes the specified entity.""" self._notifier.info(request.context, @@ -499,6 +505,8 @@ class Controller(object): result, notifier_method) + @oslo_db_api.wrap_db_retry(max_retries=db_api.MAX_RETRIES, + retry_on_deadlock=True) def update(self, request, id, body=None, **kwargs): """Updates the specified entity's attributes.""" parent_id = kwargs.get(self._parent_id_name) diff --git a/neutron/plugins/ml2/plugin.py b/neutron/plugins/ml2/plugin.py index ba8054b9989..2cf2ec80f45 100644 --- a/neutron/plugins/ml2/plugin.py +++ b/neutron/plugins/ml2/plugin.py @@ -1375,6 +1375,8 @@ class Ml2Plugin(db_base_plugin_v2.NeutronDbPluginV2, return self._bind_port_if_needed(port_context) + @oslo_db_api.wrap_db_retry(max_retries=db_api.MAX_RETRIES, + retry_on_deadlock=True) def update_port_status(self, context, port_id, status, host=None, network=None): """