Catch DBReferenceError in HA router race conditions

If auto_scheduler the router, during the race, the neutron server may
treat the HA router as 'no scheduled', then it will create a new HA
port binding with the router(id) which is deleted concurrently, and
then foreign key constraint error(Integrity Error) raised.

Change-Id: I81fdd2c971ee4ae5133126b6887ba6ad855ef138
Closes-Bug: #1533460
Related-Bug: #1523780
This commit is contained in:
LIU Yulong 2015-12-22 11:02:08 +08:00
parent 3fb153b15b
commit d1accc3668
3 changed files with 61 additions and 5 deletions

View File

@ -21,6 +21,7 @@ from oslo_db import exception as db_exc
from oslo_log import log as logging
from oslo_utils import excutils
import sqlalchemy as sa
from sqlalchemy import exc as sql_exc
from sqlalchemy import orm
from neutron._i18n import _, _LI
@ -36,6 +37,7 @@ from neutron.db import l3_db
from neutron.db import l3_dvr_db
from neutron.db import model_base
from neutron.db import models_v2
from neutron.extensions import l3
from neutron.extensions import l3_ext_ha_mode as l3_ha
from neutron.extensions import portbindings
from neutron.extensions import providernet
@ -318,12 +320,22 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin,
return num_agents
def _create_ha_port_binding(self, context, router_id, port_id):
with context.session.begin(nested=True):
portbinding = L3HARouterAgentPortBinding(port_id=port_id,
router_id=router_id)
context.session.add(portbinding)
try:
with context.session.begin(nested=True):
portbinding = L3HARouterAgentPortBinding(port_id=port_id,
router_id=router_id)
context.session.add(portbinding)
return portbinding
return portbinding
except db_exc.DBReferenceError as e:
with excutils.save_and_reraise_exception() as ctxt:
if isinstance(e.inner_exception, sql_exc.IntegrityError):
ctxt.reraise = False
LOG.debug(
'Failed to create HA router agent PortBinding, '
'Router %s has already been removed '
'by concurrent operation', router_id)
raise l3.RouterNotFound(router_id=router_id)
def add_ha_port(self, context, router_id, network_id, tenant_id):
# NOTE(kevinbenton): we have to block any ongoing transactions because

View File

@ -32,6 +32,7 @@ from neutron.db import l3_agentschedulers_db
from neutron.db import l3_db
from neutron.db import l3_hamode_db
from neutron.extensions import availability_zone as az_ext
from neutron.extensions import l3
LOG = logging.getLogger(__name__)
@ -266,6 +267,11 @@ class L3Scheduler(object):
except db_exc.DBDuplicateEntry:
LOG.debug("Router %(router)s already scheduled for agent "
"%(agent)s", {'router': router_id, 'agent': agent['id']})
except l3.RouterNotFound:
LOG.debug('Router %s has already been removed '
'by concurrent operation', router_id)
return
self.bind_router(context, router_id, agent)
def get_ha_routers_l3_agents_counts(self, context, plugin, filters=None):

View File

@ -22,6 +22,7 @@ import mock
from oslo_config import cfg
from oslo_utils import importutils
from oslo_utils import timeutils
from sqlalchemy import orm
import testscenarios
from neutron.common import constants
@ -35,6 +36,7 @@ from neutron.db import l3_dvr_ha_scheduler_db
from neutron.db import l3_dvrscheduler_db
from neutron.db import l3_hamode_db
from neutron.db import l3_hascheduler_db
from neutron.extensions import l3
from neutron.extensions import l3_ext_ha_mode as l3_ha
from neutron.extensions import l3agentscheduler as l3agent
from neutron.extensions import portbindings
@ -1280,6 +1282,42 @@ class L3HATestCaseMixin(testlib_api.SqlTestCase,
return self.plugin.create_router(self.adminContext,
{'router': router})
def test_create_ha_port_and_bind_catch_integrity_error(self):
router = self._create_ha_router(tenant_id='foo_tenant')
agent = {'id': 'foo_agent'}
orig_fn = orm.Session.add
def db_ref_err_for_add_haportbinding(s, instance):
if instance.__class__.__name__ == 'L3HARouterAgentPortBinding':
instance.router_id = 'nonexistent_router'
return orig_fn(s, instance)
with mock.patch.object(self.plugin.router_scheduler,
'bind_router') as bind_router:
with mock.patch.object(
orm.Session, 'add',
side_effect=db_ref_err_for_add_haportbinding,
autospec=True):
self.plugin.router_scheduler.create_ha_port_and_bind(
self.plugin, self.adminContext,
router['id'], router['tenant_id'], agent)
self.assertFalse(bind_router.called)
def test_create_ha_port_and_bind_catch_router_not_found(self):
router = self._create_ha_router(tenant_id='foo_tenant')
agent = {'id': 'foo_agent'}
with mock.patch.object(self.plugin.router_scheduler,
'bind_router') as bind_router:
with mock.patch.object(
self.plugin, 'add_ha_port',
side_effect=l3.RouterNotFound(router_id='foo_router')):
self.plugin.router_scheduler.create_ha_port_and_bind(
self.plugin, self.adminContext,
router['id'], router['tenant_id'], agent)
self.assertFalse(bind_router.called)
class L3_HA_scheduler_db_mixinTestCase(L3HATestCaseMixin):