Merge "Add support for DBDeadlock handling"
This commit is contained in:
commit
96683ea61d
@ -1251,7 +1251,7 @@
|
||||
# Maximum retries in case of connection error or deadlock
|
||||
# error before error is raised. Set to -1 to specify an
|
||||
# infinite retry count. (integer value)
|
||||
#db_max_retries = 20
|
||||
#db_max_retries = 5
|
||||
|
||||
|
||||
[deploy]
|
||||
|
@ -26,3 +26,6 @@ opts = [
|
||||
|
||||
def register_opts(conf):
|
||||
conf.register_opts(opts, group='database')
|
||||
# Change the oslo_db side default to 5
|
||||
conf.import_opt('db_max_retries', 'ironic.db.api', group='database')
|
||||
conf.set_default('db_max_retries', 5, group='database')
|
||||
|
@ -20,6 +20,7 @@ import collections
|
||||
import datetime
|
||||
import threading
|
||||
|
||||
from oslo_db import api as oslo_db_api
|
||||
from oslo_db import exception as db_exc
|
||||
from oslo_db.sqlalchemy import enginefacade
|
||||
from oslo_db.sqlalchemy import utils as db_utils
|
||||
@ -55,6 +56,9 @@ def _session_for_read():
|
||||
return enginefacade.reader.using(_CONTEXT)
|
||||
|
||||
|
||||
# Please add @oslo_db_api.retry_on_deadlock decorator to all methods using
|
||||
# _session_for_write (as deadlocks happen on write), so that oslo_db is able
|
||||
# to retry in case of deadlocks.
|
||||
def _session_for_write():
|
||||
return enginefacade.writer.using(_CONTEXT)
|
||||
|
||||
@ -258,6 +262,7 @@ class Connection(api.Connection):
|
||||
return _paginate_query(models.Node, limit, marker,
|
||||
sort_key, sort_dir, query)
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def reserve_node(self, tag, node_id):
|
||||
with _session_for_write():
|
||||
query = _get_node_query_with_tags()
|
||||
@ -276,6 +281,7 @@ class Connection(api.Connection):
|
||||
except NoResultFound:
|
||||
raise exception.NodeNotFound(node_id)
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def release_node(self, tag, node_id):
|
||||
with _session_for_write():
|
||||
query = model_query(models.Node)
|
||||
@ -294,6 +300,7 @@ class Connection(api.Connection):
|
||||
except NoResultFound:
|
||||
raise exception.NodeNotFound(node_id)
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def create_node(self, values):
|
||||
# ensure defaults are present for new nodes
|
||||
if 'uuid' not in values:
|
||||
@ -364,6 +371,7 @@ class Connection(api.Connection):
|
||||
|
||||
return result
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def destroy_node(self, node_id):
|
||||
with _session_for_write():
|
||||
query = model_query(models.Node)
|
||||
@ -422,6 +430,7 @@ class Connection(api.Connection):
|
||||
else:
|
||||
raise
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def _do_update_node(self, node_id, values):
|
||||
with _session_for_write():
|
||||
query = model_query(models.Node)
|
||||
@ -487,6 +496,7 @@ class Connection(api.Connection):
|
||||
return _paginate_query(models.Port, limit, marker,
|
||||
sort_key, sort_dir, query)
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def create_port(self, values):
|
||||
if not values.get('uuid'):
|
||||
values['uuid'] = uuidutils.generate_uuid()
|
||||
@ -503,6 +513,7 @@ class Connection(api.Connection):
|
||||
raise exception.PortAlreadyExists(uuid=values['uuid'])
|
||||
return port
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def update_port(self, port_id, values):
|
||||
# NOTE(dtantsur): this can lead to very strange errors
|
||||
if 'uuid' in values:
|
||||
@ -522,6 +533,7 @@ class Connection(api.Connection):
|
||||
raise exception.MACAlreadyExists(mac=values['address'])
|
||||
return ref
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def destroy_port(self, port_id):
|
||||
with _session_for_write():
|
||||
query = model_query(models.Port)
|
||||
@ -570,6 +582,7 @@ class Connection(api.Connection):
|
||||
return _paginate_query(models.Portgroup, limit, marker,
|
||||
sort_key, sort_dir, query)
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def create_portgroup(self, values):
|
||||
if not values.get('uuid'):
|
||||
values['uuid'] = uuidutils.generate_uuid()
|
||||
@ -591,6 +604,7 @@ class Connection(api.Connection):
|
||||
raise exception.PortgroupAlreadyExists(uuid=values['uuid'])
|
||||
return portgroup
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def update_portgroup(self, portgroup_id, values):
|
||||
if 'uuid' in values:
|
||||
msg = _("Cannot overwrite UUID for an existing portgroup.")
|
||||
@ -615,6 +629,7 @@ class Connection(api.Connection):
|
||||
raise
|
||||
return ref
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def destroy_portgroup(self, portgroup_id):
|
||||
def portgroup_not_empty(session):
|
||||
"""Checks whether the portgroup does not have ports."""
|
||||
@ -654,6 +669,7 @@ class Connection(api.Connection):
|
||||
return _paginate_query(models.Chassis, limit, marker,
|
||||
sort_key, sort_dir)
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def create_chassis(self, values):
|
||||
if not values.get('uuid'):
|
||||
values['uuid'] = uuidutils.generate_uuid()
|
||||
@ -668,6 +684,7 @@ class Connection(api.Connection):
|
||||
raise exception.ChassisAlreadyExists(uuid=values['uuid'])
|
||||
return chassis
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def update_chassis(self, chassis_id, values):
|
||||
# NOTE(dtantsur): this can lead to very strange errors
|
||||
if 'uuid' in values:
|
||||
@ -684,6 +701,7 @@ class Connection(api.Connection):
|
||||
ref = query.one()
|
||||
return ref
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def destroy_chassis(self, chassis_id):
|
||||
def chassis_not_empty():
|
||||
"""Checks whether the chassis does not have nodes."""
|
||||
@ -704,6 +722,7 @@ class Connection(api.Connection):
|
||||
if count != 1:
|
||||
raise exception.ChassisNotFound(chassis=chassis_id)
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def register_conductor(self, values, update_existing=False):
|
||||
with _session_for_write() as session:
|
||||
query = (model_query(models.Conductor)
|
||||
@ -731,6 +750,7 @@ class Connection(api.Connection):
|
||||
except NoResultFound:
|
||||
raise exception.ConductorNotFound(conductor=hostname)
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def unregister_conductor(self, hostname):
|
||||
with _session_for_write():
|
||||
query = (model_query(models.Conductor)
|
||||
@ -739,6 +759,7 @@ class Connection(api.Connection):
|
||||
if count == 0:
|
||||
raise exception.ConductorNotFound(conductor=hostname)
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def touch_conductor(self, hostname):
|
||||
with _session_for_write():
|
||||
query = (model_query(models.Conductor)
|
||||
@ -750,6 +771,7 @@ class Connection(api.Connection):
|
||||
if count == 0:
|
||||
raise exception.ConductorNotFound(conductor=hostname)
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def clear_node_reservations_for_conductor(self, hostname):
|
||||
nodes = []
|
||||
with _session_for_write():
|
||||
@ -764,6 +786,7 @@ class Connection(api.Connection):
|
||||
_LW('Cleared reservations held by %(hostname)s: '
|
||||
'%(nodes)s'), {'hostname': hostname, 'nodes': nodes})
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def clear_node_target_power_state(self, hostname):
|
||||
nodes = []
|
||||
with _session_for_write():
|
||||
@ -826,6 +849,7 @@ class Connection(api.Connection):
|
||||
query = _filter_active_conductors(query)
|
||||
return query.all()
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def register_conductor_hardware_interfaces(self, conductor_id,
|
||||
hardware_type, interface_type,
|
||||
interfaces, default_interface):
|
||||
@ -847,12 +871,14 @@ class Connection(api.Connection):
|
||||
interface_type=interface_type,
|
||||
interfaces=interfaces)
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def unregister_conductor_hardware_interfaces(self, conductor_id):
|
||||
with _session_for_write():
|
||||
query = (model_query(models.ConductorHardwareInterfaces)
|
||||
.filter_by(conductor_id=conductor_id))
|
||||
query.delete()
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def touch_node_provisioning(self, node_id):
|
||||
with _session_for_write():
|
||||
query = model_query(models.Node)
|
||||
@ -865,6 +891,7 @@ class Connection(api.Connection):
|
||||
if not model_query(models.Node).filter_by(id=node_id).scalar():
|
||||
raise exception.NodeNotFound(node=node_id)
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def set_node_tags(self, node_id, tags):
|
||||
# remove duplicate tags
|
||||
tags = set(tags)
|
||||
@ -878,6 +905,7 @@ class Connection(api.Connection):
|
||||
|
||||
return node_tags
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def unset_node_tags(self, node_id):
|
||||
self._check_node_exists(node_id)
|
||||
with _session_for_write():
|
||||
@ -890,6 +918,7 @@ class Connection(api.Connection):
|
||||
.all())
|
||||
return result
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def add_node_tag(self, node_id, tag):
|
||||
node_tag = models.NodeTag(tag=tag, node_id=node_id)
|
||||
|
||||
@ -904,6 +933,7 @@ class Connection(api.Connection):
|
||||
|
||||
return node_tag
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def delete_node_tag(self, node_id, tag):
|
||||
self._check_node_exists(node_id)
|
||||
with _session_for_write():
|
||||
@ -959,6 +989,7 @@ class Connection(api.Connection):
|
||||
return _paginate_query(models.VolumeConnector, limit, marker,
|
||||
sort_key, sort_dir, query)
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def create_volume_connector(self, connector_info):
|
||||
if 'uuid' not in connector_info:
|
||||
connector_info['uuid'] = uuidutils.generate_uuid()
|
||||
@ -978,6 +1009,7 @@ class Connection(api.Connection):
|
||||
uuid=connector_info['uuid'])
|
||||
return connector
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def update_volume_connector(self, ident, connector_info):
|
||||
if 'uuid' in connector_info:
|
||||
msg = _("Cannot overwrite UUID for an existing Volume Connector.")
|
||||
@ -1001,6 +1033,7 @@ class Connection(api.Connection):
|
||||
raise exception.VolumeConnectorNotFound(connector=ident)
|
||||
return ref
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def destroy_volume_connector(self, ident):
|
||||
with _session_for_write():
|
||||
query = model_query(models.VolumeConnector)
|
||||
@ -1034,6 +1067,7 @@ class Connection(api.Connection):
|
||||
return _paginate_query(models.VolumeTarget, limit, marker, sort_key,
|
||||
sort_dir, query)
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def create_volume_target(self, target_info):
|
||||
if 'uuid' not in target_info:
|
||||
target_info['uuid'] = uuidutils.generate_uuid()
|
||||
@ -1052,6 +1086,7 @@ class Connection(api.Connection):
|
||||
uuid=target_info['uuid'])
|
||||
return target
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def update_volume_target(self, ident, target_info):
|
||||
if 'uuid' in target_info:
|
||||
msg = _("Cannot overwrite UUID for an existing Volume Target.")
|
||||
@ -1072,6 +1107,7 @@ class Connection(api.Connection):
|
||||
raise exception.VolumeTargetNotFound(target=ident)
|
||||
return ref
|
||||
|
||||
@oslo_db_api.retry_on_deadlock
|
||||
def destroy_volume_target(self, ident):
|
||||
with _session_for_write():
|
||||
query = model_query(models.VolumeTarget)
|
||||
|
32
ironic/tests/unit/db/sqlalchemy/test_api.py
Normal file
32
ironic/tests/unit/db/sqlalchemy/test_api.py
Normal file
@ -0,0 +1,32 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import inspect
|
||||
|
||||
from ironic.db.sqlalchemy import api as sqlalchemy_api
|
||||
from ironic.tests import base as test_base
|
||||
|
||||
|
||||
class TestDBWriteMethodsRetryOnDeadlock(test_base.TestCase):
|
||||
|
||||
def test_retry_on_deadlock(self):
|
||||
# This test ensures that every dbapi method doing database write is
|
||||
# wrapped with retry_on_deadlock decorator
|
||||
for name, method in inspect.getmembers(sqlalchemy_api.Connection,
|
||||
predicate=inspect.ismethod):
|
||||
src = inspect.getsource(method)
|
||||
if 'with _session_for_write()' in src:
|
||||
self.assertIn(
|
||||
'@oslo_db_api.retry_on_deadlock', src,
|
||||
'oslo_db\'s retry_on_deadlock decorator not '
|
||||
'applied to method ironic.db.sqlalchemy.api.Connection.%s '
|
||||
'doing database write' % name)
|
@ -18,6 +18,8 @@
|
||||
import datetime
|
||||
|
||||
import mock
|
||||
from oslo_db import exception as db_exc
|
||||
from oslo_db import sqlalchemy
|
||||
from oslo_utils import timeutils
|
||||
|
||||
from ironic.common import exception
|
||||
@ -129,6 +131,13 @@ class DbConductorTestCase(base.DbTestCase):
|
||||
c = self.dbapi.get_conductor(c.hostname)
|
||||
self.assertEqual(test_time, timeutils.normalize_time(c.updated_at))
|
||||
|
||||
@mock.patch.object(sqlalchemy.orm.Query, 'update', autospec=True)
|
||||
def test_touch_conductor_deadlock(self, mock_update):
|
||||
mock_update.side_effect = [db_exc.DBDeadlock(), None]
|
||||
c = self._create_test_cdr()
|
||||
self.dbapi.touch_conductor(c.hostname)
|
||||
self.assertEqual(2, mock_update.call_count)
|
||||
|
||||
def test_touch_conductor_not_found(self):
|
||||
# A conductor's heartbeat will not create a new record,
|
||||
# it will only update existing ones
|
||||
|
@ -0,0 +1,12 @@
|
||||
---
|
||||
fixes:
|
||||
- Fixes an issue which caused conductor's periodic tasks to stop executing.
|
||||
See https://bugs.launchpad.net/ironic/+bug/1637210
|
||||
features:
|
||||
- Adds DBDeadlock handling which may improve stability when using Galera.
|
||||
See https://bugs.launchpad.net/ironic/+bug/1639338
|
||||
upgrade:
|
||||
- All DB API methods doing database writes now retry on deadlock. The
|
||||
``[database]db_max_retries`` configuration option specifies the maximum
|
||||
number of times to retry, and can be customised if necessary. It is 5 by
|
||||
default.
|
Loading…
Reference in New Issue
Block a user