From 00d0cb71e63e870fdf09833eec02f29b95542b18 Mon Sep 17 00:00:00 2001 From: Takahito Hirose Date: Tue, 26 Mar 2019 19:52:33 +0900 Subject: [PATCH] Fix DBDeadLock error resulting into 500 When user requests the record registration request continuously, sometimes designate hits DBDeadLock resuting into 500 InternalServerError. We get below error: 2019-02-21 21:30:39.925 49752 ERROR designate.api.middleware RemoteError: Remote error: DBDeadlock (pymysql.err.InternalError) (1213, u'Deadlock found when trying to get lock; try restarting transaction') [SQL: u'UPDATE records SET version=(records.version + %(version_1)s), updated_at=%(updated_at)s, data=%(data)s, hash=%(hash)s, status=%(status)s, action=%(action)s, serial=%(serial)s WHERE records.id = %(id_1)s'] [parameters: {'status': 'PENDING', 'hash': '39795ee18c6e3c9ad1c0190c6a3d8d4f', 'updated_at': datetime.datetime(2019, 2, 21, 12, 30, 39, 909846), u'version_1': 1, u'id_1': '7a655eeda4d446cdaa81caf19ab55fcc', 'action': 'UPDATE', 'serial': 1550752338, 'data': u'ns2.example.jp. domain.example.com. 1550752338 3552 600 86400 3600'}] In the process of record registeration, designate first tried to update the reocrd and then update the zone status. Updating the zone_status and registering the record process[1] and after synced update record_status and zone_status process[2] are in reverse order. So If user request the registering record many time and same time, Designate will get the DBDeadLock, when these processes run the same time. We observed that changing the order of the operations solves this issue. [1] https://github.com/openstack/designate/blob/master/designate/central/service.py#L1292-L1320 [2] https://github.com/openstack/designate/blob/master/designate/central/service.py#L2310-L2322 1. transaction [1]-1 updating zone status process <- run ---> table_name-zone 2. transaction [2]-1 updating record status process <- run ---> table_name-record 3. transaction [1]-2 registering record process <- run and wait ---> table_name-record 4. transaction [2]-2 updating zone process <-deadlock! ---> table_name-zone Change-Id: Icd6e690ac84a2fe0db0f4a8a513de47f7916f5ea Related-Bug: #1785459 (cherry picked from commit f828654a3d40476cac7eb24a09a36e9978c2d708) --- designate/central/service.py | 2 +- .../tests/unit/test_central/test_basic.py | 26 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/designate/central/service.py b/designate/central/service.py index a51f3c0cb..5d0b92d86 100644 --- a/designate/central/service.py +++ b/designate/central/service.py @@ -2313,8 +2313,8 @@ class Service(service.RPCService, service.Service): """ # TODO(kiall): If the status is SUCCESS and the zone is already ACTIVE, # we likely don't need to do anything. - self._update_record_status(context, zone_id, status, serial) zone = self._update_zone_status(context, zone_id, status, serial) + self._update_record_status(context, zone_id, status, serial) return zone def _update_zone_status(self, context, zone_id, status, serial): diff --git a/designate/tests/unit/test_central/test_basic.py b/designate/tests/unit/test_central/test_basic.py index a1e2e0bde..3e9f87d9d 100644 --- a/designate/tests/unit/test_central/test_basic.py +++ b/designate/tests/unit/test_central/test_basic.py @@ -433,6 +433,32 @@ class CentralServiceTestCase(CentralBasic): assert self.service._enforce_record_quota.called assert self.service._update_zone_in_storage.called + def test_create_recordset_checking_DBDeadLock(self): + self.service._enforce_recordset_quota = mock.Mock() + self.service._enforce_record_quota = mock.Mock() + self.service._is_valid_recordset_name = mock.Mock() + self.service._is_valid_recordset_placement = mock.Mock() + self.service._is_valid_recordset_placement_subzone = mock.Mock() + self.service._is_valid_ttl = mock.Mock() + + self.service.storage.create_recordset = mock.Mock(return_value='rs') + self.service._update_zone_in_storage = mock.Mock() + + # NOTE(thirose): Since this is a race condition we assume that + # we will hit it if we try to do the operations in a loop 100 times. + for num in range(100): + recordset = Mock() + recordset.name = "b%s".format(num) + recordset.obj_attr_is_set.return_value = True + recordset.records = [MockRecord()] + + rs, zone = self.service._create_recordset_in_storage( + self.context, Mockzone(), recordset + ) + assert not self.service.storage._retry_on_deadlock.called + assert self.service._update_zone_in_storage.called + assert self.service.storage.create_recordset.called + def test__create_soa(self): self.service._create_recordset_in_storage = Mock( return_value=(None, None)