From defe3a8f29c70dcdb4d271613a3a32dbd3739a5a Mon Sep 17 00:00:00 2001 From: Tim Simmons Date: Fri, 29 Jan 2016 17:21:06 -0600 Subject: [PATCH] Actually poll for zone deletes - This implements a few `TODO`s in the code around actually polling nameservers for zone deletes. I found this necessary for changes that were truing up missed DELETEs, and wanted to make a poll/ update_status call to verify deletion across a fleet of nameservers when one was down. - This originally failed in PowerDNS, because in older versions that are still in all the distro repos, PDNS responded with a NOERROR and empty answer section when it got a query for a zone it knew nothing about. NEAT. http://blog.powerdns.com/2015/03/02/from-noerror-to-refused/ - NOTE: This increases timeout for functional tests and we maybe shouldn't do that. Change-Id: Ied1e5daf4798127e00a06265164759d98fc9ba72 --- designate/central/service.py | 14 ++--- designate/mdns/notify.py | 18 ++++-- designate/pool_manager/service.py | 56 +++++++++++------- .../tests/test_pool_manager/test_service.py | 59 ------------------- designate/tests/unit/__init__.py | 35 +++++++++++ designate/tests/unit/test_mdns/test_notify.py | 4 +- .../unit/test_pool_manager/test_service.py | 28 +++++++++ devstack/gate/run_tempest_tests.sh | 2 +- functionaltests/common/utils.py | 2 +- 9 files changed, 121 insertions(+), 97 deletions(-) diff --git a/designate/central/service.py b/designate/central/service.py index d79d7f1b..fb840f73 100644 --- a/designate/central/service.py +++ b/designate/central/service.py @@ -2292,17 +2292,13 @@ class Service(service.RPCService, service.Service): zone, deleted = self._update_zone_or_record_status( zone, status, serial) - LOG.debug('Setting zone %s, serial %s: action %s, status %s' - % (zone.id, zone.serial, zone.action, zone.status)) - self.storage.update_zone(context, zone) + if zone.status != 'DELETED': + LOG.debug('Setting zone %s, serial %s: action %s, status %s' + % (zone.id, zone.serial, zone.action, zone.status)) + self.storage.update_zone(context, zone) - # TODO(Ron): Including this to retain the current logic. - # We should NOT be deleting zones. The zone status should be - # used to indicate the zone has been deleted and not the deleted - # column. The deleted column is needed for unique constraints. if deleted: - # TODO(vinod): Pass a zone to delete_zone rather than id so - # that the action, status and serial are updated correctly. + LOG.debug('update_status: deleting %s' % zone.name) self.storage.delete_zone(context, zone.id) return zone diff --git a/designate/mdns/notify.py b/designate/mdns/notify.py index beb89319..540b7a38 100644 --- a/designate/mdns/notify.py +++ b/designate/mdns/notify.py @@ -118,9 +118,14 @@ class NotifyEndpoint(base.BaseEndpoint): while (True): (response, retry) = self._make_and_send_dns_message( zone, host, port, timeout, retry_interval, retries) - if response and response.rcode() in ( - dns.rcode.NXDOMAIN, dns.rcode.REFUSED, dns.rcode.SERVFAIL): + + if response and (response.rcode() in ( + dns.rcode.NXDOMAIN, dns.rcode.REFUSED, dns.rcode.SERVFAIL) + or not bool(response.answer)): status = 'NO_ZONE' + if zone.serial == 0 and zone.action in ['DELETE', 'NONE']: + return (status, 0, retries) + elif response and len(response.answer) == 1 \ and str(response.answer[0].name) == str(zone.name) \ and response.answer[0].rdclass == dns.rdataclass.IN \ @@ -138,12 +143,14 @@ class NotifyEndpoint(base.BaseEndpoint): {'zone': zone.name, 'host': host, 'port': port, 'es': zone.serial, 'as': actual_serial, 'retries': retries}) + if retries > 0: # retry again time.sleep(retry_interval) continue else: break + else: # Everything looks good at this point. Return SUCCESS. status = 'SUCCESS' @@ -211,8 +218,11 @@ class NotifyEndpoint(base.BaseEndpoint): break # Check that we actually got a NOERROR in the rcode and and an # authoritative answer - elif response.rcode() in (dns.rcode.NXDOMAIN, dns.rcode.REFUSED, - dns.rcode.SERVFAIL): + elif (response.rcode() in + (dns.rcode.NXDOMAIN, dns.rcode.REFUSED, + dns.rcode.SERVFAIL)) or \ + (response.rcode() == dns.rcode.NOERROR and + not bool(response.answer)): LOG.info(_LI("%(zone)s not found on %(server)s:%(port)d"), {'zone': zone.name, 'server': host, 'port': port}) diff --git a/designate/pool_manager/service.py b/designate/pool_manager/service.py index 7f9e86c0..33c656fc 100644 --- a/designate/pool_manager/service.py +++ b/designate/pool_manager/service.py @@ -197,7 +197,7 @@ class Service(service.RPCService, coordination.CoordinationMixin, def _get_admin_context_all_tenants(self): return DesignateContext.get_admin_context(all_tenants=True) - # Periodioc Tasks + # Periodic Tasks def periodic_recovery(self): """ Runs only on the pool leader @@ -317,7 +317,7 @@ class Service(service.RPCService, coordination.CoordinationMixin, for also_notify in self.pool.also_notifies: self._update_zone_on_also_notify(context, also_notify, zone) - # Send a NOTIFY to each nameserver + # Ensure the change has propagated to each nameserver for nameserver in self.pool.nameservers: create_status = self._build_status_object( nameserver, zone, CREATE_ACTION) @@ -391,7 +391,7 @@ class Service(service.RPCService, coordination.CoordinationMixin, for also_notify in self.pool.also_notifies: self._update_zone_on_also_notify(context, also_notify, zone) - # Ensure the change has propogated to each nameserver + # Ensure the change has propagated to each nameserver for nameserver in self.pool.nameservers: # See if there is already another update in progress try: @@ -453,25 +453,29 @@ class Service(service.RPCService, coordination.CoordinationMixin, results.append( self._delete_zone_on_target(context, target, zone)) - # TODO(kiall): We should monitor that the Zone is actually deleted - # correctly on each of the nameservers, rather than - # assuming a successful delete-on-target is OK as we have - # in the past. - if self._exceed_or_meet_threshold( + if not self._exceed_or_meet_threshold( results.count(True), MAXIMUM_THRESHOLD): - LOG.debug('Consensus reached for deleting zone %(zone)s ' - 'on pool targets' % {'zone': zone.name}) - - self.central_api.update_status( - context, zone.id, SUCCESS_STATUS, zone.serial) - - else: LOG.warning(_LW('Consensus not reached for deleting zone %(zone)s' - ' on pool targets') % {'zone': zone.name}) - + ' on pool targets') % {'zone': zone.name}) self.central_api.update_status( context, zone.id, ERROR_STATUS, zone.serial) + zone.serial = 0 + # Ensure the change has propagated to each nameserver + for nameserver in self.pool.nameservers: + # See if there is already another update in progress + try: + self.cache.retrieve(context, nameserver.id, zone.id, + DELETE_ACTION) + except exceptions.PoolManagerStatusNotFound: + update_status = self._build_status_object( + nameserver, zone, DELETE_ACTION) + self.cache.store(context, update_status) + + self.mdns_api.poll_for_serial_number( + context, zone, nameserver, self.timeout, + self.retry_interval, self.max_retries, self.delay) + def _delete_zone_on_target(self, context, target, zone): """ :param context: Security context information. @@ -542,7 +546,11 @@ class Service(service.RPCService, coordination.CoordinationMixin, current_status.serial_number = actual_serial self.cache.store(context, current_status) + LOG.debug('Attempting to get consensus serial for %s' % + zone.name) consensus_serial = self._get_consensus_serial(context, zone) + LOG.debug('Consensus serial for %s is %s' % + (zone.name, consensus_serial)) # If there is a valid consensus serial we can still send a success # for that serial. @@ -568,11 +576,15 @@ class Service(service.RPCService, coordination.CoordinationMixin, self.central_api.update_status( context, zone.id, ERROR_STATUS, error_serial) - if status == NO_ZONE_STATUS and action != DELETE_ACTION: - LOG.warning(_LW('Zone %(zone)s is not present in some ' - 'targets') % {'zone': zone.name}) - self.central_api.update_status( - context, zone.id, NO_ZONE_STATUS, 0) + if status == NO_ZONE_STATUS: + if action == DELETE_ACTION: + self.central_api.update_status( + context, zone.id, NO_ZONE_STATUS, 0) + else: + LOG.warning(_LW('Zone %(zone)s is not present in some ' + 'targets') % {'zone': zone.name}) + self.central_api.update_status( + context, zone.id, NO_ZONE_STATUS, 0) if consensus_serial == zone.serial and self._is_consensus( context, zone, action, SUCCESS_STATUS, diff --git a/designate/tests/test_pool_manager/test_service.py b/designate/tests/test_pool_manager/test_service.py index a3f7dc59..e81055a2 100644 --- a/designate/tests/test_pool_manager/test_service.py +++ b/designate/tests/test_pool_manager/test_service.py @@ -252,65 +252,6 @@ class PoolManagerServiceNoopTest(PoolManagerTestCase): self.assertFalse(mock_update_status.called) - @patch.object(impl_fake.FakeBackend, 'delete_zone', - side_effect=exceptions.Backend) - @patch.object(central_rpcapi.CentralAPI, 'update_status') - def test_delete_zone(self, mock_update_status, _): - zone = self._build_zone('example.org.', 'DELETE', 'PENDING') - - self.service.delete_zone(self.admin_context, zone) - - mock_update_status.assert_called_once_with( - self.admin_context, zone.id, 'ERROR', zone.serial) - - @patch.object(impl_fake.FakeBackend, 'delete_zone') - @patch.object(central_rpcapi.CentralAPI, 'update_status') - def test_delete_zone_target_both_failure( - self, mock_update_status, mock_delete_zone): - - zone = self._build_zone('example.org.', 'DELETE', 'PENDING') - - mock_delete_zone.side_effect = exceptions.Backend - - self.service.delete_zone(self.admin_context, zone) - - mock_update_status.assert_called_once_with( - self.admin_context, zone.id, 'ERROR', zone.serial) - - @patch.object(impl_fake.FakeBackend, 'delete_zone') - @patch.object(central_rpcapi.CentralAPI, 'update_status') - def test_delete_zone_target_one_failure( - self, mock_update_status, mock_delete_zone): - - zone = self._build_zone('example.org.', 'DELETE', 'PENDING') - - mock_delete_zone.side_effect = [None, exceptions.Backend] - - self.service.delete_zone(self.admin_context, zone) - - mock_update_status.assert_called_once_with( - self.admin_context, zone.id, 'ERROR', zone.serial) - - @patch.object(impl_fake.FakeBackend, 'delete_zone') - @patch.object(central_rpcapi.CentralAPI, 'update_status') - def test_delete_zone_target_one_failure_consensus( - self, mock_update_status, mock_delete_zone): - - self.service.stop() - self.config( - threshold_percentage=50, - group='service:pool_manager') - self.service = self.start_service('pool_manager') - - zone = self._build_zone('example.org.', 'DELETE', 'PENDING') - - mock_delete_zone.side_effect = [None, exceptions.Backend] - - self.service.delete_zone(self.admin_context, zone) - - mock_update_status.assert_called_once_with( - self.admin_context, zone.id, 'ERROR', zone.serial) - @patch.object(mdns_rpcapi.MdnsAPI, 'get_serial_number', side_effect=messaging.MessagingException) @patch.object(central_rpcapi.CentralAPI, 'update_status') diff --git a/designate/tests/unit/__init__.py b/designate/tests/unit/__init__.py index 285801d2..d435e25b 100644 --- a/designate/tests/unit/__init__.py +++ b/designate/tests/unit/__init__.py @@ -55,3 +55,38 @@ class RoObject(object): def to_dict(self): return self.__dict__ + + +class RwObject(object): + """Object mock: raise exception on __setitem__ or __setattr__ + on any item/attr created after initialization. + Allows updating existing items/attrs + """ + def __init__(self, d=None, **kw): + if d: + kw.update(d) + self.__dict__.update(kw) + + def __getitem__(self, k): + try: + return self.__dict__[k] + except KeyError: + cn = self.__class__.__name__ + raise NotImplementedError( + "Attempt to perform __getitem__" + " %r on %s %r" % (cn, k, self.__dict__) + ) + + def __setitem__(self, k, v): + if k in self.__dict__: + self.__dict__.update({k: v}) + return + + cn = self.__class__.__name__ + raise NotImplementedError( + "Attempt to perform __setitem__ or __setattr__" + " %r on %s %r" % (cn, k, self.__dict__) + ) + + def __setattr__(self, k, v): + self.__setitem__(k, v) diff --git a/designate/tests/unit/test_mdns/test_notify.py b/designate/tests/unit/test_mdns/test_notify.py index 1cc0cdcf..183e833d 100644 --- a/designate/tests/unit/test_mdns/test_notify.py +++ b/designate/tests/unit/test_mdns/test_notify.py @@ -161,6 +161,7 @@ class MdnsNotifyTest(base.BaseTestCase): rcode=Mock(return_value=dns.rcode.NOERROR), # rcode is NOERROR but (flags & dns.flags.AA) gives 0 flags=0, + answer=['answer'], ) self.notify._send_dns_message = Mock(return_value=response) @@ -176,7 +177,8 @@ class MdnsNotifyTest(base.BaseTestCase): rcode=Mock(return_value=dns.rcode.NOERROR), # rcode is NOERROR but flags are not NOERROR flags=123, - ednsflags=321 + ednsflags=321, + answer=['answer'], ) self.notify._send_dns_message = Mock(return_value=response) diff --git a/designate/tests/unit/test_pool_manager/test_service.py b/designate/tests/unit/test_pool_manager/test_service.py index fdf7c0d2..875c732d 100644 --- a/designate/tests/unit/test_pool_manager/test_service.py +++ b/designate/tests/unit/test_pool_manager/test_service.py @@ -29,6 +29,7 @@ from designate import exceptions from designate import objects from designate.pool_manager.service import Service from designate.tests.unit import RoObject +from designate.tests.unit import RwObject import designate.pool_manager.service as pm_module @@ -180,3 +181,30 @@ class PoolManagerTest(test.BaseTestCase): self.pm.periodic_sync() self.assertEqual(3, self.pm.update_zone.call_count) + + @patch.object(pm_module.DesignateContext, 'get_admin_context') + def test_create_zone(self, mock_get_ctx, *mocks): + z = RwObject(name='a_zone', serial=1) + + mock_ctx = mock_get_ctx.return_value + self.pm._exceed_or_meet_threshold = Mock(return_value=True) + + self.pm.create_zone(mock_ctx, z) + + @patch.object(pm_module.DesignateContext, 'get_admin_context') + def test_update_zone(self, mock_get_ctx, *mocks): + z = RwObject(name='a_zone', serial=1) + + mock_ctx = mock_get_ctx.return_value + self.pm._exceed_or_meet_threshold = Mock(return_value=True) + + self.pm.update_zone(mock_ctx, z) + + @patch.object(pm_module.DesignateContext, 'get_admin_context') + def test_delete_zone(self, mock_get_ctx, *mocks): + z = RwObject(name='a_zone', serial=1) + + mock_ctx = mock_get_ctx.return_value + self.pm._exceed_or_meet_threshold = Mock(return_value=True) + + self.pm.delete_zone(mock_ctx, z) diff --git a/devstack/gate/run_tempest_tests.sh b/devstack/gate/run_tempest_tests.sh index c6cd1e6c..78755792 100755 --- a/devstack/gate/run_tempest_tests.sh +++ b/devstack/gate/run_tempest_tests.sh @@ -29,4 +29,4 @@ TEMPEST_DIR=${TEMPEST_DIR:-/opt/stack/new/tempest} pushd $DESIGNATE_DIR export TEMPEST_CONFIG=$TEMPEST_DIR/etc/tempest.conf -tox -e functional +tox -e functional -- --concurrency 4 diff --git a/functionaltests/common/utils.py b/functionaltests/common/utils.py index c02c7202..244c2fff 100644 --- a/functionaltests/common/utils.py +++ b/functionaltests/common/utils.py @@ -87,7 +87,7 @@ def parameterized(data): return wrapped -def wait_for_condition(condition, interval=1, timeout=40): +def wait_for_condition(condition, interval=5, timeout=45): end_time = time.time() + timeout while time.time() < end_time: result = condition()