Browse Source

Handle retry logic for timeouts with multiple LDAP servers

It is currently possible to specify multiple LDAP server URIs
for failover purposes when using LDAP connection pooling, as this
functionality is provided in the underlying python-ldap module.
Unfortunately, failover does not work properly if LDAP timeout
issue are encountered due to the way python-LDAP works.  If multiple
URLs are provided, the first URL that results in a successful TCP
connection is considered to be a successful LDAP connection.  If the
initial bind operation fails due to a timeout waiting for an LDAP
response from the server, it will never failover to additional
URIs.  It is easy to demonstrate this behavior by forcing an LDAP
server to hang (attach with gdb to halt the process), then using
that server as the first URI when creating a connection pool.

This patch adds proper failover logic to ldappool.  If multiple URIs
are provided, we split them and attempt to connect to them one-by-one
until we have either had a successful LDAP bind operation, or we have
exhausted the list of URIs.  The connection retry logic is processed
per-URI as well, meaning we will attempt to reconnect to the first
URI up to the requested retry limit, then we will failover to the
next URI and reset the retry count.

The ldap.TIMEOUT exception was not raised to the caller like some
of the other common LDAP exceptions we might encounter.  We should
raise the TIMEOUT exception instead of the more generic BackendError
exception to provide more detail to the calling code.

Change-Id: Iabc13363d2425e70a53163249e5389d336274533
Nathan Kinder 5 months ago
parent
commit
3f0ea8533a
3 changed files with 199 additions and 36 deletions
  1. 4
    0
      README.rst
  2. 54
    34
      ldappool/__init__.py
  3. 141
    2
      ldappool/tests/test_ldapconnection.py

+ 4
- 0
README.rst View File

@@ -51,6 +51,10 @@ Here are the options you can use when instanciating the pool:
51 51
 - **use_pool**: activates the pool. If False, will recreate a connector
52 52
   each time. **default: True**
53 53
 
54
+The **uri** option will accept a comma or whitespace separated list of LDAP
55
+server URIs to allow for failover behavior when connection errors are
56
+encountered.  Connections will be attempted against the servers in order,
57
+with **retry_max** attempts per URI before failing over to the next server.
54 58
 
55 59
 The **connection** method takes two options:
56 60
 

+ 54
- 34
ldappool/__init__.py View File

@@ -43,6 +43,7 @@ import time
43 43
 
44 44
 import ldap
45 45
 from ldap.ldapobject import ReconnectLDAPObject
46
+import re
46 47
 import six
47 48
 from six import PY2
48 49
 
@@ -239,50 +240,69 @@ class ConnectionManager(object):
239 240
         :returns: StateConnector
240 241
         :raises BackendError: If unable to connect to LDAP
241 242
         """
242
-        tries = 0
243 243
         connected = False
244 244
         if passwd is not None:
245 245
             if PY2:
246 246
                 passwd = utf8_encode(passwd)
247
-        exc = None
248
-        conn = None
249 247
 
250
-        # trying retry_max times in a row with a fresh connector
251
-        while tries < self.retry_max and not connected:
252
-            try:
253
-                log.debug('Attempting to create a new connector '
254
-                          'to %s (attempt %d)', self.uri, tries + 1)
255
-                conn = self.connector_cls(self.uri, retry_max=self.retry_max,
256
-                                          retry_delay=self.retry_delay)
257
-                conn.timeout = self.timeout
258
-                self._bind(conn, bind, passwd)
259
-                connected = True
260
-            except ldap.INVALID_CREDENTIALS as error:
261
-                exc = error
262
-                log.error('Invalid credentials. Cancelling retry',
263
-                          exc_info=True)
264
-                break
265
-            except ldap.LDAPError as error:
266
-                exc = error
267
-                tries += 1
268
-                if tries < self.retry_max:
269
-                    log.info('Failure attempting to create and bind '
270
-                             'connector; will retry after %r seconds',
271
-                             self.retry_delay, exc_info=True)
272
-                    time.sleep(self.retry_delay)
273
-                else:
274
-                    log.error('Failure attempting to create and bind '
275
-                              'connector', exc_info=True)
248
+        # If multiple server URIs have been provided, loop through
249
+        # each one in turn in case of connection failures (server down,
250
+        # timeout, etc.).  URIs can be delimited by either commas or
251
+        # whitespace.
252
+        for server in re.split('[\s,]+', self.uri):
253
+            tries = 0
254
+            exc = None
255
+            conn = None
256
+
257
+            # trying retry_max times in a row with a fresh connector
258
+            while tries < self.retry_max and not connected:
259
+                try:
260
+                    log.debug('Attempting to create a new connector '
261
+                              'to %s (attempt %d)', server, tries + 1)
262
+                    conn = self.connector_cls(server, retry_max=self.retry_max,
263
+                                              retry_delay=self.retry_delay)
264
+                    conn.timeout = self.timeout
265
+                    self._bind(conn, bind, passwd)
266
+                    connected = True
267
+                except ldap.INVALID_CREDENTIALS as error:
268
+                    # Treat this as a hard failure instead of retrying to
269
+                    # avoid locking out the LDAP account due to successive
270
+                    # failed bind attempts.  We also don't want to try
271
+                    # connecting to additional servers if multiple URIs were
272
+                    # provide, as failed bind attempts may be replicated
273
+                    # across multiple LDAP servers.
274
+                    exc = error
275
+                    log.error('Invalid credentials. Cancelling retry',
276
+                              exc_info=True)
277
+                    raise exc
278
+                except ldap.LDAPError as error:
279
+                    exc = error
280
+                    tries += 1
281
+                    if tries < self.retry_max:
282
+                        log.info('Failure attempting to create and bind '
283
+                                 'connector; will retry after %r seconds',
284
+                                 self.retry_delay, exc_info=True)
285
+                        time.sleep(self.retry_delay)
286
+                    else:
287
+                        log.error('Failure attempting to create and bind '
288
+                                  'connector', exc_info=True)
289
+
290
+            # We successfully connected to one of the servers, so
291
+            # we can just return the connection and stop processing
292
+            # any additional URIs.
293
+            if connected:
294
+                return conn
276 295
 
296
+        # We failed to connect to any of the servers,
297
+        # so raise an appropriate exception.
277 298
         if not connected:
278 299
             if isinstance(exc, (ldap.NO_SUCH_OBJECT,
279
-                                ldap.INVALID_CREDENTIALS,
280
-                                ldap.SERVER_DOWN)):
300
+                                ldap.SERVER_DOWN,
301
+                                ldap.TIMEOUT)):
281 302
                 raise exc
282 303
 
283
-            # that's something else
284
-            raise BackendError(str(exc), backend=conn)
285
-        return conn
304
+        # that's something else
305
+        raise BackendError(str(exc), backend=conn)
286 306
 
287 307
     def _get_connection(self, bind=None, passwd=None):
288 308
         if bind is None:

+ 141
- 2
ldappool/tests/test_ldapconnection.py View File

@@ -51,14 +51,51 @@ def _bind_fails(self, who='', cred='', **kw):
51 51
     raise ldap.LDAPError('LDAP connection invalid')
52 52
 
53 53
 
54
-def _bind_fails2(self, who='', cred='', **kw):
54
+def _bind_fails_server_down(self, who='', cred='', **kw):
55 55
     raise ldap.SERVER_DOWN('LDAP connection invalid')
56 56
 
57 57
 
58
+def _bind_fails_server_down_failover(self, who='', cred='', **kw):
59
+    # Raise a server down error unless the URI is 'ldap://GOOD'
60
+    if self._uri == 'ldap://GOOD':
61
+        self.connected = True
62
+        self.who = who
63
+        self.cred = cred
64
+        return 1
65
+    else:
66
+        raise ldap.SERVER_DOWN('LDAP connection invalid')
67
+
68
+
69
+def _bind_fails_timeout(self, who='', cred='', **kw):
70
+    raise ldap.TIMEOUT('LDAP connection timeout')
71
+
72
+
73
+def _bind_fails_timeout_failover(self, who='', cred='', **kw):
74
+    # Raise a timeout error unless the URI is 'ldap://GOOD'
75
+    if self._uri == 'ldap://GOOD':
76
+        self.connected = True
77
+        self.who = who
78
+        self.cred = cred
79
+        return 1
80
+    else:
81
+        raise ldap.TIMEOUT('LDAP connection timeout')
82
+
83
+
58 84
 def _bind_fails_invalid_credentials(self, who='', cred='', **kw):
59 85
     raise ldap.INVALID_CREDENTIALS('LDAP connection invalid')
60 86
 
61 87
 
88
+def _bind_fails_invalid_credentials_failover(self, who='', cred='', **kw):
89
+    # Raise invalid credentials erorr unless the URI is 'ldap://GOOD'
90
+    if self._uri == 'ldap://GOOD':
91
+        self.connected = True
92
+        self.who = who
93
+        self.cred = cred
94
+        return 1
95
+    else:
96
+        raise ldap.INVALID_CREDENTIALS('LDAP connection invalid')
97
+
98
+
62 99
 def _start_tls_s(self):
63 100
     if self.start_tls_already_called_flag:
64 101
         raise ldap.LOCAL_ERROR
@@ -146,7 +183,7 @@ class TestLDAPConnection(unittest.TestCase):
146 183
             unbinds.append(1)
147 184
 
148 185
         # the binding fails with an LDAPError
149
-        ldappool.StateConnector.simple_bind_s = _bind_fails2
186
+        ldappool.StateConnector.simple_bind_s = _bind_fails_server_down
150 187
         ldappool.StateConnector.unbind_s = _unbind
151 188
         uri = ''
152 189
         dn = 'uid=adminuser,ou=logins,dc=mozilla'
@@ -162,6 +199,80 @@ class TestLDAPConnection(unittest.TestCase):
162 199
         else:
163 200
             raise AssertionError()
164 201
 
202
+    def test_simple_bind_fails_failover(self):
203
+        unbinds = []
204
+
205
+        def _unbind(self):
206
+            unbinds.append(1)
207
+
208
+        # the binding to any server other than 'ldap://GOOD' fails
209
+        # with ldap.SERVER_DOWN
210
+        ldappool.StateConnector.simple_bind_s = \
211
+            _bind_fails_server_down_failover
212
+        ldappool.StateConnector.unbind_s = _unbind
213
+        uri = 'ldap://BAD,ldap://GOOD'
214
+        dn = 'uid=adminuser,ou=logins,dc=mozilla'
215
+        passwd = 'adminuser'
216
+        cm = ldappool.ConnectionManager(uri, dn, passwd, use_pool=True, size=2)
217
+        self.assertEqual(len(cm), 0)
218
+
219
+        try:
220
+            with cm.connection('dn', 'pass') as conn:
221
+                # Ensure we failed over to the second URI
222
+                self.assertTrue(conn.active)
223
+                self.assertEqual(conn._uri, 'ldap://GOOD')
224
+                pass
225
+        except Exception:
226
+            raise AssertionError()
227
+
228
+    def test_simple_bind_fails_timeout(self):
229
+        unbinds = []
230
+
231
+        def _unbind(self):
232
+            unbinds.append(1)
233
+
234
+        # the binding fails with ldap.TIMEOUT
235
+        ldappool.StateConnector.simple_bind_s = _bind_fails_timeout
236
+        ldappool.StateConnector.unbind_s = _unbind
237
+        uri = ''
238
+        dn = 'uid=adminuser,ou=logins,dc=mozilla'
239
+        passwd = 'adminuser'
240
+        cm = ldappool.ConnectionManager(uri, dn, passwd, use_pool=True, size=2)
241
+        self.assertEqual(len(cm), 0)
242
+
243
+        try:
244
+            with cm.connection('dn', 'pass'):
245
+                pass
246
+        except ldap.TIMEOUT:
247
+            pass
248
+        else:
249
+            raise AssertionError()
250
+
251
+    def test_simple_bind_fails_timeout_failover(self):
252
+        unbinds = []
253
+
254
+        def _unbind(self):
255
+            unbinds.append(1)
256
+
257
+        # the binding to any server other than 'ldap://GOOD' fails
258
+        # with ldap.TIMEOUT
259
+        ldappool.StateConnector.simple_bind_s = _bind_fails_timeout_failover
260
+        ldappool.StateConnector.unbind_s = _unbind
261
+        uri = 'ldap://BAD,ldap://GOOD'
262
+        dn = 'uid=adminuser,ou=logins,dc=mozilla'
263
+        passwd = 'adminuser'
264
+        cm = ldappool.ConnectionManager(uri, dn, passwd, use_pool=True, size=2)
265
+        self.assertEqual(len(cm), 0)
266
+
267
+        try:
268
+            with cm.connection('dn', 'pass') as conn:
269
+                # Ensure we failed over to the second URI
270
+                self.assertTrue(conn.active)
271
+                self.assertEqual(conn._uri, 'ldap://GOOD')
272
+                pass
273
+        except Exception:
274
+            raise AssertionError()
275
+
165 276
     def test_simple_bind_fails_invalid_credentials(self):
166 277
         unbinds = []
167 278
 
@@ -184,3 +295,31 @@ class TestLDAPConnection(unittest.TestCase):
184 295
             pass
185 296
         else:
186 297
             raise AssertionError()
298
+
299
+    def test_simple_bind_fails_invalid_credentials_failover(self):
300
+        unbinds = []
301
+
302
+        def _unbind(self):
303
+            unbinds.append(1)
304
+
305
+        # the binding to any server other than 'ldap://GOOD' fails
306
+        # with ldap.INVALID_CREDENTIALS
307
+        ldappool.StateConnector.simple_bind_s = \
308
+            _bind_fails_invalid_credentials_failover
309
+        ldappool.StateConnector.unbind_s = _unbind
310
+        uri = 'ldap://BAD,ldap://GOOD'
311
+        dn = 'uid=adminuser,ou=logins,dc=mozilla'
312
+        passwd = 'adminuser'
313
+        cm = ldappool.ConnectionManager(uri, dn, passwd, use_pool=True, size=2)
314
+        self.assertEqual(len(cm), 0)
315
+
316
+        try:
317
+            # We expect this to throw an INVALID_CREDENTIALS exception for the
318
+            # first URI, as this is a hard-failure where we don't want failover
319
+            # to occur to subsequent URIs.
320
+            with cm.connection('dn', 'pass'):
321
+                pass
322
+        except ldap.INVALID_CREDENTIALS:
323
+            pass
324
+        else:
325
+            raise AssertionError()

Loading…
Cancel
Save