Handle a failure on communicate()

After much debugging of the subject bug, we've concluded that a failure and an exception of OSError with EAGAIN from communicate() indicates some monkey business. In the specific case that we're seeing in Trove, it is because monkey patching didn't quite get done the way it should. In any event, retrying isn't the answer. This change reverts an earlier change that retried the communicate() call and instead hurls the OSError exception back to the caller, if we are out of retries. A test case to fake the communicate() failure has been amended to ensure that we handle this in what we believe now to be the right way. Originally-Submitted-In: I2450d2dc425a3d29eaba4d5ff9badc4a992a0ec8 Change-Id: Ie36d77eebb2c20124a61ff6029c3b97bfffb9427 Closes-Bug: #1347337
2014-08-14 01:21:47 -04:00 · 2014-08-14 01:21:47 -04:00 · 978e781a9c
parent 37b90eda3e
commit 978e781a9c
2 changed files with 31 additions and 30 deletions
--- a/oslo/concurrency/processutils.py
+++ b/oslo/concurrency/processutils.py
@ -17,7 +17,6 @@
 System-level utilities and helper functions.
 """

-import errno
 import logging
 import multiprocessing
 import os
@ -120,6 +119,7 @@ def execute(*cmd, **kwargs):
    :raises:                :class:`UnknownArgumentError` on
                            receiving unknown arguments
    :raises:                :class:`ProcessExecutionError`
+    :raises:                :class:`OSError`
    """

    process_input = kwargs.pop('process_input', None)
@ -173,20 +173,9 @@ def execute(*cmd, **kwargs):
                                   preexec_fn=preexec_fn,
                                   shell=shell,
                                   env=env_variables)
-            result = None
-            for _i in six.moves.range(20):
-                # NOTE(russellb) 20 is an arbitrary number of retries to
-                # prevent any chance of looping forever here.
-                try:
-                    if process_input is not None:
-                        result = obj.communicate(process_input)
-                    else:
-                        result = obj.communicate()
-                except OSError as e:
-                    if e.errno in (errno.EAGAIN, errno.EINTR):
-                        continue
-                    raise
-                break
+
+            result = obj.communicate(process_input)
+
            obj.stdin.close()  # pylint: disable=E1101
            _returncode = obj.returncode  # pylint: disable=E1101
            LOG.log(loglevel, 'Result was %s' % _returncode)
@ -197,7 +186,7 @@ def execute(*cmd, **kwargs):
                                            stderr=stderr,
                                            cmd=' '.join(cmd))
            return result
-        except ProcessExecutionError:
+        except (ProcessExecutionError, OSError):
            if not attempts:
                raise
            else:
--- a/tests/unit/test_processutils.py
+++ b/tests/unit/test_processutils.py
@ -25,6 +25,7 @@ import mock
 from oslotest import base as test_base
 import six

+from oslo.concurrency.openstack.common.fixture import mockpatch
 from oslo.concurrency import processutils


@ -192,23 +193,34 @@ grep foo
            os.unlink(tmpfilename)
            os.unlink(tmpfilename2)

+    # This test and the one below ensures that when communicate raises
+    # an OSError, we do the right thing(s)
+    def test_exception_on_communicate_error(self):
+        mock = self.useFixture(mockpatch.Patch(
+            'subprocess.Popen.communicate',
+            side_effect=OSError(errno.EAGAIN, 'fake-test')))
+
+        self.assertRaises(OSError,
+                          processutils.execute,
+                          '/usr/bin/env',
+                          'false',
+                          check_exit_code=False)
+
+        self.assertEqual(1, mock.mock.call_count)
+
    def test_retry_on_communicate_error(self):
-        self.called = False
+        mock = self.useFixture(mockpatch.Patch(
+            'subprocess.Popen.communicate',
+            side_effect=OSError(errno.EAGAIN, 'fake-test')))

-        def fake_communicate(*args, **kwargs):
-            if self.called:
-                return ('', '')
-            self.called = True
-            e = OSError('foo')
-            e.errno = errno.EAGAIN
-            raise e
+        self.assertRaises(OSError,
+                          processutils.execute,
+                          '/usr/bin/env',
+                          'false',
+                          check_exit_code=False,
+                          attempts=5)

-        self.useFixture(fixtures.MonkeyPatch(
-            'subprocess.Popen.communicate', fake_communicate))
-
-        processutils.execute('/usr/bin/env', 'true', check_exit_code=False)
-
-        self.assertTrue(self.called)
+        self.assertEqual(5, mock.mock.call_count)

    def test_with_env_variables(self):
        env_vars = {'SUPER_UNIQUE_VAR': 'The answer is 42'}