Handle a failure on communicate()

After much debugging of the subject bug, we've concluded that a
failure and an exception of OSError with EAGAIN from communicate()
indicates some monkey business. In the specific case that we're seeing
in Trove, it is because monkey patching didn't quite get done the way
it should.

In any event, retrying isn't the answer.  This change reverts an
earlier change that retried the communicate() call and instead hurls
the OSError exception back to the caller, if we are out of retries. A
test case to fake the communicate() failure has been amended to ensure
that we handle this in what we believe now to be the right way.

Originally-Submitted-In: I2450d2dc425a3d29eaba4d5ff9badc4a992a0ec8

Change-Id: Ie36d77eebb2c20124a61ff6029c3b97bfffb9427
Closes-Bug: #1347337
This commit is contained in:
Amrith Kumar 2014-08-14 01:21:47 -04:00
parent 37b90eda3e
commit 978e781a9c
2 changed files with 31 additions and 30 deletions

View File

@ -17,7 +17,6 @@
System-level utilities and helper functions.
"""
import errno
import logging
import multiprocessing
import os
@ -120,6 +119,7 @@ def execute(*cmd, **kwargs):
:raises: :class:`UnknownArgumentError` on
receiving unknown arguments
:raises: :class:`ProcessExecutionError`
:raises: :class:`OSError`
"""
process_input = kwargs.pop('process_input', None)
@ -173,20 +173,9 @@ def execute(*cmd, **kwargs):
preexec_fn=preexec_fn,
shell=shell,
env=env_variables)
result = None
for _i in six.moves.range(20):
# NOTE(russellb) 20 is an arbitrary number of retries to
# prevent any chance of looping forever here.
try:
if process_input is not None:
result = obj.communicate(process_input)
else:
result = obj.communicate()
except OSError as e:
if e.errno in (errno.EAGAIN, errno.EINTR):
continue
raise
break
result = obj.communicate(process_input)
obj.stdin.close() # pylint: disable=E1101
_returncode = obj.returncode # pylint: disable=E1101
LOG.log(loglevel, 'Result was %s' % _returncode)
@ -197,7 +186,7 @@ def execute(*cmd, **kwargs):
stderr=stderr,
cmd=' '.join(cmd))
return result
except ProcessExecutionError:
except (ProcessExecutionError, OSError):
if not attempts:
raise
else:

View File

@ -25,6 +25,7 @@ import mock
from oslotest import base as test_base
import six
from oslo.concurrency.openstack.common.fixture import mockpatch
from oslo.concurrency import processutils
@ -192,23 +193,34 @@ grep foo
os.unlink(tmpfilename)
os.unlink(tmpfilename2)
# This test and the one below ensures that when communicate raises
# an OSError, we do the right thing(s)
def test_exception_on_communicate_error(self):
mock = self.useFixture(mockpatch.Patch(
'subprocess.Popen.communicate',
side_effect=OSError(errno.EAGAIN, 'fake-test')))
self.assertRaises(OSError,
processutils.execute,
'/usr/bin/env',
'false',
check_exit_code=False)
self.assertEqual(1, mock.mock.call_count)
def test_retry_on_communicate_error(self):
self.called = False
mock = self.useFixture(mockpatch.Patch(
'subprocess.Popen.communicate',
side_effect=OSError(errno.EAGAIN, 'fake-test')))
def fake_communicate(*args, **kwargs):
if self.called:
return ('', '')
self.called = True
e = OSError('foo')
e.errno = errno.EAGAIN
raise e
self.assertRaises(OSError,
processutils.execute,
'/usr/bin/env',
'false',
check_exit_code=False,
attempts=5)
self.useFixture(fixtures.MonkeyPatch(
'subprocess.Popen.communicate', fake_communicate))
processutils.execute('/usr/bin/env', 'true', check_exit_code=False)
self.assertTrue(self.called)
self.assertEqual(5, mock.mock.call_count)
def test_with_env_variables(self):
env_vars = {'SUPER_UNIQUE_VAR': 'The answer is 42'}