Browse Source

Do not wait for console being started on timeout

Current logic tries to call communicate on Popen object when
starting shellinabox console even if console.subprocess_timeout
is hit. This blocks until the command is finished, with timeout
having no effect on it. It means that node lock is not released
until communicate call finishes.

Story: 2006429
Task: 36333
Change-Id: Ic7b878cd432990218a3d87047905ce0828b8da2a
(cherry picked from commit 1ad73338b8)
changes/25/679025/1
Vladyslav Drok 4 weeks ago
parent
commit
2f3b5e929b

+ 14
- 9
ironic/drivers/modules/console_utils.py View File

@@ -209,6 +209,12 @@ def start_shellinabox_console(node_uuid, port, console_cmd):
209 209
         LOG.warning(error)
210 210
         raise exception.ConsoleSubprocessFailed(error=error)
211 211
 
212
+    error_message = _(
213
+        "Timeout or error while waiting for console subprocess to start for "
214
+        "node: %(node)s.\nCommand: %(command)s.\n") % {
215
+            'node': node_uuid,
216
+            'command': ' '.join(args)}
217
+
212 218
     def _wait(node_uuid, popen_obj):
213 219
         locals['returncode'] = popen_obj.poll()
214 220
 
@@ -216,24 +222,22 @@ def start_shellinabox_console(node_uuid, port, console_cmd):
216 222
         # if it is, then the shellinaboxd is invoked successfully as a daemon.
217 223
         # otherwise check the error.
218 224
         if (locals['returncode'] == 0 and os.path.exists(pid_file)
219
-            and psutil.pid_exists(_get_console_pid(node_uuid))):
225
+                and psutil.pid_exists(_get_console_pid(node_uuid))):
220 226
             raise loopingcall.LoopingCallDone()
221 227
 
222
-        if (time.time() > expiration or locals['returncode'] is not None):
228
+        if locals['returncode'] is not None:
223 229
             (stdout, stderr) = popen_obj.communicate()
224
-            locals['errstr'] = _(
225
-                "Timeout or error while waiting for console "
226
-                "subprocess to start for node: %(node)s.\n"
227
-                "Command: %(command)s.\n"
230
+            locals['errstr'] = error_message + _(
228 231
                 "Exit code: %(return_code)s.\n"
229 232
                 "Stdout: %(stdout)r\n"
230 233
                 "Stderr: %(stderr)r") % {
231
-                    'node': node_uuid,
232
-                    'command': ' '.join(args),
233 234
                     'return_code': locals['returncode'],
234 235
                     'stdout': stdout,
235 236
                     'stderr': stderr}
236
-            LOG.warning(locals['errstr'])
237
+            raise loopingcall.LoopingCallDone()
238
+
239
+        if time.time() > expiration:
240
+            locals['errstr'] = error_message
237 241
             raise loopingcall.LoopingCallDone()
238 242
 
239 243
     locals = {'returncode': None, 'errstr': ''}
@@ -242,6 +246,7 @@ def start_shellinabox_console(node_uuid, port, console_cmd):
242 246
     timer.start(interval=CONF.console.subprocess_checking_interval).wait()
243 247
 
244 248
     if locals['errstr']:
249
+        LOG.warning(locals['errstr'])
245 250
         raise exception.ConsoleSubprocessFailed(error=locals['errstr'])
246 251
 
247 252
 

+ 27
- 5
ironic/tests/unit/drivers/modules/test_console_utils.py View File

@@ -331,11 +331,32 @@ class ConsoleUtilsTestCase(db_base.DbTestCase):
331 331
         mock_popen.return_value.poll.return_value = 1
332 332
         mock_popen.return_value.communicate.return_value = ('output', 'error')
333 333
 
334
-        self.assertRaises(exception.ConsoleSubprocessFailed,
335
-                          console_utils.start_shellinabox_console,
336
-                          self.info['uuid'],
337
-                          self.info['port'],
338
-                          'ls&')
334
+        self.assertRaisesRegex(
335
+            exception.ConsoleSubprocessFailed, "Stdout: 'output'",
336
+            console_utils.start_shellinabox_console, self.info['uuid'],
337
+            self.info['port'], 'ls&')
338
+
339
+        mock_stop.assert_called_once_with(self.info['uuid'])
340
+        mock_dir_exists.assert_called_once_with()
341
+        mock_popen.assert_called_once_with(mock.ANY,
342
+                                           stdout=subprocess.PIPE,
343
+                                           stderr=subprocess.PIPE)
344
+        mock_popen.return_value.poll.assert_called_with()
345
+
346
+    @mock.patch.object(subprocess, 'Popen', autospec=True)
347
+    @mock.patch.object(console_utils, '_ensure_console_pid_dir_exists',
348
+                       autospec=True)
349
+    @mock.patch.object(console_utils, '_stop_console', autospec=True)
350
+    def test_start_shellinabox_console_timeout(
351
+            self, mock_stop, mock_dir_exists, mock_popen):
352
+        self.config(subprocess_timeout=0, group='console')
353
+        self.config(subprocess_checking_interval=0, group='console')
354
+        mock_popen.return_value.poll.return_value = None
355
+
356
+        self.assertRaisesRegex(
357
+            exception.ConsoleSubprocessFailed, 'Timeout or error',
358
+            console_utils.start_shellinabox_console, self.info['uuid'],
359
+            self.info['port'], 'ls&')
339 360
 
340 361
         mock_stop.assert_called_once_with(self.info['uuid'])
341 362
         mock_dir_exists.assert_called_once_with()
@@ -343,6 +364,7 @@ class ConsoleUtilsTestCase(db_base.DbTestCase):
343 364
                                            stdout=subprocess.PIPE,
344 365
                                            stderr=subprocess.PIPE)
345 366
         mock_popen.return_value.poll.assert_called_with()
367
+        self.assertEqual(0, mock_popen.return_value.communicate.call_count)
346 368
 
347 369
     @mock.patch.object(console_utils, 'open',
348 370
                        mock.mock_open(read_data='12345\n'))

+ 7
- 0
releasenotes/notes/fix-shellinabox-console-subprocess-timeout-d3eccfe0440013d7.yaml View File

@@ -0,0 +1,7 @@
1
+---
2
+fixes:
3
+  - |
4
+    Fixed the `issue <https://storyboard.openstack.org/#!/story/2006429>`_
5
+    with node being locked for longer than ``[console]subprocess_timeout``
6
+    seconds when shellinabox process fails to start before the specifed timeout
7
+    elapses.

Loading…
Cancel
Save