Browse Source

Fix race in execution finishing

An execution state can go from RUNNING to SUCCESS between fetching the
last message from the websocket and polling the execution state. This
means the SUCCESS payload is never returned and the overcloud
deployment fails at the end with no indication as to why.

This change turns the output of the execution into the last payload,
allowing the calling SUCCESS logic to run.

Change-Id: Ic22021ba9a2717de199629e361c656e2f562fb38
Closes-Bug: #1842987
(cherry picked from commit b5b5cab61d)
changes/43/681243/1
Steve Baker 2 weeks ago
parent
commit
b0c714e322
2 changed files with 37 additions and 2 deletions
  1. 31
    0
      tripleoclient/tests/workflows/test_base.py
  2. 6
    2
      tripleoclient/workflows/base.py

+ 31
- 0
tripleoclient/tests/workflows/test_base.py View File

@@ -12,6 +12,7 @@
12 12
 # License for the specific language governing permissions and limitations
13 13
 # under the License.
14 14
 
15
+import json
15 16
 import mock
16 17
 
17 18
 from osc_lib.tests import utils
@@ -89,6 +90,7 @@ class TestBaseWorkflows(utils.TestCommand):
89 90
         }
90 91
 
91 92
         mistral = mock.Mock()
93
+        mistral.executions.get.return_value.state = 'RUNNING'
92 94
         websocket = mock.Mock()
93 95
         websocket.wait_for_messages.return_value = iter([payload_a, payload_b])
94 96
         execution = mock.Mock()
@@ -139,6 +141,7 @@ class TestBaseWorkflows(utils.TestCommand):
139 141
         }
140 142
 
141 143
         mistral = mock.Mock()
144
+        mistral.executions.get.return_value.state = 'RUNNING'
142 145
         websocket = mock.Mock()
143 146
         websocket.wait_for_messages.return_value = iter([payload_a, payload_b])
144 147
         execution = mock.Mock()
@@ -172,3 +175,31 @@ class TestBaseWorkflows(utils.TestCommand):
172 175
 
173 176
         self.assertRaises(ex.WorkflowActionError,
174 177
                           base.call_action, mistral, action)
178
+
179
+    def test_wait_for_messages_execution_complete(self):
180
+        payload_a = {
181
+            'status': 'RUNNING',
182
+            'execution_id': 'aaaa',
183
+            'root_execution_id': 'aaaa'
184
+        }
185
+        payload_b = {
186
+            'status': 'SUCCESS',
187
+            'execution_id': 'aaaa',
188
+            'root_execution_id': 'aaaa'
189
+        }
190
+
191
+        mistral = mock.Mock()
192
+        mistral.executions.get.return_value.state = 'SUCCESS'
193
+        mistral.executions.get.return_value.output = json.dumps(payload_b)
194
+        websocket = mock.Mock()
195
+        websocket.wait_for_messages.return_value = iter([payload_a])
196
+        execution = mock.Mock()
197
+        execution.id = 'aaaa'
198
+
199
+        messages = list(base.wait_for_messages(mistral, websocket, execution))
200
+
201
+        # Assert only payload_b was returned
202
+        self.assertEqual([payload_a, payload_b], messages)
203
+        mistral.executions.get.assert_called_with('aaaa')
204
+
205
+        websocket.wait_for_messages.assert_called_with(timeout=None)

+ 6
- 2
tripleoclient/workflows/base.py View File

@@ -91,8 +91,12 @@ def wait_for_messages(mistral, websocket, execution, timeout=None):
91 91
             # default to running and assume it is just an "in progress"
92 92
             # message from the workflow.
93 93
             # Workflows should end with SUCCESS or ERROR statuses.
94
-            if payload.get('status', 'RUNNING') != "RUNNING" or \
95
-                    mistral.executions.get(execution.id).state != "RUNNING":
94
+            if payload.get('status', 'RUNNING') != "RUNNING":
95
+                return
96
+            execution = mistral.executions.get(execution.id)
97
+            if execution.state != "RUNNING":
98
+                # yield the output as the last payload which was missed
99
+                yield json.loads(execution.output)
96 100
                 return
97 101
     except (exceptions.WebSocketTimeout, exceptions.WebSocketConnectionClosed):
98 102
         check_execution_status(mistral, execution.id)

Loading…
Cancel
Save