Add populate_retry to schedule_and_build_instances

When boot an instance and failed on the compute node, nova will not retry to boot on other host. Since https://review.openstack.org/#/c/319379/ change the create instance workflow and called schedule_and_build_instances which not populate the retry into filter properties. So nova will not retry when boot on compute fail. This patch populate retry to instance properties when call schedule_and_build_instances. Conflicts: nova/tests/functional/regressions/test_bug_1671648.py NOTE(mriedem): The conflict is due to putting the functional test fix before this bug fix in the backport series. Change-Id: Ifdaddcd265a7fe8282499e27043936f8212610ad Closes-Bug: #1671648 (cherry picked from commit cb4ce72f5f)
2017-03-10 14:05:57 +08:00 · 2017-03-10 14:05:57 +08:00 · 4e3be434bd
commit 4e3be434bd
parent 6304edf608
3 changed files with 11 additions and 12 deletions
--- a/nova/conductor/manager.py
+++ b/nova/conductor/manager.py
@ -875,9 +875,10 @@ class ComputeTaskManager(base.Base):
        for (build_request, request_spec, host) in six.moves.zip(
                build_requests, request_specs, hosts):
            filter_props = request_spec.to_legacy_filter_properties_dict()
+            instance = build_request.get_new_instance(context)
+            scheduler_utils.populate_retry(filter_props, instance.uuid)
            scheduler_utils.populate_filter_properties(filter_props,
                                                       host)
-            instance = build_request.get_new_instance(context)

            # Convert host from the scheduler into a cell record
            if host['host'] not in host_mapping_cache:
--- a/nova/tests/functional/regressions/test_bug_1671648.py
+++ b/nova/tests/functional/regressions/test_bug_1671648.py
@ -136,22 +136,19 @@ class TestRetryBetweenComputeNodeBuilds(test.TestCase):
        scheduler picks the first host which we mock out to fail the claim.
        This should then trigger a retry to the second host.
        """
-        # Create the server which we expect to go to ERROR state because
-        # of the regression bug. Once the bug is fixed, we should assert
-        # that the server goes to ACTIVE status and is on the second host
-        # after the retry operation.
+        # Now that the bug is fixed, we should assert that the server goes to
+        # ACTIVE status and is on the second host after the retry operation.
        server = dict(
            name='retry-test',
            imageRef=self.image_id,
            flavorRef=self.flavor_id)
        server = self.admin_api.post_server({'server': server})
        self.addCleanup(self.admin_api.delete_server, server['id'])
-        server = self._wait_for_instance_status(server['id'], 'ERROR')
+        server = self._wait_for_instance_status(server['id'], 'ACTIVE')

-        # Assert that there is no host for the failed server. This should
-        # assert that the host is not the failed host once the bug is fixed.
-        self.assertIsNone(server['OS-EXT-SRV-ATTR:host'])
+        # Assert that the host is not the failed host.
+        self.assertNotEqual(self.failed_host,
+                            server['OS-EXT-SRV-ATTR:host'])

-        # Assert that we did not retry. Once the bug is fixed, this should
-        # be equal to 2.
-        self.assertEqual(1, self.attempts)
+        # Assert that we retried.
+        self.assertEqual(2, self.attempts)
--- a/nova/tests/unit/conductor/test_conductor.py
+++ b/nova/tests/unit/conductor/test_conductor.py
@ -1442,6 +1442,7 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
        def _build_and_run_instance(ctxt, *args, **kwargs):
            details['instance'] = kwargs['instance']
            self.assertTrue(kwargs['instance'].id)
+            self.assertTrue(kwargs['filter_properties'].get('retry'))
            self.assertEqual(1, len(kwargs['block_device_mapping']))
            # FIXME(danms): How to validate the db connection here?