diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py index e3e6cabc302a..379057376c72 100644 --- a/nova/conductor/manager.py +++ b/nova/conductor/manager.py @@ -1241,6 +1241,29 @@ class ComputeTaskManager(base.Base): else: return tags + def _create_instance_action_for_cell0(self, context, instance, exc): + """Create a failed "create" instance action for the instance in cell0. + + :param context: nova auth RequestContext targeted at cell0 + :param instance: Instance object being buried in cell0 + :param exc: Exception that occurred which resulted in burial + """ + # First create the action record. + objects.InstanceAction.action_start( + context, instance.uuid, instance_actions.CREATE, want_result=False) + # Now create an event for that action record. + event_name = 'conductor_schedule_and_build_instances' + objects.InstanceActionEvent.event_start( + context, instance.uuid, event_name, want_result=False, + host=self.host) + # And finish the event with the exception. Note that we expect this + # method to be called from _bury_in_cell0 which is called from within + # an exception handler so sys.exc_info should return values but if not + # it's not the end of the world - this is best effort. + objects.InstanceActionEvent.event_finish_with_failure( + context, instance.uuid, event_name, exc_val=exc, + exc_tb=sys.exc_info()[2], want_result=False) + def _bury_in_cell0(self, context, request_spec, exc, build_requests=None, instances=None, block_device_mapping=None, @@ -1281,6 +1304,10 @@ class ComputeTaskManager(base.Base): with obj_target_cell(instance, cell0) as cctxt: instance.create() + # Record an instance action with a failed event. + self._create_instance_action_for_cell0( + cctxt, instance, exc) + # NOTE(mnaser): In order to properly clean-up volumes after # being buried in cell0, we need to store BDMs. if block_device_mapping: diff --git a/nova/tests/functional/regressions/test_bug_1852458.py b/nova/tests/functional/regressions/test_bug_1852458.py new file mode 100644 index 000000000000..130427c777e9 --- /dev/null +++ b/nova/tests/functional/regressions/test_bug_1852458.py @@ -0,0 +1,82 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from nova.compute import instance_actions +from nova import test +from nova.tests import fixtures as nova_fixtures +from nova.tests.functional import fixtures as func_fixtures +from nova.tests.functional import integrated_helpers +from nova.tests.unit.image import fake as fake_image +from nova.tests.unit import policy_fixture +from nova import utils + + +class TestInstanceActionBuryInCell0(test.TestCase, + integrated_helpers.InstanceHelperMixin): + """Regression test for bug 1852458 where the "create" instance action + event was not being created for instances buried in cell0 starting in + Ocata. + """ + def setUp(self): + super(TestInstanceActionBuryInCell0, self).setUp() + # Setup common fixtures. + fake_image.stub_out_image_service(self) + self.addCleanup(fake_image.FakeImageService_reset) + self.useFixture(func_fixtures.PlacementFixture()) + self.useFixture(nova_fixtures.NeutronFixture(self)) + policy = self.useFixture(policy_fixture.RealPolicyFixture()) + # Allow non-admins to see instance action events. + policy.set_rules({ + 'os_compute_api:os-instance-actions:events': 'rule:admin_or_owner' + }, overwrite=False) + # Setup controller services. + self.start_service('conductor') + self.start_service('scheduler') + self.api = self.useFixture( + nova_fixtures.OSAPIFixture(api_version='v2.1')).api + + def test_bury_in_cell0_instance_create_action(self): + """Tests creating a server which will fail scheduling because there is + no compute service and result in the instance being created (buried) + in cell0. + """ + server = self._build_minimal_create_server_request( + self.api, 'test_bury_in_cell0_instance_create_action', + image_uuid=fake_image.get_valid_image_id(), + networks='none') + # Use microversion 2.37 to create a server without any networking. + with utils.temporary_mutation(self.api, microversion='2.37'): + server = self.api.post_server({'server': server}) + # The server should go to ERROR status and have a NoValidHost fault. + server = self._wait_for_state_change(self.api, server, 'ERROR') + self.assertIn('fault', server) + self.assertIn('No valid host', server['fault']['message']) + self.assertEqual('', server['hostId']) + # Assert the "create" instance action exists and is failed. + actions = self.api.get_instance_actions(server['id']) + self.assertEqual(1, len(actions), actions) + action = actions[0] + self.assertEqual(instance_actions.CREATE, action['action']) + self.assertEqual('Error', action['message']) + # Get the events. There should be one with an Error result. + action = self.api.api_get( + '/servers/%s/os-instance-actions/%s' % + (server['id'], action['request_id'])).body['instanceAction'] + events = action['events'] + self.assertEqual(1, len(events), events) + event = events[0] + self.assertEqual('conductor_schedule_and_build_instances', + event['event']) + self.assertEqual('Error', event['result']) + # Normally non-admins cannot see the event traceback but we enabled + # that via policy in setUp so assert something was recorded. + self.assertIn('select_destinations', event['traceback']) diff --git a/releasenotes/notes/bug-1852458-cell0-instance-action-e3112cf17bcc7c64.yaml b/releasenotes/notes/bug-1852458-cell0-instance-action-e3112cf17bcc7c64.yaml new file mode 100644 index 000000000000..5bb5dd5e0138 --- /dev/null +++ b/releasenotes/notes/bug-1852458-cell0-instance-action-e3112cf17bcc7c64.yaml @@ -0,0 +1,11 @@ +--- +fixes: + - | + This release contains a fix for a `regression`__ introduced in 15.0.0 + (Ocata) where server create failing during scheduling would not result in + an instance action record being created in the cell0 database. Now when + creating a server fails during scheduling and is "buried" in cell0 a + ``create`` action will be created with an event named + ``conductor_schedule_and_build_instances``. + + .. __: https://bugs.launchpad.net/nova/+bug/1852458