Create instance action when burying in cell0

Change I8742071b55f018f864f5a382de20075a5b444a79 in Ocata
moved the creation of the instance record from the API to
conductor. As a result, the "create" instance action was
only being created in conductor when the instance is created
in a non-cell0 database. This is a regression because before
that change when a server create would fail during scheduling
you could still list instance actions for the server and see
the "create" action but that was lost once we started burying
those instances in cell0.

This fixes the bug by creating the "create" action in the cell0
database when burying an instance there. It goes a step further
and also creates and finishes an event so the overall action
message shows up as "Error" with the details about where the
failure occurred in the event traceback.

A short release note is added since a new action event is
added here (conductor_schedule_and_build_instances) rather than
re-use some kind of event that we could generate from the
compute service, e.g. compute__do_build_and_run_instance.

Change-Id: I1e9431e739adfbcfc1ca34b87e826a516a4b18e2
Closes-Bug: #1852458
(cherry picked from commit f2608c9117)
(cherry picked from commit 6484d9ff5b)
This commit is contained in:
Matt Riedemann 2019-11-13 15:03:27 -05:00 committed by Stephen Finucane
parent 836da35b2b
commit 7ff71e6b87
3 changed files with 120 additions and 0 deletions

View File

@ -1241,6 +1241,29 @@ class ComputeTaskManager(base.Base):
else:
return tags
def _create_instance_action_for_cell0(self, context, instance, exc):
"""Create a failed "create" instance action for the instance in cell0.
:param context: nova auth RequestContext targeted at cell0
:param instance: Instance object being buried in cell0
:param exc: Exception that occurred which resulted in burial
"""
# First create the action record.
objects.InstanceAction.action_start(
context, instance.uuid, instance_actions.CREATE, want_result=False)
# Now create an event for that action record.
event_name = 'conductor_schedule_and_build_instances'
objects.InstanceActionEvent.event_start(
context, instance.uuid, event_name, want_result=False,
host=self.host)
# And finish the event with the exception. Note that we expect this
# method to be called from _bury_in_cell0 which is called from within
# an exception handler so sys.exc_info should return values but if not
# it's not the end of the world - this is best effort.
objects.InstanceActionEvent.event_finish_with_failure(
context, instance.uuid, event_name, exc_val=exc,
exc_tb=sys.exc_info()[2], want_result=False)
def _bury_in_cell0(self, context, request_spec, exc,
build_requests=None, instances=None,
block_device_mapping=None,
@ -1281,6 +1304,10 @@ class ComputeTaskManager(base.Base):
with obj_target_cell(instance, cell0) as cctxt:
instance.create()
# Record an instance action with a failed event.
self._create_instance_action_for_cell0(
cctxt, instance, exc)
# NOTE(mnaser): In order to properly clean-up volumes after
# being buried in cell0, we need to store BDMs.
if block_device_mapping:

View File

@ -0,0 +1,82 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from nova.compute import instance_actions
from nova import test
from nova.tests import fixtures as nova_fixtures
from nova.tests.functional import fixtures as func_fixtures
from nova.tests.functional import integrated_helpers
from nova.tests.unit.image import fake as fake_image
from nova.tests.unit import policy_fixture
from nova import utils
class TestInstanceActionBuryInCell0(test.TestCase,
integrated_helpers.InstanceHelperMixin):
"""Regression test for bug 1852458 where the "create" instance action
event was not being created for instances buried in cell0 starting in
Ocata.
"""
def setUp(self):
super(TestInstanceActionBuryInCell0, self).setUp()
# Setup common fixtures.
fake_image.stub_out_image_service(self)
self.addCleanup(fake_image.FakeImageService_reset)
self.useFixture(func_fixtures.PlacementFixture())
self.useFixture(nova_fixtures.NeutronFixture(self))
policy = self.useFixture(policy_fixture.RealPolicyFixture())
# Allow non-admins to see instance action events.
policy.set_rules({
'os_compute_api:os-instance-actions:events': 'rule:admin_or_owner'
}, overwrite=False)
# Setup controller services.
self.start_service('conductor')
self.start_service('scheduler')
self.api = self.useFixture(
nova_fixtures.OSAPIFixture(api_version='v2.1')).api
def test_bury_in_cell0_instance_create_action(self):
"""Tests creating a server which will fail scheduling because there is
no compute service and result in the instance being created (buried)
in cell0.
"""
server = self._build_minimal_create_server_request(
self.api, 'test_bury_in_cell0_instance_create_action',
image_uuid=fake_image.get_valid_image_id(),
networks='none')
# Use microversion 2.37 to create a server without any networking.
with utils.temporary_mutation(self.api, microversion='2.37'):
server = self.api.post_server({'server': server})
# The server should go to ERROR status and have a NoValidHost fault.
server = self._wait_for_state_change(self.api, server, 'ERROR')
self.assertIn('fault', server)
self.assertIn('No valid host', server['fault']['message'])
self.assertEqual('', server['hostId'])
# Assert the "create" instance action exists and is failed.
actions = self.api.get_instance_actions(server['id'])
self.assertEqual(1, len(actions), actions)
action = actions[0]
self.assertEqual(instance_actions.CREATE, action['action'])
self.assertEqual('Error', action['message'])
# Get the events. There should be one with an Error result.
action = self.api.api_get(
'/servers/%s/os-instance-actions/%s' %
(server['id'], action['request_id'])).body['instanceAction']
events = action['events']
self.assertEqual(1, len(events), events)
event = events[0]
self.assertEqual('conductor_schedule_and_build_instances',
event['event'])
self.assertEqual('Error', event['result'])
# Normally non-admins cannot see the event traceback but we enabled
# that via policy in setUp so assert something was recorded.
self.assertIn('select_destinations', event['traceback'])

View File

@ -0,0 +1,11 @@
---
fixes:
- |
This release contains a fix for a `regression`__ introduced in 15.0.0
(Ocata) where server create failing during scheduling would not result in
an instance action record being created in the cell0 database. Now when
creating a server fails during scheduling and is "buried" in cell0 a
``create`` action will be created with an event named
``conductor_schedule_and_build_instances``.
.. __: https://bugs.launchpad.net/nova/+bug/1852458