Add a way to exit early from a wait_for_instance_event()

In some situations, we may enter into a race-safe wait context for an
external event and then determine within the context that the event
would have already fired or for some other reason waiting is not required.
In that case, this introduces a VirtAPI method to abort the wait for
specific events in a clean way.

This should be used sparingly and only in certain circumstances where
it is clear that the event has already occurred. For example, if the
action triggering the event was initiated outside of the wait context,
we can enter the wait context (establishing a waiting event to capture),
and then poll once for completion of that thing. If incomplete, we will
block and wait on context exit as usual. If the thing we are waiting for
has already completed, then we can avoid the long wait by calling the
early-exit helper and avoid what would ultimately be a timeout and
failure.

We are specifically adding this to address the cyborg scenario as follows:

Once we select a host in the conductor for an instance build, we can call
to cyborg to start the preparation (i.e. programming) of the device on the
destination host. This takes time, and by kicking it off early, we can
overlap that process with other work we have to do. Because that will send
an event upon completion, which will be routed to the compute, we will race
to start the build process on the compute and begin the race-safe event
wait in the compute manager. Instead of collapsing the window and waiting
synchronously, we can trigger it early, then start the wait in the compute
later, check for early completion *after* the wait has been set up, and then
exit the wait early if we determine that the event had already been sent.
This allows us to be async with cyborg across two services while still being
safe in the critical region where we need to setup the wait before we rely
on the event being sent.

Change-Id: I8e5a0683069b2f322cb059d6eb501d732d1bd851
This commit is contained in:
Dan Smith 2019-11-25 13:35:44 -08:00
parent ee25a82c2c
commit 56d3cd7aa7
4 changed files with 77 additions and 3 deletions

View File

@ -418,6 +418,22 @@ class ComputeVirtAPI(virtapi.VirtAPI):
self._compute = compute
self.reportclient = compute.reportclient
class ExitEarly(Exception):
def __init__(self, events):
super(Exception, self).__init__()
self.events = events
self._exit_early_exc = ExitEarly
def exit_wait_early(self, events):
"""Exit a wait_for_instance_event() immediately and avoid
waiting for some events.
:param: events: A list of (name, tag) tuples for events that we should
skip waiting for during a wait_for_instance_event().
"""
raise self._exit_early_exc(events=events)
def _default_error_callback(self, event_name, instance):
raise exception.NovaException(_('Instance event failed'))
@ -445,6 +461,17 @@ class ComputeVirtAPI(virtapi.VirtAPI):
waiting for the rest of the events, False to stop processing,
or raise an exception which will bubble up to the waiter.
If the inner code wishes to abort waiting for one or more
events because it knows some state to be finished or condition
to be satisfied, it can use VirtAPI.exit_wait_early() with a
list of event (name,tag) items to avoid waiting for those
events upon context exit. Note that exit_wait_early() exits
the context immediately and should be used to signal that all
work has been completed and provide the unified list of events
that need not be waited for. Waiting for the remaining events
will begin immediately upon early exit as if the context was
exited normally.
:param instance: The instance for which an event is expected
:param event_names: A list of event names. Each element is a
tuple of strings to indicate (name, tag),
@ -471,12 +498,25 @@ class ComputeVirtAPI(virtapi.VirtAPI):
# should all be canceled and fired immediately below,
# but don't stick around if not.
deadline = 0
yield
try:
yield
except self._exit_early_exc as e:
early_events = set([objects.InstanceExternalEvent.make_key(n, t)
for n, t in e.events])
else:
early_events = []
with eventlet.timeout.Timeout(deadline):
for event_name, event in events.items():
actual_event = event.wait()
if actual_event.status == 'completed':
if event_name in early_events:
continue
else:
actual_event = event.wait()
if actual_event.status == 'completed':
continue
# If we get here, we have an event that was not completed,
# nor skipped via exit_wait_early(). Decide whether to
# keep waiting by calling the error_callback() hook.
decision = error_callback(event_name, instance)
if decision is False:
break

View File

@ -48,6 +48,9 @@ class VirtAPIBaseTest(test.NoDBTestCase, test.APICoverage):
self.assertExpected('wait_for_instance_event',
'instance', ['event'])
def test_exit_wait_early(self):
self.assertExpected('exit_wait_early', [])
def test_update_compute_provider_status(self):
self.assertExpected('update_compute_provider_status',
nova_context.get_admin_context(), uuids.rp_uuid,
@ -67,6 +70,8 @@ class FakeVirtAPITest(VirtAPIBaseTest):
with self.virtapi.wait_for_instance_event(*args, **kwargs):
run = True
self.assertTrue(run)
elif method == 'exit_wait_early':
self.virtapi.exit_wait_early(*args, **kwargs)
elif method == 'update_compute_provider_status':
self.virtapi.update_compute_provider_status(*args, **kwargs)
else:
@ -120,6 +125,12 @@ class ComputeVirtAPITest(VirtAPIBaseTest):
self.compute = FakeCompute()
self.virtapi = compute_manager.ComputeVirtAPI(self.compute)
def test_exit_wait_early(self):
self.assertRaises(self.virtapi._exit_early_exc,
self.virtapi.exit_wait_early,
[('foo', 'bar'),
('foo', 'baz')])
def test_wait_for_instance_event(self):
and_i_ran = ''
event_1_tag = objects.InstanceExternalEvent.make_key(
@ -182,6 +193,22 @@ class ComputeVirtAPITest(VirtAPIBaseTest):
self.assertRaises(test.TestingException, do_test)
def test_wait_for_instance_event_exit_early(self):
# Wait for two events, exit early skipping one.
# Make sure we waited for one and did not wait for the other
with self.virtapi.wait_for_instance_event('instance',
[('foo', 'bar'),
('foo', 'baz')]):
self.virtapi.exit_wait_early([('foo', 'baz')])
self.fail('never gonna happen')
self.assertEqual(2, len(self.compute._events))
for event in self.compute._events:
if event.tag == 'bar':
event.wait.assert_called_once_with()
else:
event.wait.assert_not_called()
def test_update_compute_provider_status(self):
"""Tests scenarios for adding/removing the COMPUTE_STATUS_DISABLED
trait on a given compute node resource provider.

View File

@ -666,6 +666,10 @@ class FakeVirtAPI(virtapi.VirtAPI):
# fall through
yield
def exit_wait_early(self, events):
# We never wait, so there is nothing to exit early
pass
def update_compute_provider_status(self, context, rp_uuid, enabled):
pass

View File

@ -21,6 +21,9 @@ class VirtAPI(object):
error_callback=None):
raise NotImplementedError()
def exit_wait_early(self, events):
raise NotImplementedError()
def update_compute_provider_status(self, context, rp_uuid, enabled):
"""Used to add/remove the COMPUTE_STATUS_DISABLED trait on the provider