Use fair locks in resource tracker

When the resource tracker has to lock a compute host for updates or
inspection, it uses a single semaphore. In most cases, this is fine, as
a compute process only is tracking one hypervisor. However, in Ironic, it's
possible for one compute process to track many hypervisors. In this
case, wait queues for instance claims can get "stuck" briefly behind
longer processing loops such as the update_resources periodic job. The
reason this is possible is because the oslo.lockutils synchronized
library does not use fair locks by default. When a lock is released, one
of the threads waiting for the lock is randomly allowed to take the lock
next. A fair lock ensures that the thread that next requested the lock
will be allowed to take it.

This should ensure that instance claim requests do not have a chance of
losing the lock contest, which should ensure that instance build
requests do not queue unnecessarily behind long-running tasks.

This includes bumping the oslo.concurrency dependency; fair locks were
added in 3.29.0 (I37577becff4978bf643c65fa9bc2d78d342ea35a).

Change-Id: Ia5e521e0f0c7a78b5ace5de9f343e84d872553f9
Related-Bug: #1864122
This commit is contained in:
Jason Anderson 2020-02-27 10:37:34 -06:00
parent f2d088b04e
commit 1ed9f9dac5
No known key found for this signature in database
GPG Key ID: 9207452BF63947DD
3 changed files with 14 additions and 14 deletions

View File

@ -72,7 +72,7 @@ os-win==3.0.0
os-xenapi==0.3.3
osc-lib==1.10.0
oslo.cache==1.26.0
oslo.concurrency==3.26.0
oslo.concurrency==3.29.0
oslo.config==6.1.0
oslo.context==2.21.0
oslo.db==4.44.0

View File

@ -112,7 +112,7 @@ class ResourceTracker(object):
self.assigned_resources = collections.defaultdict(
lambda: collections.defaultdict(set))
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
def instance_claim(self, context, instance, nodename, allocations,
limits=None):
"""Indicate that some resources are needed for an upcoming compute
@ -186,7 +186,7 @@ class ResourceTracker(object):
return claim
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
def rebuild_claim(self, context, instance, nodename, allocations,
limits=None, image_meta=None, migration=None):
"""Create a claim for a rebuild operation."""
@ -195,7 +195,7 @@ class ResourceTracker(object):
migration, allocations, move_type='evacuation',
limits=limits, image_meta=image_meta)
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
def resize_claim(self, context, instance, instance_type, nodename,
migration, allocations, image_meta=None, limits=None):
"""Create a claim for a resize or cold-migration move.
@ -207,7 +207,7 @@ class ResourceTracker(object):
migration, allocations, image_meta=image_meta,
limits=limits)
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
def live_migration_claim(self, context, instance, nodename, migration,
limits):
"""Builds a MoveClaim for a live migration.
@ -515,7 +515,7 @@ class ResourceTracker(object):
instance.node = None
instance.save()
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
def abort_instance_claim(self, context, instance, nodename):
"""Remove usage from the given instance."""
self._update_usage_from_instance(context, instance, nodename,
@ -538,7 +538,7 @@ class ResourceTracker(object):
dev_pools_obj = self.pci_tracker.stats.to_device_pools_obj()
self.compute_nodes[nodename].pci_device_pools = dev_pools_obj
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
def drop_move_claim(self, context, instance, nodename,
instance_type=None, prefix='new_'):
"""Remove usage for an incoming/outgoing migration.
@ -587,7 +587,7 @@ class ResourceTracker(object):
ctxt = context.elevated()
self._update(ctxt, self.compute_nodes[nodename])
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
def update_usage(self, context, instance, nodename):
"""Update the resource usage and stats after a change in an
instance
@ -858,7 +858,7 @@ class ResourceTracker(object):
'another host\'s instance!',
{'uuid': migration.instance_uuid})
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
def _update_available_resource(self, context, resources, startup=False):
# initialize the compute node object, creating it
@ -1705,7 +1705,7 @@ class ResourceTracker(object):
"""Resets the failed_builds stats for the given node."""
self.stats[nodename].build_succeeded()
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
def claim_pci_devices(self, context, pci_requests):
"""Claim instance PCI resources
@ -1718,7 +1718,7 @@ class ResourceTracker(object):
self.pci_tracker.save(context)
return result
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
def allocate_pci_devices_for_instance(self, context, instance):
"""Allocate instance claimed PCI resources
@ -1728,7 +1728,7 @@ class ResourceTracker(object):
self.pci_tracker.allocate_instance(instance)
self.pci_tracker.save(context)
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
def free_pci_device_allocations_for_instance(self, context, instance):
"""Free instance allocated PCI resources
@ -1738,7 +1738,7 @@ class ResourceTracker(object):
self.pci_tracker.free_instance_allocations(context, instance)
self.pci_tracker.save(context)
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
def free_pci_device_claims_for_instance(self, context, instance):
"""Free instance claimed PCI resources

View File

@ -34,7 +34,7 @@ six>=1.10.0 # MIT
stevedore>=1.20.0 # Apache-2.0
websockify>=0.9.0 # LGPLv3
oslo.cache>=1.26.0 # Apache-2.0
oslo.concurrency>=3.26.0 # Apache-2.0
oslo.concurrency>=3.29.0 # Apache-2.0
oslo.config>=6.1.0 # Apache-2.0
oslo.context>=2.21.0 # Apache-2.0
oslo.log>=3.36.0 # Apache-2.0