Use fair locks in resource tracker
When the resource tracker has to lock a compute host for updates or inspection, it uses a single semaphore. In most cases, this is fine, as a compute process only is tracking one hypervisor. However, in Ironic, it's possible for one compute process to track many hypervisors. In this case, wait queues for instance claims can get "stuck" briefly behind longer processing loops such as the update_resources periodic job. The reason this is possible is because the oslo.lockutils synchronized library does not use fair locks by default. When a lock is released, one of the threads waiting for the lock is randomly allowed to take the lock next. A fair lock ensures that the thread that next requested the lock will be allowed to take it. This should ensure that instance claim requests do not have a chance of losing the lock contest, which should ensure that instance build requests do not queue unnecessarily behind long-running tasks. This includes bumping the oslo.concurrency dependency; fair locks were added in 3.29.0 (I37577becff4978bf643c65fa9bc2d78d342ea35a). Change-Id: Ia5e521e0f0c7a78b5ace5de9f343e84d872553f9 Related-Bug: #1864122
This commit is contained in:
parent
f2d088b04e
commit
1ed9f9dac5
@ -72,7 +72,7 @@ os-win==3.0.0
|
||||
os-xenapi==0.3.3
|
||||
osc-lib==1.10.0
|
||||
oslo.cache==1.26.0
|
||||
oslo.concurrency==3.26.0
|
||||
oslo.concurrency==3.29.0
|
||||
oslo.config==6.1.0
|
||||
oslo.context==2.21.0
|
||||
oslo.db==4.44.0
|
||||
|
@ -112,7 +112,7 @@ class ResourceTracker(object):
|
||||
self.assigned_resources = collections.defaultdict(
|
||||
lambda: collections.defaultdict(set))
|
||||
|
||||
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
|
||||
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
|
||||
def instance_claim(self, context, instance, nodename, allocations,
|
||||
limits=None):
|
||||
"""Indicate that some resources are needed for an upcoming compute
|
||||
@ -186,7 +186,7 @@ class ResourceTracker(object):
|
||||
|
||||
return claim
|
||||
|
||||
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
|
||||
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
|
||||
def rebuild_claim(self, context, instance, nodename, allocations,
|
||||
limits=None, image_meta=None, migration=None):
|
||||
"""Create a claim for a rebuild operation."""
|
||||
@ -195,7 +195,7 @@ class ResourceTracker(object):
|
||||
migration, allocations, move_type='evacuation',
|
||||
limits=limits, image_meta=image_meta)
|
||||
|
||||
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
|
||||
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
|
||||
def resize_claim(self, context, instance, instance_type, nodename,
|
||||
migration, allocations, image_meta=None, limits=None):
|
||||
"""Create a claim for a resize or cold-migration move.
|
||||
@ -207,7 +207,7 @@ class ResourceTracker(object):
|
||||
migration, allocations, image_meta=image_meta,
|
||||
limits=limits)
|
||||
|
||||
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
|
||||
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
|
||||
def live_migration_claim(self, context, instance, nodename, migration,
|
||||
limits):
|
||||
"""Builds a MoveClaim for a live migration.
|
||||
@ -515,7 +515,7 @@ class ResourceTracker(object):
|
||||
instance.node = None
|
||||
instance.save()
|
||||
|
||||
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
|
||||
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
|
||||
def abort_instance_claim(self, context, instance, nodename):
|
||||
"""Remove usage from the given instance."""
|
||||
self._update_usage_from_instance(context, instance, nodename,
|
||||
@ -538,7 +538,7 @@ class ResourceTracker(object):
|
||||
dev_pools_obj = self.pci_tracker.stats.to_device_pools_obj()
|
||||
self.compute_nodes[nodename].pci_device_pools = dev_pools_obj
|
||||
|
||||
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
|
||||
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
|
||||
def drop_move_claim(self, context, instance, nodename,
|
||||
instance_type=None, prefix='new_'):
|
||||
"""Remove usage for an incoming/outgoing migration.
|
||||
@ -587,7 +587,7 @@ class ResourceTracker(object):
|
||||
ctxt = context.elevated()
|
||||
self._update(ctxt, self.compute_nodes[nodename])
|
||||
|
||||
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
|
||||
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
|
||||
def update_usage(self, context, instance, nodename):
|
||||
"""Update the resource usage and stats after a change in an
|
||||
instance
|
||||
@ -858,7 +858,7 @@ class ResourceTracker(object):
|
||||
'another host\'s instance!',
|
||||
{'uuid': migration.instance_uuid})
|
||||
|
||||
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
|
||||
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
|
||||
def _update_available_resource(self, context, resources, startup=False):
|
||||
|
||||
# initialize the compute node object, creating it
|
||||
@ -1705,7 +1705,7 @@ class ResourceTracker(object):
|
||||
"""Resets the failed_builds stats for the given node."""
|
||||
self.stats[nodename].build_succeeded()
|
||||
|
||||
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
|
||||
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
|
||||
def claim_pci_devices(self, context, pci_requests):
|
||||
"""Claim instance PCI resources
|
||||
|
||||
@ -1718,7 +1718,7 @@ class ResourceTracker(object):
|
||||
self.pci_tracker.save(context)
|
||||
return result
|
||||
|
||||
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
|
||||
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
|
||||
def allocate_pci_devices_for_instance(self, context, instance):
|
||||
"""Allocate instance claimed PCI resources
|
||||
|
||||
@ -1728,7 +1728,7 @@ class ResourceTracker(object):
|
||||
self.pci_tracker.allocate_instance(instance)
|
||||
self.pci_tracker.save(context)
|
||||
|
||||
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
|
||||
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
|
||||
def free_pci_device_allocations_for_instance(self, context, instance):
|
||||
"""Free instance allocated PCI resources
|
||||
|
||||
@ -1738,7 +1738,7 @@ class ResourceTracker(object):
|
||||
self.pci_tracker.free_instance_allocations(context, instance)
|
||||
self.pci_tracker.save(context)
|
||||
|
||||
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
|
||||
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
|
||||
def free_pci_device_claims_for_instance(self, context, instance):
|
||||
"""Free instance claimed PCI resources
|
||||
|
||||
|
@ -34,7 +34,7 @@ six>=1.10.0 # MIT
|
||||
stevedore>=1.20.0 # Apache-2.0
|
||||
websockify>=0.9.0 # LGPLv3
|
||||
oslo.cache>=1.26.0 # Apache-2.0
|
||||
oslo.concurrency>=3.26.0 # Apache-2.0
|
||||
oslo.concurrency>=3.29.0 # Apache-2.0
|
||||
oslo.config>=6.1.0 # Apache-2.0
|
||||
oslo.context>=2.21.0 # Apache-2.0
|
||||
oslo.log>=3.36.0 # Apache-2.0
|
||||
|
Loading…
x
Reference in New Issue
Block a user