nodepool/nodepool/allocation.py

419 lines
16 KiB
Python

#!/usr/bin/env python
# Copyright (C) 2013 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
#
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module holds classes that represent concepts in nodepool's
allocation algorithm.
The algorithm is:
Setup:
* Establish the node providers with their current available
capacity.
* Establish requests that are to be made of each provider for a
certain label.
* Indicate which providers can supply nodes of that label.
* Indicate to which targets nodes of a certain label from a certain
provider may be distributed (and the weight that should be
given to each target when distributing).
Run:
* For each label, set the requested number of nodes from each
provider to be proportional to that providers overall capacity.
* Define the 'priority' of a request as the number of requests for
the same label from other providers.
* For each provider, sort the requests by the priority. This puts
requests that can be serviced by the fewest providers first.
* Grant each such request in proportion to that requests portion of
the total amount requested by requests of the same priority.
* The nodes allocated by a grant are then distributed to the targets
which are associated with the provider and label, in proportion to
that target's portion of the sum of the weights of each target for
that label.
"""
import functools
# History allocation tracking
# The goal of the history allocation tracking is to ensure forward
# progress by not starving any particular label when in over-quota
# situations. For example, if you have two labels, say 'fedora' and
# 'ubuntu', and 'ubuntu' is requesting many more nodes than 'fedora',
# it is quite possible that 'fedora' never gets any allocations. If
# 'fedora' is required for a gate-check job, older changes may wait
# in Zuul's pipelines longer than expected while jobs for newer
# changes continue to receive 'ubuntu' nodes and overall merge
# throughput decreases during such contention.
#
# We track the history of allocations by label. A persistent
# AllocationHistory object should be kept and passed along with each
# AllocationRequest, which records its initial request in the history
# via recordRequest().
#
# When a sub-allocation gets a grant, it records this via a call to
# AllocationHistory.recordGrant(). All the sub-allocations
# contribute to tracking the total grants for the parent
# AllocationRequest.
#
# When finished requesting grants from all providers,
# AllocationHistory.grantsDone() should be called to store the
# allocation state in the history.
#
# This history is used AllocationProvider.makeGrants() to prioritize
# requests that have not been granted in prior iterations.
# AllocationHistory.getWaitTime will return how many iterations
# each label has been waiting for an allocation.
class AllocationHistory(object):
'''A history of allocation requests and grants'''
def __init__(self, history=100):
# current allocations for this iteration
# keeps elements of type
# label -> (request, granted)
self.current_allocations = {}
self.history = history
# list of up to <history> previous current_allocation
# dictionaries
self.past_allocations = []
def recordRequest(self, label, amount):
try:
a = self.current_allocations[label]
a['requested'] += amount
except KeyError:
self.current_allocations[label] = dict(requested=amount,
allocated=0)
def recordGrant(self, label, amount):
try:
a = self.current_allocations[label]
a['allocated'] += amount
except KeyError:
# granted but not requested? shouldn't happen
raise
def grantsDone(self):
# save this round of allocations/grants up to our history
self.past_allocations.insert(0, self.current_allocations)
self.past_allocations = self.past_allocations[:self.history]
self.current_allocations = {}
def getWaitTime(self, label):
# go through the history of allocations and calculate how many
# previous iterations this label has received none of its
# requested allocations.
wait = 0
# We don't look at the current_alloctions here; only
# historical. With multiple providers, possibly the first
# provider has given nodes to the waiting label (which would
# be recorded in current_allocations), and a second provider
# should fall back to using the usual ratio-based mechanism?
for i, a in enumerate(self.past_allocations):
if (label in a) and (a[label]['allocated'] == 0):
wait = i + 1
continue
# only interested in consecutive failures to allocate.
break
return wait
class AllocationProvider(object):
"""A node provider and its capacity."""
def __init__(self, name, available):
self.name = name
# if this is negative, many of the calcuations turn around and
# we start handing out nodes that don't exist.
self.available = available if available >= 0 else 0
self.sub_requests = []
self.grants = []
def __repr__(self):
return '<AllocationProvider %s>' % self.name
def makeGrants(self):
# build a list of (request,wait-time) tuples
all_reqs = [(x, x.getWaitTime()) for x in self.sub_requests]
# reqs with no wait time get processed via ratio mechanism
reqs = [x[0] for x in all_reqs if x[1] == 0]
# we prioritize whoever has been waiting the longest and give
# them whatever is available. If we run out, put them back in
# the ratio queue
waiters = [x for x in all_reqs if x[1] != 0]
waiters.sort(key=lambda x: x[1], reverse=True)
for w in waiters:
w = w[0]
if self.available > 0:
w.grant(min(int(w.amount), self.available))
else:
reqs.append(w)
# Sort the remaining requests by priority so we fill the most
# specific requests first (e.g., if this provider is the only
# one that can supply foo nodes, then it should focus on
# supplying them and leave bar nodes to other providers).
reqs.sort(lambda a, b: cmp(a.getPriority(), b.getPriority()))
for req in reqs:
total_requested = 0.0
# Within a specific priority, limit the number of
# available nodes to a value proportionate to the request.
reqs_at_this_level = [r for r in reqs
if r.getPriority() == req.getPriority()]
for r in reqs_at_this_level:
total_requested += r.amount
if total_requested:
ratio = float(req.amount) / total_requested
else:
ratio = 0.0
grant = int(round(req.amount))
grant = min(grant, int(round(self.available * ratio)))
# This adjusts our availability as well as the values of
# other requests, so values will be correct the next time
# through the loop.
req.grant(grant)
class AllocationRequest(object):
"""A request for a number of labels."""
def __init__(self, name, amount, history=None):
self.name = name
self.amount = float(amount)
# Sub-requests of individual providers that make up this
# request. AllocationProvider -> AllocationSubRequest
self.sub_requests = {}
# Targets to which nodes from this request may be assigned.
# AllocationTarget -> AllocationRequestTarget
self.request_targets = {}
if history is not None:
self.history = history
else:
self.history = AllocationHistory()
self.history.recordRequest(name, amount)
# subrequests use these
self.recordGrant = functools.partial(self.history.recordGrant, name)
self.getWaitTime = functools.partial(self.history.getWaitTime, name)
def __repr__(self):
return '<AllocationRequest for %s of %s>' % (self.amount, self.name)
def addTarget(self, target, current):
art = AllocationRequestTarget(self, target, current)
self.request_targets[target] = art
def addProvider(self, provider, target, subnodes):
# Handle being called multiple times with different targets.
s = self.sub_requests.get(provider)
if not s:
s = AllocationSubRequest(self, provider, subnodes)
agt = s.addTarget(self.request_targets[target])
self.sub_requests[provider] = s
if s not in provider.sub_requests:
provider.sub_requests.append(s)
self.makeRequests()
return s, agt
def makeRequests(self):
# (Re-)distribute this request across all of its providers.
total_available = 0.0
for sub_request in self.sub_requests.values():
total_available += sub_request.provider.available
for sub_request in self.sub_requests.values():
if total_available:
ratio = float(sub_request.provider.available) / total_available
else:
ratio = 0.0
sub_request.setAmount(ratio * self.amount)
class AllocationSubRequest(object):
"""A request for a number of images from a specific provider."""
def __init__(self, request, provider, subnodes):
self.request = request
self.provider = provider
self.amount = 0.0
self.subnodes = subnodes
self.targets = []
def __repr__(self):
return '<AllocationSubRequest for %s (out of %s) of %s from %s>' % (
self.amount, self.request.amount, self.request.name,
self.provider.name)
def addTarget(self, request_target):
agt = AllocationGrantTarget(self, request_target)
self.targets.append(agt)
return agt
def setAmount(self, amount):
self.amount = amount
def getPriority(self):
return len(self.request.sub_requests)
def getWaitTime(self):
return self.request.getWaitTime()
def grant(self, amount):
# Grant this request (with the supplied amount). Adjust this
# sub-request's value to the actual, as well as the values of
# any remaining sub-requests.
# fractional amounts don't make sense
assert int(amount) == amount
# Remove from the set of sub-requests so that this is not
# included in future calculations.
self.provider.sub_requests.remove(self)
del self.request.sub_requests[self.provider]
if amount > 0:
grant = AllocationGrant(self.request, self.provider,
amount, self.targets)
self.request.recordGrant(amount)
# This is now a grant instead of a request.
self.provider.grants.append(grant)
else:
grant = None
amount = 0
self.amount = amount
# Adjust provider and request values accordingly.
self.request.amount -= amount
subnode_factor = 1 + self.subnodes
self.provider.available -= (amount * subnode_factor)
# Adjust the requested values for related sub-requests.
self.request.makeRequests()
# Allocate these granted nodes to targets.
if grant:
grant.makeAllocations()
class AllocationGrant(object):
"""A grant of a certain number of nodes of an image from a
specific provider."""
def __init__(self, request, provider, amount, targets):
self.request = request
self.provider = provider
self.amount = amount
self.targets = targets
def __repr__(self):
return '<AllocationGrant of %s of %s from %s>' % (
self.amount, self.request.name, self.provider.name)
def makeAllocations(self):
# Allocate this grant to the linked targets.
total_current = 0
for agt in self.targets:
total_current += agt.request_target.current
amount = self.amount
# Add the nodes in this allocation to the total number of
# nodes for this image so that we're setting our target
# allocations based on a portion of the total future nodes.
total_current += amount
remaining_targets = len(self.targets)
for agt in self.targets:
# Evenly distribute the grants across all targets
ratio = 1.0 / remaining_targets
# Take the weight and apply it to the total number of
# nodes to this image to figure out how many of the total
# nodes should ideally be on this target.
desired_count = int(round(ratio * total_current))
# The number of nodes off from our calculated target.
delta = desired_count - agt.request_target.current
# Use the delta as the allocation for this target, but
# make sure it's bounded by 0 and the number of nodes we
# have available to allocate.
allocation = min(delta, amount)
allocation = max(allocation, 0)
# The next time through the loop, we have reduced our
# grant by this amount.
amount -= allocation
# Don't consider this target's count in the total number
# of nodes in the next iteration, nor the nodes we have
# just allocated.
total_current -= agt.request_target.current
total_current -= allocation
# Since we aren't considering this target's count, also
# don't consider this target itself when calculating the
# ratio.
remaining_targets -= 1
# Set the amount of this allocation.
agt.allocate(allocation)
class AllocationTarget(object):
"""A target to which nodes may be assigned."""
def __init__(self, name):
self.name = name
def __repr__(self):
return '<AllocationTarget %s>' % (self.name)
class AllocationRequestTarget(object):
"""A request associated with a target to which nodes may be assigned."""
def __init__(self, request, target, current):
self.target = target
self.request = request
self.current = current
class AllocationGrantTarget(object):
"""A target for a specific grant to which nodes may be assigned."""
def __init__(self, sub_request, request_target):
self.sub_request = sub_request
self.request_target = request_target
self.amount = 0
def __repr__(self):
return '<AllocationGrantTarget for %s of %s to %s>' % (
self.amount, self.sub_request.request.name,
self.request_target.target.name)
def allocate(self, amount):
# This is essentially the output of this system. This
# represents the number of nodes of a specific image from a
# specific provider that should be assigned to a specific
# target.
self.amount = amount
# Update the number of nodes of this image that are assigned
# to this target to assist in other allocation calculations
self.request_target.current += amount