#!/usr/bin/env python # Copyright (C) 2013 OpenStack Foundation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. # # See the License for the specific language governing permissions and # limitations under the License. """ This module holds classes that represent concepts in nodepool's allocation algorithm. The algorithm is: Setup: * Establish the node providers with their current available capacity. * Establish requests that are to be made of each provider for a certain label. * Indicate which providers can supply nodes of that label. * Indicate to which targets nodes of a certain label from a certain provider may be distributed (and the weight that should be given to each target when distributing). Run: * For each label, set the requested number of nodes from each provider to be proportional to that providers overall capacity. * Define the 'priority' of a request as the number of requests for the same label from other providers. * For each provider, sort the requests by the priority. This puts requests that can be serviced by the fewest providers first. * Grant each such request in proportion to that requests portion of the total amount requested by requests of the same priority. * The nodes allocated by a grant are then distributed to the targets which are associated with the provider and label, in proportion to that target's portion of the sum of the weights of each target for that label. """ import functools # History allocation tracking # The goal of the history allocation tracking is to ensure forward # progress by not starving any particular label when in over-quota # situations. For example, if you have two labels, say 'fedora' and # 'ubuntu', and 'ubuntu' is requesting many more nodes than 'fedora', # it is quite possible that 'fedora' never gets any allocations. If # 'fedora' is required for a gate-check job, older changes may wait # in Zuul's pipelines longer than expected while jobs for newer # changes continue to receive 'ubuntu' nodes and overall merge # throughput decreases during such contention. # # We track the history of allocations by label. A persistent # AllocationHistory object should be kept and passed along with each # AllocationRequest, which records its initial request in the history # via recordRequest(). # # When a sub-allocation gets a grant, it records this via a call to # AllocationHistory.recordGrant(). All the sub-allocations # contribute to tracking the total grants for the parent # AllocationRequest. # # When finished requesting grants from all providers, # AllocationHistory.grantsDone() should be called to store the # allocation state in the history. # # This history is used AllocationProvider.makeGrants() to prioritize # requests that have not been granted in prior iterations. # AllocationHistory.getWaitTime will return how many iterations # each label has been waiting for an allocation. class AllocationHistory(object): '''A history of allocation requests and grants''' def __init__(self, history=100): # current allocations for this iteration # keeps elements of type # label -> (request, granted) self.current_allocations = {} self.history = history # list of up to previous current_allocation # dictionaries self.past_allocations = [] def recordRequest(self, label, amount): try: a = self.current_allocations[label] a['requested'] += amount except KeyError: self.current_allocations[label] = dict(requested=amount, allocated=0) def recordGrant(self, label, amount): try: a = self.current_allocations[label] a['allocated'] += amount except KeyError: # granted but not requested? shouldn't happen raise def grantsDone(self): # save this round of allocations/grants up to our history self.past_allocations.insert(0, self.current_allocations) self.past_allocations = self.past_allocations[:self.history] self.current_allocations = {} def getWaitTime(self, label): # go through the history of allocations and calculate how many # previous iterations this label has received none of its # requested allocations. wait = 0 # We don't look at the current_alloctions here; only # historical. With multiple providers, possibly the first # provider has given nodes to the waiting label (which would # be recorded in current_allocations), and a second provider # should fall back to using the usual ratio-based mechanism? for i, a in enumerate(self.past_allocations): if (label in a) and (a[label]['allocated'] == 0): wait = i + 1 continue # only interested in consecutive failures to allocate. break return wait class AllocationProvider(object): """A node provider and its capacity.""" def __init__(self, name, available): self.name = name # if this is negative, many of the calcuations turn around and # we start handing out nodes that don't exist. self.available = available if available >= 0 else 0 self.sub_requests = [] self.grants = [] def __repr__(self): return '' % self.name def makeGrants(self): # build a list of (request,wait-time) tuples all_reqs = [(x, x.getWaitTime()) for x in self.sub_requests] # reqs with no wait time get processed via ratio mechanism reqs = [x[0] for x in all_reqs if x[1] == 0] # we prioritize whoever has been waiting the longest and give # them whatever is available. If we run out, put them back in # the ratio queue waiters = [x for x in all_reqs if x[1] != 0] waiters.sort(key=lambda x: x[1], reverse=True) for w in waiters: w = w[0] if self.available > 0: w.grant(min(int(w.amount), self.available)) else: reqs.append(w) # Sort the remaining requests by priority so we fill the most # specific requests first (e.g., if this provider is the only # one that can supply foo nodes, then it should focus on # supplying them and leave bar nodes to other providers). reqs.sort(lambda a, b: cmp(a.getPriority(), b.getPriority())) for req in reqs: total_requested = 0.0 # Within a specific priority, limit the number of # available nodes to a value proportionate to the request. reqs_at_this_level = [r for r in reqs if r.getPriority() == req.getPriority()] for r in reqs_at_this_level: total_requested += r.amount if total_requested: ratio = float(req.amount) / total_requested else: ratio = 0.0 grant = int(round(req.amount)) grant = min(grant, int(round(self.available * ratio))) # This adjusts our availability as well as the values of # other requests, so values will be correct the next time # through the loop. req.grant(grant) class AllocationRequest(object): """A request for a number of labels.""" def __init__(self, name, amount, history=None): self.name = name self.amount = float(amount) # Sub-requests of individual providers that make up this # request. AllocationProvider -> AllocationSubRequest self.sub_requests = {} # Targets to which nodes from this request may be assigned. # AllocationTarget -> AllocationRequestTarget self.request_targets = {} if history is not None: self.history = history else: self.history = AllocationHistory() self.history.recordRequest(name, amount) # subrequests use these self.recordGrant = functools.partial(self.history.recordGrant, name) self.getWaitTime = functools.partial(self.history.getWaitTime, name) def __repr__(self): return '' % (self.amount, self.name) def addTarget(self, target, current): art = AllocationRequestTarget(self, target, current) self.request_targets[target] = art def addProvider(self, provider, target, subnodes): # Handle being called multiple times with different targets. s = self.sub_requests.get(provider) if not s: s = AllocationSubRequest(self, provider, subnodes) agt = s.addTarget(self.request_targets[target]) self.sub_requests[provider] = s if s not in provider.sub_requests: provider.sub_requests.append(s) self.makeRequests() return s, agt def makeRequests(self): # (Re-)distribute this request across all of its providers. total_available = 0.0 for sub_request in self.sub_requests.values(): total_available += sub_request.provider.available for sub_request in self.sub_requests.values(): if total_available: ratio = float(sub_request.provider.available) / total_available else: ratio = 0.0 sub_request.setAmount(ratio * self.amount) class AllocationSubRequest(object): """A request for a number of images from a specific provider.""" def __init__(self, request, provider, subnodes): self.request = request self.provider = provider self.amount = 0.0 self.subnodes = subnodes self.targets = [] def __repr__(self): return '' % ( self.amount, self.request.amount, self.request.name, self.provider.name) def addTarget(self, request_target): agt = AllocationGrantTarget(self, request_target) self.targets.append(agt) return agt def setAmount(self, amount): self.amount = amount def getPriority(self): return len(self.request.sub_requests) def getWaitTime(self): return self.request.getWaitTime() def grant(self, amount): # Grant this request (with the supplied amount). Adjust this # sub-request's value to the actual, as well as the values of # any remaining sub-requests. # fractional amounts don't make sense assert int(amount) == amount # Remove from the set of sub-requests so that this is not # included in future calculations. self.provider.sub_requests.remove(self) del self.request.sub_requests[self.provider] if amount > 0: grant = AllocationGrant(self.request, self.provider, amount, self.targets) self.request.recordGrant(amount) # This is now a grant instead of a request. self.provider.grants.append(grant) else: grant = None amount = 0 self.amount = amount # Adjust provider and request values accordingly. self.request.amount -= amount subnode_factor = 1 + self.subnodes self.provider.available -= (amount * subnode_factor) # Adjust the requested values for related sub-requests. self.request.makeRequests() # Allocate these granted nodes to targets. if grant: grant.makeAllocations() class AllocationGrant(object): """A grant of a certain number of nodes of an image from a specific provider.""" def __init__(self, request, provider, amount, targets): self.request = request self.provider = provider self.amount = amount self.targets = targets def __repr__(self): return '' % ( self.amount, self.request.name, self.provider.name) def makeAllocations(self): # Allocate this grant to the linked targets. total_current = 0 for agt in self.targets: total_current += agt.request_target.current amount = self.amount # Add the nodes in this allocation to the total number of # nodes for this image so that we're setting our target # allocations based on a portion of the total future nodes. total_current += amount remaining_targets = len(self.targets) for agt in self.targets: # Evenly distribute the grants across all targets ratio = 1.0 / remaining_targets # Take the weight and apply it to the total number of # nodes to this image to figure out how many of the total # nodes should ideally be on this target. desired_count = int(round(ratio * total_current)) # The number of nodes off from our calculated target. delta = desired_count - agt.request_target.current # Use the delta as the allocation for this target, but # make sure it's bounded by 0 and the number of nodes we # have available to allocate. allocation = min(delta, amount) allocation = max(allocation, 0) # The next time through the loop, we have reduced our # grant by this amount. amount -= allocation # Don't consider this target's count in the total number # of nodes in the next iteration, nor the nodes we have # just allocated. total_current -= agt.request_target.current total_current -= allocation # Since we aren't considering this target's count, also # don't consider this target itself when calculating the # ratio. remaining_targets -= 1 # Set the amount of this allocation. agt.allocate(allocation) class AllocationTarget(object): """A target to which nodes may be assigned.""" def __init__(self, name): self.name = name def __repr__(self): return '' % (self.name) class AllocationRequestTarget(object): """A request associated with a target to which nodes may be assigned.""" def __init__(self, request, target, current): self.target = target self.request = request self.current = current class AllocationGrantTarget(object): """A target for a specific grant to which nodes may be assigned.""" def __init__(self, sub_request, request_target): self.sub_request = sub_request self.request_target = request_target self.amount = 0 def __repr__(self): return '' % ( self.amount, self.sub_request.request.name, self.request_target.target.name) def allocate(self, amount): # This is essentially the output of this system. This # represents the number of nodes of a specific image from a # specific provider that should be assigned to a specific # target. self.amount = amount # Update the number of nodes of this image that are assigned # to this target to assist in other allocation calculations self.request_target.current += amount