congress/congress/policy_engines/vm_placement.py

676 lines
26 KiB
Python

# Copyright (c) 2015 VMware, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
import subprocess
import time
from oslo_log import log as logging
import pulp
from congress.datalog import arithmetic_solvers
from congress.datalog import base
from congress.datalog import compile
from congress.datalog import nonrecursive
from congress import exception
from congress.policy_engines import base_driver
LOG = logging.getLogger(__name__)
def d6service(name, keys, inbox, datapath, args):
"""This method is called by d6cage to create a dataservice instance."""
return ComputePlacementEngine(name, keys, inbox, datapath, args)
# TODO(thinrichs): Figure out what to move to the base class PolicyEngineDriver
# Could also pull out the Datalog-to-LP conversion, potentially.
class ComputePlacementEngine(base_driver.PolicyEngineDriver):
def __init__(self, name='', keys='', inbox=None, datapath=None, args=None):
super(ComputePlacementEngine, self).__init__(
name, keys, inbox, datapath)
self.policy = nonrecursive.MultiModuleNonrecursiveRuleTheory(name=name)
self.initialized = True
self.guest_host_assignment = {}
self.lplang = arithmetic_solvers.PulpLpLang()
self.vm_migrator = VmMigrator()
###########################
# Policy engine interface
def insert(self, formula):
return self.policy.insert(self.parse1(formula))
def delete(self, formula):
return self.policy.delete(self.parse1(formula))
def select(self, query):
ans = self.policy.select(self.parse1(query))
return " ".join(str(x) for x in ans)
def set_policy(self, policy):
LOG.info("%s:: setting policy to %s", str(self.name), str(policy))
# empty out current policy
external = [compile.Tablename.build_service_table(service, name)
for service, name in self._current_external_tables()]
self.policy.empty(tablenames=external, invert=True)
# insert new policy and subscribe to the tablenames referencing a
# datasource driver
for rule in self.parse(policy):
self.policy.insert(rule)
LOG.info("new policy: %s", self.policy.content_string())
# initialize table subscriptions
self.initialize_table_subscriptions()
# enforce policy
self.enforce_policy()
def initialize_table_subscriptions(self):
"""Initialize table subscription.
Once policies have all been loaded, this function subscribes to
all the necessary tables. See UPDATE_TABLE_SUBSCRIPTIONS as well.
"""
tablenames = self.policy.tablenames()
tablenames = [compile.Tablename.parse_service_table(table)
for table in tablenames]
tablenames = [(service, name) for (service, name) in tablenames
if service is not None]
self._set_subscriptions(tablenames)
def _set_subscriptions(self, tablenames):
"""Update subscriptions on DSE to be exactly @tablenames."""
subscriptions = set(self._current_external_tables())
tablenames = set(tablenames)
toadd = tablenames - subscriptions
torem = subscriptions - tablenames
for service, tablename in toadd:
if service is not None:
LOG.info("%s:: subscribing to (%s, %s)",
self.name, service, tablename)
self.subscribe(service, tablename,
callback=self.receive_data)
for service, tablename in torem:
if service is not None:
LOG.info("%s:: unsubscribing from (%s, %s)",
self.name, service, tablename)
self.unsubscribe(service, tablename)
relevant_tables = [compile.Tablename.build_service_table(
service, tablename)]
self.policy.empty(relevant_tables)
def _current_external_tables(self):
"""Return list of tables engine is currently subscribed to."""
return [(value.key, value.dataindex)
for value in self.subdata.values()]
################################################################
# Receiving data published on the DSE by other services
# For PoC, assuming all data already present and no pubs.
# So we're ignoring this for now.
def receive_data(self, msg):
"""Event handler for when a dataservice publishes data.
That data can either be the full table (as a list of tuples)
or a delta (a list of Events).
"""
LOG.info("%s:: received data msg %s", self.name, msg)
# if empty data, assume it is an init msg, since noop otherwise
if len(msg.body.data) == 0:
self.receive_data_full(msg)
else:
# grab an item from any iterable
dataelem = next(iter(msg.body.data))
if isinstance(dataelem, compile.Event):
self.receive_data_update(msg)
else:
self.receive_data_full(msg)
self.enforce_policy()
def receive_data_full(self, msg):
"""Handler for when dataservice publishes full table."""
LOG.info("%s:: received full data msg for %s: %s",
self.name, msg.header['dataindex'],
";".join(str(x) for x in msg.body.data))
tablename = compile.Tablename.build_service_table(
msg.replyTo, msg.header['dataindex'])
# Use a generator to avoid instantiating all these Facts at once.
# Don't print out 'literals' since that will eat the generator
literals = (compile.Fact(tablename, row) for row in msg.body.data)
LOG.info("%s:: begin initialize_tables %s", self.name, tablename)
self.policy.initialize_tables([tablename], literals)
LOG.info("%s:: end initialize data msg for %s", self.name, tablename)
select = [str(x) for x in self.select('p(x)')]
LOG.info("%s:: select('p(x)'): %s ENDED", self.name, " ".join(select))
def receive_data_update(self, msg):
"""Handler for when dataservice publishes a delta."""
LOG.info("%s:: received update data msg for %s: %s",
self.name, msg.header['dataindex'],
";".join(str(x) for x in msg.body.data))
new_events = []
for event in msg.body.data:
assert compile.is_atom(event.formula), (
"receive_data_update received non-atom: " +
str(event.formula))
# prefix tablename with data source
actual_table = compile.Tablename.build_service_table(
msg.replyTo, event.formula.table.table)
values = [term.name for term in event.formula.arguments]
newevent = compile.Event(compile.Fact(actual_table, values),
insert=event.insert)
new_events.append(newevent)
(permitted, changes) = self.policy.update(new_events)
if not permitted:
raise exception.CongressException(
"Update not permitted." + '\n'.join(str(x) for x in changes))
else:
tablename = msg.header['dataindex']
service = msg.replyTo
LOG.debug("update data msg for %s from %s caused %d "
"changes: %s", tablename, service, len(changes),
";".join(str(x) for x in changes))
#######################################
# Policy enforcement
def enforce_policy(self):
"""Enforce policy by migrating VMs to minimize warnings.
Raises LpProblemUnsolvable if the LP cannot solve the
given problem.
Raises LpConversionFailure if self.policy cannot be converted
into an LP problem.
"""
LOG.info("Enforcing policy")
ans = self.policy.select(self.parse1('warning(x)'), True)
if len(ans) == 0:
return
# grab assignment
g_h_assignment = self.calculate_vm_assignment()
self.guest_host_assignment = dict(g_h_assignment)
# migrate
for guest in g_h_assignment:
g_h_assignment[guest] = [g_h_assignment[guest], 0]
self.vm_migrator.do_migrations(g_h_assignment)
def calculate_vm_assignment(self):
"""Calculate where VMs should be located in order to minimize warnings.
Returns a dictionary from guest ID to host ID where that guest should
be located.
Raises LpProblemUnsolvable if the LP cannot solve the
given problem.
Raises LpConversionFailure if self.policy cannot be converted
into an LP problem.
"""
g_h_assignment = {}
LOG.info("* Calculating VM assignment for Datalog policy: *")
LOG.info(self.policy.content_string())
migproblem, value_mapping = self.policy_to_lp_problem()
LOG.info("* Converted to PuLP program: *")
LOG.info("problem: %s", migproblem)
migproblem.solve()
LOG.info("problem status: %s", migproblem.status)
if pulp.LpStatus[migproblem.status] == 'Optimal':
LOG.info("value-mapping: %s", value_mapping)
for var in migproblem.variables():
LOG.info("var: %s = %s", var.name, var.varValue)
if var.name.startswith('assign'):
g, h = var.name.lstrip('assign').lstrip('_').split('_')
g = value_mapping.get(int(g), g)
h = value_mapping.get(int(h), h)
LOG.info("guest %s, host %s has value %s",
g, h, var.varValue)
if var.varValue == 1.0:
# add correct old host
g_h_assignment[g] = h
return g_h_assignment
raise LpProblemUnsolvable(str(migproblem))
#######################################
# Toplevel conversion of Datalog to LP
# mapping Datalog tables to LP decision variables
def policy_to_lp_problem(self):
"""Return an LP problem representing the state of this engine.
Returns an instance of self.lplang.problem representing the policy
and the current data of this engine.
"""
opt, hard = self.policy_to_lp()
LOG.info("* Converted Datalog policy to DatalogLP *")
LOG.info("optimization:\n%s", opt)
LOG.info("constraints:\n%s", "\n".join(str(x) for x in hard))
bounds = {}
for exp in hard:
self.set_bounds(exp, bounds)
return self.lplang.problem(opt, hard, bounds)
def policy_to_lp(self):
"""Transform self.policy into a (non-)linear programming problem.
Returns (<optimization criteria>, <hard constraints>) where
each are represented using expressions constructed by self.lplang.
"""
# soft constraints. optimization criteria: minimize number of warnings
# LOG.info("* Converting warning(x) to DatalogLP *")
wquery = self.parse1('warning(x)')
warnings, wvars = self.datalog_to_lp(wquery, [])
opt = self.lplang.makeOr(*wvars)
# hard constraints. all must be false
# LOG.info("* Converting error(x) to DatalogLP *")
equery = self.parse1('error(x)')
errors, evars = self.datalog_to_lp(equery, [])
hard = [self.lplang.makeNotEqual(var, 1) for var in evars]
# domain-specific axioms, e.g. sum of guest memory util = host mem util
# LOG.info("* Constructing domain-specific axioms *")
axioms = self.domain_axioms()
return opt, warnings + errors + hard + axioms
def set_bounds(self, expr, bounds):
"""Find upper bounds on all variables occurring in expr.
:param: expr is a LpLang.Expression
:param: bounds: is a dictionary mapping an Expression's tuple() to a
number.
Modifies bounds to include values for all variables occurring inside
expr.
"""
# LOG.info("set_bounds(%s)", expr)
variables = self.lplang.variables(expr)
for var in variables:
tup = var.tuple()
if tup not in bounds:
bounds[tup] = 10
##########################
# Domain-specific axioms
def domain_axioms(self):
"""Return a list of all the domain-specific axioms as strings.
Axioms define relationships between LP decision variables that we
would not expect the user to write.
"""
# TODO(thinrichs): just defining relationship between mem-usage for
# guests and hosts. Add rest of axioms.
hosts = self.get_hosts()
guests = self.get_guests()
memusage = self.get_memusage()
memusage_ax = self._domain_axiom_memusage(hosts, guests, memusage)
assign_ax = self._domain_axiom_assignment(hosts, guests)
return memusage_ax + assign_ax
def _domain_axiom_assignment(self, hosts, guests):
"""Return axioms for assignment variables.
:param: hosts is the list of host IDs
:param: guests is the list of guest IDs
assign[h1,g] + ... + assign[hn, g] = 1
"""
axioms = []
for g in guests:
hostvars = [self._construct_assign(h, g) for h in hosts]
axioms.append(self.lplang.makeEqual(
1, self.lplang.makeArith('plus', *hostvars)))
return axioms
def _construct_assign(self, host, guest):
return self.lplang.makeBoolVariable('assign', guest, host)
def _domain_axiom_memusage(self, hosts, guests, memusage):
"""Return a list of LP axioms defining guest/host mem-usage.
:param: hosts is the list of host IDs
:param: guests is the list of guest IDs
Axiom: sum of all guest mem-usage for those guests deployed on a host
gives the mem-usage for that host:
hMemUse[h] = assign[1][h]*gMemUse[1] + ... + assign[G][h]*gMemUse[G].
Returns a list of LpLang expressions.
Raises NotEnoughData if it does not have guest memory usage.
"""
axioms = []
for h in hosts:
guest_terms = []
for guest in guests:
if guest not in memusage:
raise NotEnoughData(
"could not find guest mem usage: %s" % guest)
guest_terms.append(
self.lplang.makeArith(
'times',
self._construct_assign(h, guest),
memusage[guest]))
axioms.append(
self.lplang.makeEqual(
self.lplang.makeIntVariable('hMemUse', h),
self.lplang.makeArith('plus', *guest_terms)))
return axioms
def get_hosts(self):
query = self.parse1('nova:host(id, zone, memory_capacity)')
host_rows = self.policy.select(query)
return set([lit.arguments[0].name for lit in host_rows])
def get_guests(self):
query = self.parse1('nova:server(id, name, host)')
guest_rows = self.policy.select(query)
return set([lit.arguments[0].name for lit in guest_rows])
def get_memusage(self):
query = self.parse1('ceilometer:mem_consumption(id, mem)')
rows = self.policy.select(query)
return {lit.arguments[0].name: lit.arguments[1].name
for lit in rows}
#########################
# Convert datalog to LP
unknowns = ['ceilometer:mem_consumption']
rewrites = ['ceilometer:mem_consumption(x, y) :- '
'var("hMemUse", x), output(y)']
def datalog_to_lp(self, query, unknown_table_possibilities):
"""Convert rules defining QUERY in self.policy into a linear program.
@unknowns is the list of tablenames that should become
decision variables. @unknown_table_possibilities is the list
of all possible instances of the decision variable tables.
"""
# TODO(thinrichs): figure out if/when negation is handled properly
# a list of rules, each of which has an instance of QUERY in the head
# and whose bodies are drawn from unknowns.
rules = self.policy.abduce(query, self.unknowns)
# LOG.info("interpolates:\n%s", "\n".join(str(x) for x in rules))
if len(unknown_table_possibilities):
rules = self.policy.instances(query, unknown_table_possibilities)
# LOG.info("instances:\n%s", "\n".join(str(x) for x in rules))
equalities, variables = self._to_lp(rules)
# LOG.info("LP rules: \n%s", "\n".join(str(x) for x in equalities))
# LOG.info("LP variables: %s", ", ".join(str(x) for x in variables))
return equalities, variables
def _to_lp(self, rules):
"""Compute an LP program equivalent to the given Datalog rules.
:param: rules: a list of Rule instances, all of which are ground
except for variables representing LP variables
"""
# TODO(thinrichs): need type analysis to ensure we differentiate
# hosts from guests within ceilometer:mem_consumption
act = nonrecursive.MultiModuleNonrecursiveRuleTheory()
for var_rewrite_rule in self.rewrites:
changes = act.insert(self.parse1(var_rewrite_rule))
assert(changes)
LOG.debug("action theory: %s", act.content_string())
act.set_tracer(self.policy.tracer)
definitions = {}
for rule in rules:
equalities, newrule = self._extract_lp_variable_equalities(
rule, act)
LOG.debug("equalities: %s", equalities)
LOG.debug("newrule: %s", newrule)
LOG.debug("newrule.body: %s", str(newrule.body))
head = self._lit_to_lp_variable(newrule.head)
LOG.debug("head: %s", str(head))
LOG.debug("newrule.body: %s", newrule.body)
body = []
for lit in newrule.body:
LOG.debug("processing %s", lit)
body.append(self._lit_to_lp_arithmetic(lit, equalities))
LOG.debug("new body: %s", ";".join(str(x) for x in body))
conjunction = self.lplang.makeAnd(*body)
LOG.debug("conjunct: %s", conjunction)
if head not in definitions:
definitions[head] = set([conjunction])
else:
definitions[head].add(conjunction)
equalities = [self.lplang.makeEqual(h, self.lplang.makeOr(*bodies))
for h, bodies in definitions.items()]
return equalities, definitions.keys()
def _extract_lp_variable_equalities(self, rule, rewrite_theory):
"""Extract values for LP variables and slightly modify rule.
:param: rule: an instance of Rule
:param: rewrite_theory: reference to a theory that contains rules
describing how tables correspond to LP variable inputs and
outputs.
Returns (i) dictionary mapping Datalog variable name (a string) to
the set of LP variables to which it is equal and (ii) a rewriting
of the rule that is the same as the original except some
elements have been removed from the body.
"""
newbody = []
varnames = {}
for lit in rule.body:
result = self._extract_lp_variable_equality_lit(
lit, rewrite_theory)
if result is None:
newbody.append(lit)
else:
datalogvar, lpvar = result
if datalogvar not in varnames:
varnames[datalogvar] = set([lpvar])
else:
varnames[datalogvar].add(lpvar)
return varnames, compile.Rule(rule.head, newbody)
def _extract_lp_variable_equality_lit(self, lit, rewrite_theory):
"""Identify datalog variable representing an LP-variable.
:param: lit: an instance of Literal
:param: rewrite_theory: reference to a theory that contains rules
describing how tables correspond to LP variable inputs and
outputs.
Returns None, signifying literal does not include any datalog
variable that maps to an LP variable, or (datalogvar, lpvar).
"""
if lit.is_builtin():
return
# LOG.info("_extract_lp_var_eq_lit %s", lit)
rewrites = rewrite_theory.abduce(lit, ['var', 'output'])
# LOG.info("lit rewriting: %s", ";".join(str(x) for x in rewrites))
if not rewrites:
return
assert(len(rewrites) == 1)
varlit = next(lit for lit in rewrites[0].body
if lit.table.table == 'var')
# LOG.info("varlit: %s", varlit)
lpvar = self._varlit_to_lp_variable(varlit)
outlit = next(lit for lit in rewrites[0].body
if lit.table.table == 'output')
outvar = outlit.arguments[0].name
# LOG.info("lpvar: %s; outvar: %s", lpvar, outvar)
return outvar, lpvar
def _lit_to_lp_arithmetic(self, lit, varnames):
"""Translates Datalog literal into an LP arithmetic statement.
:param: lit is a Literal instance and may include Datalog variables
:param: varnames is a dictionary from datalog variables to a set of
LP variables
Returns an LP arithmetic statement.
Raises LpConversion if one of the Datalog variables appearing in
lit has other than 1 value in varnames.
Raises LpException if the arithmetic operator is not supported.
"""
# TODO(thinrichs) translate to infix and use standard operators
newargs = [self._term_to_lp_term(arg, varnames)
for arg in lit.arguments]
return self.lplang.makeArith(lit.tablename(), *newargs)
def _lit_to_lp_variable(self, lit):
"""Translates ground Datalog literal into an LP variable.
:param: lit is a Literal instance without variables
Returns an LP variable.
Raises LpConversionFailure if lit includes any Datalog variables.
"""
if any(arg.is_variable() for arg in lit.arguments):
raise self.lplang.LpConversionFailure(
"Tried to convert literal %s into LP variable but "
"found a Datalog variable" % lit)
args = [arg.name for arg in lit.arguments]
return self.lplang.makeVariable(lit.table.table, *args, type='bool')
def _term_to_lp_term(self, term, varnames):
"""Translates Datalog term into an LP variable or a constant.
:param: term is an instance of Term
:param: varnames is a dictionary from varname to a set of LP variables
Returns an LP variable, a number, or a string.
Raises LpConversionFailure if Datalog variable appears without a
corresponding LP variable or if multiple LP variables for a given
Datalog variable. (The latter condition could probably be handled
without raising an error, but this is good for now.)
"""
if term.is_variable():
if term.name not in varnames:
raise self.lplang.LpConversionFailure(
"Residual variable not assigned a value: %s" % term.name)
if len(varnames[term.name]) > 1:
raise self.lplang.LpConversionFailure(
"Variable name assigned to 2 different values: "
"%s assigned %s" % (term.name, varnames[term.name]))
return next(iter(varnames[term.name]))
return term.name
def _varlit_to_lp_variable(self, lit):
args = [x.name for x in lit.arguments[1:]]
return self.lplang.makeVariable(lit.arguments[0].name, *args)
#################
# Miscellaneous
def debug_mode(self):
tracer = base.Tracer()
tracer.trace('*')
self.policy.set_tracer(tracer)
def production_mode(self):
tracer = base.Tracer()
self.policy.set_tracer(tracer)
def parse(self, policy):
return compile.parse(policy, use_modules=False)
def parse1(self, policy):
return compile.parse1(policy, use_modules=False)
class NotEnoughData(exception.CongressException):
pass
class LpProblemUnsolvable(exception.CongressException):
pass
class VmMigrator(object):
"""Code for migrating VMs once we have a LP problem solution."""
@classmethod
def migrate(cls, guest, host):
try:
call = ["nova", "live-migration", str(guest), str(host)]
LOG.info("migrating: %s", call)
ret = subprocess.check_output(call, stderr=subprocess.STDOUT)
if ret == 0:
return True
except Exception:
pass
@classmethod
def check_status(cls, guest, host, status):
g = subprocess.check_output(["nova", "list"])
g = g.replace("-", "").replace("+", "").lstrip("[").rstrip("]")
elems = g.split('\n')
for elem in elems:
e = elem.split("|")
el = [x.strip() for x in e]
try:
if status == el[2]:
return True
except Exception:
pass
@classmethod
def do_migration(cls, guest, newh, oldh):
if (newh == oldh):
return True
try:
done = cls.migrate(guest, newh)
if done:
for i in range(3):
if cls.check_migrate(guest, newh, "ACTIVE"):
return True
else:
time.sleep(2)
except Exception:
pass
return False
# status: -1 if migration done
@classmethod
def getnext(cls, mapping, status):
hi = max(status.values())
if hi > 0:
i = list(status.values()).index(hi)
return list(status.keys())[i]
@classmethod
def do_migrations(cls, g_h_mapping):
max_attempts = 10
guest_mig_status = dict.fromkeys(g_h_mapping.keys(), max_attempts)
g = cls.getnext(g_h_mapping, guest_mig_status)
while g:
newh, oldh = g_h_mapping[g]
if cls.do_migration(g, newh, oldh):
guest_mig_status[g] = -1
else:
guest_mig_status[g] -= 1
g = cls.getnext(g_h_mapping, guest_mig_status)
return guest_mig_status