nova/nova/scheduler/host_manager.py

344 lines
13 KiB
Python

# Copyright (c) 2011 OpenStack, LLC.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""
Manage hosts in the current zone.
"""
import datetime
import UserDict
from nova import db
from nova import exception
from nova import flags
from nova import log as logging
from nova.openstack.common import cfg
from nova.scheduler import filters
from nova import utils
host_manager_opts = [
cfg.IntOpt('reserved_host_disk_mb',
default=0,
help='Amount of disk in MB to reserve for host/dom0'),
cfg.IntOpt('reserved_host_memory_mb',
default=512,
help='Amount of memory in MB to reserve for host/dom0'),
cfg.MultiStrOpt('scheduler_available_filters',
default=['nova.scheduler.filters.standard_filters'],
help='Filter classes available to the scheduler which may '
'be specified more than once. An entry of '
'"nova.scheduler.filters.standard_filters" '
'maps to all filters included with nova.'),
cfg.ListOpt('scheduler_default_filters',
default=[
'AvailabilityZoneFilter',
'RamFilter',
'ComputeFilter'
],
help='Which filter class names to use for filtering hosts '
'when not specified in the request.'),
]
FLAGS = flags.FLAGS
FLAGS.register_opts(host_manager_opts)
LOG = logging.getLogger(__name__)
class ReadOnlyDict(UserDict.IterableUserDict):
"""A read-only dict."""
def __init__(self, source=None):
self.data = {}
self.update(source)
def __setitem__(self, key, item):
raise TypeError
def __delitem__(self, key):
raise TypeError
def clear(self):
raise TypeError
def pop(self, key, *args):
raise TypeError
def popitem(self):
raise TypeError
def update(self, source=None):
if source is None:
return
elif isinstance(source, UserDict.UserDict):
self.data = source.data
elif isinstance(source, type({})):
self.data = source
else:
raise TypeError
class HostState(object):
"""Mutable and immutable information tracked for a host.
This is an attempt to remove the ad-hoc data structures
previously used and lock down access.
"""
def __init__(self, host, topic, capabilities=None, service=None):
self.host = host
self.topic = topic
# Read-only capability dicts
if capabilities is None:
capabilities = {}
self.capabilities = ReadOnlyDict(capabilities.get(topic, None))
if service is None:
service = {}
self.service = ReadOnlyDict(service)
# Mutable available resources.
# These will change as resources are virtually "consumed".
self.free_ram_mb = 0
self.free_disk_mb = 0
self.vcpus_total = 0
self.vcpus_used = 0
def update_from_compute_node(self, compute):
"""Update information about a host from its compute_node info."""
all_disk_mb = compute['local_gb'] * 1024
all_ram_mb = compute['memory_mb']
vcpus_total = compute['vcpus']
if FLAGS.reserved_host_disk_mb > 0:
all_disk_mb -= FLAGS.reserved_host_disk_mb
if FLAGS.reserved_host_memory_mb > 0:
all_ram_mb -= FLAGS.reserved_host_memory_mb
self.free_ram_mb = all_ram_mb
self.free_disk_mb = all_disk_mb
self.vcpus_total = vcpus_total
def consume_from_instance(self, instance):
"""Update information about a host from instance info."""
disk_mb = (instance['root_gb'] + instance['ephemeral_gb']) * 1024
ram_mb = instance['memory_mb']
vcpus = instance['vcpus']
self.free_ram_mb -= ram_mb
self.free_disk_mb -= disk_mb
self.vcpus_used += vcpus
def passes_filters(self, filter_fns, filter_properties):
"""Return whether or not this host passes filters."""
if self.host in filter_properties.get('ignore_hosts', []):
LOG.debug(_('Host filter fails for ignored host %(host)s'),
{'host': self.host})
return False
force_hosts = filter_properties.get('force_hosts', [])
if force_hosts:
if not self.host in force_hosts:
LOG.debug(_('Host filter fails for non-forced host %(host)s'),
{'host': self.host})
return self.host in force_hosts
for filter_fn in filter_fns:
if not filter_fn(self, filter_properties):
LOG.debug(_('Host filter function %(func)s failed for '
'%(host)s'),
{'func': repr(filter_fn),
'host': self.host})
return False
LOG.debug(_('Host filter passes for %(host)s'), {'host': self.host})
return True
def __repr__(self):
return ("host '%s': free_ram_mb:%s free_disk_mb:%s" %
(self.host, self.free_ram_mb, self.free_disk_mb))
class HostManager(object):
"""Base HostManager class."""
# Can be overriden in a subclass
host_state_cls = HostState
def __init__(self):
self.service_states = {} # { <host> : { <service> : { cap k : v }}}
self.filter_classes = filters.get_filter_classes(
FLAGS.scheduler_available_filters)
def _choose_host_filters(self, filters):
"""Since the caller may specify which filters to use we need
to have an authoritative list of what is permissible. This
function checks the filter names against a predefined set
of acceptable filters.
"""
if filters is None:
filters = FLAGS.scheduler_default_filters
if not isinstance(filters, (list, tuple)):
filters = [filters]
good_filters = []
bad_filters = []
for filter_name in filters:
found_class = False
for cls in self.filter_classes:
if cls.__name__ == filter_name:
found_class = True
filter_instance = cls()
# Get the filter function
filter_func = getattr(filter_instance,
'host_passes', None)
if filter_func:
good_filters.append(filter_func)
break
if not found_class:
bad_filters.append(filter_name)
if bad_filters:
msg = ", ".join(bad_filters)
raise exception.SchedulerHostFilterNotFound(filter_name=msg)
return good_filters
def filter_hosts(self, hosts, filter_properties, filters=None):
"""Filter hosts and return only ones passing all filters"""
filtered_hosts = []
filter_fns = self._choose_host_filters(filters)
for host in hosts:
if host.passes_filters(filter_fns, filter_properties):
filtered_hosts.append(host)
return filtered_hosts
def get_host_list(self):
"""Returns a list of dicts for each host that the Zone Manager
knows about. Each dict contains the host_name and the service
for that host.
"""
all_hosts = self.service_states.keys()
ret = []
for host in self.service_states:
for svc in self.service_states[host]:
ret.append({"service": svc, "host_name": host})
return ret
def get_service_capabilities(self):
"""Roll up all the individual host info to generic 'service'
capabilities. Each capability is aggregated into
<cap>_min and <cap>_max values."""
hosts_dict = self.service_states
# TODO(sandy) - be smarter about fabricating this structure.
# But it's likely to change once we understand what the Best-Match
# code will need better.
combined = {} # { <service>_<cap> : (min, max), ... }
stale_host_services = {} # { host1 : [svc1, svc2], host2 :[svc1]}
for host, host_dict in hosts_dict.iteritems():
for service_name, service_dict in host_dict.iteritems():
if not service_dict.get("enabled", True):
# Service is disabled; do no include it
continue
# Check if the service capabilities became stale
if self.host_service_caps_stale(host, service_name):
if host not in stale_host_services:
stale_host_services[host] = [] # Adding host key once
stale_host_services[host].append(service_name)
continue
for cap, value in service_dict.iteritems():
if cap == "timestamp": # Timestamp is not needed
continue
key = "%s_%s" % (service_name, cap)
min_value, max_value = combined.get(key, (value, value))
min_value = min(min_value, value)
max_value = max(max_value, value)
combined[key] = (min_value, max_value)
# Delete the expired host services
self.delete_expired_host_services(stale_host_services)
return combined
def update_service_capabilities(self, service_name, host, capabilities):
"""Update the per-service capabilities based on this notification."""
LOG.debug(_("Received %(service_name)s service update from "
"%(host)s.") % locals())
service_caps = self.service_states.get(host, {})
# Copy the capabilities, so we don't modify the original dict
capab_copy = dict(capabilities)
capab_copy["timestamp"] = utils.utcnow() # Reported time
service_caps[service_name] = capab_copy
self.service_states[host] = service_caps
def host_service_caps_stale(self, host, service):
"""Check if host service capabilites are not recent enough."""
allowed_time_diff = FLAGS.periodic_interval * 3
caps = self.service_states[host][service]
if ((utils.utcnow() - caps["timestamp"]) <=
datetime.timedelta(seconds=allowed_time_diff)):
return False
return True
def delete_expired_host_services(self, host_services_dict):
"""Delete all the inactive host services information."""
for host, services in host_services_dict.iteritems():
service_caps = self.service_states[host]
for service in services:
del service_caps[service]
if len(service_caps) == 0: # Delete host if no services
del self.service_states[host]
def get_all_host_states(self, context, topic):
"""Returns a dict of all the hosts the HostManager
knows about. Also, each of the consumable resources in HostState
are pre-populated and adjusted based on data in the db.
For example:
{'192.168.1.100': HostState(), ...}
Note: this can be very slow with a lot of instances.
InstanceType table isn't required since a copy is stored
with the instance (in case the InstanceType changed since the
instance was created)."""
if topic != 'compute':
raise NotImplementedError(_(
"host_manager only implemented for 'compute'"))
host_state_map = {}
# Make a compute node dict with the bare essential metrics.
compute_nodes = db.compute_node_get_all(context)
for compute in compute_nodes:
service = compute['service']
if not service:
LOG.warn(_("No service for compute ID %s") % compute['id'])
continue
host = service['host']
capabilities = self.service_states.get(host, None)
host_state = self.host_state_cls(host, topic,
capabilities=capabilities,
service=dict(service.iteritems()))
host_state.update_from_compute_node(compute)
host_state_map[host] = host_state
# "Consume" resources from the host the instance resides on.
instances = db.instance_get_all(context)
for instance in instances:
host = instance['host']
if not host:
continue
host_state = host_state_map.get(host, None)
if not host_state:
continue
host_state.consume_from_instance(instance)
return host_state_map