Don't poll nova with compute agent

This change introduces a new method to get instances
metadata on the ceilometer-compute-agent.

This switches devstack/gate to libvirt_metadata for gnocchi

Change-Id: Ice1918659be49589a45d7a406044adc0a187aa27
This commit is contained in:
Mehdi Abaakouk 2016-11-15 11:24:46 +01:00 committed by Julien Danjou
parent 75539b2725
commit b692d3a6a6
9 changed files with 413 additions and 54 deletions

View File

@ -13,17 +13,44 @@
# License for the specific language governing permissions and limitations
# under the License.
import hashlib
from lxml import etree
import operator
import cachetools
from novaclient import exceptions
from oslo_config import cfg
from oslo_log import log
from oslo_utils import timeutils
try:
import libvirt
except ImportError:
libvirt = None
from ceilometer.agent import plugin_base
from ceilometer.compute.virt.libvirt import utils as libvirt_utils
from ceilometer import nova_client
OPTS = [
cfg.BoolOpt('workload_partitioning',
default=False,
deprecated_for_removal=True,
help='Enable work-load partitioning, allowing multiple '
'compute agents to be run simultaneously.'),
'compute agents to be run simultaneously. '
'(replaced by instance_discovery_method)'),
cfg.StrOpt('instance_discovery_method',
default='naive',
choices=['naive', 'workload_partitioning', 'libvirt_metadata'],
help="Ceilometer offers many methods to discover the instance"
"running on a compute node: \n"
"* naive: poll nova to get all instances\n"
"* workload_partitioning: poll nova to get instances of "
"the compute\n"
"* libvirt_metadata: get instances from libvirt metadata "
" but without instance metadata (recommended for Gnocchi "
" backend"),
cfg.IntOpt('resource_update_interval',
default=0,
min=0,
@ -34,20 +61,154 @@ OPTS = [
"the instance list to poll will be updated based "
"on this option's interval. Measurements relating "
"to the instances will match intervals "
"defined in pipeline.")
"defined in pipeline. "),
]
LOG = log.getLogger(__name__)
class NovaLikeServer(object):
def __init__(self, **kwargs):
for k, v in kwargs.items():
setattr(self, k, v)
def __repr__(self):
return '<NovaLikeServer: %s>' % getattr(self, 'name', 'unknown-name')
class InstanceDiscovery(plugin_base.DiscoveryBase):
method = None
def __init__(self, conf):
super(InstanceDiscovery, self).__init__(conf)
if not self.method:
self.method = conf.compute.instance_discovery_method
# For backward compatibility
if self.method == "naive" and conf.compute.workload_partitioning:
self.method = "workload_partitioning"
self.nova_cli = nova_client.Client(conf)
self.last_run = None
self.instances = {}
self.expiration_time = conf.compute.resource_update_interval
if self.method == "libvirt_metadata":
self._connection = None
# 4096 instances on a compute should be enough :)
self._flavor_cache = cachetools.LRUCache(4096)
else:
self.instances = {}
self.last_run = None
@property
def connection(self):
if not self._connection:
self._connection = libvirt_utils.get_libvirt_connection(self.conf)
return self._connection
def discover(self, manager, param=None):
"""Discover resources to monitor."""
if self.method != "libvirt_metadata":
return self.discover_nova_polling(manager, param=None)
else:
return self.discover_libvirt_polling(manager, param=None)
@staticmethod
def _safe_find_int(xml, path):
elem = xml.find("./%s" % path)
if elem is not None:
return int(elem.text)
return 0
@cachetools.cachedmethod(operator.attrgetter('_flavor_cache'))
def get_flavor_id(self, name):
try:
return self.nova_cli.nova_client.flavors.find(name=name).id
except exceptions.NotFound:
return None
@libvirt_utils.retry_on_disconnect
def discover_libvirt_polling(self, manager, param=None):
instances = []
for domain in self.connection.listAllDomains():
full_xml = etree.fromstring(domain.XMLDesc())
os_type_xml = full_xml.find("./os/type")
xml_string = domain.metadata(
libvirt.VIR_DOMAIN_METADATA_ELEMENT,
"http://openstack.org/xmlns/libvirt/nova/1.0")
metadata_xml = etree.fromstring(xml_string)
# TODO(sileht): We don't have the flavor ID here So the Gnocchi
# resource update will fail for compute sample (or put None ?)
# We currently poll nova to get the flavor ID, but storing the
# flavor_id doesn't have any sense because the flavor description
# can change over the time, we should store the detail of the
# flavor. this is why nova doesn't put the id in the libvirt
# metadata
# This implements
flavor_xml = metadata_xml.find("./flavor")
flavor = {
"id": self.get_flavor_id(flavor_xml.attrib["name"]),
"name": flavor_xml.attrib["name"],
"vcpus": self._safe_find_int(flavor_xml, "vcpus"),
"ram": self._safe_find_int(flavor_xml, "memory"),
"disk": self._safe_find_int(flavor_xml, "disk"),
"ephemeral": self._safe_find_int(flavor_xml, "ephemeral"),
"swap": self._safe_find_int(flavor_xml, "swap"),
}
dom_state = domain.state()[0]
vm_state = libvirt_utils.LIBVIRT_POWER_STATE.get(dom_state)
status = libvirt_utils.LIBVIRT_STATUS.get(dom_state)
user_id = metadata_xml.find("./owner/user").attrib["uuid"]
project_id = metadata_xml.find("./owner/project").attrib["uuid"]
# From:
# https://github.com/openstack/nova/blob/852f40fd0c6e9d8878212ff3120556668023f1c4/nova/api/openstack/compute/views/servers.py#L214-L220
host_id = hashlib.sha224(
(project_id + self.conf.host).encode('utf-8')).hexdigest()
# The image description is partial, but Gnocchi only care about the
# id, so we are fine
image_xml = metadata_xml.find("./root[@type='image']")
image = ({'id': image_xml.attrib['uuid']}
if image_xml is not None else None)
instance_data = {
"id": domain.UUIDString(),
"name": metadata_xml.find("./name").text,
"flavor": flavor,
"image": image,
"os_type": os_type_xml.text,
"architecture": os_type_xml.attrib["arch"],
"OS-EXT-SRV-ATTR:instance_name": domain.name(),
"OS-EXT-SRV-ATTR:host": self.conf.host,
"OS-EXT-STS:vm_state": vm_state,
"tenant_id": project_id,
"user_id": user_id,
"hostId": host_id,
"status": status,
# NOTE(sileht): Other fields that Ceilometer tracks
# where we can't get the value here, but their are
# retreived by notification
"metadata": {},
# "OS-EXT-STS:task_state"
# 'reservation_id',
# 'OS-EXT-AZ:availability_zone',
# 'kernel_id',
# 'ramdisk_id',
# some image detail
}
LOG.debug("instance data: %s", instance_data)
instances.append(NovaLikeServer(**instance_data))
return instances
def discover_nova_polling(self, manager, param=None):
secs_from_last_update = 0
if self.last_run:
secs_from_last_update = timeutils.delta_seconds(
@ -80,7 +241,7 @@ class InstanceDiscovery(plugin_base.DiscoveryBase):
@property
def group_id(self):
if self.conf.compute.workload_partitioning:
if self.method == "workload_partitioning":
return self.conf.host
else:
return None

View File

@ -15,55 +15,25 @@
"""Implementation of Inspector abstraction for libvirt."""
from lxml import etree
from oslo_config import cfg
from oslo_log import log as logging
from oslo_utils import units
import six
try:
import libvirt
except ImportError:
libvirt = None
from ceilometer.compute.pollsters import util
from ceilometer.compute.virt import inspector as virt_inspector
from ceilometer.compute.virt.libvirt import utils as libvirt_utils
from ceilometer.i18n import _LW, _LE, _
libvirt = None
LOG = logging.getLogger(__name__)
OPTS = [
cfg.StrOpt('libvirt_type',
default='kvm',
choices=['kvm', 'lxc', 'qemu', 'uml', 'xen'],
help='Libvirt domain type.'),
cfg.StrOpt('libvirt_uri',
default='',
help='Override the default libvirt URI '
'(which is dependent on libvirt_type).'),
]
def retry_on_disconnect(function):
def decorator(self, *args, **kwargs):
try:
return function(self, *args, **kwargs)
except ImportError:
# NOTE(sileht): in case of libvirt failed to be imported
raise
except libvirt.libvirtError as e:
if (e.get_error_code() in (libvirt.VIR_ERR_SYSTEM_ERROR,
libvirt.VIR_ERR_INTERNAL_ERROR) and
e.get_error_domain() in (libvirt.VIR_FROM_REMOTE,
libvirt.VIR_FROM_RPC)):
LOG.debug('Connection to libvirt broken')
self.connection = None
return function(self, *args, **kwargs)
else:
raise
return decorator
class LibvirtInspector(virt_inspector.Inspector):
per_type_uris = dict(uml='uml:///system', xen='xen:///', lxc='lxc:///')
def __init__(self, conf):
super(LibvirtInspector, self).__init__(conf)
self._connection = None
@ -71,19 +41,10 @@ class LibvirtInspector(virt_inspector.Inspector):
@property
def connection(self):
if not self._connection:
global libvirt
if libvirt is None:
libvirt = __import__('libvirt')
uri = (self.conf.libvirt_uri or
self.per_type_uris.get(self.conf.libvirt_type,
'qemu:///system'))
LOG.debug('Connecting to libvirt: %s', uri)
self._connection = libvirt.openReadOnly(uri)
self._connection = libvirt_utils.get_libvirt_connection(self.conf)
return self._connection
@retry_on_disconnect
@libvirt_utils.retry_on_disconnect
def _lookup_by_uuid(self, instance):
instance_name = util.instance_name(instance)
try:

View File

@ -0,0 +1,104 @@
#
# Copyright 2016 Red Hat, Inc
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from oslo_config import cfg
from oslo_log import log as logging
try:
import libvirt
except ImportError:
libvirt = None
LOG = logging.getLogger(__name__)
OPTS = [
cfg.StrOpt('libvirt_type',
default='kvm',
choices=['kvm', 'lxc', 'qemu', 'uml', 'xen'],
help='Libvirt domain type.'),
cfg.StrOpt('libvirt_uri',
default='',
help='Override the default libvirt URI '
'(which is dependent on libvirt_type).'),
]
LIBVIRT_PER_TYPE_URIS = dict(uml='uml:///system', xen='xen:///', lxc='lxc:///')
# We don't use the libvirt constants in case of libvirt is not avialable
VIR_DOMAIN_NOSTATE = 0
VIR_DOMAIN_RUNNING = 1
VIR_DOMAIN_BLOCKED = 2
VIR_DOMAIN_PAUSED = 3
VIR_DOMAIN_SHUTDOWN = 4
VIR_DOMAIN_SHUTOFF = 5
VIR_DOMAIN_CRASHED = 6
VIR_DOMAIN_PMSUSPENDED = 7
# Stolen from nova
LIBVIRT_POWER_STATE = {
VIR_DOMAIN_NOSTATE: 'pending',
VIR_DOMAIN_RUNNING: 'running',
VIR_DOMAIN_BLOCKED: 'running',
VIR_DOMAIN_PAUSED: 'paused',
VIR_DOMAIN_SHUTDOWN: 'shutdown',
VIR_DOMAIN_SHUTOFF: 'shutdown',
VIR_DOMAIN_CRASHED: 'crashed',
VIR_DOMAIN_PMSUSPENDED: 'suspended',
}
# NOTE(sileht): This is a guessing of the nova
# status, should be true 99.9% on the time,
# but can be wrong during some transistion state
# like shelving/rescuing
LIBVIRT_STATUS = {
VIR_DOMAIN_NOSTATE: 'building',
VIR_DOMAIN_RUNNING: 'active',
VIR_DOMAIN_BLOCKED: 'active',
VIR_DOMAIN_PAUSED: 'paused',
VIR_DOMAIN_SHUTDOWN: 'stopped',
VIR_DOMAIN_SHUTOFF: 'stopped',
VIR_DOMAIN_CRASHED: 'error',
VIR_DOMAIN_PMSUSPENDED: 'suspended',
}
def get_libvirt_connection(conf):
if not libvirt:
raise ImportError("python-libvirt module is missing")
uri = (conf.libvirt_uri or LIBVIRT_PER_TYPE_URIS.get(conf.libvirt_type,
'qemu:///system'))
LOG.debug('Connecting to libvirt: %s', uri)
return libvirt.openReadOnly(uri)
def retry_on_disconnect(function):
def decorator(self, *args, **kwargs):
try:
return function(self, *args, **kwargs)
except ImportError:
# NOTE(sileht): in case of libvirt failed to be imported
raise
except libvirt.libvirtError as e:
if (e.get_error_code() in (libvirt.VIR_ERR_SYSTEM_ERROR,
libvirt.VIR_ERR_INTERNAL_ERROR) and
e.get_error_domain() in (libvirt.VIR_FROM_REMOTE,
libvirt.VIR_FROM_RPC)):
LOG.debug('Connection to libvirt broken')
self.connection = None
return function(self, *args, **kwargs)
else:
raise
return decorator

View File

@ -23,7 +23,7 @@ import ceilometer.api.controllers.v2.root
import ceilometer.collector
import ceilometer.compute.discovery
import ceilometer.compute.virt.inspector
import ceilometer.compute.virt.libvirt.inspector
import ceilometer.compute.virt.libvirt.utils
import ceilometer.compute.virt.vmware.inspector
import ceilometer.compute.virt.xenapi.inspector
import ceilometer.coordination
@ -79,7 +79,7 @@ def list_opts():
itertools.chain(ceilometer.agent.manager.OPTS,
ceilometer.api.app.OPTS,
ceilometer.compute.virt.inspector.OPTS,
ceilometer.compute.virt.libvirt.inspector.OPTS,
ceilometer.compute.virt.libvirt.utils.OPTS,
ceilometer.dispatcher.OPTS,
ceilometer.ipmi.notifications.ironic.OPTS,
ceilometer.middleware.OPTS,

View File

@ -18,9 +18,69 @@ from oslo_config import fixture as fixture_config
from oslotest import mockpatch
from ceilometer.compute import discovery
from ceilometer.compute.pollsters import util
from ceilometer.compute.virt.libvirt import utils
import ceilometer.tests.base as base
LIBVIRT_METADATA_XML = """
<instance>
<package version="14.0.0"/>
<name>test.dom.com</name>
<creationTime>2016-11-16 07:35:06</creationTime>
<flavor name="m1.tiny">
<memory>512</memory>
<disk>1</disk>
<swap>0</swap>
<ephemeral>0</ephemeral>
<vcpus>1</vcpus>
</flavor>
<owner>
<user uuid="a1f4684e58bd4c88aefd2ecb0783b497">admin</user>
<project uuid="d99c829753f64057bc0f2030da309943">admin</project>
</owner>
<root type="image" uuid="bdaf114a-35e9-4163-accd-226d5944bf11"/>
</instance>
"""
LIBVIRT_DESC_XML = """
<domain type='kvm' id='1'>
<name>instance-00000001</name>
<uuid>a75c2fa5-6c03-45a8-bbf7-b993cfcdec27</uuid>
<os>
<type arch='x86_64' machine='pc-i440fx-xenial'>hvm</type>
<kernel>/opt/stack/data/nova/instances/a75c2fa5-6c03-45a8-bbf7-b993cfcdec27/kernel</kernel>
<initrd>/opt/stack/data/nova/instances/a75c2fa5-6c03-45a8-bbf7-b993cfcdec27/ramdisk</initrd>
<cmdline>root=/dev/vda console=tty0 console=ttyS0</cmdline>
<boot dev='hd'/>
<smbios mode='sysinfo'/>
</os>
</domain>
"""
class FakeDomain(object):
def state(self):
return [1, 2]
def name(self):
return "instance-00000001"
def UUIDString(self):
return "a75c2fa5-6c03-45a8-bbf7-b993cfcdec27"
def XMLDesc(self):
return LIBVIRT_DESC_XML
def metadata(self, flags, url):
return LIBVIRT_METADATA_XML
class FakeConn(object):
def listAllDomains(self):
return [FakeDomain()]
class TestDiscovery(base.BaseTestCase):
def setUp(self):
@ -32,6 +92,8 @@ class TestDiscovery(base.BaseTestCase):
self.instance.name)
setattr(self.instance, 'OS-EXT-STS:vm_state',
'active')
# FIXME(sileht): This is wrong, this should be a uuid
# The internal id of nova can't be retrieved via API or notification
self.instance.id = 1
self.instance.flavor = {'name': 'm1.small', 'id': 2, 'vcpus': 1,
'ram': 512, 'disk': 20, 'ephemeral': 0}
@ -97,3 +159,47 @@ class TestDiscovery(base.BaseTestCase):
self.assertEqual(1, list(resources)[0].id)
self.client.instance_get_all_by_host.assert_called_once_with(
self.CONF.host, "2016-01-01T00:00:00+00:00")
@mock.patch.object(utils, "libvirt")
@mock.patch.object(discovery, "libvirt")
def test_discovery_with_libvirt(self, libvirt, libvirt2):
self.CONF.set_override("instance_discovery_method",
"libvirt_metadata",
group="compute")
libvirt.VIR_DOMAIN_METADATA_ELEMENT = 2
libvirt2.openReadOnly.return_value = FakeConn()
dsc = discovery.InstanceDiscovery(self.CONF)
resources = dsc.discover(mock.MagicMock())
self.assertEqual(1, len(resources))
r = list(resources)[0]
s = util.make_sample_from_instance(self.CONF, r, "metric", "delta",
"carrot", 1)
self.assertEqual("a75c2fa5-6c03-45a8-bbf7-b993cfcdec27",
s.resource_id)
self.assertEqual("d99c829753f64057bc0f2030da309943",
s.project_id)
self.assertEqual("a1f4684e58bd4c88aefd2ecb0783b497",
s.user_id)
metadata = s.resource_metadata
self.assertEqual(1, metadata["vcpus"])
self.assertEqual(512, metadata["memory_mb"])
self.assertEqual(1, metadata["disk_gb"])
self.assertEqual(0, metadata["ephemeral_gb"])
self.assertEqual(1, metadata["root_gb"])
self.assertEqual("bdaf114a-35e9-4163-accd-226d5944bf11",
metadata["image_ref"])
self.assertEqual("test.dom.com", metadata["display_name"])
self.assertEqual("instance-00000001", metadata["name"])
self.assertEqual("a75c2fa5-6c03-45a8-bbf7-b993cfcdec27",
metadata["instance_id"])
self.assertEqual("m1.tiny", metadata["instance_type"])
self.assertEqual(
"4d0bc931ea7f0513da2efd9acb4cf3a273c64b7bcc544e15c070e662",
metadata["host"])
self.assertEqual(self.CONF.host, metadata["instance_host"])
self.assertEqual("active", metadata["status"])
self.assertEqual("running", metadata["state"])
self.assertEqual("hvm", metadata["os_type"])
self.assertEqual("x86_64", metadata["architecture"])

View File

@ -27,6 +27,7 @@ from oslotest import base
from ceilometer.compute.virt import inspector as virt_inspector
from ceilometer.compute.virt.libvirt import inspector as libvirt_inspector
from ceilometer.compute.virt.libvirt import utils
class TestLibvirtInspection(base.BaseTestCase):
@ -46,6 +47,7 @@ class TestLibvirtInspection(base.BaseTestCase):
libvirt_inspector.libvirt = mock.Mock()
libvirt_inspector.libvirt.VIR_DOMAIN_SHUTOFF = 5
libvirt_inspector.libvirt.libvirtError = self.fakeLibvirtError
utils.libvirt = libvirt_inspector.libvirt
self.domain = mock.Mock()
self.addCleanup(mock.patch.stopall)
@ -469,6 +471,7 @@ class TestLibvirtInspectionWithError(base.BaseTestCase):
mock.MagicMock(side_effect=Exception('dummy'))))
libvirt_inspector.libvirt = mock.Mock()
libvirt_inspector.libvirt.libvirtError = self.fakeLibvirtError
utils.libvirt = libvirt_inspector.libvirt
def test_inspect_unknown_error(self):
self.assertRaises(virt_inspector.InspectorException,

View File

@ -257,6 +257,7 @@ function _ceilometer_configure_storage_backend {
iniset $CEILOMETER_CONF database event_connection mongodb://localhost:27017/ceilometer
iniset $CEILOMETER_CONF database metering_connection mongodb://localhost:27017/ceilometer
elif [ "$CEILOMETER_BACKEND" = 'gnocchi' ] ; then
iniset $CEILOMETER_CONF compute instance_discovery_method libvirt_metadata
iniset $CEILOMETER_CONF DEFAULT meter_dispatchers gnocchi
iniset $CEILOMETER_CONF DEFAULT event_dispatchers gnocchi
# NOTE(gordc): set higher retry in case gnocchi is started after ceilometer on a slow machine

View File

@ -0,0 +1,22 @@
---
features:
- The Ceilometer compute agent can now retrieve some instance metadata from
the metadata libvirt API instead of polling the Nova API. Since Mitaka,
Nova fills this metadata with some information about the instance.
To enable this feature you should set [compute]/instance_discovery_method =
libvirt_metadata in the configuration file.
The only downside of this method is that user_metadata (and some other
instance attributes) are no longer part of the samples created by the
agent. But when Gnocchi is used as backend, this is not an issue since
Gnocchi doesn't store resource metadata aside of the measurements. And the
missing informations are still retrieved through the Nova notifications
and will fully update the resource information in Gnocchi.
upgrade:
- If you are using Gnocchi as backend it's strongly
recommended to switch [compute]/instance_discovery_method to
libvirt_metadata. This will reduce the load on the Nova API
especially if you have many compute nodes.
deprecations:
- The [compute]/workload_partitioning = True is deprecated in favor
of [compute]/instance_discovery_method = workload_partitioning

View File

@ -2,6 +2,7 @@
# of appearance. Changing the order has an impact on the overall integration
# process, which may cause wedges in the gate later.
cachetools>=1.1.0 # MIT License
cotyledon>=1.3.0 #Apache-2.0
futures>=3.0;python_version=='2.7' or python_version=='2.6' # BSD
futurist>=0.11.0 # Apache-2.0