Add an API for loading a data source

The patch brings a couple of new concepts into cloudinit.
We have a general miniframework for defining plugins and discovering them,
with an actual implementation which uses the *pkgutil* builtin module.

Built atop of this framework, we have a new data source loader, found
in cloudinit.bases.DataSourceLoader. The loader operates on three concepts:

  - the *module iterator*, which is used to list modules from a specific
    location (in our case, from cloudinit.sources.__path__). The data
    source loader takes care to use only the modules that exports a
    given API.

  - the data source discovery API assumes that each data source module exports
    a function called `data_sources`, which should return a tuple of data source
    classes that the said module exports. The loader filters the modules
    that provides this API.

  - the data source loader uses a new concept called *search strategy*
    for discovering a potential data source. The search strategies
    are classes whose purpose is to select one or more data sources
    from a data source stream (any iterable).
    There are multiple ways to implement a strategy, the search
    can be either serial or parallel, there's no additional requirement
    as long as they return an iterable.
    Also, the strategies can be stacked together, for instance, having
    two strategies, one for selecting only the network data sources
    and another for selecting the available data sources from a list
    of potential data sources.

This patch also adds a new API that uses the DataSourceLoader
with a given module iterator and strategies for selecting one
data source that cloudinit can use.

Change-Id: I30f312191ce40e45445ed9e3fc8a3d4651903280
This commit is contained in:
Claudiu Popa 2015-08-05 16:39:12 +03:00
parent 735f0ac4ad
commit 42559c7f19
7 changed files with 478 additions and 0 deletions

View File

@ -0,0 +1,54 @@
# Copyright 2015 Canonical Ltd.
# This file is part of cloud-init. See LICENCE file for license information.
#
# vi: ts=4 expandtab
"""Various base classes and implementations for finding *plugins*."""
import abc
import pkgutil
import six
from cloudinit import logging
LOG = logging.getLogger(__name__)
@six.add_metaclass(abc.ABCMeta)
class BaseModuleIterator(object):
"""Base class for describing a *module iterator*
A module iterator is a class that's capable of listing
modules or packages from a specific location, which are
already loaded.
"""
def __init__(self, search_paths):
self._search_paths = search_paths
@abc.abstractmethod
def list_modules(self):
"""List all the modules that this finder knows about."""
class PkgutilModuleIterator(BaseModuleIterator):
"""A class based on the *pkgutil* module for discovering modules."""
@staticmethod
def _find_module(finder, module):
"""Delegate to the *finder* for finding the given module."""
return finder.find_module(module).load_module(module)
def list_modules(self):
"""List all modules that this class knows about."""
for finder, name, _ in pkgutil.walk_packages(self._search_paths):
try:
module = self._find_module(finder, name)
except ImportError:
LOG.debug('Could not import the module %r using the '
'search path %r', name, finder.path)
continue
yield module

View File

@ -4,9 +4,18 @@
# vi: ts=4 expandtab
import abc
import itertools
import six
from cloudinit import exceptions
from cloudinit import logging
from cloudinit import sources
from cloudinit.sources import strategy
LOG = logging.getLogger(__name__)
class APIResponse(object):
"""Holds API response content
@ -35,6 +44,64 @@ class APIResponse(object):
return self.decoded_buffer
class DataSourceLoader(object):
"""Class for retrieving an available data source instance
:param names:
A list of possible data source names, from which the loader
should pick. This can be used to filter the data sources
that can be found from outside of cloudinit control.
:param module_iterator:
An instance of :class:`cloudinit.plugin_finder.BaseModuleIterator`,
which is used to find possible modules where the data sources
can be found.
:param strategies:
An iterator of search strategy classes, where each strategy is capable
of filtering the data sources that can be used by cloudinit.
Possible strategies includes serial data source search or
parallel data source or filtering data sources according to
some criteria (only network data sources)
"""
def __init__(self, names, module_iterator, strategies):
self._names = names
self._module_iterator = module_iterator
self._strategies = strategies
@staticmethod
def _implements_source_api(module):
"""Check if the given module implements the data source API."""
return hasattr(module, 'data_sources')
def _valid_modules(self):
"""Return all the modules that are *valid*
Valid modules are those that implements a particular API
for declaring the data sources it exports.
"""
modules = self._module_iterator.list_modules()
return filter(self._implements_source_api, modules)
def all_data_sources(self):
"""Get all the data source classes that this finder knows about."""
return itertools.chain.from_iterable(
module.data_sources()
for module in self._valid_modules())
def valid_data_sources(self):
"""Get the data sources that are valid for this run."""
data_sources = self.all_data_sources()
# Instantiate them before passing to the strategies.
data_sources = (data_source() for data_source in data_sources)
for strategy_instance in self._strategies:
data_sources = strategy_instance.search_data_sources(data_sources)
return data_sources
@six.add_metaclass(abc.ABCMeta)
class BaseDataSource(object):
"""Base class for the data sources."""
@ -106,3 +173,41 @@ class BaseDataSource(object):
def is_password_set(self):
"""Check if the password was already posted to the metadata service."""
def get_data_source(names, module_iterator, strategies=None):
"""Get an instance of any data source available.
:param names:
A list of possible data source names, from which the loader
should pick. This can be used to filter the data sources
that can be found from outside of cloudinit control.
:param module_iterator:
A subclass of :class:`cloudinit.plugin_finder.BaseModuleIterator`,
which is used to find possible modules where the data sources
can be found.
:param strategies:
An iterator of search strategy classes, where each strategy is capable
of filtering the data sources that can be used by cloudinit.
"""
if names:
default_strategies = [strategy.FilterNameStrategy(names)]
else:
default_strategies = []
if strategies is None:
strategies = []
strategy_instances = [strategy_cls() for strategy_cls in strategies]
strategies = default_strategies + strategy_instances
iterator = module_iterator(sources.__path__)
loader = DataSourceLoader(names, iterator, strategies)
valid_sources = loader.valid_data_sources()
data_source = next(valid_sources, None)
if not data_source:
raise exceptions.CloudInitError('No available data source found')
return data_source

View File

@ -125,3 +125,8 @@ class HttpOpenStackSource(baseopenstack.BaseOpenStackSource):
return False
else:
raise
def data_sources():
"""Get the data sources exported in this module."""
return (HttpOpenStackSource,)

View File

@ -0,0 +1,79 @@
# Copyright 2015 Canonical Ltd.
# This file is part of cloud-init. See LICENCE file for license information.
#
# vi: ts=4 expandtab
import abc
import six
from cloudinit import logging
LOG = logging.getLogger(__name__)
@six.add_metaclass(abc.ABCMeta)
class BaseSearchStrategy(object):
"""Declare search strategies for data sources
A *search strategy* represents a decoupled way of choosing
one or more data sources from a list of data sources.
Each strategy can be used interchangeably and they can
be composed. For instance, once can apply a filtering strategy
over a parallel search strategy, which looks for the available
data sources.
"""
@abc.abstractmethod
def search_data_sources(self, data_sources):
"""Search the possible data sources for this strategy
The method should filter the data sources that can be
considered *valid* for the given strategy.
:param data_sources:
An iterator of data source instances, where the lookup
will be done.
"""
@staticmethod
def is_datasource_available(data_source):
"""Check if the given *data_source* is considered *available*
A data source is considered available if it can be loaded,
but other strategies could implement their own behaviour.
"""
try:
if data_source.load():
return True
except Exception:
LOG.error("Failed to load data source %r", data_source)
return False
class FilterNameStrategy(BaseSearchStrategy):
"""A strategy for filtering data sources by name
:param names:
A list of strings, where each string is a name for a possible
data source. Only the data sources that are in this list will
be loaded and filtered.
"""
def __init__(self, names=None):
self._names = names
super(FilterNameStrategy, self).__init__()
def search_data_sources(self, data_sources):
return (source for source in data_sources
if source.__class__.__name__ in self._names)
class SerialSearchStrategy(BaseSearchStrategy):
"""A strategy that chooses a data source in serial."""
def search_data_sources(self, data_sources):
for data_source in data_sources:
if self.is_datasource_available(data_source):
yield data_source

View File

@ -0,0 +1,115 @@
# Copyright 2015 Canonical Ltd.
# This file is part of cloud-init. See LICENCE file for license information.
#
# vi: ts=4 expandtab
import functools
import string
import types
from cloudinit import exceptions
from cloudinit import plugin_finder
from cloudinit.sources import base
from cloudinit.sources import strategy
from cloudinit import tests
class TestDataSourceDiscovery(tests.TestCase):
def setUp(self):
super(TestDataSourceDiscovery, self).setUp()
self._modules = None
@property
def modules(self):
if self._modules:
return self._modules
class Module(types.ModuleType):
def data_sources(self):
return (self, )
def __call__(self):
return self
@property
def __class__(self):
return self
modules = self._modules = list(map(Module, string.ascii_letters))
return modules
@property
def module_iterator(self):
modules = self.modules
class ModuleIterator(plugin_finder.BaseModuleIterator):
def list_modules(self):
return modules + [None, "", 42]
return ModuleIterator(None)
def test_loader_api(self):
# Test that the API of DataSourceLoader is sane
loader = base.DataSourceLoader(
names=[], module_iterator=self.module_iterator,
strategies=[])
all_data_sources = list(loader.all_data_sources())
valid_data_sources = list(loader.valid_data_sources())
self.assertEqual(all_data_sources, self.modules)
self.assertEqual(valid_data_sources, self.modules)
def test_loader_strategies(self):
class OrdStrategy(strategy.BaseSearchStrategy):
def search_data_sources(self, data_sources):
return filter(lambda source: ord(source.__name__) < 100,
data_sources)
class NameStrategy(strategy.BaseSearchStrategy):
def search_data_sources(self, data_sources):
return (source for source in data_sources
if source.__name__ in ('a', 'b', 'c'))
loader = base.DataSourceLoader(
names=[], module_iterator=self.module_iterator,
strategies=(OrdStrategy(), NameStrategy(), ))
valid_data_sources = list(loader.valid_data_sources())
self.assertEqual(len(valid_data_sources), 3)
self.assertEqual([source.__name__ for source in valid_data_sources],
['a', 'b', 'c'])
def test_get_data_source_filtered_by_name(self):
source = base.get_data_source(
names=['a', 'c'],
module_iterator=self.module_iterator.__class__)
self.assertEqual(source.__name__, 'a')
def test_get_data_source_multiple_strategies(self):
class ReversedStrategy(strategy.BaseSearchStrategy):
def search_data_sources(self, data_sources):
return reversed(list(data_sources))
source = base.get_data_source(
names=['a', 'b', 'c'],
module_iterator=self.module_iterator.__class__,
strategies=(ReversedStrategy, ))
self.assertEqual(source.__name__, 'c')
def test_get_data_source_no_data_source(self):
get_data_source = functools.partial(
base.get_data_source,
names=['totallymissing'],
module_iterator=self.module_iterator.__class__)
exc = self.assertRaises(exceptions.CloudInitError,
get_data_source)
self.assertEqual(str(exc), 'No available data source found')
def test_get_data_source_no_name_filtering(self):
source = base.get_data_source(
names=[], module_iterator=self.module_iterator.__class__)
self.assertEqual(source.__name__, 'a')

View File

@ -0,0 +1,65 @@
# Copyright 2015 Canonical Ltd.
# This file is part of cloud-init. See LICENCE file for license information.
#
# vi: ts=4 expandtab
from cloudinit.sources import strategy
from cloudinit import tests
from cloudinit.tests.util import mock
class TestStrategy(tests.TestCase):
def test_custom_strategy(self):
class CustomStrategy(strategy.BaseSearchStrategy):
def search_data_sources(self, data_sources):
# Return them in reverse order
return list(reversed(data_sources))
data_sources = [mock.sentinel.first, mock.sentinel.second]
instance = CustomStrategy()
sources = instance.search_data_sources(data_sources)
self.assertEqual(sources, [mock.sentinel.second, mock.sentinel.first])
def test_is_datasource_available(self):
class CustomStrategy(strategy.BaseSearchStrategy):
def search_data_sources(self, _):
pass
instance = CustomStrategy()
good_source = mock.Mock()
good_source.load.return_value = True
bad_source = mock.Mock()
bad_source.load.return_value = False
self.assertTrue(instance.is_datasource_available(good_source))
self.assertFalse(instance.is_datasource_available(bad_source))
def test_filter_name_strategy(self):
names = ['first', 'second', 'third']
full_names = names + ['fourth', 'fifth']
sources = [type(name, (object, ), {})() for name in full_names]
instance = strategy.FilterNameStrategy(names)
sources = list(instance.search_data_sources(sources))
self.assertEqual(len(sources), 3)
self.assertEqual([source.__class__.__name__ for source in sources],
names)
def test_serial_search_strategy(self):
def is_available(self, data_source):
return data_source in available_sources
sources = [mock.sentinel.first, mock.sentinel.second,
mock.sentinel.third, mock.sentinel.fourth]
available_sources = [mock.sentinel.second, mock.sentinel.fourth]
with mock.patch('cloudinit.sources.strategy.BaseSearchStrategy.'
'is_datasource_available', new=is_available):
instance = strategy.SerialSearchStrategy()
valid_sources = list(instance.search_data_sources(sources))
self.assertEqual(available_sources, valid_sources)

View File

@ -0,0 +1,55 @@
# Copyright 2015 Canonical Ltd.
# This file is part of cloud-init. See LICENCE file for license information.
#
# vi: ts=4 expandtab
import contextlib
import os
import shutil
import tempfile
from cloudinit import plugin_finder
from cloudinit.tests import TestCase
from cloudinit.tests import util
class TestPkgutilModuleIterator(TestCase):
@staticmethod
@contextlib.contextmanager
def _create_tmpdir():
tmpdir = tempfile.mkdtemp()
try:
yield tmpdir
finally:
shutil.rmtree(tmpdir)
@contextlib.contextmanager
def _create_package(self):
with self._create_tmpdir() as tmpdir:
path = os.path.join(tmpdir, 'good.py')
with open(path, 'w') as stream:
stream.write('name = 42')
# Make sure this fails.
bad = os.path.join(tmpdir, 'bad.py')
with open(bad, 'w') as stream:
stream.write('import missingmodule')
yield tmpdir
def test_pkgutil_module_iterator(self):
logging_format = ("Could not import the module 'bad' "
"using the search path %r")
with util.LogSnatcher('cloudinit.plugin_finder') as snatcher:
with self._create_package() as tmpdir:
expected_logging = logging_format % tmpdir
iterator = plugin_finder.PkgutilModuleIterator([tmpdir])
modules = list(iterator.list_modules())
self.assertEqual(len(modules), 1)
module = modules[0]
self.assertEqual(module.name, 42)
self.assertEqual(len(snatcher.output), 1)
self.assertEqual(snatcher.output[0], expected_logging)