Significant re-working of the userdata handling and introduction of

vendordata.

Vendordata is a datasource provided userdata-like blob that is parsed
similiarly to userdata, execept at the user's pleasure.


cloudinit/config/cc_scripts_vendor.py: added vendor script cloud config

cloudinit/config/cc_vendor_scripts_per_boot.py: added vendor per boot
    cloud config

cloudinit/config/cc_vendor_scripts_per_instance.py: added vendor per
    instance vendor cloud config

cloudinit/config/cc_vendor_scripts_per_once.py: added per once vendor
    cloud config script

doc/examples/cloud-config-vendor-data.txt: documentation of vendor-data
    examples

doc/vendordata.txt: documentation of vendordata for vendors

(RENAMED) tests/unittests/test_userdata.py => tests/unittests/test_userdata.py
      TO: tests/unittests/test_userdata.py => tests/unittests/test_data.py:
    userdata test cases are not expanded to confirm superiority over vendor
    data.

bin/cloud-init: change instances of 'consume_userdata' to 'consume_data'

cloudinit/handlers/cloud_config.py: Added vendor script handling to default
    cloud-config modules

cloudinit/handlers/shell_script.py: Added ability to change the path key to
    support vendor provided 'vendor-scripts'. Defaults to 'script'.

cloudinit/helpers.py:
    - Changed ConfigMerger to include handling of vendordata.
    - Changed helpers to include paths for vendordata.

cloudinit/sources/__init__.py: Added functions for helping vendordata
    - get_vendordata_raw(): returns vendordata unprocessed
    - get_vendordata(): returns vendordata through userdata processor
    - has_vendordata(): indicator if vendordata is present
    - consume_vendordata(): datasource directive for indicating explict
        user approval of vendordata consumption. Defaults to 'false'

cloudinit/stages.py: Re-jiggered for handling of vendordata
    - _initial_subdirs(): added vendor script definition
    - update(): added self._store_vendordata()
    - [ADDED] _store_vendordata(): store vendordata
    - _get_default_handlers(): modified to allow for filtering
        which handlers will run against vendordata
    - [ADDED] _do_handlers(): moved logic from consume_userdata
        to _do_handlers(). This allows _consume_vendordata() and
        _consume_userdata() to use the same code path.
    - [RENAMED] consume_userdata() to _consume_userdata()
    - [ADDED] _consume_vendordata() for handling vendordata
        - run after userdata to get user cloud-config
        - uses ConfigMerger to get the configuration from the
            instance perspective about whether or not to use
            vendordata
    - [ADDED] consume_data() to call _consume_{user,vendor}data

cloudinit/util.py:
    - [ADDED] get_nested_option_as_list() used by cc_vendor* for
        getting a nested value from a dict and returned as a list
    - runparts(): added 'exe_prefix' for running exe with a prefix,
        used by cc_vendor*

config/cloud.cfg: Added vendor script execution as default

tests/unittests/test_runs/test_merge_run.py: changed consume_userdata() to
    consume_data()

tests/unittests/test_runs/test_simple_run.py: changed consume_userdata() to
    consume_data()
This commit is contained in:
Ben Howard 2014-01-08 17:16:24 -07:00
parent 5eb522eee7
commit 161d6ab3eb
18 changed files with 725 additions and 45 deletions

View File

@ -261,8 +261,8 @@ def main_init(name, args):
# Attempt to consume the data per instance.
# This may run user-data handlers and/or perform
# url downloads and such as needed.
(ran, _results) = init.cloudify().run('consume_userdata',
init.consume_userdata,
(ran, _results) = init.cloudify().run('consume_data',
init.consume_data,
args=[PER_INSTANCE],
freq=PER_INSTANCE)
if not ran:
@ -271,7 +271,7 @@ def main_init(name, args):
#
# See: https://bugs.launchpad.net/bugs/819507 for a little
# reason behind this...
init.consume_userdata(PER_ALWAYS)
init.consume_data(PER_ALWAYS)
except Exception:
util.logexc(LOG, "Consuming user data failed!")
return 1

View File

@ -0,0 +1,44 @@
# vi: ts=4 expandtab
#
# Copyright (C) 2011-2014 Canonical Ltd.
# Copyright (C) 2012 Hewlett-Packard Development Company, L.P.
#
# Author: Scott Moser <scott.moser@canonical.com>
# Author: Ben Howard <ben.howard@canonical.com>
# Author: Juerg Haefliger <juerg.haefliger@hp.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 3, as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
from cloudinit import util
from cloudinit.settings import PER_INSTANCE
frequency = PER_INSTANCE
SCRIPT_SUBDIR = 'vendor'
def handle(name, _cfg, cloud, log, _args):
# This is written to by the user data handlers
# Ie, any custom shell scripts that come down
# go here...
runparts_path = os.path.join(cloud.get_ipath_cur(), 'scripts',
SCRIPT_SUBDIR)
try:
util.runparts(runparts_path)
except:
log.warn("Failed to run module %s (%s in %s)",
name, SCRIPT_SUBDIR, runparts_path)
raise

View File

@ -0,0 +1,43 @@
# vi: ts=4 expandtab
#
# Copyright (C) 2011-2014 Canonical Ltd.
# Copyright (C) 2012 Hewlett-Packard Development Company, L.P.
#
# Author: Scott Moser <scott.moser@canonical.com>
# Author: Ben Howard <ben.howard@canonical.com>
# Author: Juerg Haefliger <juerg.haefliger@hp.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 3, as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
from cloudinit import util
from cloudinit.settings import PER_ALWAYS
frequency = PER_ALWAYS
SCRIPT_SUBDIR = 'per-boot'
def handle(name, cfg, cloud, log, _args):
runparts_path = os.path.join(cloud.get_cpath(), 'scripts', 'vendor',
SCRIPT_SUBDIR)
vendor_prefix = util.get_nested_option_as_list(cfg, 'vendor_data',
'prefix')
try:
util.runparts(runparts_path, exe_prefix=vendor_prefix)
except:
log.warn("Failed to run module %s (%s in %s)",
name, SCRIPT_SUBDIR, runparts_path)
raise

View File

@ -0,0 +1,43 @@
# vi: ts=4 expandtab
#
# Copyright (C) 2011-2014 Canonical Ltd.
# Copyright (C) 2012 Hewlett-Packard Development Company, L.P.
#
# Author: Scott Moser <scott.moser@canonical.com>
# Author: Ben Howard <ben.howard@canonical.com>
# Author: Juerg Haefliger <juerg.haefliger@hp.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 3, as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
from cloudinit import util
from cloudinit.settings import PER_INSTANCE
frequency = PER_INSTANCE
SCRIPT_SUBDIR = 'per-instance'
def handle(name, cfg, cloud, log, _args):
runparts_path = os.path.join(cloud.get_cpath(), 'scripts', 'vendor',
SCRIPT_SUBDIR)
vendor_prefix = util.get_nested_option_as_list(cfg, 'vendor_data',
'prefix')
try:
util.runparts(runparts_path, exe_prefix=vendor_prefix)
except:
log.warn("Failed to run module %s (%s in %s)",
name, SCRIPT_SUBDIR, runparts_path)
raise

View File

@ -0,0 +1,43 @@
# vi: ts=4 expandtab
#
# Copyright (C) 2011-2014 Canonical Ltd.
# Copyright (C) 2012 Hewlett-Packard Development Company, L.P.
#
# Author: Scott Moser <scott.moser@canonical.com>
# Author: Ben Howard <ben.howard@canonical.com>
# Author: Juerg Haefliger <juerg.haefliger@hp.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 3, as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
from cloudinit import util
from cloudinit.settings import PER_ONCE
frequency = PER_ONCE
SCRIPT_SUBDIR = 'per-once'
def handle(name, cfg, cloud, log, _args):
runparts_path = os.path.join(cloud.get_cpath(), 'scripts', 'vendor',
SCRIPT_SUBDIR)
vendor_prefix = util.get_nested_option_as_list(cfg, 'vendor_data',
'prefix')
try:
util.runparts(runparts_path, exe_prefix=vendor_prefix)
except:
log.warn("Failed to run module %s (%s in %s)",
name, SCRIPT_SUBDIR, runparts_path)
raise

View File

@ -66,6 +66,8 @@ class CloudConfigPartHandler(handlers.Handler):
handlers.Handler.__init__(self, PER_ALWAYS, version=3)
self.cloud_buf = None
self.cloud_fn = paths.get_ipath("cloud_config")
if 'cloud_config_path' in _kwargs:
self.cloud_fn = paths.get_ipath(_kwargs["cloud_config_path"])
self.file_names = []
def list_types(self):

View File

@ -36,6 +36,8 @@ class ShellScriptPartHandler(handlers.Handler):
def __init__(self, paths, **_kwargs):
handlers.Handler.__init__(self, PER_ALWAYS)
self.script_dir = paths.get_ipath_cur('scripts')
if 'script_path' in _kwargs:
self.script_dir = paths.get_ipath_cur(_kwargs['script_path'])
def list_types(self):
return [

View File

@ -200,11 +200,13 @@ class Runners(object):
class ConfigMerger(object):
def __init__(self, paths=None, datasource=None,
additional_fns=None, base_cfg=None):
additional_fns=None, base_cfg=None,
include_vendor=True):
self._paths = paths
self._ds = datasource
self._fns = additional_fns
self._base_cfg = base_cfg
self._include_vendor = include_vendor
# Created on first use
self._cfg = None
@ -237,13 +239,19 @@ class ConfigMerger(object):
# a configuration file to use when running...
if not self._paths:
return i_cfgs
cc_fn = self._paths.get_ipath_cur('cloud_config')
if cc_fn and os.path.isfile(cc_fn):
try:
i_cfgs.append(util.read_conf(cc_fn))
except:
util.logexc(LOG, 'Failed loading of cloud-config from %s',
cc_fn)
cc_paths = ['cloud_config']
if self._include_vendor:
cc_paths.append('vendor_cloud_config')
for cc_p in cc_paths:
cc_fn = self._paths.get_ipath_cur(cc_p)
if cc_fn and os.path.isfile(cc_fn):
try:
i_cfgs.append(util.read_conf(cc_fn))
except:
util.logexc(LOG, 'Failed loading of cloud-config from %s',
cc_fn)
return i_cfgs
def _read_cfg(self):
@ -331,13 +339,18 @@ class Paths(object):
self.lookups = {
"handlers": "handlers",
"scripts": "scripts",
"vendor_scripts": "scripts/vendor",
"sem": "sem",
"boothooks": "boothooks",
"userdata_raw": "user-data.txt",
"userdata": "user-data.txt.i",
"obj_pkl": "obj.pkl",
"cloud_config": "cloud-config.txt",
"vendor_cloud_config": "vendor-cloud-config.txt",
"data": "data",
"vendordata_raw": "vendor-data.txt",
"vendordata": "vendor-data.txt.i",
"mergedvendoruser": "vendor-user-data.txt",
}
# Set when a datasource becomes active
self.datasource = ds

View File

@ -53,6 +53,8 @@ class DataSource(object):
self.userdata = None
self.metadata = None
self.userdata_raw = None
self.vendordata = None
self.vendordata_raw = None
# find the datasource config name.
# remove 'DataSource' from classname on front, and remove 'Net' on end.
@ -77,9 +79,28 @@ class DataSource(object):
if self.userdata is None:
self.userdata = self.ud_proc.process(self.get_userdata_raw())
if apply_filter:
return self._filter_userdata(self.userdata)
return self._filter_xdata(self.userdata)
return self.userdata
def get_vendordata(self, apply_filter=False):
if self.vendordata is None:
self.vendordata = self.ud_proc.process(self.get_vendordata_raw())
if apply_filter:
return self._filter_xdata(self.vendordata)
return self.vendordata
def has_vendordata(self):
if self.vendordata_raw is not None:
return True
return False
def consume_vendordata(self):
"""
The datasource may allow for consumption of vendordata, but only
when the datasource has allowed it. The default is false.
"""
return False
@property
def launch_index(self):
if not self.metadata:
@ -88,7 +109,7 @@ class DataSource(object):
return self.metadata['launch-index']
return None
def _filter_userdata(self, processed_ud):
def _filter_xdata(self, processed_ud):
filters = [
launch_index.Filter(util.safe_int(self.launch_index)),
]
@ -104,6 +125,9 @@ class DataSource(object):
def get_userdata_raw(self):
return self.userdata_raw
def get_vendordata_raw(self):
return self.vendordata_raw
# the data sources' config_obj is a cloud-config formated
# object that came to it from ways other than cloud-config
# because cloud-config content would be handled elsewhere

View File

@ -26,7 +26,8 @@ import copy
import os
import sys
from cloudinit.settings import (PER_INSTANCE, FREQUENCIES, CLOUD_CONFIG)
from cloudinit.settings import (PER_ALWAYS, PER_INSTANCE, FREQUENCIES,
CLOUD_CONFIG)
from cloudinit import handlers
@ -123,6 +124,10 @@ class Init(object):
os.path.join(c_dir, 'scripts', 'per-instance'),
os.path.join(c_dir, 'scripts', 'per-once'),
os.path.join(c_dir, 'scripts', 'per-boot'),
os.path.join(c_dir, 'scripts', 'vendor'),
os.path.join(c_dir, 'scripts', 'vendor', 'per-boot'),
os.path.join(c_dir, 'scripts', 'vendor', 'per-instance'),
os.path.join(c_dir, 'scripts', 'vendor', 'per-once'),
os.path.join(c_dir, 'seed'),
os.path.join(c_dir, 'instances'),
os.path.join(c_dir, 'handlers'),
@ -319,6 +324,7 @@ class Init(object):
if not self._write_to_cache():
return
self._store_userdata()
self._store_vendordata()
def _store_userdata(self):
raw_ud = "%s" % (self.datasource.get_userdata_raw())
@ -326,21 +332,62 @@ class Init(object):
processed_ud = "%s" % (self.datasource.get_userdata())
util.write_file(self._get_ipath('userdata'), processed_ud, 0600)
def _default_userdata_handlers(self):
def _store_vendordata(self):
raw_vd = "%s" % (self.datasource.get_vendordata_raw())
util.write_file(self._get_ipath('vendordata_raw'), raw_vd, 0600)
processed_vd = "%s" % (self.datasource.get_vendordata())
util.write_file(self._get_ipath('vendordata'), processed_vd, 0600)
def _get_default_handlers(self, user_data=False, vendor_data=False,
excluded=None):
opts = {
'paths': self.paths,
'datasource': self.datasource,
}
def conditional_get(cls, mod):
cls_name = cls.__name__.split('.')[-1]
_mod = getattr(cls, mod)
if not excluded:
return _mod(**opts)
if cls_name not in excluded:
_mod = getattr(cls, mod)
return _mod(**opts)
# TODO(harlowja) Hmmm, should we dynamically import these??
def_handlers = [
cc_part.CloudConfigPartHandler(**opts),
ss_part.ShellScriptPartHandler(**opts),
bh_part.BootHookPartHandler(**opts),
up_part.UpstartJobPartHandler(**opts),
conditional_get(bh_part, 'BootHookPartHandler'),
conditional_get(up_part, 'UpstartJobPartHandler'),
]
return def_handlers
def consume_userdata(self, frequency=PER_INSTANCE):
# Add in the shell script part handler
if user_data:
def_handlers.extend([
conditional_get(cc_part, 'CloudConfigPartHandler'),
conditional_get(ss_part, 'ShellScriptPartHandler')])
# This changes the path for the vendor script execution
if vendor_data:
opts['script_path'] = "vendor_scripts"
opts['cloud_config_path'] = "vendor_cloud_config"
def_handlers.extend([
conditional_get(cc_part, 'CloudConfigPartHandler'),
conditional_get(ss_part, 'ShellScriptPartHandler')])
return [x for x in def_handlers if x is not None]
def _default_userdata_handlers(self):
return self._get_default_handlers(user_data=True)
def _default_vendordata_handlers(self, excluded=None):
return self._get_default_handlers(vendor_data=True, excluded=excluded)
def _do_handlers(self, data_msg, c_handlers_list, frequency):
"""
Generalized handlers suitable for use with either vendordata
or userdata
"""
cdir = self.paths.get_cpath("handlers")
idir = self._get_ipath("handlers")
@ -352,12 +399,6 @@ class Init(object):
if d and d not in sys.path:
sys.path.insert(0, d)
# Ensure datasource fetched before activation (just incase)
user_data_msg = self.datasource.get_userdata(True)
# This keeps track of all the active handlers
c_handlers = helpers.ContentHandlers()
def register_handlers_in_dir(path):
# Attempts to register any handler modules under the given path.
if not path or not os.path.isdir(path):
@ -382,13 +423,16 @@ class Init(object):
util.logexc(LOG, "Failed to register handler from %s",
fname)
# This keeps track of all the active handlers
c_handlers = helpers.ContentHandlers()
# Add any handlers in the cloud-dir
register_handlers_in_dir(cdir)
# Register any other handlers that come from the default set. This
# is done after the cloud-dir handlers so that the cdir modules can
# take over the default user-data handler content-types.
for mod in self._default_userdata_handlers():
for mod in c_handlers_list:
types = c_handlers.register(mod, overwrite=False)
if types:
LOG.debug("Added default handler for %s from %s", types, mod)
@ -420,7 +464,7 @@ class Init(object):
# names...
'handlercount': 0,
}
handlers.walk(user_data_msg, handlers.walker_callback,
handlers.walk(data_msg, handlers.walker_callback,
data=part_data)
def finalize_handlers():
@ -442,6 +486,12 @@ class Init(object):
finally:
finalize_handlers()
def consume_data(self, frequency=PER_INSTANCE):
# Consume the userdata first, because we need want to let the part
# handlers run first (for merging stuff)
self._consume_userdata(frequency)
self._consume_vendordata(frequency)
# Perform post-consumption adjustments so that
# modules that run during the init stage reflect
# this consumed set.
@ -453,6 +503,82 @@ class Init(object):
# objects before the load of the userdata happened,
# this is expected.
def _consume_vendordata(self, frequency=PER_ALWAYS):
"""
Consume the vendordata and run the part handlers on it
"""
if not self.datasource.has_vendordata():
LOG.info("datasource did not provide vendor data")
return
# User-data should have been consumed first. If it has, then we can
# read it and simply parse it. This means that the datasource can
# define if the vendordata can be consumed too....i.e this method
# gives us a lot of flexibility.
_cc_merger = helpers.ConfigMerger(paths=self._paths,
datasource=self.datasource,
additional_fns=[],
base_cfg=self.cfg,
include_vendor=False)
_cc = _cc_merger.cfg
if not self.datasource.consume_vendordata():
if not isinstance(_cc, dict):
LOG.info(("userdata does explicitly allow vendordata "
"consumption"))
return
if 'vendor_data' not in _cc:
LOG.info(("no 'vendor_data' directive found in the"
"conf files. Skipping consumption of vendordata"))
return
# This allows for the datasource to signal explicit conditions when
# when the user has opted in to user-data
if self.datasource.consume_vendordata():
LOG.info(("datasource has indicated that vendordata that user"
" opted-in via another channel"))
vdc = _cc.get('vendor_data')
no_handlers = None
if isinstance(vdc, dict):
enabled = vdc.get('enabled')
no_handlers = vdc.get('no_run')
if enabled is None:
LOG.info("vendordata will not be consumed: user has not opted-in")
return
elif util.is_false(enabled):
LOG.info("user has requested NO vendordata consumption")
return
LOG.info("vendor data will be consumed")
# Ensure vendordata source fetched before activation (just incase)
vendor_data_msg = self.datasource.get_vendordata(True)
# This keeps track of all the active handlers, while excluding what the
# users doesn't want run, i.e. boot_hook, cloud_config, shell_script
c_handlers_list = self._default_vendordata_handlers(
excluded=no_handlers)
# Run the handlers
self._do_handlers(vendor_data_msg, c_handlers_list, frequency)
def _consume_userdata(self, frequency=PER_INSTANCE):
"""
Consume the userdata and run the part handlers
"""
# Ensure datasource fetched before activation (just incase)
user_data_msg = self.datasource.get_userdata(True)
# This keeps track of all the active handlers
c_handlers_list = self._default_userdata_handlers()
# Run the handlers
self._do_handlers(user_data_msg, c_handlers_list, frequency)
class Modules(object):
def __init__(self, init, cfg_files=None):

View File

@ -88,7 +88,11 @@ class UserDataProcessor(object):
def process(self, blob):
accumulating_msg = MIMEMultipart()
self._process_msg(convert_string(blob), accumulating_msg)
if isinstance(blob, list):
for b in blob:
self._process_msg(convert_string(b), accumulating_msg)
else:
self._process_msg(convert_string(blob), accumulating_msg)
return accumulating_msg
def _process_msg(self, base_msg, append_msg):

View File

@ -606,7 +606,7 @@ def del_dir(path):
shutil.rmtree(path)
def runparts(dirp, skip_no_exist=True):
def runparts(dirp, skip_no_exist=True, exe_prefix=None):
if skip_no_exist and not os.path.isdir(dirp):
return
@ -617,7 +617,10 @@ def runparts(dirp, skip_no_exist=True):
if os.path.isfile(exe_path) and os.access(exe_path, os.X_OK):
attempted.append(exe_path)
try:
subp([exe_path], capture=False)
exe_cmd = exe_prefix
if isinstance(exe_prefix, list):
exe_cmd.extend(exe_path)
subp([exe_cmd], capture=False)
except ProcessExecutionError as e:
logexc(LOG, "Failed running %s [%s]", exe_path, e.exit_code)
failed.append(e)
@ -1847,3 +1850,26 @@ def expand_dotted_devname(dotted):
return toks
else:
return (dotted, None)
def get_nested_option_as_list(dct, first, second):
"""
Return a nested option from a dict as a list
"""
if not isinstance(dct, dict):
raise TypeError("get_nested_option_as_list only works with dicts")
root = dct.get(first)
if not isinstance(root, dict):
return None
token = root.get(second)
if isinstance(token, list):
return token
elif isinstance(token, dict):
ret_list = []
for k, v in dct.iteritems():
ret_list.append((k, v))
return ret_list
elif isinstance(token, str):
return token.split()
return None

View File

@ -64,6 +64,10 @@ cloud_config_modules:
# The modules that run in the 'final' stage
cloud_final_modules:
- rightscale_userdata
- vendor-scripts-per-once
- vendor-scripts-per-boot
- vendor-scripts-per-instance
- script-vendor
- scripts-per-once
- scripts-per-boot
- scripts-per-instance

View File

@ -0,0 +1,16 @@
#cloud-config
#
# This explains how to control vendordata via a cloud-config
#
# On select Datasources, vendors have a channel for the consumptions
# of all support user-data types via a special channel called
# vendordata. Users of the end system are given ultimate control.
#
vendor_data:
enabled: True
prefix: /usr/bin/ltrace
# enabled: whether it is enabled or not
# prefix: the command to run before any vendor scripts.
# Note: this is a fairly weak method of containment. It should
# be used to profile a script, not to prevent its run

93
doc/vendordata.txt Normal file
View File

@ -0,0 +1,93 @@
=== Overview ===
Vendordata is data provided by the entity that launches an instance.
The cloud provider makes this data available to the instance via in one
way or another.
Vendordata follows the same rules as user-data, with the following
caveauts:
1. Users have ultimate control over vendordata
2. By default it only runs on first boot
3. Vendordata runs at the users pleasure. If the use of
vendordata is required for the instance to run, then
vendordata should not be used.
4. Most vendor operations should be done either via script,
boot_hook or upstart job.
Vendors utilizing the vendordata channel are strongly advised to
use the #cloud-config-jsonp method, otherwise they risk that a
user can accidently override choices.
Further, we strongly advise vendors to not 'be evil'. By evil, we
mean any action that could compromise a system. Since users trust
you, please take care to make sure that any vendordata is safe,
atomic, indopenant and does not put your users at risk.
cloud-init can read this input and act on it in different ways.
=== Input Formats ===
cloud-init will download and cache to filesystem any vendor-data that it
finds. However, certain types of vendor-data are handled specially.
* Gzip Compressed Content
content found to be gzip compressed will be uncompressed, and
these rules applied to the uncompressed data
* Mime Multi Part archive
This list of rules is applied to each part of this multi-part file
Using a mime-multi part file, the user can specify more than one
type of data. For example, both a user data script and a
cloud-config type could be specified.
* vendor-data Script
begins with: #! or Content-Type: text/x-shellscript
script will be executed at "rc.local-like" level during first boot.
rc.local-like means "very late in the boot sequence"
* Include File
begins with #include or Content-Type: text/x-include-url
This content is a "include" file. The file contains a list of
urls, one per line. Each of the URLs will be read, and their content
will be passed through this same set of rules. Ie, the content
read from the URL can be gzipped, mime-multi-part, or plain text
* Include File Once
begins with #include-once or Content-Type: text/x-include-once-url
This content is a "include" file. The file contains a list of
urls, one per line. Each of the URLs will be read, and their content
will be passed through this same set of rules. Ie, the content
read from the URL can be gzipped, mime-multi-part, or plain text
This file will just be downloaded only once per instance, and its
contents cached for subsequent boots. This allows you to pass in
one-time-use or expiring URLs.
* Cloud Config Data
begins with #cloud-config or Content-Type: text/cloud-config
This content is "cloud-config" data. See the examples for a
commented example of supported config formats.
* Upstart Job
begins with #upstart-job or Content-Type: text/upstart-job
Content is placed into a file in /etc/init, and will be consumed
by upstart as any other upstart job.
* Cloud Boothook
begins with #cloud-boothook or Content-Type: text/cloud-boothook
This content is "boothook" data. It is stored in a file under
/var/lib/cloud and then executed immediately.
This is the earliest "hook" available. Note, that there is no
mechanism provided for running only once. The boothook must take
care of this itself. It is provided with the instance id in the
environment variable "INSTANCE_ID". This could be made use of to
provide a 'once-per-instance'
=== Examples ===
There are examples in the examples subdirectory.
Additionally, the 'tools' directory contains 'write-mime-multipart',
which can be used to easily generate mime-multi-part files from a list
of input files. That data can then be given to an instance.
See 'write-mime-multipart --help' for usage.

View File

@ -13,6 +13,7 @@ from email.mime.multipart import MIMEMultipart
from cloudinit import handlers
from cloudinit import helpers as c_helpers
from cloudinit import log
from cloudinit.settings import (PER_INSTANCE)
from cloudinit import sources
from cloudinit import stages
from cloudinit import util
@ -24,10 +25,16 @@ from tests.unittests import helpers
class FakeDataSource(sources.DataSource):
def __init__(self, userdata):
def __init__(self, userdata=None, vendordata=None,
consume_vendor=False):
sources.DataSource.__init__(self, {}, None, None)
self.metadata = {'instance-id': INSTANCE_ID}
self.userdata_raw = userdata
self.vendordata_raw = vendordata
self._consume_vendor = consume_vendor
def consume_vendordata(self):
return self._consume_vendor
# FIXME: these tests shouldn't be checking log output??
@ -45,6 +52,11 @@ class TestConsumeUserData(helpers.FilesystemMockingTestCase):
if self._log_handler and self._log:
self._log.removeHandler(self._log_handler)
def _patchIn(self, root):
self.restore()
self.patchOS(root)
self.patchUtils(root)
def capture_log(self, lvl=logging.DEBUG):
log_file = StringIO.StringIO()
self._log_handler = logging.StreamHandler(log_file)
@ -68,13 +80,89 @@ class TestConsumeUserData(helpers.FilesystemMockingTestCase):
self.patchUtils(new_root)
self.patchOS(new_root)
ci.fetch()
ci.consume_userdata()
ci.consume_data()
cc_contents = util.load_file(ci.paths.get_ipath("cloud_config"))
cc = util.load_yaml(cc_contents)
self.assertEquals(2, len(cc))
self.assertEquals('qux', cc['baz'])
self.assertEquals('qux2', cc['bar'])
def test_simple_jsonp_vendor_and_user(self):
# test that user-data wins over vendor
user_blob = '''
#cloud-config-jsonp
[
{ "op": "add", "path": "/baz", "value": "qux" },
{ "op": "add", "path": "/bar", "value": "qux2" },
{ "op": "add", "path": "/vendor_data", "value": {"enabled": "true"}}
]
'''
vendor_blob = '''
#cloud-config-jsonp
[
{ "op": "add", "path": "/baz", "value": "quxA" },
{ "op": "add", "path": "/bar", "value": "quxB" },
{ "op": "add", "path": "/foo", "value": "quxC" }
]
'''
new_root = self.makeDir()
self._patchIn(new_root)
initer = stages.Init()
initer.datasource = FakeDataSource(user_blob, vendordata=vendor_blob)
initer.read_cfg()
initer.initialize()
initer.fetch()
_iid = initer.instancify()
initer.update()
initer.cloudify().run('consume_data',
initer.consume_data,
args=[PER_INSTANCE],
freq=PER_INSTANCE)
mods = stages.Modules(initer)
(_which_ran, _failures) = mods.run_section('cloud_init_modules')
cfg = mods.cfg
self.assertIn('vendor_data', cfg)
self.assertEquals('qux', cfg['baz'])
self.assertEquals('qux2', cfg['bar'])
self.assertEquals('quxC', cfg['foo'])
def test_simple_jsonp_no_vendor_consumed(self):
# make sure that vendor data is not consumed
user_blob = '''
#cloud-config-jsonp
[
{ "op": "add", "path": "/baz", "value": "qux" },
{ "op": "add", "path": "/bar", "value": "qux2" }
]
'''
vendor_blob = '''
#cloud-config-jsonp
[
{ "op": "add", "path": "/baz", "value": "quxA" },
{ "op": "add", "path": "/bar", "value": "quxB" },
{ "op": "add", "path": "/foo", "value": "quxC" }
]
'''
new_root = self.makeDir()
self._patchIn(new_root)
initer = stages.Init()
initer.datasource = FakeDataSource(user_blob, vendordata=vendor_blob)
initer.read_cfg()
initer.initialize()
initer.fetch()
_iid = initer.instancify()
initer.update()
initer.cloudify().run('consume_data',
initer.consume_data,
args=[PER_INSTANCE],
freq=PER_INSTANCE)
mods = stages.Modules(initer)
(_which_ran, _failures) = mods.run_section('cloud_init_modules')
cfg = mods.cfg
self.assertEquals('qux', cfg['baz'])
self.assertEquals('qux2', cfg['bar'])
self.assertNotIn('foo', cfg)
def test_mixed_cloud_config(self):
blob_cc = '''
#cloud-config
@ -105,12 +193,121 @@ c: d
self.patchUtils(new_root)
self.patchOS(new_root)
ci.fetch()
ci.consume_userdata()
ci.consume_data()
cc_contents = util.load_file(ci.paths.get_ipath("cloud_config"))
cc = util.load_yaml(cc_contents)
self.assertEquals(1, len(cc))
self.assertEquals('c', cc['a'])
def test_vendor_with_datasource_perm(self):
vendor_blob = '''
#cloud-config
a: b
name: vendor
run:
- x
- y
'''
new_root = self.makeDir()
self._patchIn(new_root)
initer = stages.Init()
initer.datasource = FakeDataSource('', vendordata=vendor_blob,
consume_vendor=True)
initer.read_cfg()
initer.initialize()
initer.fetch()
_iid = initer.instancify()
initer.update()
initer.cloudify().run('consume_data',
initer.consume_data,
args=[PER_INSTANCE],
freq=PER_INSTANCE)
mods = stages.Modules(initer)
(_which_ran, _failures) = mods.run_section('cloud_init_modules')
cfg = mods.cfg
self.assertEquals('b', cfg['a'])
self.assertEquals('vendor', cfg['name'])
self.assertIn('x', cfg['run'])
self.assertIn('y', cfg['run'])
def test_vendor_user_yaml_cloud_config(self):
vendor_blob = '''
#cloud-config
a: b
name: vendor
run:
- x
- y
'''
user_blob = '''
#cloud-config
a: c
vendor_data:
enabled: True
prefix: /bin/true
name: user
run:
- z
'''
new_root = self.makeDir()
self._patchIn(new_root)
initer = stages.Init()
initer.datasource = FakeDataSource(user_blob, vendordata=vendor_blob)
initer.read_cfg()
initer.initialize()
initer.fetch()
_iid = initer.instancify()
initer.update()
initer.cloudify().run('consume_data',
initer.consume_data,
args=[PER_INSTANCE],
freq=PER_INSTANCE)
mods = stages.Modules(initer)
(_which_ran, _failures) = mods.run_section('cloud_init_modules')
cfg = mods.cfg
self.assertIn('vendor_data', cfg)
self.assertEquals('c', cfg['a'])
self.assertEquals('user', cfg['name'])
self.assertNotIn('x', cfg['run'])
self.assertNotIn('y', cfg['run'])
self.assertIn('z', cfg['run'])
def test_vendordata_script(self):
vendor_blob = '''
#!/bin/bash
echo "test"
'''
user_blob = '''
#cloud-config
vendor_data:
enabled: True
prefix: /bin/true
'''
new_root = self.makeDir()
self._patchIn(new_root)
initer = stages.Init()
initer.datasource = FakeDataSource(user_blob, vendordata=vendor_blob)
initer.read_cfg()
initer.initialize()
initer.fetch()
_iid = initer.instancify()
initer.update()
initer.cloudify().run('consume_data',
initer.consume_data,
args=[PER_INSTANCE],
freq=PER_INSTANCE)
mods = stages.Modules(initer)
(_which_ran, _failures) = mods.run_section('cloud_init_modules')
cfg = mods.cfg
vendor_script = initer.paths.get_ipath_cur('vendor_scripts')
vendor_script_fns = "%s%s/part-001" % (new_root, vendor_script)
self.assertTrue(os.path.exists(vendor_script_fns))
def test_merging_cloud_config(self):
blob = '''
#cloud-config
@ -185,7 +382,7 @@ p: 1
log_file = self.capture_log(logging.WARNING)
ci.fetch()
ci.consume_userdata()
ci.consume_data()
self.assertIn(
"Unhandled non-multipart (text/x-not-multipart) userdata:",
log_file.getvalue())
@ -221,7 +418,7 @@ c: 4
self.patchUtils(new_root)
self.patchOS(new_root)
ci.fetch()
ci.consume_userdata()
ci.consume_data()
contents = util.load_file(ci.paths.get_ipath("cloud_config"))
contents = util.load_yaml(contents)
self.assertTrue(isinstance(contents, dict))
@ -244,7 +441,7 @@ c: 4
log_file = self.capture_log(logging.WARNING)
ci.fetch()
ci.consume_userdata()
ci.consume_data()
self.assertIn(
"Unhandled unknown content-type (text/plain)",
log_file.getvalue())
@ -264,7 +461,7 @@ c: 4
log_file = self.capture_log(logging.WARNING)
ci.fetch()
ci.consume_userdata()
ci.consume_data()
self.assertEqual("", log_file.getvalue())
def test_mime_text_x_shellscript(self):
@ -284,7 +481,7 @@ c: 4
log_file = self.capture_log(logging.WARNING)
ci.fetch()
ci.consume_userdata()
ci.consume_data()
self.assertEqual("", log_file.getvalue())
def test_mime_text_plain_shell(self):
@ -304,5 +501,5 @@ c: 4
log_file = self.capture_log(logging.WARNING)
ci.fetch()
ci.consume_userdata()
ci.consume_data()
self.assertEqual("", log_file.getvalue())

View File

@ -35,8 +35,8 @@ class TestMergeRun(helpers.FilesystemMockingTestCase):
initer.datasource.userdata_raw = ud
_iid = initer.instancify()
initer.update()
initer.cloudify().run('consume_userdata',
initer.consume_userdata,
initer.cloudify().run('consume_data',
initer.consume_data,
args=[PER_INSTANCE],
freq=PER_INSTANCE)
mirrors = initer.distro.get_option('package_mirrors')

View File

@ -66,8 +66,8 @@ class TestSimpleRun(helpers.FilesystemMockingTestCase):
initer.update()
self.assertTrue(os.path.islink("var/lib/cloud/instance"))
initer.cloudify().run('consume_userdata',
initer.consume_userdata,
initer.cloudify().run('consume_data',
initer.consume_data,
args=[PER_INSTANCE],
freq=PER_INSTANCE)