Add Ansible-deploy driver

Requires Ironic API >= 1.22 when using heartbeats to Ironic API.

For better logging and proper deployment failure handling,
Ironic should be > '6.1.1.dev147' version.

Tested with and targets Ansible >= 2.1

Experimental DIB element: I3f6c3baf0197d27f2d423f52611666ca186cd0a4
Experimental TinyCore-based bootstrap:
Ie39ce67dc93e7d53bf75937c7defacafad5fbfcf

Tested with DevStack, Bifrost and Mirantis OpenStack,
with both VMs and real IPMI hardware.
Tempest's baremetal_server_basic_ops test reliably passes on DevStack
with `pxe_ssh_ansible` driver.

More elaborate documentation will be proposed in next changes.

Change-Id: Ib9317d365d7bc39aa00a9d9e1eadddd2f9b2947f
Related-bug: #1526308
Co-Authored-By: Yuriy Zveryanskyy <yzveryanskyy@mirantis.com>
changes/74/325974/28
Pavlo Shchelokovskyy 6 years ago
parent 4a6bb47529
commit ffd006e098
  1. 3
      devstack/enabled-drivers.txt
  2. 67
      ironic_staging_drivers/ansible/__init__.py
  3. 746
      ironic_staging_drivers/ansible/deploy.py
  4. 11
      ironic_staging_drivers/ansible/playbooks/add-ironic-nodes.yaml
  5. 24
      ironic_staging_drivers/ansible/playbooks/ansible.cfg
  6. 8
      ironic_staging_drivers/ansible/playbooks/callback_plugins/ironic_log.ini
  7. 122
      ironic_staging_drivers/ansible/playbooks/callback_plugins/ironic_log.py
  8. 12
      ironic_staging_drivers/ansible/playbooks/clean.yaml
  9. 19
      ironic_staging_drivers/ansible/playbooks/clean_steps.yaml
  10. 13
      ironic_staging_drivers/ansible/playbooks/deploy.yaml
  11. 1
      ironic_staging_drivers/ansible/playbooks/inventory
  12. 111
      ironic_staging_drivers/ansible/playbooks/library/parted.py
  13. 104
      ironic_staging_drivers/ansible/playbooks/library/stream_url.py
  14. 6
      ironic_staging_drivers/ansible/playbooks/roles/clean/tasks/main.yaml
  15. 6
      ironic_staging_drivers/ansible/playbooks/roles/clean/tasks/shred.yaml
  16. 4
      ironic_staging_drivers/ansible/playbooks/roles/clean/tasks/zap.yaml
  17. 54
      ironic_staging_drivers/ansible/playbooks/roles/deploy/files/install_grub.sh
  18. 115
      ironic_staging_drivers/ansible/playbooks/roles/deploy/files/partition_configdrive.sh
  19. 37
      ironic_staging_drivers/ansible/playbooks/roles/deploy/tasks/configdrive.yaml
  20. 11
      ironic_staging_drivers/ansible/playbooks/roles/deploy/tasks/download.yaml
  21. 3
      ironic_staging_drivers/ansible/playbooks/roles/deploy/tasks/grub.yaml
  22. 17
      ironic_staging_drivers/ansible/playbooks/roles/deploy/tasks/main.yaml
  23. 28
      ironic_staging_drivers/ansible/playbooks/roles/deploy/tasks/parted.yaml
  24. 7
      ironic_staging_drivers/ansible/playbooks/roles/deploy/tasks/root-device.yaml
  25. 19
      ironic_staging_drivers/ansible/playbooks/roles/deploy/tasks/write.yaml
  26. 6
      ironic_staging_drivers/ansible/playbooks/roles/shutdown/tasks/main.yaml
  27. 10
      ironic_staging_drivers/ansible/playbooks/roles/wait/tasks/main.yaml
  28. 1
      ironic_staging_drivers/ansible/python-requirements.txt
  29. 0
      ironic_staging_drivers/tests/unit/ansible/__init__.py
  30. 778
      ironic_staging_drivers/tests/unit/ansible/test_deploy.py
  31. 22
      releasenotes/notes/ansible-deploy-63d94ae3857bf7d0.yaml
  32. 4
      setup.cfg

@ -9,3 +9,6 @@ fake_libvirt_fake
fake_amt_fake
pxe_amt_iscsi
pxe_amt_agent
pxe_ssh_ansible
pxe_libvirt_ansible
pxe_ipmitool_ansible

@ -0,0 +1,67 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ironic.drivers import base
from ironic.drivers.modules import fake
from ironic.drivers.modules import ipmitool
from ironic.drivers.modules import pxe
from ironic.drivers.modules import ssh
from ironic_staging_drivers.ansible import deploy as ansible_deploy
from ironic_staging_drivers.libvirt import power as libvirt_power
class AnsibleAndSSHDriver(base.BaseDriver):
"""Ansible + SSH driver.
NOTE: This driver is meant only for testing environments.
"""
def __init__(self):
self.power = ssh.SSHPower()
self.boot = pxe.PXEBoot()
self.deploy = ansible_deploy.AnsibleDeploy()
self.management = ssh.SSHManagement()
class AnsibleAndIPMIToolDriver(base.BaseDriver):
"""Ansible + Ipmitool driver."""
def __init__(self):
self.power = ipmitool.IPMIPower()
self.boot = pxe.PXEBoot()
self.deploy = ansible_deploy.AnsibleDeploy()
self.management = ipmitool.IPMIManagement()
self.vendor = ipmitool.VendorPassthru()
class FakeAnsibleDriver(base.BaseDriver):
"""Ansible + Fake driver"""
def __init__(self):
self.power = fake.FakePower()
self.boot = pxe.PXEBoot()
self.deploy = ansible_deploy.AnsibleDeploy()
self.management = fake.FakeManagement()
class AnsibleAndLibvirtDriver(base.BaseDriver):
"""Ansible + Libvirt driver.
NOTE: This driver is meant only for testing environments.
"""
def __init__(self):
self.power = libvirt_power.LibvirtPower()
self.boot = pxe.PXEBoot()
self.deploy = ansible_deploy.AnsibleDeploy()
self.management = libvirt_power.LibvirtManagement()

@ -0,0 +1,746 @@
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""
Ansible deploy driver
"""
import json
import os
import shlex
from ironic_lib import utils as irlib_utils
from oslo_concurrency import processutils
from oslo_config import cfg
from oslo_log import log
from oslo_utils import excutils
from oslo_utils import strutils
from oslo_utils import units
import retrying
import six
import six.moves.urllib.parse as urlparse
import yaml
from ironic.common import dhcp_factory
from ironic.common import exception
from ironic.common.glance_service import service_utils
from ironic.common.i18n import _
from ironic.common.i18n import _LE
from ironic.common.i18n import _LI
from ironic.common.i18n import _LW
from ironic.common import image_service
from ironic.common import images
from ironic.common import states
from ironic.common import utils
from ironic.conductor import rpcapi
from ironic.conductor import task_manager
from ironic.conductor import utils as manager_utils
from ironic.conf import CONF
from ironic.drivers import base
from ironic.drivers.modules import deploy_utils
ansible_opts = [
cfg.StrOpt('ansible_extra_args',
help=_('Extra arguments to pass on every '
'invocation of Ansible.')),
cfg.IntOpt('verbosity',
min=0,
max=4,
help=_('Set ansible verbosity level requested when invoking '
'"ansible-playbook" command. '
'4 includes detailed SSH session logging. '
'Default is 4 when global debug is enabled '
'and 0 otherwise.')),
cfg.StrOpt('ansible_playbook_script',
default='ansible-playbook',
help=_('Path to "ansible-playbook" script. '
'Default will search the $PATH configured for user '
'running ironic-conductor process. '
'Provide the full path when ansible-playbook is not in '
'$PATH or installed in not default location.')),
cfg.StrOpt('playbooks_path',
default=os.path.join(os.path.dirname(__file__), 'playbooks'),
help=_('Path to directory with playbooks, roles and '
'local inventory.')),
cfg.StrOpt('config_file_path',
default=os.path.join(
os.path.dirname(__file__), 'playbooks', 'ansible.cfg'),
help=_('Path to ansible configuration file. If set to empty, '
'system default will be used.')),
cfg.IntOpt('post_deploy_get_power_state_retries',
min=0,
default=6,
help=_('Number of times to retry getting power state to check '
'if bare metal node has been powered off after a soft '
'power off.')),
cfg.IntOpt('post_deploy_get_power_state_retry_interval',
min=0,
default=5,
help=_('Amount of time (in seconds) to wait between polling '
'power state after trigger soft poweroff.')),
cfg.IntOpt('extra_memory',
default=10,
help=_('Extra amount of memory in MiB expected to be consumed '
'by Ansible-related processes on the node. Affects '
'decision whether image will fit into RAM.')),
cfg.BoolOpt('use_ramdisk_callback',
default=True,
help=_('Use callback request from ramdisk for start deploy or '
'cleaning. Disable it when using custom ramdisk '
'without callback script. '
'When callback is disabled, Neutron is mandatory.')),
]
CONF.register_opts(ansible_opts, group='ansible')
LOG = log.getLogger(__name__)
DEFAULT_PLAYBOOKS = {
'deploy': 'deploy.yaml',
'clean': 'clean.yaml'
}
DEFAULT_CLEAN_STEPS = 'clean_steps.yaml'
OPTIONAL_PROPERTIES = {
'ansible_deploy_username': _('Deploy ramdisk username for Ansible. '
'This user must have passwordless sudo '
'permissions. Default is "ansible". '
'Optional.'),
'ansible_deploy_key_file': _('Path to private key file. If not specified, '
'default keys for user running '
'ironic-conductor process will be used. '
'Note that for keys with password, those '
'must be pre-loaded into ssh-agent. '
'Optional.'),
'ansible_deploy_playbook': _('Name of the Ansible playbook used for '
'deployment. Default is %s. Optional.'
) % DEFAULT_PLAYBOOKS['deploy'],
'ansible_clean_playbook': _('Name of the Ansible playbook used for '
'cleaning. Default is %s. Optional.'
) % DEFAULT_PLAYBOOKS['clean'],
'ansible_clean_steps_config': _('Name of the file with default cleaning '
'steps configuration. Default is %s. '
'Optional.'
) % DEFAULT_CLEAN_STEPS
}
COMMON_PROPERTIES = OPTIONAL_PROPERTIES
DISK_LAYOUT_PARAMS = ('root_gb', 'swap_mb', 'ephemeral_gb')
INVENTORY_FILE = os.path.join(CONF.ansible.playbooks_path, 'inventory')
class PlaybookNotFound(exception.IronicException):
_msg_fmt = _('Failed to set ansible playbook for action %(action)s')
def _parse_ansible_driver_info(node, action='deploy'):
user = node.driver_info.get('ansible_deploy_username', 'ansible')
key = node.driver_info.get('ansible_deploy_key_file')
playbook = node.driver_info.get('ansible_%s_playbook' % action,
DEFAULT_PLAYBOOKS.get(action))
if not playbook:
raise PlaybookNotFound(action=action)
return playbook, user, key
def _get_configdrive_path(basename):
return os.path.join(CONF.tempdir, basename + '.cndrive')
# NOTE(yuriyz): this is a copy from agent driver
def build_instance_info_for_deploy(task):
"""Build instance_info necessary for deploying to a node."""
node = task.node
instance_info = node.instance_info
image_source = instance_info['image_source']
if service_utils.is_glance_image(image_source):
glance = image_service.GlanceImageService(version=2,
context=task.context)
image_info = glance.show(image_source)
swift_temp_url = glance.swift_temp_url(image_info)
LOG.debug('Got image info: %(info)s for node %(node)s.',
{'info': image_info, 'node': node.uuid})
instance_info['image_url'] = swift_temp_url
instance_info['image_checksum'] = image_info['checksum']
instance_info['image_disk_format'] = image_info['disk_format']
else:
try:
image_service.HttpImageService().validate_href(image_source)
except exception.ImageRefValidationFailed:
with excutils.save_and_reraise_exception():
LOG.error(_LE("Ansible deploy supports only HTTP(S) URLs as "
"instance_info['image_source']. Either %s "
"is not a valid HTTP(S) URL or "
"is not reachable."), image_source)
instance_info['image_url'] = image_source
return instance_info
def _get_node_ip(task):
api = dhcp_factory.DHCPFactory().provider
ip_addrs = api.get_ip_addresses(task)
if not ip_addrs:
raise exception.FailedToGetIPAddressOnPort(_(
"Failed to get IP address for any port on node %s.") %
task.node.uuid)
if len(ip_addrs) > 1:
error = _("Ansible driver does not support multiple IP addresses "
"during deploy or cleaning")
raise exception.InstanceDeployFailure(reason=error)
return ip_addrs[0]
# some good code from agent
def _reboot_and_finish_deploy(task):
wait = CONF.ansible.post_deploy_get_power_state_retry_interval * 1000
attempts = CONF.ansible.post_deploy_get_power_state_retries + 1
@retrying.retry(
stop_max_attempt_number=attempts,
retry_on_result=lambda state: state != states.POWER_OFF,
wait_fixed=wait
)
def _wait_until_powered_off(task):
return task.driver.power.get_power_state(task)
try:
_wait_until_powered_off(task)
except Exception as e:
LOG.warning(_LW('Failed to soft power off node %(node_uuid)s '
'in at least %(timeout)d seconds. Error: %(error)s'),
{'node_uuid': task.node.uuid,
'timeout': (wait * (attempts - 1)) / 1000,
'error': e})
manager_utils.node_power_action(task, states.POWER_OFF)
task.driver.network.remove_provisioning_network(task)
task.driver.network.configure_tenant_networks(task)
manager_utils.node_power_action(task, states.POWER_ON)
def _prepare_extra_vars(host_list, variables=None):
nodes_var = []
for node_uuid, ip, user, extra in host_list:
nodes_var.append(dict(name=node_uuid, ip=ip, user=user, extra=extra))
extra_vars = dict(ironic_nodes=nodes_var)
if variables:
extra_vars.update(variables)
return extra_vars
def _run_playbook(name, extra_vars, key, tags=None, notags=None):
"""Execute ansible-playbook."""
playbook = os.path.join(CONF.ansible.playbooks_path, name)
args = [CONF.ansible.ansible_playbook_script, playbook,
'-i', INVENTORY_FILE,
'-e', json.dumps(extra_vars),
]
if CONF.ansible.config_file_path:
env = ['env', 'ANSIBLE_CONFIG=%s' % CONF.ansible.config_file_path]
args = env + args
if tags:
args.append('--tags=%s' % ','.join(tags))
if notags:
args.append('--skip-tags=%s' % ','.join(notags))
if key:
args.append('--private-key=%s' % key)
verbosity = CONF.ansible.verbosity
if verbosity is None and CONF.debug:
verbosity = 4
if verbosity:
args.append('-' + 'v' * verbosity)
if CONF.ansible.ansible_extra_args:
args.extend(shlex.split(CONF.ansible.ansible_extra_args))
try:
out, err = utils.execute(*args)
return out, err
except processutils.ProcessExecutionError as e:
raise exception.InstanceDeployFailure(reason=e)
def _calculate_memory_req(task):
image_source = task.node.instance_info['image_source']
image_size = images.download_size(task.context, image_source)
return image_size // units.Mi + CONF.ansible.extra_memory
def _parse_partitioning_info(node):
info = node.instance_info
i_info = {}
i_info['root_gb'] = info.get('root_gb')
error_msg = _("'root_gb' is missing in node's instance_info")
deploy_utils.check_for_missing_params(i_info, error_msg)
i_info['swap_mb'] = info.get('swap_mb', 0)
i_info['ephemeral_gb'] = info.get('ephemeral_gb', 0)
err_msg_invalid = _("Cannot validate parameter for deploy. Invalid "
"parameter %(param)s. Reason: %(reason)s")
for param in DISK_LAYOUT_PARAMS:
try:
i_info[param] = int(i_info[param])
except ValueError:
reason = _("%s is not an integer value") % i_info[param]
raise exception.InvalidParameterValue(err_msg_invalid %
{'param': param,
'reason': reason})
# convert to sizes expected by 'parted' Ansible module
root_mib = 1024 * i_info.pop('root_gb')
swap_mib = i_info.pop('swap_mb')
ephemeral_mib = 1024 * i_info.pop('ephemeral_gb')
partitions = []
root_partition = {'name': 'root',
'size_mib': root_mib,
'boot': 'yes',
'swap': 'no'}
partitions.append(root_partition)
if swap_mib:
swap_partition = {'name': 'swap',
'size_mib': swap_mib,
'boot': 'no',
'swap': 'yes'}
partitions.append(swap_partition)
if ephemeral_mib:
ephemeral_partition = {'name': 'ephemeral',
'size_mib': ephemeral_mib,
'boot': 'no',
'swap': 'no'}
partitions.append(ephemeral_partition)
i_info['ephemeral_format'] = info.get('ephemeral_format')
if not i_info['ephemeral_format']:
i_info['ephemeral_format'] = CONF.pxe.default_ephemeral_format
preserve_ephemeral = info.get('preserve_ephemeral', False)
try:
i_info['preserve_ephemeral'] = (
strutils.bool_from_string(preserve_ephemeral, strict=True))
except ValueError as e:
raise exception.InvalidParameterValue(
err_msg_invalid % {'param': 'preserve_ephemeral', 'reason': e})
i_info['preserve_ephemeral'] = (
'yes' if i_info['preserve_ephemeral'] else 'no')
i_info['ironic_partitions'] = partitions
return i_info
def _prepare_variables(task):
node = task.node
i_info = node.instance_info
image = {
'url': i_info['image_url'],
'mem_req': _calculate_memory_req(task),
'disk_format': i_info.get('image_disk_format'),
}
checksum = i_info.get('image_checksum')
if checksum:
# NOTE(pas-ha) checksum can be in <algo>:<checksum> format
# as supported by various Ansible modules, mostly good for
# standalone Ironic case when instance_info is populated manually.
# With no <algo> we take that instance_info is populated from Glance,
# where API reports checksum as MD5 always.
if ':' not in checksum:
checksum = 'md5:%s' % checksum
image['checksum'] = checksum
variables = {'image': image}
configdrive = i_info.get('configdrive')
if configdrive:
if urlparse.urlparse(configdrive).scheme in ('http', 'https'):
cfgdrv_type = 'url'
cfgdrv_location = configdrive
else:
cfgdrv_location = _get_configdrive_path(node.uuid)
with open(cfgdrv_location, 'w') as f:
f.write(configdrive)
cfgdrv_type = 'file'
variables['configdrive'] = {'type': cfgdrv_type,
'location': cfgdrv_location}
return variables
def _validate_clean_steps(steps, node_uuid):
missing = []
for step in steps:
name = step.setdefault('name', 'unnamed')
if 'interface' not in step:
missing.append({'name': name, 'field': 'interface'})
args = step.get('args', {})
for arg_name, arg in args.items():
if arg.get('required', False) and 'value' not in arg:
missing.append({'name': name,
'field': '%s.value' % arg_name})
if missing:
err_string = ', '.join(
'name %(name)s, field %(field)s' % i for i in missing)
msg = _("Malformed clean_steps file: %s") % err_string
LOG.error(msg)
raise exception.NodeCleaningFailure(node=node_uuid,
reason=msg)
def _get_clean_steps(task, interface=None, override_priorities=None):
"""Get cleaning steps."""
clean_steps_file = task.node.driver_info.get('ansible_clean_steps_config',
DEFAULT_CLEAN_STEPS)
path = os.path.join(CONF.ansible.playbooks_path, clean_steps_file)
try:
with open(path) as f:
internal_steps = yaml.safe_load(f)
except Exception as e:
msg = _('Failed to load clean steps from file '
'%(file)s: %(exc)s') % {'file': path, 'exc': e}
raise exception.NodeCleaningFailure(node=task.node.uuid, reason=msg)
_validate_clean_steps(internal_steps, task.node.uuid)
steps = []
override = override_priorities or {}
for params in internal_steps:
name = params['name']
clean_if = params['interface']
if interface is not None and interface != clean_if:
continue
new_priority = override.get(name)
priority = (new_priority if new_priority is not None else
params.get('priority', 0))
args = {}
argsinfo = params.get('args', {})
for arg, arg_info in argsinfo.items():
args[arg] = arg_info.pop('value', None)
step = {
'interface': clean_if,
'step': name,
'priority': priority,
'abortable': False,
'argsinfo': argsinfo,
'args': args
}
steps.append(step)
return steps
# taken from agent driver
def _notify_conductor_resume_clean(task):
LOG.debug('Sending RPC to conductor to resume cleaning for node %s',
task.node.uuid)
uuid = task.node.uuid
rpc = rpcapi.ConductorAPI()
topic = rpc.get_topic_for(task.node)
# Need to release the lock to let the conductor take it
task.release_resources()
rpc.continue_node_clean(task.context, uuid, topic=topic)
def _deploy(task, node_address):
"""Internal function for deployment to a node."""
notags = ['wait'] if CONF.ansible.use_ramdisk_callback else []
node = task.node
LOG.debug('IP of node %(node)s is %(ip)s',
{'node': node.uuid, 'ip': node_address})
iwdi = node.driver_internal_info.get('is_whole_disk_image')
variables = _prepare_variables(task)
if iwdi:
notags.append('parted')
else:
variables.update(_parse_partitioning_info(task.node))
playbook, user, key = _parse_ansible_driver_info(task.node)
node_list = [(node.uuid, node_address, user, node.extra)]
extra_vars = _prepare_extra_vars(node_list, variables=variables)
LOG.debug('Starting deploy on node %s', node.uuid)
# any caller should manage exceptions raised from here
_run_playbook(playbook, extra_vars, key, notags=notags)
LOG.info(_LI('Ansible complete deploy on node %s'), node.uuid)
LOG.debug('Rebooting node %s to instance', node.uuid)
manager_utils.node_set_boot_device(task, 'disk', persistent=True)
_reboot_and_finish_deploy(task)
task.driver.boot.clean_up_ramdisk(task)
class AnsibleDeploy(base.DeployInterface):
"""Interface for deploy-related actions."""
def get_properties(self):
"""Return the properties of the interface."""
return COMMON_PROPERTIES
def validate(self, task):
"""Validate the driver-specific Node deployment info."""
task.driver.boot.validate(task)
node = task.node
iwdi = node.driver_internal_info.get('is_whole_disk_image')
if not iwdi and deploy_utils.get_boot_option(node) == "netboot":
raise exception.InvalidParameterValue(_(
"Node %(node)s is configured to use the %(driver)s driver "
"which does not support netboot.") % {'node': node.uuid,
'driver': node.driver})
params = {}
image_source = node.instance_info.get('image_source')
params['instance_info.image_source'] = image_source
error_msg = _('Node %s failed to validate deploy image info. Some '
'parameters were missing') % node.uuid
deploy_utils.check_for_missing_params(params, error_msg)
@task_manager.require_exclusive_lock
def deploy(self, task):
"""Perform a deployment to a node."""
manager_utils.node_power_action(task, states.REBOOT)
if CONF.ansible.use_ramdisk_callback:
return states.DEPLOYWAIT
node = task.node
ip_addr = _get_node_ip(task)
try:
_deploy(task, ip_addr)
except Exception as e:
error = _('Deploy failed for node %(node)s: '
'Error: %(exc)s') % {'node': node.uuid,
'exc': six.text_type(e)}
LOG.exception(error)
self._set_failed_state(task, error)
else:
LOG.info(_LI('Deployment to node %s done'), node.uuid)
return states.DEPLOYDONE
@task_manager.require_exclusive_lock
def tear_down(self, task):
"""Tear down a previous deployment on the task's node."""
manager_utils.node_power_action(task, states.POWER_OFF)
task.driver.network.unconfigure_tenant_networks(task)
return states.DELETED
def prepare(self, task):
"""Prepare the deployment environment for this node."""
node = task.node
# TODO(pas-ha) investigate takeover scenario
if node.provision_state == states.DEPLOYING:
# adding network-driver dependent provisioning ports
manager_utils.node_power_action(task, states.POWER_OFF)
task.driver.network.add_provisioning_network(task)
if node.provision_state not in [states.ACTIVE, states.ADOPTING]:
node.instance_info = build_instance_info_for_deploy(task)
node.save()
boot_opt = deploy_utils.build_agent_options(node)
task.driver.boot.prepare_ramdisk(task, boot_opt)
def clean_up(self, task):
"""Clean up the deployment environment for this node."""
task.driver.boot.clean_up_ramdisk(task)
provider = dhcp_factory.DHCPFactory()
provider.clean_dhcp(task)
irlib_utils.unlink_without_raise(
_get_configdrive_path(task.node.uuid))
def take_over(self, task):
LOG.error(_LE("Ansible deploy does not support take over. "
"You must redeploy the node %s explicitly."),
task.node.uuid)
def get_clean_steps(self, task):
"""Get the list of clean steps from the file.
:param task: a TaskManager object containing the node
:returns: A list of clean step dictionaries
"""
new_priorities = {
'erase_devices': CONF.deploy.erase_devices_priority,
'erase_devices_metadata':
CONF.deploy.erase_devices_metadata_priority
}
return _get_clean_steps(task, interface='deploy',
override_priorities=new_priorities)
def execute_clean_step(self, task, step):
"""Execute a clean step.
:param task: a TaskManager object containing the node
:param step: a clean step dictionary to execute
:returns: None
"""
node = task.node
playbook, user, key = _parse_ansible_driver_info(
task.node, action='clean')
stepname = step['step']
try:
ip_addr = node.driver_internal_info['ansible_cleaning_ip']
except KeyError:
raise exception.NodeCleaningFailure(node=node.uuid,
reason='undefined node IP '
'addresses')
node_list = [(node.uuid, ip_addr, user, node.extra)]
extra_vars = _prepare_extra_vars(node_list)
LOG.debug('Starting cleaning step %(step)s on node %(node)s',
{'node': node.uuid, 'step': stepname})
step_tags = step['args'].get('tags', [])
try:
_run_playbook(playbook, extra_vars, key,
tags=step_tags)
except exception.InstanceDeployFailure as e:
LOG.error(_LE("Ansible failed cleaning step %(step)s "
"on node %(node)s."), {
'node': node.uuid, 'step': stepname})
manager_utils.cleaning_error_handler(task, six.text_type(e))
LOG.info(_LI('Ansible completed cleaning step %(step)s '
'on node %(node)s.'),
{'node': node.uuid, 'step': stepname})
def prepare_cleaning(self, task):
"""Boot into the ramdisk to prepare for cleaning.
:param task: a TaskManager object containing the node
:raises NodeCleaningFailure: if the previous cleaning ports cannot
be removed or if new cleaning ports cannot be created
:returns: None or states.CLEANWAIT for async prepare.
"""
node = task.node
use_callback = CONF.ansible.use_ramdisk_callback
if use_callback:
manager_utils.set_node_cleaning_steps(task)
if not node.driver_internal_info['clean_steps']:
# no clean steps configured, nothing to do.
return
deploy_utils.prepare_cleaning_ports(task)
boot_opt = deploy_utils.build_agent_options(node)
task.driver.boot.prepare_ramdisk(task, boot_opt)
manager_utils.node_power_action(task, states.REBOOT)
if use_callback:
return states.CLEANWAIT
ip_addr = _get_node_ip(task)
LOG.debug('IP of node %(node)s is %(ip)s',
{'node': node.uuid, 'ip': ip_addr})
driver_internal_info = node.driver_internal_info
driver_internal_info['ansible_cleaning_ip'] = ip_addr
node.driver_internal_info = driver_internal_info
node.save()
playbook, user, key = _parse_ansible_driver_info(
task.node, action='clean')
node_list = [(node.uuid, ip_addr, user, node.extra)]
extra_vars = _prepare_extra_vars(node_list)
LOG.debug('Waiting ramdisk on node %s for cleaning', node.uuid)
_run_playbook(playbook, extra_vars, key, tags=['wait'])
LOG.info(_LI('Node %s is ready for cleaning'), node.uuid)
def tear_down_cleaning(self, task):
"""Clean up the PXE and DHCP files after cleaning.
:param task: a TaskManager object containing the node
:raises NodeCleaningFailure: if the cleaning ports cannot be
removed
"""
node = task.node
driver_internal_info = node.driver_internal_info
driver_internal_info.pop('ansible_cleaning_ip', None)
node.driver_internal_info = driver_internal_info
node.save()
manager_utils.node_power_action(task, states.POWER_OFF)
task.driver.boot.clean_up_ramdisk(task)
deploy_utils.tear_down_cleaning_ports(task)
# FIXME(pas-ha): remove this workaround after nearest Ironic release
# that contains the specified commit (next after 6.1.0)
# and require this Ironic release
def _upgrade_lock(self, task, purpose=None):
try:
task.upgrade_lock(purpose=purpose)
except TypeError:
LOG.warning(_LW("To have better logging please update your "
"Ironic installation to contain commit "
"2a73b50a7fb29c4e73511d2294aa19c37d96c969."))
task.upgrade_lock()
# FIXME(pas-ha): remove this workaround after nearest Ironic release
# that contains the specified commit (next after 6.1.0)
# and require this Ironic release
def _set_failed_state(self, task, error):
try:
deploy_utils.set_failed_state(task, error, collect_logs=False)
except TypeError:
LOG.warning(_LW("To have proper error handling please update "
"your Ironic installation to contain commit "
"bb62f256f7aa55c292ebeae73ca25a4a9f0ec8c0."))
deploy_utils.set_failed_state(task, error)
def heartbeat(self, task, callback_url):
"""Method for ansible ramdisk callback."""
node = task.node
address = urlparse.urlparse(callback_url).netloc.split(':')[0]
if node.maintenance:
# this shouldn't happen often, but skip the rest if it does.
LOG.debug('Heartbeat from node %(node)s in maintenance mode; '
'not taking any action.', {'node': node.uuid})
elif node.provision_state == states.DEPLOYWAIT:
LOG.debug('Heartbeat from %(node)s.', {'node': node.uuid})
self._upgrade_lock(task, purpose='deploy')
node = task.node
task.process_event('resume')
try:
_deploy(task, address)
except Exception as e:
error = _('Deploy failed for node %(node)s: '
'Error: %(exc)s') % {'node': node.uuid,
'exc': six.text_type(e)}
LOG.exception(error)
self._set_failed_state(task, error)
else:
LOG.info(_LI('Deployment to node %s done'), node.uuid)
task.process_event('done')
elif node.provision_state == states.CLEANWAIT:
LOG.debug('Node %s just booted to start cleaning.',
node.uuid)
self._upgrade_lock(task, purpose='clean')
node = task.node
driver_internal_info = node.driver_internal_info
driver_internal_info['ansible_cleaning_ip'] = address
node.driver_internal_info = driver_internal_info
node.save()
try:
_notify_conductor_resume_clean(task)
except Exception as e:
error = _('cleaning failed for node %(node)s: '
'Error: %(exc)s') % {'node': node.uuid,
'exc': six.text_type(e)}
LOG.exception(error)
manager_utils.cleaning_error_handler(task, error)
else:
LOG.warning(_LW('Call back from %(node)s in invalid provision '
'state %(state)s'),
{'node': node.uuid, 'state': node.provision_state})

@ -0,0 +1,11 @@
- hosts: conductor
gather_facts: no
tasks:
- add_host:
group: ironic
hostname: "{{ item.name }}"
ansible_ssh_host: "{{ item.ip }}"
ansible_ssh_user: "{{ item.user }}"
ironic_extra: "{{ item.extra | default({}) }}"
with_items: "{{ ironic_nodes }}"
tags: always

@ -0,0 +1,24 @@
[defaults]
# retries through the ansible-deploy driver are not supported
retry_files_enabled = False
# this is using supplied callback_plugin to interleave ansible event logs
# into Ironic-conductor log as set in ironic configuration file,
# see callback_plugin/ironic_log.ini for some options to set
# (DevStack _needs_ some tweaks)
callback_whitelist = ironic_log
# For better security, bake SSH host keys into bootstrap image,
# add those to ~/.ssh/known_hosts for user running ironic-conductor service
# on all nodes where ironic-conductor and ansible-deploy driver are installed,
# and set the host_key_checking to True (or comment it out, it is the default)
host_key_checking = False
# uncomment if you have problem with ramdisk locale on ansible >= 2.1
#module_set_locale=False
[ssh_connection]
# pipelining greatly increases speed of deployment, disable it only when
# your version of ssh client on ironic node or server in bootstrap image
# do not support it or if you can not disable "requiretty" for the
# passwordless sudoer user in the bootstrap image.
# See Ansible documentation for more info:
# http://docs.ansible.com/ansible/intro_configuration.html#pipelining
pipelining = True

@ -0,0 +1,8 @@
[ironic]
# If Ironic's config is not in one of default oslo_config locations,
# specify the path to it here
#config_file = None
# If running a testing system with only stderr logging (e.g. DevStack)
# specify an actual file to log into here, for example Ironic-Conductor one.
#log_file = None

@ -0,0 +1,122 @@
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import ConfigParser
import os
from oslo_config import cfg
from oslo_log import log as logging
from ironic.common import i18n
from ironic import version
basename = os.path.splitext(__file__)[0]
config = ConfigParser.ConfigParser()
ironic_config = None
ironic_log_file = None
try:
config.readfp(open(basename + ".ini"))
if config.has_option('ironic', 'config_file'):
ironic_config = config.get('ironic', 'config_file')
if config.has_option('ironic', 'log_file'):
ironic_log_file = config.get('ironic', 'log_file')
except Exception:
pass
CONF = cfg.CONF
DOMAIN = 'ironic'
LOG = logging.getLogger(__name__, project=DOMAIN,
version=version.version_info.release_string())
logging.register_options(CONF)
conf_kwargs = dict(args=[], project=DOMAIN,
version=version.version_info.release_string())
if ironic_config:
conf_kwargs['default_config_files'] = [ironic_config]
CONF(**conf_kwargs)
if ironic_log_file:
CONF.set_override("log_file", ironic_log_file)
CONF.set_override("use_stderr", False)
logging.setup(CONF, DOMAIN)
class CallbackModule(object):
CALLBACK_VERSION = 2.0
CALLBACK_TYPE = 'notification'
CALLBACK_NAME = 'ironic_log'
CALLBACK_NEEDS_WHITELIST = True
def __init__(self, display=None):
self.node = None
def runner_msg_dict(self, result):
self.node = result._host.get_name()
name = result._task.get_name()
res = str(result._result)
return dict(node=self.node, name=name, res=res)
def v2_playbook_on_task_start(self, task, is_conditional):
# NOTE(pas-ha) I do not know (yet) how to obtain a ref to host
# until first task is processed
node = self.node or "Node"
name = task.get_name()
if name == 'setup':
LOG.debug("Processing task %(name)s.", dict(name=name))
else:
LOG.debug("Processing task %(name)s on node %(node)s.",
dict(name=name, node=node))
def v2_runner_on_failed(self, result, *args, **kwargs):
LOG.error(i18n._LE(
"Ansible task %(name)s failed on node %(node)s: %(res)s"),
self.runner_msg_dict(result))
def v2_runner_on_ok(self, result):
msg_dict = self.runner_msg_dict(result)
if msg_dict['name'] == 'setup':
LOG.info(i18n._LI(
"Ansible task 'setup' complete on node %(node)s"),
msg_dict)
else:
LOG.info(i18n._LI(
"Ansible task %(name)s complete on node %(node)s: %(res)s"),
msg_dict)
def v2_runner_on_unreachable(self, result):
LOG.error(i18n._LE(
"Node %(node)s was unreachable for Ansible task %(name)s: "
"%(res)s"),
self.runner_msg_dict(result))
def v2_runner_on_async_poll(self, result):
LOG.debug("Polled ansible task %(name)s for complete "
"on node %(node)s: %(res)s",
self.runner_msg_dict(result))
def v2_runner_on_async_ok(self, result):
LOG.info(i18n._LI(
"Async Ansible task %(name)s complete on node %(node)s: %(res)s"),
self.runner_msg_dict(result))
def v2_runner_on_async_failed(self, result):
LOG.error(i18n._LE(
"Async Ansible task %(name)s failed on node %(node)s: %(res)s"),
self.runner_msg_dict(result))
def v2_runner_on_skipped(self, result):
LOG.debug("Ansible task %(name)s skipped on node %(node)s: %(res)s",
self.runner_msg_dict(result))

@ -0,0 +1,12 @@
---
- include: add-ironic-nodes.yaml
- hosts: ironic
gather_facts: no
roles:
- role: wait
tags: wait
- hosts: ironic
roles:
- clean

@ -0,0 +1,19 @@
- name: erase_devices_metadata
priority: 99
interface: deploy
args:
tags:
required: true
description: list of playbook tags used to erase partition table on disk devices
value:
- zap
- name: erase_devices
priority: 10
interface: deploy
args:
tags:
required: true
description: list of playbook tags used to erase disk devices
value:
- shred

@ -0,0 +1,13 @@
---
- include: add-ironic-nodes.yaml
- hosts: ironic
gather_facts: no
roles:
- role: wait
tags: wait
- hosts: ironic
roles:
- deploy
- shutdown

@ -0,0 +1 @@
conductor ansible_connection=local

@ -0,0 +1,111 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
PARTITION_TYPES = ('primary', 'logical', 'extended')
def construct_parted_args(device):
parted_args = [
'-s', device['device'],
]
if device['label']:
parted_args.extend(['mklabel', device['label']])
partitions = device['partitions']
if partitions:
parted_args.extend(['-a', 'optimal', '--', 'unit', 'MiB'])
start = 1
for ind, partition in enumerate(device['partitions']):
parted_args.extend([
'mkpart', partition['type']])
if partition['swap']:
parted_args.append('linux-swap')
end = start + partition['size_mib']
parted_args.extend(["%i" % start, "%i" % end])
start = end
if partition['boot']:
parted_args.extend([
'set', str(ind + 1), 'boot', 'on'])
return parted_args
def validate_partitions(module, partitions):
for ind, partition in enumerate(partitions):
# partition name might be an empty string
partition['name'] = partition.get('name') or str(ind + 1)
size = partition.get('size_mib', None)
if not size:
module.fail_json(msg="Partition size must be provided")
try:
partition['size_mib'] = int(size)
except ValueError:
module.fail_json(msg="Can not cast partition size to INT.")
partition.setdefault('type', 'primary')
if partition['type'] not in PARTITION_TYPES:
module.fail_json(msg="Partition type must be one of "
"%s." % PARTITION_TYPES)
partition['swap'] = module.boolean(partition.get('swap', False))
partition['boot'] = module.boolean(partition.get('boot', False))
if partition['boot'] and partition['swap']:
module.fail_json(msg="Can not set partition to "
"boot and swap simultaneously.")
# TODO(pas-ha) add more validation, e.g.
# - only one boot partition?
# - no more than 4 primary partitions on msdos table
# - no more that one extended partition on msdos table
# - estimate and validate available space
def main():
module = AnsibleModule(
argument_spec=dict(
device=dict(required=True, type='str'),
dryrun=dict(required=False, default=False, type='bool'),
new_label=dict(required=False, default=False, type='bool'),
label=dict(requred=False, default='msdos', choices=[
"bsd", "dvh", "gpt", "loop", "mac", "msdos", "pc98", "sun"]),
partitions=dict(
required=False, type='list')
),
supports_check_mode=True)
device = module.params['device']
dryrun = module.params['dryrun']
new_label = module.params['new_label']
label = module.params['label']
if not new_label:
label = False
partitions = module.params['partitions'] or []
try:
validate_partitions(module, partitions)
except Exception as e:
module.fail_json(msg="Malformed partitions arguments: %s" % e)
parted_args = construct_parted_args(dict(device=device, label=label,
partitions=partitions))
command = [module.get_bin_path('parted', required=True)]
if not (module.check_mode or dryrun):
command.extend(parted_args)
module.run_command(command, check_rc=True)
partitions_created = {p['name']: '%s%i' % (device, i + 1)
for i, p in enumerate(partitions)}
module.exit_json(changed=not dryrun, created=partitions_created)
from ansible.module_utils.basic import * # noqa
if __name__ == '__main__':
main()

@ -0,0 +1,104 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import hashlib
import string
import requests
# adapted from IPA
DEFAULT_CHUNK_SIZE = 1024 * 1024 # 1MB
class StreamingDownloader(object):
def __init__(self, url, chunksize, hash_algo=None):
if hash_algo is not None:
self.hasher = hashlib.new(hash_algo)
else:
self.hasher = None
self.chunksize = chunksize
resp = requests.get(url, stream=True)
if resp.status_code != 200:
raise Exception('Invalid response code: %s' % resp.status_code)
self._request = resp
def __iter__(self):
for chunk in self._request.iter_content(chunk_size=self.chunksize):
if self.hasher is not None:
self.hasher.update(chunk)
yield chunk
def checksum(self):
if self.hasher is not None:
return self.hasher.hexdigest()
def stream_to_dest(url, dest, chunksize, hash_algo):
downloader = StreamingDownloader(url, chunksize, hash_algo)
with open(dest, 'wb+') as f:
for chunk in downloader:
f.write(chunk)
return downloader.checksum()
def main():
module = AnsibleModule(
argument_spec=dict(
url=dict(required=True, type='str'),
dest=dict(required=True, type='str'),
checksum=dict(required=False, type='str', default=''),
chunksize=dict(required=False, type='int',
default=DEFAULT_CHUNK_SIZE)
))
url = module.params['url']
dest = module.params['dest']
checksum = module.params['checksum']
chunksize = module.params['chunksize']
if checksum == '':
hash_algo, checksum = None, None
else:
try:
hash_algo, checksum = checksum.rsplit(':', 1)
except ValueError:
module.fail_json(msg='The checksum parameter has to be in format '
'"<algorithm>:<checksum>"')
checksum = checksum.lower()
if not all(c in string.hexdigits for c in checksum):
module.fail_json(msg='The checksum must be valid HEX number')
if hash_algo not in hashlib.algorithms_available:
module.fail_json(msg="%s checksums are not supported" % hash_algo)
try:
actual_checksum = stream_to_dest(
url, dest, chunksize, hash_algo)
except Exception as e:
module.fail_json(msg=str(e))
else:
if hash_algo and actual_checksum != checksum:
module.fail_json(msg='Invalid dest checksum')
else:
module.exit_json(changed=True)
# NOTE(pas-ha) Ansible's module_utils.basic is licensed under BSD (2 clause)
from ansible.module_utils.basic import * # noqa
if __name__ == '__main__':
main()

@ -0,0 +1,6 @@
- include: zap.yaml
tags:
- zap
- include: shred.yaml
tags:
- shred

@ -0,0 +1,6 @@
- name: clean block devices
become: yes
command: shred -f -z /dev/{{ item }}
async: 3600
poll: 30
with_items: "{{ ansible_devices }}"

@ -0,0 +1,4 @@
- name: wipe partition metadata
become: yes
command: sgdisk -Z /dev/{{ item }}
with_items: "{{ ansible_devices }}"

@ -0,0 +1,54 @@
#!/bin/sh
# code from DIB bash ramdisk
readonly target_disk=$1
readonly root_part=$2
readonly root_part_mount=/mnt/rootfs
# We need to run partprobe to ensure all partitions are visible
partprobe $target_disk
mkdir -p $root_part_mount
mount $root_part $root_part_mount
if [ $? != "0" ]; then
echo "Failed to mount root partition $root_part on $root_part_mount"
exit 1
fi
mkdir -p $root_part_mount/dev
mkdir -p $root_part_mount/sys
mkdir -p $root_part_mount/proc
mount -o bind /dev $root_part_mount/dev
mount -o bind /sys $root_part_mount/sys
mount -o bind /proc $root_part_mount/proc
# Find grub version
V=
if [ -x $root_part_mount/usr/sbin/grub2-install ]; then
V=2
fi
# Install grub
ret=1
if chroot $root_part_mount /bin/sh -c "/usr/sbin/grub$V-install ${target_disk}"; then
echo "Generating the grub configuration file"
# tell GRUB2 to preload its "lvm" module to gain LVM booting on direct-attached disks
if [ "$V" = "2" ]; then
echo "GRUB_PRELOAD_MODULES=lvm" >> $root_part_mount/etc/default/grub
fi
chroot $root_part_mount /bin/sh -c "/usr/sbin/grub$V-mkconfig -o /boot/grub$V/grub.cfg"
ret=$?
fi
umount $root_part_mount/dev
umount $root_part_mount/sys
umount $root_part_mount/proc
umount $root_part_mount
if [ $ret != "0" ]; then
echo "Installing grub bootloader failed"