tripleo-ansible/tripleo_ansible/ansible_plugins/modules/metalsmith_instances.py
Steve Baker 3ba91a22a4 Capture metalsmith python logging
The metalsmith_instances module calls into metalsmith and there is
currently no logging mechanism to see what is happening. This change
is an attempt to improve this by configuring logging and adding the
output to the result dict as 'logging'.

Change-Id: I3da2e72383787f96bf4d930e416b8fb5c0a4ff72
Related-Bug: #1879472
2020-07-16 10:55:44 +12:00

421 lines
13 KiB
Python

#!/usr/bin/python
# Copyright 2020 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
__metaclass__ = type
from concurrent import futures
import io
import logging
from ansible.module_utils.basic import AnsibleModule
from ansible.module_utils.openstack import openstack_cloud_from_module
from ansible.module_utils.openstack import openstack_full_argument_spec
from ansible.module_utils.openstack import openstack_module_kwargs
import metalsmith
from metalsmith import instance_config
from metalsmith import sources
import yaml
ANSIBLE_METADATA = {
'metadata_version': '1.1',
'status': ['preview'],
'supported_by': 'community'
}
DOCUMENTATION = '''
---
module: metalsmith_instances
short_description: Manage baremetal instances with metalsmith
version_added: "2.9"
author: "Steve Baker (@stevebaker)"
description:
- Provision and unprovision ironic baremetal instances using metalsmith,
which is a a simple tool to provision bare metal machines using
OpenStack Bare Metal Service (ironic) and, optionally, OpenStack
Image Service (glance) and OpenStack Networking Service (neutron).
options:
instances:
description:
- List of node description dicts to perform operations on
type: list
default: []
elements: dict
suboptions:
hostname:
description:
- Host name to use, defaults to Node's name or UUID
type: str
name:
description:
- The name of an existing node to provision
type: str
image:
description:
- Details of the image you want to provision onto the node
type: dict
required: True
suboptions:
href:
description:
- Image to use (name, UUID or URL)
type: str
required: True
checksum :
description:
- Image MD5 checksum or URL with checksums
type: str
kernel:
description:
- URL of the image's kernel
type: str
ramdisk:
description:
- URL of the image's ramdisk
type: str
nics:
description:
- List of requested NICs
type: list
elements: dict
suboptions:
network:
description:
- Network to create a port on (name or UUID)
subnet:
description:
- Subnet to create a port on (name or UUID)
port:
description:
- Port to attach (name or UUID)
fixed_ip:
description:
- Attach IP from the network
netboot:
description:
- Boot from network instead of local disk
default: no
type: bool
root_size_gb:
description:
- Root partition size (in GiB), defaults to (local_gb - 1)
type: int
swap_size_mb:
description:
- Swap partition size (in MiB), defaults to no swap
type: int
capabilities:
description:
- Selection criteria to match the node capabilities
type: dict
traits:
description:
- Traits the node should have
type: list
elements: str
ssh_public_keys:
description:
- SSH public keys to load
type: str
resource_class:
description:
- Node resource class to provision
type: str
default: baremetal
conductor_group:
description:
- Conductor group to pick the node from
type: str
user_name:
description:
- Name of the admin user to create
type: str
passwordless_sudo:
description:
- Allow password-less sudo for the user
default: yes
type: bool
clean_up:
description:
- Clean up resources on failure
default: yes
type: bool
state:
description:
- Desired provision state, "present" to provision,
"absent" to unprovision, "reserved" to create an allocation
record without changing the node state
default: present
choices:
- present
- absent
- reserved
wait:
description:
- A boolean value instructing the module to wait for node provision
to complete before returning. A 'yes' is implied if the number of
instances is more than the concurrency.
type: bool
default: no
timeout:
description:
- An integer value representing the number of seconds to wait for the
node provision to complete.
type: int
default: 3660
concurrency:
description:
- Maximum number of instances to provision at once. Set to 0 to have no
concurrency limit
type: int
log_level:
description:
- Set the logging level for the log which is available in the
returned 'logging' result.
default: info
choices:
- debug
- info
- warning
- error
'''
METALSMITH_LOG_MAP = {
'debug': logging.DEBUG,
'info': logging.INFO,
'warning': logging.WARNING,
'error': logging.ERROR
}
BASE_LOG_MAP = {
'debug': logging.INFO,
'info': logging.WARNING,
'warning': logging.WARNING,
'error': logging.ERROR
}
def _get_source(instance):
image = instance.get('image')
return sources.detect(image=image.get('href'),
kernel=image.get('kernel'),
ramdisk=image.get('ramdisk'),
checksum=image.get('checksum'))
def reserve(provisioner, instances, clean_up):
nodes = []
for instance in instances:
if instance.get('name') is not None:
# NOTE(dtantsur): metalsmith accepts list of instances to pick
# from. We implement a simplest case when a user can pick a
# node by its name (actually, UUID will also work).
candidates = [instance['name']]
else:
candidates = None
try:
node = provisioner.reserve_node(
resource_class=instance.get('resource_class', 'baremetal'),
capabilities=instance.get('capabilities'),
candidates=candidates,
traits=instance.get('traits'),
conductor_group=instance.get('conductor_group')),
if isinstance(node, tuple):
node = node[0]
nodes.append(node)
# side-effect of populating the instance name, which is passed to
# a later provision step
instance['name'] = node.id
except Exception as exc:
if clean_up:
# Remove all reservations on failure
_release_nodes(provisioner, [i.id for i in nodes])
raise exc
return len(nodes) > 0, nodes
def _release_nodes(provisioner, node_ids):
for node in node_ids:
try:
provisioner.unprovision_node(node)
except Exception:
pass
def provision(provisioner, instances, timeout, concurrency, clean_up, wait):
if not instances:
return False, []
# first, ensure all instances are reserved
reserve(provisioner, [i for i in instances if not i.get('name')], clean_up)
nodes = []
# no limit on concurrency, create a worker for every instance
if concurrency < 1:
concurrency = len(instances)
# if concurrency is less than instances, need to wait for
# instance completion
if concurrency < len(instances):
wait = True
provision_jobs = []
exceptions = []
with futures.ThreadPoolExecutor(max_workers=concurrency) as p:
for i in instances:
provision_jobs.append(p.submit(
_provision_instance, provisioner, i, nodes, timeout, wait
))
for job in futures.as_completed(provision_jobs):
e = job.exception()
if e:
exceptions.append(e)
if clean_up:
# first, cancel all jobs
for job in provision_jobs:
job.cancel()
# Unprovision all provisioned so far.
# This is best-effort as some provision calls may have
# started but not yet appended to nodes.
_release_nodes(provisioner, [i.uuid for i in nodes])
nodes = []
if exceptions:
# TODO(sbaker) future enhancement to tolerate a proportion of failures
# so that provisioning and deployment can continue
raise exceptions[0]
return len(nodes) > 0, nodes
def _provision_instance(provisioner, instance, nodes, timeout, wait):
name = instance.get('name')
image = _get_source(instance)
ssh_keys = instance.get('ssh_public_keys')
config = instance_config.CloudInitConfig(ssh_keys=ssh_keys)
if instance.get('user_name'):
config.add_user(instance.get('user_name'), admin=True,
sudo=instance.get('passwordless_sudo', True))
node = provisioner.provision_node(
name,
config=config,
hostname=instance.get('hostname'),
image=image,
nics=instance.get('nics'),
root_size_gb=instance.get('root_size_gb'),
swap_size_mb=instance.get('swap_size_mb'),
netboot=instance.get('netboot', False)
)
nodes.append(node)
if wait:
provisioner.wait_for_provisioning(
[node.uuid], timeout=timeout)
def unprovision(provisioner, instances):
for instance in instances:
provisioner.unprovision_node(instance.get('name'))
return True
def _configure_logging(log_level):
log_fmt = ('%(asctime)s %(levelname)s %(name)s: %(message)s')
urllib_level = logging.CRITICAL
base_level = BASE_LOG_MAP[log_level]
metalsmith_level = METALSMITH_LOG_MAP[log_level]
logging.basicConfig(level=base_level, format=log_fmt)
logging.getLogger('urllib3.connectionpool').setLevel(urllib_level)
logger = logging.getLogger('metalsmith')
logger.setLevel(metalsmith_level)
log_stream = io.StringIO()
logger.addHandler(logging.StreamHandler(log_stream))
return log_stream
def main():
argument_spec = openstack_full_argument_spec(
**yaml.safe_load(DOCUMENTATION)['options']
)
module_kwargs = openstack_module_kwargs()
module = AnsibleModule(
argument_spec=argument_spec,
supports_check_mode=False,
**module_kwargs
)
log_stream = _configure_logging(module.params['log_level'])
try:
sdk, cloud = openstack_cloud_from_module(module)
provisioner = metalsmith.Provisioner(cloud_region=cloud.config)
instances = module.params['instances']
state = module.params['state']
concurrency = module.params['concurrency']
timeout = module.params['timeout']
wait = module.params['wait']
clean_up = module.params['clean_up']
if state == 'present':
changed, nodes = provision(provisioner, instances,
timeout, concurrency, clean_up,
wait)
instances = [{
'name': i.node.name or i.uuid,
'hostname': i.hostname,
'id': i.uuid,
} for i in nodes]
module.exit_json(
changed=changed,
msg="{} instances provisioned".format(len(nodes)),
instances=instances,
logging=log_stream.getvalue()
)
if state == 'reserved':
changed, nodes = reserve(provisioner, instances, clean_up)
module.exit_json(
changed=changed,
msg="{} instances reserved".format(len(nodes)),
ids=[node.id for node in nodes],
instances=instances,
logging=log_stream.getvalue()
)
if state == 'absent':
changed = unprovision(provisioner, instances)
module.exit_json(
changed=changed,
msg="{} nodes unprovisioned".format(len(instances)),
logging=log_stream.getvalue()
)
except Exception as e:
module.fail_json(
msg=str(e),
logging=log_stream.getvalue()
)
if __name__ == '__main__':
main()