Merge branch 'master' into module_rename

This commit is contained in:
Scott Hussey 2017-06-23 07:54:14 -05:00 committed by GitHub
commit f1d39608eb
19 changed files with 618 additions and 75 deletions

102
.gitignore vendored Normal file
View File

@ -0,0 +1,102 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
# IDEA IDE
.idea/

View File

@ -27,8 +27,7 @@ class DrydockConfig(object):
node_driver = {
'maasdriver': {
'api_key': 'KTMHgA42cNSMnfmJ82:cdg4yQUhp542aHsCTV:7Dc2KB9hQpWq3LfQAAAKAj6wdg22yWxZ',
'api_url': 'http://localhost:5240/MAAS/api/2.0/',
},
}

View File

@ -31,17 +31,21 @@ def start_api(state_manager=None, ingester=None, orchestrator=None):
control_api = falcon.API(request_type=DrydockRequest,
middleware=[AuthMiddleware(), ContextMiddleware(), LoggingMiddleware()])
# v1.0 of Drydock API
v1_0_routes = [
# API for managing orchestrator tasks
control_api.add_route('/tasks', TasksResource(state_manager=state_manager, orchestrator=orchestrator))
control_api.add_route('/tasks/{task_id}', TaskResource(state_manager=state_manager))
('/tasks', TasksResource(state_manager=state_manager, orchestrator=orchestrator)),
('/tasks/{task_id}', TaskResource(state_manager=state_manager)),
# API for managing site design data
control_api.add_route('/designs', DesignsResource(state_manager=state_manager))
control_api.add_route('/designs/{design_id}', DesignResource(state_manager=state_manager, orchestrator=orchestrator))
control_api.add_route('/designs/{design_id}/parts', DesignsPartsResource(state_manager=state_manager, ingester=ingester))
control_api.add_route('/designs/{design_id}/parts/{kind}', DesignsPartsKindsResource(state_manager=state_manager))
('/designs', DesignsResource(state_manager=state_manager)),
('/designs/{design_id}', DesignResource(state_manager=state_manager, orchestrator=orchestrator)),
('/designs/{design_id}/parts', DesignsPartsResource(state_manager=state_manager, ingester=ingester)),
('/designs/{design_id}/parts/{kind}', DesignsPartsKindsResource(state_manager=state_manager)),
('/designs/{design_id}/parts/{kind}/{name}', DesignsPartResource(state_manager=state_manager, orchestrator=orchestrator))
]
control_api.add_route('/designs/{design_id}/parts/{kind}/{name}',
DesignsPartResource(state_manager=state_manager, orchestrator=orchestrator))
for path, res in v1_0_routes:
control_api.add_route('/api/v1.0' + path, res)
return control_api

View File

@ -118,6 +118,7 @@ class DrydockRequestContext(object):
self.user = None
self.roles = ['anyone']
self.request_id = str(uuid.uuid4())
self.external_marker = None
def set_log_level(self, level):
@ -138,7 +139,7 @@ class DrydockRequestContext(object):
if x != role]
def set_external_marker(self, marker):
self.external_marker = str(marker)[:32]
self.external_marker = str(marker)[:20]
class DrydockRequest(request.Request):
context_type = DrydockRequestContext

View File

@ -72,11 +72,9 @@ class ContextMiddleware(object):
elif requested_logging == 'INFO':
ctx.set_log_level('INFO')
ctx.req_id = str(uuid.uuid4())
ext_marker = req.get_header('X-Context-Marker')
ctx.external_ctx = ext_marker if ext_marker is not None else ''
ctx.set_external_marker(ext_marker if ext_marker is not None else '')
class LoggingMiddleware(object):
@ -88,7 +86,7 @@ class LoggingMiddleware(object):
extra = {
'user': ctx.user,
'req_id': ctx.req_id,
'external_ctx': ctx.external_ctx,
'external_ctx': ctx.external_marker,
}
resp.append_header('X-Drydock-Req', ctx.req_id)
self.logger.info("%s - %s" % (req.uri, resp.status), extra=extra)

View File

@ -3,28 +3,30 @@
This is the external facing API service to control the rest
of Drydock and query Drydock-managed data.
## Endpoints ##
### /tasks ###
## v1.0 Endpoints ##
### /api/v1.0/tasks ###
POST - Create a new orchestration task and submit it for execution
GET - Get status of a task
DELETE - Cancel execution of a task if permitted
### /designs ###
### /api/v1.0/designs ###
POST - Create a new site design so design parts can be added
### /designs/{id}
### /api/v1.0/designs/{id}
GET - Get a current design if available. Param 'source=compiled' to calculate the inheritance chain and compile the effective design.
### /designs/{id}/parts
### /api/v1.0/designs/{id}/parts
POST - Submit a new design part to be ingested and added to this design
GET - View a currently defined design part
PUT - Replace an existing design part *Not Implemented*
### /designs/{id}/parts/{kind}/{name}
### /api/v1.0/designs/{id}/parts/{kind}/{name}
GET - View a single design part. param 'source=compiled' to calculate the inheritance chain and compile the effective configuration for the design part.

View File

@ -28,6 +28,7 @@ class NodeDriver(ProviderDriver):
hd_fields.OrchestratorAction.CreateStorageTemplate,
hd_fields.OrchestratorAction.CreateBootMedia,
hd_fields.OrchestratorAction.PrepareHardwareConfig,
hd_fields.OrchestratorAction.IdentifyNode,
hd_fields.OrchestratorAction.ConfigureHardware,
hd_fields.OrchestratorAction.InterrogateNode,
hd_fields.OrchestratorAction.ApplyNodeNetworking,

View File

@ -11,6 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time
import logging
import drydock_provisioner.error as errors
import drydock_provisioner.config as config
import drydock_provisioner.drivers as drivers
@ -22,6 +25,7 @@ from .api_client import MaasRequestFactory
import drydock_provisioner.drivers.node.maasdriver.models.fabric as maas_fabric
import drydock_provisioner.drivers.node.maasdriver.models.vlan as maas_vlan
import drydock_provisioner.drivers.node.maasdriver.models.subnet as maas_subnet
import drydock_provisioner.drivers.node.maasdriver.models.machine as maas_machine
class MaasNodeDriver(NodeDriver):
@ -34,6 +38,8 @@ class MaasNodeDriver(NodeDriver):
self.config = config.DrydockConfig.node_driver[self.driver_key]
self.logger = logging.getLogger('drydock.nodedriver.maasdriver')
def execute_task(self, task_id):
task = self.state_manager.get_task(task_id)
@ -104,6 +110,9 @@ class MaasNodeDriver(NodeDriver):
site_design = self.orchestrator.get_effective_site(design_id)
if task.action == hd_fields.OrchestratorAction.CreateNetworkTemplate:
self.orchestrator.task_field_update(task.get_id(), status=hd_fields.TaskStatus.Running)
subtask = self.orchestrator.create_task(task_model.DriverTask,
parent_task_id=task.get_id(), design_id=design_id,
action=task.action, site_name=task.site_name,
@ -111,8 +120,14 @@ class MaasNodeDriver(NodeDriver):
runner = MaasTaskRunner(state_manager=self.state_manager,
orchestrator=self.orchestrator,
task_id=subtask.get_id(),config=self.config)
self.logger.info("Starting thread for task %s to create network templates" % (subtask.get_id()))
runner.start()
# TODO Figure out coherent system for putting all the timeouts in
# the config
runner.join(timeout=120)
if runner.is_alive():
@ -120,18 +135,89 @@ class MaasNodeDriver(NodeDriver):
'retry': False,
'detail': 'MaaS Network creation timed-out'
}
self.logger.warn("Thread for task %s timed out after 120s" % (subtask.get_id()))
self.orchestrator.task_field_update(task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=hd_fields.ActionResult.Failure,
result_detail=result)
else:
subtask = self.state_manager.get_task(subtask.get_id())
self.logger.info("Thread for task %s completed - result %s" % (subtask.get_id(), subtask.get_result()))
self.orchestrator.task_field_update(task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=subtask.get_result())
return
elif task.action == hd_fields.OrchestratorAction.IdentifyNode:
self.orchestrator.task_field_update(task.get_id(),
status=hd_fields.TaskStatus.Running)
subtasks = []
result_detail = {
'detail': []
}
for n in task.node_list:
subtask = self.orchestrator.create_task(task_model.DriverTask,
parent_task_id=task.get_id(), design_id=design_id,
action=hd_fields.OrchestratorAction.IdentifyNode,
site_name=task.site_name,
task_scope={'site': task.site_name, 'node_names': [n]})
runner = MaasTaskRunner(state_manager=self.state_manager,
orchestrator=self.orchestrator,
task_id=subtask.get_id(),config=self.config)
self.logger.info("Starting thread for task %s to identify node %s" % (subtask.get_id(), n))
runner.start()
subtasks.append(subtask.get_id())
running_subtasks = len(subtasks)
attempts = 0
worked = failed = False
#TODO Add timeout to config
while running_subtasks > 0 and attempts < 3:
for t in subtasks:
subtask = self.state_manager.get_task(t)
if subtask.status == hd_fields.TaskStatus.Complete:
self.logger.info("Task %s to identify node %s complete - status %s" %
(subtask.get_id(), n, subtask.get_result()))
result_detail['detail'].extend(subtask.result_detail['detail'])
running_subtasks = running_subtasks - 1
if subtask.result in [hd_fields.ActionResult.Success,
hd_fields.ActionResult.PartialSuccess]:
worked = True
elif subtask.result in [hd_fields.ActionResult.Failure,
hd_fields.ActionResult.PartialSuccess]:
failed = True
time.sleep(1 * 60)
attempts = attempts + 1
if running_subtasks > 0:
self.logger.warn("Time out for task %s before all subtask threads complete" % (task.get_id()))
result = hd_fields.ActionResult.DependentFailure
result_detail['detail'].append('Some subtasks did not complete before the timeout threshold')
if worked and failed:
result = hd_fields.ActionResult.PartialSuccess
elif worked:
result = hd_fields.ActionResult.Success
else:
result = hd_fields.ActionResult.Failure
self.orchestrator.task_field_update(task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=result,
result_detail=result_detail)
class MaasTaskRunner(drivers.DriverTaskRunner):
def __init__(self, config=None, **kwargs):
@ -139,6 +225,8 @@ class MaasTaskRunner(drivers.DriverTaskRunner):
self.driver_config = config
self.logger = logging.getLogger('drydock.nodedriver.maasdriver')
def execute_task(self):
task_action = self.task.action
@ -314,4 +402,48 @@ class MaasTaskRunner(drivers.DriverTaskRunner):
self.orchestrator.task_field_update(self.task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=action_result,
result_detail=result_detail)
result_detail=result_detail)
elif task_action == hd_fields.OrchestratorAction.IdentifyNode:
try:
machine_list = maas_machine.Machines(self.maas_client)
machine_list.refresh()
except:
self.orchestrator.task_field_update(self.task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=hd_fields.ActionResult.Failure,
result_detail={'detail': 'Error accessing MaaS Machines API', 'retry': True})
return
nodes = self.task.node_list
result_detail = {'detail': []}
worked = failed = False
for n in nodes:
try:
node = site_design.get_baremetal_node(n)
machine = machine_list.identify_baremetal_node(node)
if machine is not None:
worked = True
result_detail['detail'].append("Node %s identified in MaaS" % n)
else:
failed = True
result_detail['detail'].append("Node %s not found in MaaS" % n)
except Exception as ex:
failed = True
result_detail['detail'].append("Error identifying node %s: %s" % (n, str(ex)))
result = None
if worked and failed:
result = hd_fields.ActionResult.PartialSuccess
elif worked:
result = hd_fields.ActionResult.Success
elif failed:
result = hd_fields.ActionResult.Failure
self.orchestrator.task_field_update(self.task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=result,
result_detail=result_detail)

View File

@ -13,6 +13,7 @@
# limitations under the License.
import json
import re
import logging
import drydock_provisioner.error as errors
"""
@ -28,6 +29,7 @@ class ResourceBase(object):
def __init__(self, api_client, **kwargs):
self.api_client = api_client
self.logger = logging.getLogger('drydock.drivers.maasdriver')
for f in self.fields:
if f in kwargs.keys():
@ -143,13 +145,15 @@ class ResourceBase(object):
return i
"""
A collection of MaaS resources.
Rather than a simple list, we will key the collection on resource
ID for more efficient access.
"""
class ResourceCollectionBase(object):
"""
A collection of MaaS resources.
Rather than a simple list, we will key the collection on resource
ID for more efficient access.
:param api_client: An instance of api_client.MaasRequestFactory
"""
collection_url = ''
collection_resource = ResourceBase
@ -157,12 +161,14 @@ class ResourceCollectionBase(object):
def __init__(self, api_client):
self.api_client = api_client
self.resources = {}
self.logger = logging.getLogger('drydock.drivers.maasdriver')
"""
Parse URL for placeholders and replace them with current
instance values
"""
def interpolate_url(self):
"""
Parse URL for placeholders and replace them with current
instance values
"""
pattern = '\{([a-z_]+)\}'
regex = re.compile(pattern)
start = 0

View File

@ -0,0 +1,34 @@
# Copyright 2017 AT&T Intellectual Property. All other rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import drydock_provisioner.drivers.node.maasdriver.models.base as model_base
class Interface(model_base.ResourceBase):
resource_url = 'nodes/{system_id}/interfaces/{resource_id}/'
fields = ['resource_id', 'system_id', 'name', 'type', 'mac_address', 'vlan',
'links', 'effective_mtu']
json_fields = ['name', 'type', 'mac_address', 'vlan', 'links', 'effective_mtu']
def __init__(self, api_client, **kwargs):
super(Interface, self).__init__(api_client, **kwargs)
class Interfaces(model_base.ResourceCollectionBase):
collection_url = 'nodes/{system_id}/interfaces/'
collection_resource = Interface
def __init__(self, api_client, **kwargs):
super(Interfaces, self).__init__(api_client)
self.system_id = kwargs.get('system_id', None)

View File

@ -0,0 +1,185 @@
# Copyright 2017 AT&T Intellectual Property. All other rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import drydock_provisioner.drivers.node.maasdriver.models.base as model_base
import drydock_provisioner.drivers.node.maasdriver.models.interface as maas_interface
import bson
import yaml
class Machine(model_base.ResourceBase):
resource_url = 'machines/{resource_id}/'
fields = ['resource_id', 'hostname', 'power_type', 'power_state', 'power_parameters', 'interfaces',
'boot_interface', 'memory', 'cpu_count', 'tag_names']
json_fields = ['hostname', 'power_type']
def __init__(self, api_client, **kwargs):
super(Machine, self).__init__(api_client, **kwargs)
# Replace generic dicts with interface collection model
if getattr(self, 'resource_id', None) is not None:
self.interfaces = maas_interface.Interfaces(api_client, system_id=self.resource_id)
self.interfaces.refresh()
def get_power_params(self):
url = self.interpolate_url()
resp = self.api_client.get(url, op='power_parameters')
if resp.status_code == 200:
self.power_parameters = resp.json()
def commission(self, debug=False):
url = self.interpolate_url()
# If we want to debug this node commissioning, enable SSH
# after commissioning and leave the node powered up
options = {'enable_ssh': '1' if debug else '0'}
resp = self.api_client.post(url, op='commission', files=options)
# Need to sort out how to handle exceptions
if not resp.ok:
raise Exception()
def get_details(self):
url = self.interpolate_url()
resp = self.api_client.get(url, op='details')
if resp.status_code == 200:
detail_config = bson.loads(resp.text)
return detail_config
def to_dict(self):
"""
Serialize this resource instance into a dict matching the
MAAS representation of the resource
"""
data_dict = {}
for f in self.json_fields:
if getattr(self, f, None) is not None:
if f == 'resource_id':
data_dict['system_id'] = getattr(self, f)
else:
data_dict[f] = getattr(self, f)
return data_dict
@classmethod
def from_dict(cls, api_client, obj_dict):
"""
Create a instance of this resource class based on a dict
of MaaS type attributes
Customized for Machine due to use of system_id instead of id
as resource key
:param api_client: Instance of api_client.MaasRequestFactory for accessing MaaS API
:param obj_dict: Python dict as parsed from MaaS API JSON representing this resource type
"""
refined_dict = {k: obj_dict.get(k, None) for k in cls.fields}
if 'system_id' in obj_dict.keys():
refined_dict['resource_id'] = obj_dict.get('system_id')
i = cls(api_client, **refined_dict)
return i
class Machines(model_base.ResourceCollectionBase):
collection_url = 'machines/'
collection_resource = Machine
def __init__(self, api_client, **kwargs):
super(Machines, self).__init__(api_client)
# Add the OOB power parameters to each machine instance
def collect_power_params(self):
for k, v in self.resources.items():
v.get_power_params()
def identify_baremetal_node(self, node_model, update_name=True):
"""
Search all the defined MaaS Machines and attempt to match
one against the provided Drydock BaremetalNode model. Update
the MaaS instance with the correct hostname
:param node_model: Instance of objects.node.BaremetalNode to search MaaS for matching resource
:param update_name: Whether Drydock should update the MaaS resource name to match the Drydock design
"""
node_oob_network = node_model.oob_network
node_oob_ip = node_model.get_network_address(node_oob_network)
if node_oob_ip is None:
self.logger.warn("Node model missing OOB IP address")
raise ValueError('Node model missing OOB IP address')
try:
self.collect_power_params()
maas_node = self.singleton({'power_params.power_address': node_oob_ip})
self.logger.debug("Found MaaS resource %s matching Node %s" % (maas_node.resource_id, node_model.get_id()))
if maas_node.hostname != node_model.name and update_name:
maas_node.hostname = node_model.name
maas_node.update()
self.logger.debug("Updated MaaS resource %s hostname to %s" % (maas_node.resource_id, node_model.name))
return maas_node
except ValueError as ve:
self.logger.warn("Error locating matching MaaS resource for OOB IP %s" % (node_oob_ip))
return None
def query(self, query):
"""
Custom query method to deal with complex fields
"""
result = list(self.resources.values())
for (k, v) in query.items():
if k.startswith('power_params.'):
field = k[13:]
result = [i for i in result
if str(getattr(i,'power_parameters', {}).get(field, None)) == str(v)]
else:
result = [i for i in result
if str(getattr(i, k, None)) == str(v)]
return result
def add(self, res):
"""
Create a new resource in this collection in MaaS
Customize as Machine resources use 'system_id' instead of 'id'
"""
data_dict = res.to_dict()
url = self.interpolate_url()
resp = self.api_client.post(url, files=data_dict)
if resp.status_code == 200:
resp_json = resp.json()
res.set_resource_id(resp_json.get('system_id'))
return res
raise errors.DriverError("Failed updating MAAS url %s - return code %s"
% (url, resp.status_code))

View File

@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import time
import logging
from pyghmi.ipmi.command import Command
@ -34,15 +35,19 @@ class PyghmiDriver(oob.OobDriver):
self.driver_key = "pyghmi_driver"
self.driver_desc = "Pyghmi OOB Driver"
self.logger = logging.getLogger('drydock.oobdriver.pyghmi')
self.config = config.DrydockConfig.node_driver.get(self.driver_key, {})
def execute_task(self, task_id):
task = self.state_manager.get_task(task_id)
if task is None:
self.logger.error("Invalid task %s" % (task_id))
raise errors.DriverError("Invalid task %s" % (task_id))
if task.action not in self.supported_actions:
self.logger.error("Driver %s doesn't support task action %s"
% (self.driver_desc, task.action))
raise errors.DriverError("Driver %s doesn't support task action %s"
% (self.driver_desc, task.action))
@ -66,7 +71,7 @@ class PyghmiDriver(oob.OobDriver):
result=hd_fields.ActionResult.Success)
return
site_design = self.orchestrator.get_effective_site(design_id, task.site_name)
site_design = self.orchestrator.get_effective_site(design_id)
target_nodes = []
@ -118,13 +123,6 @@ class PyghmiDriver(oob.OobDriver):
if x.get_result() in [hd_fields.ActionResult.PartialSuccess,
hd_fields.ActionResult.Failure]]
print("Task %s successful subtasks: %s" %
(task.get_id(), len(success_subtasks)))
print("Task %s unsuccessful subtasks: %s" %
(task.get_id(), len(nosuccess_subtasks)))
print("Task %s total subtasks: %s" %
(task.get_id(), len(task.get_subtasks())))
task_result = None
if len(success_subtasks) > 0 and len(nosuccess_subtasks) > 0:
task_result = hd_fields.ActionResult.PartialSuccess
@ -145,9 +143,11 @@ class PyghmiTaskRunner(drivers.DriverTaskRunner):
def __init__(self, node=None, **kwargs):
super(PyghmiTaskRunner, self).__init__(**kwargs)
self.logger = logging.getLogger('drydock.oobdriver.pyghmi')
# We cheat here by providing the Node model instead
# of making the runner source it from statemgmt
if node is None:
self.logger.error("Did not specify target node")
raise errors.DriverError("Did not specify target node")
self.node = node
@ -171,8 +171,7 @@ class PyghmiTaskRunner(drivers.DriverTaskRunner):
raise errors.DriverError("Runner node does not match " \
"task node scope")
ipmi_network = self.node.applied.get('oob_network')
ipmi_network = self.node.oob_network
ipmi_address = self.node.get_network_address(ipmi_network)
if ipmi_address is None:
@ -184,8 +183,8 @@ class PyghmiTaskRunner(drivers.DriverTaskRunner):
self.orchestrator.task_field_update(self.task.get_id(),
status=hd_fields.TaskStatus.Running)
ipmi_account = self.node.applied.get('oob_account', '')
ipmi_credential = self.node.applied.get('oob_credential', '')
ipmi_account = self.node.oob_account
ipmi_credential = self.node.oob_credential
ipmi_session = Command(bmc=ipmi_address, userid=ipmi_account,
password=ipmi_credential)

View File

@ -32,6 +32,7 @@ and storage.
* CreateStorageTemplate - Configure site-wide storage information in bootstrapper
* CreateBootMedia - Ensure all needed boot media is available to the bootstrapper including external repositories
* PrepareHardwareConfig - Prepare the bootstrapper to handle all hardware configuration actions (firmware updates, RAID configuration, driver installation)
* IdentifyNode - Correlate a node definition in the Drydock internal model with a node detected by the downstream node bootstrapper.
* ConfigureHardware - Update and validate all hardware configurations on a node prior to deploying the OS on it
* InterrogateNode - Interrogate the bootstrapper about node information. Depending on the current state of the node, this interrogation will produce different information.
* ApplyNodeNetworking - Configure networking for a node

View File

@ -28,7 +28,7 @@ def start_drydock():
logger.setLevel(config.DrydockConfig.global_config.get('log_level'))
ch = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(filename)s:%(funcName)s - %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)

View File

@ -83,7 +83,11 @@ class Ingester(object):
self.logger.debug("Ingester:ingest_data ingesting design parts for design %s" % design_id)
if plugin_name in self.registered_plugins:
design_items = self.registered_plugins[plugin_name].ingest_data(**kwargs)
try:
design_items = self.registered_plugins[plugin_name].ingest_data(**kwargs)
except ValueError as vex:
self.logger.warn("Ingester:ingest_data - Error process data - %s" % (str(vex)))
return None
self.logger.debug("Ingester:ingest_data parsed %s design parts" % str(len(design_items)))
for m in design_items:
if context is not None:

View File

@ -69,7 +69,7 @@ class YamlIngester(IngesterPlugin):
"""
def parse_docs(self, yaml_string):
models = []
self.logger.debug("yamlingester:parse_docs - Parsing YAML string \n%s" % (yaml_string))
try:
parsed_data = yaml.load_all(yaml_string)
except yaml.YAMLError as err:

View File

@ -44,6 +44,7 @@ class OrchestratorAction(BaseDrydockEnum):
CreateStorageTemplate = 'create_storage_template'
CreateBootMedia = 'create_boot_media'
PrepareHardwareConfig = 'prepare_hardware_config'
IdentifyNode = 'identify_node'
ConfigureHardware = 'configure_hardware'
InterrogateNode = 'interrogate_node'
ApplyNodeNetworking = 'apply_node_networking'

View File

@ -16,6 +16,7 @@ import uuid
import time
import threading
import importlib
import logging
from copy import deepcopy
@ -32,6 +33,7 @@ class Orchestrator(object):
self.enabled_drivers = {}
self.state_manager = state_manager
self.logger = logging.getLogger('drydock.orchestrator')
if enabled_drivers is not None:
oob_driver_name = enabled_drivers.get('oob', None)
@ -155,10 +157,14 @@ class Orchestrator(object):
task_scope=task_scope,
action=hd_fields.OrchestratorAction.CreateNetworkTemplate)
self.logger.info("Starting node driver task %s to create network templates" % (driver_task.get_id()))
driver.execute_task(driver_task.get_id())
driver_task = self.state_manager.get_task(driver_task.get_id())
self.logger.info("Node driver task %s complete" % (driver_task.get_id()))
self.task_field_update(task_id,
status=hd_fields.TaskStatus.Complete,
result=driver_task.get_result())
@ -166,13 +172,13 @@ class Orchestrator(object):
elif task.action == hd_fields.OrchestratorAction.VerifyNode:
self.task_field_update(task_id,
status=hd_fields.TaskStatus.Running)
oob_driver = self.enabled_drivers['oob']
driver = self.enabled_drivers['oob']
if driver is None:
if oob_driver is None:
self.task_field_update(task_id,
status=hd_fields.TaskStatus.Errored,
result=hd_fields.ActionResult.Failure)
result=hd_fields.ActionResult.Failure,
result_detail={'detail': 'Error: No oob driver configured', 'retry': False})
return
site_design = self.get_effective_site(design_id)
@ -186,30 +192,42 @@ class Orchestrator(object):
task_scope = {'site' : task_site,
'node_names' : target_names}
driver_task = self.create_task(tasks.DriverTask,
oob_driver_task = self.create_task(tasks.DriverTask,
parent_task_id=task.get_id(),
design_id=design_id,
action=hd_fields.OrchestratorAction.InterrogateNode,
action=hd_fields.OrchestratorAction.InterrogateOob,
task_scope=task_scope)
driver.execute_task(driver_task.get_id())
oob_driver.execute_task(oob_driver_task.get_id())
driver_task = self.state_manager.get_task(driver_task.get_id())
oob_driver_task = self.state_manager.get_task(oob_driver_task.get_id())
self.task_field_update(task_id,
status=hd_fields.TaskStatus.Complete,
result=driver_task.get_result())
result=oob_driver_task.get_result())
return
elif task.action == hd_fields.OrchestratorAction.PrepareNode:
failed = worked = False
self.task_field_update(task_id,
status=hd_fields.TaskStatus.Running)
driver = self.enabled_drivers['oob']
oob_driver = self.enabled_drivers['oob']
if driver is None:
if oob_driver is None:
self.task_field_update(task_id,
status=hd_fields.TaskStatus.Errored,
result=hd_fields.ActionResult.Failure)
result=hd_fields.ActionResult.Failure,
result_detail={'detail': 'Error: No oob driver configured', 'retry': False})
return
node_driver = self.enabled_drivers['node']
if node_driver is None:
self.task_field_update(task_id,
status=hd_fields.TaskStatus.Errored,
result=hd_fields.ActionResult.Failure,
result_detail={'detail': 'Error: No node driver configured', 'retry': False})
return
site_design = self.get_effective_site(design_id)
@ -228,34 +246,89 @@ class Orchestrator(object):
design_id=design_id,
action=hd_fields.OrchestratorAction.SetNodeBoot,
task_scope=task_scope)
self.logger.info("Starting OOB driver task %s to set PXE boot" % (setboot_task.get_id()))
driver.execute_task(setboot_task.get_id())
oob_driver.execute_task(setboot_task.get_id())
self.logger.info("OOB driver task %s complete" % (setboot_task.get_id()))
setboot_task = self.state_manager.get_task(setboot_task.get_id())
if setboot_task.get_result() == hd_fields.ActionResult.Success:
worked = True
elif setboot_task.get_result() == hd_fields.ActionResult.PartialSuccess:
worked = failed = True
elif setboot_task.get_result() == hd_fields.ActionResult.Failure:
failed = True
cycle_task = self.create_task(tasks.DriverTask,
parent_task_id=task.get_id(),
design_id=design_id,
action=hd_fields.OrchestratorAction.PowerCycleNode,
task_scope=task_scope)
driver.execute_task(cycle_task.get_id())
self.logger.info("Starting OOB driver task %s to power cycle nodes" % (cycle_task.get_id()))
oob_driver.execute_task(cycle_task.get_id())
self.logger.info("OOB driver task %s complete" % (cycle_task.get_id()))
cycle_task = self.state_manager.get_task(cycle_task.get_id())
if (setboot_task.get_result() == hd_fields.ActionResult.Success and
cycle_task.get_result() == hd_fields.ActionResult.Success):
self.task_field_update(task_id,
status=hd_fields.TaskStatus.Complete,
result=hd_fields.ActionResult.Success)
elif (setboot_task.get_result() == hd_fields.ActionResult.Success or
cycle_task.get_result() == hd_fields.ActionResult.Success):
self.task_field_update(task_id,
status=hd_fields.TaskStatus.Complete,
result=hd_fields.ActionResult.PartialSuccess)
if cycle_task.get_result() == hd_fields.ActionResult.Success:
worked = True
elif cycle_task.get_result() == hd_fields.ActionResult.PartialSuccess:
worked = failed = True
elif cycle_task.get_result() == hd_fields.ActionResult.Failure:
failed = True
# IdentifyNode success will take some time after PowerCycleNode finishes
# Retry the operation a few times if it fails before considering it a final failure
# Each attempt is a new task which might make the final task tree a bit confusing
node_identify_attempts = 0
while True:
node_identify_task = self.create_task(tasks.DriverTask,
parent_task_id=task.get_id(),
design_id=design_id,
action=hd_fields.OrchestratorAction.IdentifyNode,
task_scope=task_scope)
self.logger.info("Starting node driver task %s to identify node - attempt %s" %
(node_identify_task.get_id(), node_identify_attempts+1))
node_driver.execute_task(node_identify_task.get_id())
node_identify_attempts = node_identify_attempts + 1
node_identify_task = self.state_manager.get_task(node_identify_task.get_id())
if node_identify_task.get_result() == hd_fields.ActionResult.Success:
worked = True
break
elif node_identify_task.get_result() in [hd_fields.ActionResult.PartialSuccess,
hd_fields.ActionResult.Failure]:
# TODO This threshold should be a configurable default and tunable by task API
if node_identify_attempts > 2:
failed = True
break
time.sleep(5 * 60)
final_result = None
if worked and failed:
final_result = hd_fields.ActionResult.PartialSuccess
elif worked:
final_result = hd_fields.ActionResult.Success
else:
self.task_field_update(task_id,
status=hd_fields.TaskStatus.Complete,
result=hd_fields.ActionResult.Failure)
final_result = hd_fields.ActionResult.Failure
self.task_field_update(task_id,
status=hd_fields.TaskStatus.Complete,
result=final_result)
return
else:

View File

@ -62,6 +62,7 @@ setup(name='drydock_provisioner',
'requests',
'oauthlib',
'uwsgi>1.4',
'bson===0.4.7'
]
)