Browse Source

Shipyard deployment configuration

Puts into place the DeploymentConfiguration yaml that
provides the options that should be configured by the site
design to the deployment (and update) workflows.

This change additionally refactors reused parts to common
modules as related to info passing (xcom)

Change-Id: Ib6470899b204dbc18d2a9a2e4f95540b3b0032b0
changes/87/569187/1
Bryan Strassner 5 years ago
parent
commit
a88a5cf15a
  1. 59
      etc/shipyard/shipyard.conf.sample
  2. 67
      shipyard_airflow/conf/config.py
  3. 10
      shipyard_airflow/control/action/actions_api.py
  4. 5
      shipyard_airflow/dags/armada_deploy_site.py
  5. 175
      shipyard_airflow/dags/common_step_factory.py
  6. 18
      shipyard_airflow/dags/config_path.py
  7. 36
      shipyard_airflow/dags/dag_deployment_configuration.py
  8. 26
      shipyard_airflow/dags/dag_names.py
  9. 6
      shipyard_airflow/dags/deckhand_get_design.py
  10. 95
      shipyard_airflow/dags/deploy_site.py
  11. 6
      shipyard_airflow/dags/destroy_node.py
  12. 6
      shipyard_airflow/dags/drydock_deploy_site.py
  13. 6
      shipyard_airflow/dags/preflight_checks.py
  14. 94
      shipyard_airflow/dags/redeploy_server.py
  15. 87
      shipyard_airflow/dags/update_site.py
  16. 5
      shipyard_airflow/dags/validate_site_design.py
  17. 38
      shipyard_airflow/plugins/armada_operator.py
  18. 18
      shipyard_airflow/plugins/concurrency_check_operator.py
  19. 20
      shipyard_airflow/plugins/deckhand_base_operator.py
  20. 66
      shipyard_airflow/plugins/deckhand_client_factory.py
  21. 178
      shipyard_airflow/plugins/deployment_configuration_operator.py
  22. 128
      shipyard_airflow/plugins/drydock_operators.py
  23. 27
      shipyard_airflow/plugins/promenade_base_operator.py
  24. 4
      shipyard_airflow/plugins/promenade_check_etcd.py
  25. 4
      shipyard_airflow/plugins/promenade_clear_labels.py
  26. 5
      shipyard_airflow/plugins/promenade_drain_node.py
  27. 84
      shipyard_airflow/plugins/xcom_puller.py
  28. 76
      shipyard_airflow/schemas/deploymentConfiguration.yaml
  29. 4
      test-requirements.txt
  30. 2
      tests/unit/control/test.conf
  31. 15
      tests/unit/plugins/test.conf
  32. 29
      tests/unit/plugins/test_deckhand_client_factory.py
  33. 158
      tests/unit/plugins/test_deployment_configuration_operator.py
  34. 0
      tests/unit/schemas/__init__.py
  35. 78
      tests/unit/schemas/test_deployment_configuration.py
  36. 13
      tests/unit/yaml_samples/deploymentConfiguration_bad_manifest.yaml
  37. 31
      tests/unit/yaml_samples/deploymentConfiguration_full_valid.yaml
  38. 12
      tests/unit/yaml_samples/deploymentConfiguration_minimal_valid.yaml
  39. 21
      tox.ini

59
etc/shipyard/shipyard.conf.sample

@ -20,7 +20,13 @@
#
# The web server for Airflow (string value)
#web_server = http://localhost:32080
#web_server = http://localhost:32080/
# Seconds to wait to connect to the airflow api (integer value)
#airflow_api_connect_timeout = 5
# Seconds to wait for a response from the airflow api (integer value)
#airflow_api_read_timeout = 60
# The database for shipyard (string value)
#postgresql_db = postgresql+psycopg2://shipyard:changeme@postgresql.ucp:5432/shipyard
@ -31,9 +37,6 @@
# The direcotry containing the alembic.ini file (string value)
#alembic_ini_path = /home/shipyard/shipyard
# Upgrade the database on startup (boolean value)
#upgrade_db = true
[deckhand]
@ -58,39 +61,6 @@
# (string value)
#service_type = physicalprovisioner
# Query interval (in seconds) for verify_site task (integer value)
#verify_site_query_interval = 10
# Time out (in seconds) for verify_site task (integer value)
#verify_site_task_timeout = 60
# Query interval (in seconds) for prepare_site task (integer value)
#prepare_site_query_interval = 10
# Time out (in seconds) for prepare_site task (integer value)
#prepare_site_task_timeout = 300
# Query interval (in seconds) for prepare_node task (integer value)
#prepare_node_query_interval = 30
# Time out (in seconds) for prepare_node task (integer value)
#prepare_node_task_timeout = 1800
# Query interval (in seconds) for deploy_node task (integer value)
#deploy_node_query_interval = 30
# Time out (in seconds) for deploy_node task (integer value)
#deploy_node_task_timeout = 3600
# Query interval (in seconds) for destroy_node task (integer value)
#destroy_node_query_interval = 30
# Time out (in seconds) for destroy_node task (integer value)
#destroy_node_task_timeout = 900
# Backoff time (in seconds) before checking cluster join (integer value)
#cluster_join_check_backoff_time = 120
[keystone_authtoken]
@ -278,17 +248,22 @@
[requests_config]
# Deckhand client connect timeout (in seconds)
#
# From shipyard_airflow
#
# Deckhand client connect timeout (in seconds) (integer value)
#deckhand_client_connect_timeout = 5
# Deckhand client timeout (in seconds) for GET,
# PUT, POST and DELETE request
# Deckhand client timeout (in seconds) for GET, PUT, POST and DELETE request
# (integer value)
#deckhand_client_read_timeout = 300
# UCP component validation connect timeout (in seconds)
# UCP component validation connect timeout (in seconds) (integer value)
#validation_connect_timeout = 5
# UCP component validation timeout (in seconds)
# UCP component validation timeout (in seconds) (integer value)
#validation_read_timeout = 300

67
shipyard_airflow/conf/config.py

@ -32,6 +32,16 @@ SECTIONS = [
default='http://localhost:32080/',
help='The web server for Airflow'
),
cfg.IntOpt(
'airflow_api_connect_timeout',
default=5,
help='Seconds to wait to connect to the airflow api'
),
cfg.IntOpt(
'airflow_api_read_timeout',
default=60,
help='Seconds to wait for a response from the airflow api'
),
cfg.StrOpt(
'postgresql_db',
default=(
@ -52,7 +62,7 @@ SECTIONS = [
'alembic_ini_path',
default='/home/shipyard/shipyard',
help='The direcotry containing the alembic.ini file'
)
),
]
),
ConfigSection(
@ -125,61 +135,6 @@ SECTIONS = [
'the service lookup in the Keystone service catalog.'
)
),
cfg.IntOpt(
'verify_site_query_interval',
default=10,
help='Query interval (in seconds) for verify_site task'
),
cfg.IntOpt(
'verify_site_task_timeout',
default=60,
help='Time out (in seconds) for verify_site task'
),
cfg.IntOpt(
'prepare_site_query_interval',
default=10,
help='Query interval (in seconds) for prepare_site task'
),
cfg.IntOpt(
'prepare_site_task_timeout',
default=300,
help='Time out (in seconds) for prepare_site task'
),
cfg.IntOpt(
'prepare_node_query_interval',
default=30,
help='Query interval (in seconds) for prepare_node task'
),
cfg.IntOpt(
'prepare_node_task_timeout',
default=1800,
help='Time out (in seconds) for prepare_node task'
),
cfg.IntOpt(
'deploy_node_query_interval',
default=30,
help='Query interval (in seconds) for deploy_node task'
),
cfg.IntOpt(
'deploy_node_task_timeout',
default=3600,
help='Time out (in seconds) for deploy_node task'
),
cfg.IntOpt(
'destroy_node_query_interval',
default=30,
help='Query interval (in seconds) for destroy_node task'
),
cfg.IntOpt(
'destroy_node_task_timeout',
default=900,
help='Time out (in seconds) for destroy_node task'
),
cfg.IntOpt(
'cluster_join_check_backoff_time',
default=120,
help='Backoff time (in seconds) before checking cluster join'
),
]
),
ConfigSection(

10
shipyard_airflow/control/action/actions_api.py

@ -221,8 +221,13 @@ class ActionsResource(BaseResource):
:param dag_id: the name of the dag to invoke
:param action: the action structure to invoke the dag with
"""
# TODO(bryan-strassner) refactor the mechanics of this method to an
# airflow api client module
# Retrieve URL
web_server_url = CONF.base.web_server
c_timeout = CONF.base.airflow_api_connect_timeout
r_timeout = CONF.base.airflow_api_read_timeout
if 'Error' in web_server_url:
raise ApiError(
@ -232,7 +237,6 @@ class ActionsResource(BaseResource):
'value'),
status=falcon.HTTP_503,
retry=True, )
else:
conf_value = {'action': action}
# "conf" - JSON string that gets pickled into the DagRun's
@ -242,7 +246,7 @@ class ActionsResource(BaseResource):
dag_id, self.to_json(conf_value)))
try:
resp = requests.get(req_url, timeout=(5, 15))
resp = requests.get(req_url, timeout=(c_timeout, r_timeout))
LOG.info('Response code from Airflow trigger_dag: %s',
resp.status_code)
# any 4xx/5xx will be HTTPError, which are RequestException
@ -268,6 +272,8 @@ class ActionsResource(BaseResource):
return dag_execution_date
def _exhume_date(self, dag_id, log_string):
# TODO(bryan-strassner) refactor this to an airflow api client module
# we are unable to use the response time because that
# does not match the time when the dag was recorded.
# We have to parse the stdout returned to find the

5
shipyard_airflow/dags/armada_deploy_site.py

@ -15,10 +15,7 @@
from airflow.models import DAG
from airflow.operators import ArmadaOperator
# Location of shiyard.conf
# Note that the shipyard.conf file needs to be placed on a volume
# that can be accessed by the containers
config_path = '/usr/local/airflow/plugins/shipyard.conf'
from config_path import config_path
def deploy_site_armada(parent_dag_name, child_dag_name, args):

175
shipyard_airflow/dags/common_step_factory.py

@ -0,0 +1,175 @@
# Copyright 2018 AT&T Intellectual Property. All other rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from airflow.operators import ConcurrencyCheckOperator
from airflow.operators.python_operator import PythonOperator
from airflow.operators.subdag_operator import SubDagOperator
from armada_deploy_site import deploy_site_armada
import dag_names as dn
from deckhand_get_design import get_design_deckhand
from destroy_node import destroy_server
from drydock_deploy_site import deploy_site_drydock
from failure_handlers import step_failure_handler
from dag_deployment_configuration import get_deployment_configuration
from preflight_checks import all_preflight_checks
from validate_site_design import validate_site_design
class CommonStepFactory(object):
"""Common step factory
A factory to generate steps that are reused among multiple dags
"""
def __init__(self, parent_dag_name, dag, default_args):
"""Creates a factory
Uses the specified parent_dag_name
"""
self.parent_dag_name = parent_dag_name
self.dag = dag
self.default_args = default_args
def get_action_xcom(self, task_id=dn.ACTION_XCOM):
"""Generate the action_xcom step
Step responsible for getting the action information passed
by the invocation of the dag, which includes any options.
"""
def xcom_push(**kwargs):
"""xcom_push function
Defines a push function to store the content of 'action' that is
defined via 'dag_run' in XCOM so that it can be used by the
Operators
"""
kwargs['ti'].xcom_push(key='action',
value=kwargs['dag_run'].conf['action'])
return PythonOperator(task_id=task_id,
dag=self.dag,
python_callable=xcom_push)
def get_concurrency_check(self, task_id=dn.DAG_CONCURRENCY_CHECK_DAG_NAME):
"""Generate the concurrency check step
Concurrency check prevents simultaneous execution of dags that should
not execute together.
"""
return ConcurrencyCheckOperator(
task_id=task_id,
on_failure_callback=step_failure_handler,
dag=self.dag)
def get_preflight(self, task_id=dn.ALL_PREFLIGHT_CHECKS_DAG_NAME):
"""Generate the preflight step
Preflight checks preconditions for running a DAG
"""
return SubDagOperator(
subdag=all_preflight_checks(
self.parent_dag_name,
task_id,
args=self.default_args),
task_id=task_id,
on_failure_callback=step_failure_handler,
dag=self.dag)
def get_get_design_version(self, task_id=dn.DECKHAND_GET_DESIGN_VERSION):
"""Generate the get design version step
Retrieves the version of the design to use from deckhand
"""
return SubDagOperator(
subdag=get_design_deckhand(
self.parent_dag_name,
task_id,
args=self.default_args),
task_id=task_id,
on_failure_callback=step_failure_handler,
dag=self.dag)
def get_validate_site_design(self,
task_id=dn.VALIDATE_SITE_DESIGN_DAG_NAME):
"""Generate the validate site design step
Validation of the site design checks that the design to be used
for a deployment passes checks before using it.
"""
return SubDagOperator(
subdag=validate_site_design(
self.parent_dag_name,
task_id,
args=self.default_args),
task_id=task_id,
on_failure_callback=step_failure_handler,
dag=self.dag)
def get_deployment_configuration(self,
task_id=dn.GET_DEPLOY_CONF_DAG_NAME):
"""Generate the step to retrieve the deployment configuration
This step provides the timings and strategies that will be used in
subsequent steps
"""
return SubDagOperator(
subdag=get_deployment_configuration(
self.parent_dag_name,
task_id,
args=self.default_args),
task_id=task_id,
on_failure_callback=step_failure_handler,
dag=self.dag)
def get_drydock_build(self, task_id=dn.DRYDOCK_BUILD_DAG_NAME):
"""Generate the drydock build step
Drydock build does the hardware provisioning.
"""
return SubDagOperator(
subdag=deploy_site_drydock(
self.parent_dag_name,
task_id,
args=self.default_args),
task_id=task_id,
on_failure_callback=step_failure_handler,
dag=self.dag)
def get_armada_build(self, task_id=dn.ARMADA_BUILD_DAG_NAME):
"""Generate the armada build step
Armada build does the deployment of helm charts
"""
return SubDagOperator(
subdag=deploy_site_armada(
self.parent_dag_name,
task_id,
args=self.default_args),
task_id=task_id,
on_failure_callback=step_failure_handler,
dag=self.dag)
def get_destroy_server(self, task_id=dn.DESTROY_SERVER_DAG_NAME):
"""Generate a destroy server step
Destroy server tears down kubernetes and hardware
"""
return SubDagOperator(
subdag=destroy_server(
self.parent_dag_name,
task_id,
args=self.default_args),
task_id=task_id,
on_failure_callback=step_failure_handler,
dag=self.dag)

18
shipyard_airflow/dags/config_path.py

@ -0,0 +1,18 @@
# Copyright 2018 AT&T Intellectual Property. All other rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Location of shiyard.conf
# Note that the shipyard.conf file needs to be placed on a volume
# that can be accessed by the containers
config_path = '/usr/local/airflow/plugins/shipyard.conf'

36
shipyard_airflow/dags/dag_deployment_configuration.py

@ -0,0 +1,36 @@
# Copyright 2018 AT&T Intellectual Property. All other rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from airflow.models import DAG
from airflow.operators import DeploymentConfigurationOperator
from config_path import config_path
GET_DEPLOYMENT_CONFIGURATION_NAME = 'get_deployment_configuration'
def get_deployment_configuration(parent_dag_name, child_dag_name, args):
"""DAG to retrieve deployment configuration"""
dag = DAG(
'{}.{}'.format(parent_dag_name, child_dag_name),
default_args=args)
deployment_configuration = DeploymentConfigurationOperator(
task_id=GET_DEPLOYMENT_CONFIGURATION_NAME,
shipyard_conf=config_path,
main_dag_name=parent_dag_name,
dag=dag)
return dag

26
shipyard_airflow/dags/dag_names.py

@ -0,0 +1,26 @@
# Copyright 2018 AT&T Intellectual Property. All other rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Subdags
ALL_PREFLIGHT_CHECKS_DAG_NAME = 'preflight'
ARMADA_BUILD_DAG_NAME = 'armada_build'
DAG_CONCURRENCY_CHECK_DAG_NAME = 'dag_concurrency_check'
DECKHAND_GET_DESIGN_VERSION = 'deckhand_get_design_version'
GET_DEPLOY_CONF_DAG_NAME = 'dag_deployment_configuration'
DRYDOCK_BUILD_DAG_NAME = 'drydock_build'
VALIDATE_SITE_DESIGN_DAG_NAME = 'validate_site_design'
DESTROY_SERVER_DAG_NAME = 'destroy_server'
# Steps
ACTION_XCOM = 'action_xcom'

6
shipyard_airflow/dags/deckhand_get_design.py

@ -16,11 +16,7 @@ from airflow.models import DAG
from airflow.operators import DeckhandGetDesignOperator
from airflow.operators import DeckhandRetrieveRenderedDocOperator
# Location of shiyard.conf
# Note that the shipyard.conf file needs to be placed on a volume
# that can be accessed by the containers
config_path = '/usr/local/airflow/plugins/shipyard.conf'
from config_path import config_path
def get_design_deckhand(parent_dag_name, child_dag_name, args):

95
shipyard_airflow/dags/deploy_site.py

@ -1,4 +1,4 @@
# Copyright 2017 AT&T Intellectual Property. All other rights reserved.
# Copyright 2018 AT&T Intellectual Property. All other rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -14,29 +14,16 @@
from datetime import timedelta
import airflow
import failure_handlers
from airflow import DAG
from airflow.operators import ConcurrencyCheckOperator
from airflow.operators.python_operator import PythonOperator
from airflow.operators.subdag_operator import SubDagOperator
from armada_deploy_site import deploy_site_armada
from deckhand_get_design import get_design_deckhand
from drydock_deploy_site import deploy_site_drydock
from preflight_checks import all_preflight_checks
from validate_site_design import validate_site_design
"""
deploy_site is the top-level orchestration DAG for deploying a site using the
Undercloud platform.
from common_step_factory import CommonStepFactory
"""deploy_site
the top-level orchestration DAG for deploying a site using the Undercloud
platform.
"""
ALL_PREFLIGHT_CHECKS_DAG_NAME = 'preflight'
ARMADA_BUILD_DAG_NAME = 'armada_build'
DAG_CONCURRENCY_CHECK_DAG_NAME = 'dag_concurrency_check'
DECKHAND_GET_DESIGN_VERSION = 'deckhand_get_design_version'
DRYDOCK_BUILD_DAG_NAME = 'drydock_build'
PARENT_DAG_NAME = 'deploy_site'
VALIDATE_SITE_DESIGN_DAG_NAME = 'validate_site_design'
default_args = {
'owner': 'airflow',
@ -51,66 +38,28 @@ default_args = {
}
dag = DAG(PARENT_DAG_NAME, default_args=default_args, schedule_interval=None)
"""
Define push function to store the content of 'action' that is
defined via 'dag_run' in XCOM so that it can be used by the
Operators
"""
def xcom_push(**kwargs):
# Pushes action XCom
kwargs['ti'].xcom_push(key='action',
value=kwargs['dag_run'].conf['action'])
action_xcom = PythonOperator(
task_id='action_xcom', dag=dag, python_callable=xcom_push)
concurrency_check = ConcurrencyCheckOperator(
task_id=DAG_CONCURRENCY_CHECK_DAG_NAME,
on_failure_callback=failure_handlers.step_failure_handler,
dag=dag)
preflight = SubDagOperator(
subdag=all_preflight_checks(
PARENT_DAG_NAME, ALL_PREFLIGHT_CHECKS_DAG_NAME, args=default_args),
task_id=ALL_PREFLIGHT_CHECKS_DAG_NAME,
on_failure_callback=failure_handlers.step_failure_handler,
dag=dag)
get_design_version = SubDagOperator(
subdag=get_design_deckhand(
PARENT_DAG_NAME, DECKHAND_GET_DESIGN_VERSION, args=default_args),
task_id=DECKHAND_GET_DESIGN_VERSION,
on_failure_callback=failure_handlers.step_failure_handler,
dag=dag)
validate_site_design = SubDagOperator(
subdag=validate_site_design(
PARENT_DAG_NAME, VALIDATE_SITE_DESIGN_DAG_NAME, args=default_args),
task_id=VALIDATE_SITE_DESIGN_DAG_NAME,
on_failure_callback=failure_handlers.step_failure_handler,
dag=dag)
drydock_build = SubDagOperator(
subdag=deploy_site_drydock(
PARENT_DAG_NAME, DRYDOCK_BUILD_DAG_NAME, args=default_args),
task_id=DRYDOCK_BUILD_DAG_NAME,
on_failure_callback=failure_handlers.step_failure_handler,
dag=dag)
step_factory = CommonStepFactory(parent_dag_name=PARENT_DAG_NAME,
dag=dag,
default_args=default_args)
armada_build = SubDagOperator(
subdag=deploy_site_armada(
PARENT_DAG_NAME, ARMADA_BUILD_DAG_NAME, args=default_args),
task_id=ARMADA_BUILD_DAG_NAME,
on_failure_callback=failure_handlers.step_failure_handler,
dag=dag)
action_xcom = step_factory.get_action_xcom()
concurrency_check = step_factory.get_concurrency_check()
preflight = step_factory.get_preflight()
get_design_version = step_factory.get_get_design_version()
validate_site_design = step_factory.get_validate_site_design()
deployment_configuration = step_factory.get_deployment_configuration()
drydock_build = step_factory.get_drydock_build()
armada_build = step_factory.get_armada_build()
# DAG Wiring
concurrency_check.set_upstream(action_xcom)
preflight.set_upstream(concurrency_check)
get_design_version.set_upstream(preflight)
validate_site_design.set_upstream(get_design_version)
drydock_build.set_upstream(validate_site_design)
deployment_configuration.set_upstream(get_design_version)
drydock_build.set_upstream([
validate_site_design,
deployment_configuration
])
armada_build.set_upstream(drydock_build)

6
shipyard_airflow/dags/destroy_node.py

@ -20,11 +20,7 @@ from airflow.operators import PromenadeDecommissionNodeOperator
from airflow.operators import PromenadeDrainNodeOperator
from airflow.operators import PromenadeShutdownKubeletOperator
# Location of shiyard.conf
# Note that the shipyard.conf file needs to be placed on a volume
# that can be accessed by the containers
config_path = '/usr/local/airflow/plugins/shipyard.conf'
from config_path import config_path
def destroy_server(parent_dag_name, child_dag_name, args):

6
shipyard_airflow/dags/drydock_deploy_site.py

@ -15,11 +15,7 @@
from airflow.models import DAG
from airflow.operators import DryDockOperator
# Location of shiyard.conf
# Note that the shipyard.conf file needs to be placed on a volume
# that can be accessed by the containers
config_path = '/usr/local/airflow/plugins/shipyard.conf'
from config_path import config_path
def deploy_site_drydock(parent_dag_name, child_dag_name, args):

6
shipyard_airflow/dags/preflight_checks.py

@ -16,11 +16,7 @@ from airflow.models import DAG
from airflow.operators import K8sHealthCheckOperator
from airflow.operators import UcpHealthCheckOperator
# Location of shiyard.conf
# Note that the shipyard.conf file needs to be placed on a volume
# that can be accessed by the containers
config_path = '/usr/local/airflow/plugins/shipyard.conf'
from config_path import config_path
def all_preflight_checks(parent_dag_name, child_dag_name, args):

94
shipyard_airflow/dags/redeploy_server.py

@ -1,4 +1,4 @@
# Copyright 2017 AT&T Intellectual Property. All other rights reserved.
# Copyright 2018 AT&T Intellectual Property. All other rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -14,29 +14,16 @@
from datetime import timedelta
import airflow
import failure_handlers
from airflow import DAG
from airflow.operators import ConcurrencyCheckOperator
from airflow.operators.python_operator import PythonOperator
from airflow.operators.subdag_operator import SubDagOperator
from deckhand_get_design import get_design_deckhand
from destroy_node import destroy_server
from drydock_deploy_site import deploy_site_drydock
from preflight_checks import all_preflight_checks
from validate_site_design import validate_site_design
"""
redeploy_server is the top-level orchestration DAG for redeploying a
server using the Undercloud platform.
from common_step_factory import CommonStepFactory
"""redeploy_server
The top-level orchestration DAG for redeploying a server using the Undercloud
platform.
"""
ALL_PREFLIGHT_CHECKS_DAG_NAME = 'preflight'
DAG_CONCURRENCY_CHECK_DAG_NAME = 'dag_concurrency_check'
DECKHAND_GET_DESIGN_VERSION = 'deckhand_get_design_version'
DESTROY_SERVER_DAG_NAME = 'destroy_server'
DRYDOCK_BUILD_DAG_NAME = 'drydock_build'
PARENT_DAG_NAME = 'redeploy_server'
VALIDATE_SITE_DESIGN_DAG_NAME = 'validate_site_design'
default_args = {
'owner': 'airflow',
@ -51,66 +38,29 @@ default_args = {
}
dag = DAG(PARENT_DAG_NAME, default_args=default_args, schedule_interval=None)
"""
Define push function to store the content of 'action' that is
defined via 'dag_run' in XCOM so that it can be used by the
Operators
"""
def xcom_push(**kwargs):
# Pushes action XCom
kwargs['ti'].xcom_push(key='action',
value=kwargs['dag_run'].conf['action'])
action_xcom = PythonOperator(
task_id='action_xcom', dag=dag, python_callable=xcom_push)
concurrency_check = ConcurrencyCheckOperator(
task_id=DAG_CONCURRENCY_CHECK_DAG_NAME,
on_failure_callback=failure_handlers.step_failure_handler,
dag=dag)
preflight = SubDagOperator(
subdag=all_preflight_checks(
PARENT_DAG_NAME, ALL_PREFLIGHT_CHECKS_DAG_NAME, args=default_args),
task_id=ALL_PREFLIGHT_CHECKS_DAG_NAME,
on_failure_callback=failure_handlers.step_failure_handler,
dag=dag)
get_design_version = SubDagOperator(
subdag=get_design_deckhand(
PARENT_DAG_NAME, DECKHAND_GET_DESIGN_VERSION, args=default_args),
task_id=DECKHAND_GET_DESIGN_VERSION,
on_failure_callback=failure_handlers.step_failure_handler,
dag=dag)
validate_site_design = SubDagOperator(
subdag=validate_site_design(
PARENT_DAG_NAME, VALIDATE_SITE_DESIGN_DAG_NAME, args=default_args),
task_id=VALIDATE_SITE_DESIGN_DAG_NAME,
on_failure_callback=failure_handlers.step_failure_handler,
dag=dag)
step_factory = CommonStepFactory(parent_dag_name=PARENT_DAG_NAME,
dag=dag,
default_args=default_args)
destroy_server = SubDagOperator(
subdag=destroy_server(
PARENT_DAG_NAME, DESTROY_SERVER_DAG_NAME, args=default_args),
task_id=DESTROY_SERVER_DAG_NAME,
on_failure_callback=failure_handlers.step_failure_handler,
dag=dag)
drydock_build = SubDagOperator(
subdag=deploy_site_drydock(
PARENT_DAG_NAME, DRYDOCK_BUILD_DAG_NAME, args=default_args),
task_id=DRYDOCK_BUILD_DAG_NAME,
on_failure_callback=failure_handlers.step_failure_handler,
dag=dag)
action_xcom = step_factory.get_action_xcom()
concurrency_check = step_factory.get_concurrency_check()
preflight = step_factory.get_preflight()
get_design_version = step_factory.get_get_design_version()
validate_site_design = step_factory.get_validate_site_design()
deployment_configuration = step_factory.get_deployment_configuration()
destroy_server = step_factory.get_destroy_server()
drydock_build = step_factory.get_drydock_build()
# DAG Wiring
concurrency_check.set_upstream(action_xcom)
preflight.set_upstream(concurrency_check)
get_design_version.set_upstream(preflight)
validate_site_design.set_upstream(get_design_version)
destroy_server.set_upstream(validate_site_design)
deployment_configuration.set_upstream(get_design_version)
destroy_server.set_upstream([
validate_site_design,
deployment_configuration
])
drydock_build.set_upstream(destroy_server)

87
shipyard_airflow/dags/update_site.py

@ -1,4 +1,4 @@
# Copyright 2017 AT&T Intellectual Property. All other rights reserved.
# Copyright 2018 AT&T Intellectual Property. All other rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -14,32 +14,21 @@
from datetime import timedelta
import airflow
import failure_handlers
from airflow import DAG
from airflow.operators import ConcurrencyCheckOperator
from airflow.operators.python_operator import PythonOperator
from airflow.operators.subdag_operator import SubDagOperator
from armada_deploy_site import deploy_site_armada
from deckhand_get_design import get_design_deckhand
from drydock_deploy_site import deploy_site_drydock
from validate_site_design import validate_site_design
"""
update_site is the top-level orchestration DAG for updating a site using the
Undercloud platform.
from common_step_factory import CommonStepFactory
"""update_site
The top-level orchestration DAG for updating a site using the Undercloud
platform.
TODO: We will disable pre-flight checks for now and will revisit it at
a later date. The pre-flight checks will be more targeted in the
case of 'update_site' and will include specific checks on things
like coredns, calico and ceph.
"""
ARMADA_BUILD_DAG_NAME = 'armada_build'
DAG_CONCURRENCY_CHECK_DAG_NAME = 'dag_concurrency_check'
DECKHAND_GET_DESIGN_VERSION = 'deckhand_get_design_version'
DRYDOCK_BUILD_DAG_NAME = 'drydock_build'
PARENT_DAG_NAME = 'update_site'
VALIDATE_SITE_DESIGN_DAG_NAME = 'validate_site_design'
default_args = {
'owner': 'airflow',
@ -54,58 +43,26 @@ default_args = {
}
dag = DAG(PARENT_DAG_NAME, default_args=default_args, schedule_interval=None)
"""
Define push function to store the content of 'action' that is
defined via 'dag_run' in XCOM so that it can be used by the
Operators
"""
def xcom_push(**kwargs):
# Pushes action XCom
kwargs['ti'].xcom_push(key='action',
value=kwargs['dag_run'].conf['action'])
action_xcom = PythonOperator(
task_id='action_xcom', dag=dag, python_callable=xcom_push)
concurrency_check = ConcurrencyCheckOperator(
task_id=DAG_CONCURRENCY_CHECK_DAG_NAME,
on_failure_callback=failure_handlers.step_failure_handler,
dag=dag)
get_design_version = SubDagOperator(
subdag=get_design_deckhand(
PARENT_DAG_NAME, DECKHAND_GET_DESIGN_VERSION, args=default_args),
task_id=DECKHAND_GET_DESIGN_VERSION,
on_failure_callback=failure_handlers.step_failure_handler,
dag=dag)
validate_site_design = SubDagOperator(
subdag=validate_site_design(
PARENT_DAG_NAME, VALIDATE_SITE_DESIGN_DAG_NAME, args=default_args),
task_id=VALIDATE_SITE_DESIGN_DAG_NAME,
on_failure_callback=failure_handlers.step_failure_handler,
dag=dag)
drydock_build = SubDagOperator(
subdag=deploy_site_drydock(
PARENT_DAG_NAME, DRYDOCK_BUILD_DAG_NAME, args=default_args),
task_id=DRYDOCK_BUILD_DAG_NAME,
on_failure_callback=failure_handlers.step_failure_handler,
dag=dag)
step_factory = CommonStepFactory(parent_dag_name=PARENT_DAG_NAME,
dag=dag,
default_args=default_args)
armada_build = SubDagOperator(
subdag=deploy_site_armada(
PARENT_DAG_NAME, ARMADA_BUILD_DAG_NAME, args=default_args),
task_id=ARMADA_BUILD_DAG_NAME,
on_failure_callback=failure_handlers.step_failure_handler,
dag=dag)
action_xcom = step_factory.get_action_xcom()
concurrency_check = step_factory.get_concurrency_check()
get_design_version = step_factory.get_get_design_version()
validate_site_design = step_factory.get_validate_site_design()
deployment_configuration = step_factory.get_deployment_configuration()
drydock_build = step_factory.get_drydock_build()
armada_build = step_factory.get_armada_build()
# DAG Wiring
concurrency_check.set_upstream(action_xcom)
get_design_version.set_upstream(concurrency_check)
validate_site_design.set_upstream(get_design_version)
drydock_build.set_upstream(validate_site_design)
deployment_configuration.set_upstream(get_design_version)
drydock_build.set_upstream([
validate_site_design,
deployment_configuration
])
armada_build.set_upstream(drydock_build)

5
shipyard_airflow/dags/validate_site_design.py

@ -17,10 +17,7 @@ from airflow.operators import ArmadaOperator
from airflow.operators import DeckhandValidateSiteDesignOperator
from airflow.operators import DryDockOperator
# Location of shiyard.conf
# Note that the shipyard.conf file needs to be placed on a volume
# that can be accessed by the containers
config_path = '/usr/local/airflow/plugins/shipyard.conf'
from config_path import config_path
def validate_site_design(parent_dag_name, child_dag_name, args):

38
shipyard_airflow/plugins/armada_operator.py

@ -28,6 +28,7 @@ import armada.common.session as session
from get_k8s_pod_port_ip import get_pod_port_ip
from service_endpoint import ucp_service_endpoint
from service_token import shipyard_service_token
from xcom_puller import XcomPuller
class ArmadaOperator(BaseOperator):
@ -37,6 +38,9 @@ class ArmadaOperator(BaseOperator):
:param main_dag_name: Parent Dag
:param shipyard_conf: Location of shipyard.conf
:param sub_dag_name: Child Dag
The Drydock operator assumes that prior steps have set xcoms for
the action and the deployment configuration
"""
@apply_defaults
@ -46,7 +50,6 @@ class ArmadaOperator(BaseOperator):
shipyard_conf=None,
svc_token=None,
sub_dag_name=None,
workflow_info={},
xcom_push=True,
*args, **kwargs):
@ -56,7 +59,6 @@ class ArmadaOperator(BaseOperator):
self.shipyard_conf = shipyard_conf
self.svc_token = svc_token
self.sub_dag_name = sub_dag_name
self.workflow_info = workflow_info
self.xcom_push_flag = xcom_push
def execute(self, context):
@ -67,16 +69,12 @@ class ArmadaOperator(BaseOperator):
# Define task_instance
task_instance = context['task_instance']
# Extract information related to current workflow
# The workflow_info variable will be a dictionary
# that contains information about the workflow such
# as action_id, name and other related parameters
workflow_info = task_instance.xcom_pull(
task_ids='action_xcom', key='action',
dag_id=self.main_dag_name)
# Set up and retrieve values from xcom
self.xcom_puller = XcomPuller(self.main_dag_name, task_instance)
self.action_info = self.xcom_puller.get_action_info()
# Logs uuid of action performed by the Operator
logging.info("Armada Operator for action %s", workflow_info['id'])
logging.info("Armada Operator for action %s", self.action_info['id'])
# Retrieve Deckhand Design Reference
design_ref = self.get_deckhand_design_ref(context)
@ -108,6 +106,10 @@ class ArmadaOperator(BaseOperator):
return site_design_validity
# Set up target manifest (only if not doing validate)
self.dc = self.xcom_puller.get_deployment_configuration()
self.target_manifest = self.dc['armada.manifest']
# Create Armada Client
# Retrieve Endpoint Information
svc_type = 'armada'
@ -128,13 +130,8 @@ class ArmadaOperator(BaseOperator):
# Armada Apply
elif self.action == 'armada_apply':
# TODO (bryan-strassner) externalize the name of the manifest to
# use this needs to come from a site configuration document for
# consumption by shipyard/airflow. For now. "full-site" is the
# only value that will work.
target_manifest = 'full-site'
self.armada_apply(context, armada_client, design_ref,
target_manifest)
self.target_manifest)
# Armada Get Releases
elif self.action == 'armada_get_releases':
@ -268,14 +265,7 @@ class ArmadaOperator(BaseOperator):
logging.info("Deckhand endpoint is %s", deckhand_svc_endpoint)
# Retrieve revision_id from xcom
# Note that in the case of 'deploy_site', the dag_id will
# be 'deploy_site.deckhand_get_design_version' for the
# 'deckhand_get_design_version' task. We need to extract
# the xcom value from it in order to get the value of the
# last committed revision ID
committed_revision_id = context['task_instance'].xcom_pull(
task_ids='deckhand_get_design_version',
dag_id=self.main_dag_name + '.deckhand_get_design_version')
committed_revision_id = self.xcom_puller.get_design_version()
# Form Design Reference Path that we will use to retrieve
# the Design YAMLs

18
shipyard_airflow/plugins/concurrency_check_operator.py

@ -56,9 +56,14 @@ class ConcurrencyCheckOperator(BaseOperator):
@apply_defaults
def __init__(self, conflicting_dag_set=None, *args, **kwargs):
super(ConcurrencyCheckOperator, self).__init__(*args, **kwargs)
if conflicting_dag_set is not None:
self.conflicting_dag_set = conflicting_dag_set
else:
self.conflicting_dag_set = conflicting_dag_set
def execute(self, context):
"""
Run the check to see if this DAG has an concurrency issues with other
DAGs. Stop the workflow if there is.
"""
if self.conflicting_dag_set is None:
self.check_dag_id = self.dag.dag_id
logging.debug('dag_id is %s', self.check_dag_id)
if '.' in self.dag.dag_id:
@ -70,11 +75,6 @@ class ConcurrencyCheckOperator(BaseOperator):
self.conflicting_dag_set = find_conflicting_dag_set(
self.check_dag_id)
def execute(self, context):
"""
Run the check to see if this DAG has an concurrency issues with other
DAGs. Stop the workflow if there is.
"""
logging.info('Checking for running of dags: %s',
', '.join(self.conflicting_dag_set))
@ -123,7 +123,7 @@ class ConcurrencyCheckOperator(BaseOperator):
"""
conflict_string = '{} conflicts with running {}. Aborting run'.format(
dag_name, conflict)
logging.warning(conflict_string)
logging.error(conflict_string)
raise AirflowException(conflict_string)

20
shipyard_airflow/plugins/deckhand_base_operator.py

@ -23,6 +23,7 @@ from airflow.exceptions import AirflowException
from deckhand.client import client as deckhand_client
from service_endpoint import ucp_service_endpoint
from service_token import shipyard_service_token
from xcom_puller import XcomPuller
class DeckhandBaseOperator(BaseOperator):
@ -49,7 +50,6 @@ class DeckhandBaseOperator(BaseOperator):
svc_session=None,
svc_token=None,
validation_read_timeout=None,
workflow_info={},
xcom_push=True,
*args, **kwargs):
"""Initialization of DeckhandBaseOperator object.
@ -66,7 +66,6 @@ class DeckhandBaseOperator(BaseOperator):
:param svc_session: Keystone Session
:param svc_token: Keystone Token
:param validation_read_timeout: Deckhand validation timeout
:param workflow_info: Information related to current workflow
:param xcom_push: xcom usage
"""
@ -84,7 +83,6 @@ class DeckhandBaseOperator(BaseOperator):
self.svc_session = svc_session
self.svc_token = svc_token
self.validation_read_timeout = validation_read_timeout
self.workflow_info = workflow_info
self.xcom_push_flag = xcom_push
def execute(self, context):
@ -117,17 +115,13 @@ class DeckhandBaseOperator(BaseOperator):
# Define task_instance
task_instance = context['task_instance']
# Extract information related to current workflow
# The workflow_info variable will be a dictionary
# that contains information about the workflow such
# as action_id, name and other related parameters
self.workflow_info = task_instance.xcom_pull(
task_ids='action_xcom', key='action',
dag_id=self.main_dag_name)
# Set up and retrieve values from xcom
self.xcom_puller = XcomPuller(self.main_dag_name, task_instance)
self.action_info = self.xcom_puller.get_action_info()
# Logs uuid of Shipyard action
logging.info("Executing Shipyard Action %s",
self.workflow_info['id'])
self.action_info['id'])
# Retrieve Endpoint Information
self.deckhand_svc_endpoint = ucp_service_endpoint(
@ -158,9 +152,7 @@ class DeckhandBaseOperator(BaseOperator):
if self.task_id != 'deckhand_get_design_version':
# Retrieve 'revision_id' from xcom
self.revision_id = task_instance.xcom_pull(
task_ids='deckhand_get_design_version',
dag_id=self.main_dag_name + '.deckhand_get_design_version')
self.revision_id = self.xcom_puller.get_design_version()
if self.revision_id:
logging.info("Revision ID is %d", self.revision_id)

66
shipyard_airflow/plugins/deckhand_client_factory.py

@ -0,0 +1,66 @@
# Copyright 2018 AT&T Intellectual Property. All other rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import configparser
import logging
from keystoneauth1.identity import v3 as keystone_v3
from keystoneauth1 import session as keystone_session
from deckhand.client import client as deckhand_client
LOG = logging.getLogger(__name__)
class DeckhandClientFactory(object):
"""Factory for DeckhandClient to encapsulate commonly reused setup"""
def __init__(self,
shipyard_conf,
*args, **kwargs):
"""Deckhand Client Factory
Creates a client factory to retrieve clients
:param shipyard_conf: Location of shipyard.conf
"""
self.config = configparser.ConfigParser()
self.config.read(shipyard_conf)
def get_client(self):
"""Retrieve a deckhand client"""
"""
Notes:
TODO(bryan-strassner): If/when the airflow plugin modules move to using
oslo config, consider using the example here:
https://github.com/att-comdev/deckhand/blob/cef3b52a104e620e88a24caf70ed2bb1297c268f/deckhand/barbican/client_wrapper.py#L53
which will load the attributes from the config more flexibly.
Keystoneauth1 also provides for a simpler solution with:
https://docs.openstack.org/keystoneauth/latest/api/keystoneauth1.loading.html
if oslo config is used.
"""
keystone_auth = {}
# Construct Session Argument
for attr in ('auth_url', 'password', 'project_domain_name',
'project_name', 'username', 'user_domain_name'):
keystone_auth[attr] = self.config.get('keystone_authtoken', attr)
# Set up keystone session
auth = keystone_v3.Password(**keystone_auth)
sess = keystone_session.Session(auth=auth)
LOG.info("Setting up Deckhand client with parameters")
for attr in keystone_auth:
if attr != 'password':
LOG.debug('%s = %s', attr, keystone_auth[attr])
return deckhand_client.Client(session=sess, endpoint_type='internal')

178
shipyard_airflow/plugins/deployment_configuration_operator.py

@ -0,0 +1,178 @@
# Copyright 2017 AT&T Intellectual Property. All other rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Deployment Configuration
Retrieves the deployment configuration from Deckhand and places the values
retrieved into a dictionary
"""
import logging
from airflow.exceptions import AirflowException
from airflow.models import BaseOperator
from airflow.plugins_manager import AirflowPlugin
from airflow.utils.decorators import apply_defaults
try:
from deckhand_client_factory import DeckhandClientFactory
except ImportError:
from shipyard_airflow.plugins.deckhand_client_factory import (
DeckhandClientFactory
)
LOG = logging.getLogger(__name__)
class DeploymentConfigurationOperator(BaseOperator):
"""Deployment Configuration Operator
Retrieve the deployment configuration from Deckhand for use throughout
the workflow. Put the configuration into a dictionary.
Failures are raised:
- when Deckhand cannot be contacted
- when the DeploymentConfiguration (deployment-configuration) document
cannot be retrieved
"""
config_keys_defaults = {
"physical_provisioner.deployment_strategy": "all-at-once",
"physical_provisioner.deploy_interval": 30,
"physical_provisioner.deploy_timeout": 3600,
"physical_provisioner.destroy_interval": 30,
"physical_provisioner.destroy_timeout": 900,
"physical_provisioner.join_wait": 120,
"physical_provisioner.prepare_node_interval": 30,
"physical_provisioner.prepare_node_timeout": 1000,
"physical_provisioner.prepare_site_interval": 10,
"physical_provisioner.prepare_site_timeout": 300,
"physical_provisioner.verify_interval": 10,
"physical_provisioner.verify_timeout": 60,
"kubernetes.node_status_interval": 30,
"kubernetes.node_status_timeout": 1800,
"kubernetes_provisioner.drain_timeout": 3600,
"kubernetes_provisioner.drain_grace_period": 1800,
"kubernetes_provisioner.clear_labels_timeout": 1800,
"kubernetes_provisioner.remove_etcd_timeout": 1800,
"kubernetes_provisioner.etcd_ready_timeout": 600,
"armada.manifest": "full-site"
}
@apply_defaults
def __init__(self,
main_dag_name=None,
shipyard_conf=None,
*args, **kwargs):
"""Deployment Configuration Operator
Generate a DeploymentConfigurationOperator to read the deployment's
configuration for use by other operators
:param main_dag_name: Parent Dag
:param shipyard_conf: Location of shipyard.conf
"""
super(DeploymentConfigurationOperator, self).__init__(*args, **kwargs)
self.main_dag_name = main_dag_name
self.shipyard_conf = shipyard_conf
def execute(self, context):
"""Perform Deployment Configuration extraction"""
revision_id = self.get_revision_id(context.get('task_instance'))
doc = self.get_doc(revision_id)
converted = self.map_config_keys(doc)
# return the mapped configuration so that it can be placed on xcom
return converted
def get_revision_id(self, task_instance):
"""Get the revision id from xcom"""
if task_instance:
LOG.debug("task_instance found, extracting design version")
# Set the revision_id to the revision on the xcom
revision_id = task_instance.xcom_pull(
task_ids='deckhand_get_design_version',
dag_id=self.main_dag_name + '.deckhand_get_design_version')
if revision_id:
LOG.info("Revision is set to: %s for deployment configuration",
revision_id)
return revision_id
# either revision id was not on xcom, or the task_instance is messed
raise AirflowException(
"Design_revision is not set. Cannot proceed with retrieval of"
" the design configuration"
)