Add Retry Logic for Armada Apply

We have seen intermittent network connectivity issues during
integration testing where Armada Apply errors out due to git
clone failures.

This patch set allows up to 10 retries for Armada Apply. There
will be a 30 seconds back off for each attempt.

It also collapses the additional layer and do do away with
the extra sub-dag layer for Armada Build. Note that this extra
layer is not really needed from a functionality perspective.

Note also that the default retry interval will be changed from
60 seconds to 30 seconds for the 'deploy_site' dag.

Change-Id: Icb1fd2d1a8e319330c93ac73a4f05b7e49987efd
This commit is contained in:
Anthony Lin 2017-12-13 04:37:09 +00:00
parent 1ab35fbf68
commit c224a9e045
3 changed files with 34 additions and 68 deletions

View File

@ -14,91 +14,56 @@
from airflow.models import DAG
from airflow.operators import ArmadaOperator
from airflow.operators.subdag_operator import SubDagOperator
# Location of shiyard.conf
config_path = '/usr/local/airflow/plugins/shipyard.conf'
# Names used for sub-subdags in the armada site deployment subdag
CREATE_ARMADA_CLIENT_DAG_NAME = 'create_armada_client'
GET_ARMADA_STATUS_DAG_NAME = 'armada_status'
ARMADA_VALIDATE_DAG_NAME = 'armada_validate'
ARMADA_APPLY_DAG_NAME = 'armada_apply'
ARMADA_GET_RELEASES_DAG_NAME = 'armada_get_releases'
def get_armada_subdag_step(parent_dag_name, child_dag_name, args):
'''
Execute Armada Subdag
'''
dag = DAG(
'{}.{}'.format(parent_dag_name, child_dag_name),
default_args=args)
# Note that in the event where the 'deploy_site' Action is
# triggered from Shipyard, the 'parent_dag_name' variable
# gets assigned with 'deploy_site.create_armada_client'.
# This is the name that we want to assign to the subdag so
# that we can reference it for xcom. The name of the main
# dag will be the front part of that value, i.e. 'deploy_site'.
# Hence we will extract the front part and assign it to main_dag.
# We will reuse this pattern for other Actions, e.g. update_site,
# redeploy_site as well.
operator = ArmadaOperator(
task_id=child_dag_name,
shipyard_conf=config_path,
action=child_dag_name,
main_dag_name=parent_dag_name[0:parent_dag_name.find('.')],
sub_dag_name=parent_dag_name,
dag=dag)
return dag
def deploy_site_armada(parent_dag_name, child_dag_name, args):
'''
Puts into atomic unit
Armada Subdag
'''
dag = DAG(
'{}.{}'.format(parent_dag_name, child_dag_name),
default_args=args)
armada_client = SubDagOperator(
subdag=get_armada_subdag_step(dag.dag_id,
CREATE_ARMADA_CLIENT_DAG_NAME,
args),
task_id=CREATE_ARMADA_CLIENT_DAG_NAME,
# Create Armada Client
armada_client = ArmadaOperator(
task_id='create_armada_client',
shipyard_conf=config_path,
action='create_armada_client',
dag=dag)
armada_status = SubDagOperator(
subdag=get_armada_subdag_step(dag.dag_id,
GET_ARMADA_STATUS_DAG_NAME,
args),
task_id=GET_ARMADA_STATUS_DAG_NAME,
# Get Tiller Status
armada_status = ArmadaOperator(
task_id='armada_status',
shipyard_conf=config_path,
action='armada_status',
dag=dag)
armada_validate = SubDagOperator(
subdag=get_armada_subdag_step(dag.dag_id,
ARMADA_VALIDATE_DAG_NAME,
args),
task_id=ARMADA_VALIDATE_DAG_NAME,
# Validate Armada YAMLs
armada_validate = ArmadaOperator(
task_id='armada_validate',
shipyard_conf=config_path,
action='armada_validate',
dag=dag)
armada_apply = SubDagOperator(
subdag=get_armada_subdag_step(dag.dag_id,
ARMADA_APPLY_DAG_NAME,
args),
task_id=ARMADA_APPLY_DAG_NAME,
# Armada Apply
armada_apply = ArmadaOperator(
task_id='armada_apply',
shipyard_conf=config_path,
action='armada_apply',
retries=10,
dag=dag)
armada_get_releases = SubDagOperator(
subdag=get_armada_subdag_step(dag.dag_id,
ARMADA_GET_RELEASES_DAG_NAME,
args),
task_id=ARMADA_GET_RELEASES_DAG_NAME,
# Get Helm Releases
armada_get_releases = ArmadaOperator(
task_id='armada_get_releases',
shipyard_conf=config_path,
action='armada_get_releases',
dag=dag)
# DAG Wiring
# Define dependencies
armada_status.set_upstream(armada_client)
armada_validate.set_upstream(armada_status)
armada_apply.set_upstream(armada_validate)

View File

@ -52,7 +52,7 @@ default_args = {
'email_on_retry': False,
'provide_context': True,
'retries': 0,
'retry_delay': timedelta(minutes=1),
'retry_delay': timedelta(seconds=30),
}
dag = DAG(PARENT_DAG_NAME, default_args=default_args, schedule_interval=None)

View File

@ -221,10 +221,11 @@ class ArmadaOperator(BaseOperator):
context['query'])
# We will expect Armada to return the releases that it is
# deploying. An empty value for 'install' means that armada
# delploy has failed. Note that if we try and deploy the same
# release twice, we will end up with empty response on our
# second attempt and that will be treated as a failure scenario.
# deploying. An empty value for 'install' means that armada
# delploy has failed. Note that if we try and deploy the
# same release twice, we will end up with empty response on
# our second attempt and that will be treated as a failure
# scenario.
if armada_post_apply['message']['install']:
logging.info("Armada Apply Successfully Executed")
logging.info(armada_post_apply)