diff --git a/charts/shipyard/values.yaml b/charts/shipyard/values.yaml index 04c87254..20103bc3 100644 --- a/charts/shipyard/values.yaml +++ b/charts/shipyard/values.yaml @@ -328,6 +328,7 @@ conf: prepare_node_task_timeout: 1800 deploy_node_query_interval: 30 deploy_node_task_timeout: 3600 + cluster_join_check_backoff_time: 120 healthcheck: schema: http endpoint: /api/v1.0/health diff --git a/etc/shipyard/shipyard.conf.sample b/etc/shipyard/shipyard.conf.sample index e2e5423a..6a2b3c2d 100644 --- a/etc/shipyard/shipyard.conf.sample +++ b/etc/shipyard/shipyard.conf.sample @@ -82,6 +82,8 @@ # Time out (in seconds) for deploy_node task (integer value) #deploy_node_task_timeout = 3600 +# Backoff time (in seconds) before checking cluster join (integer value) +#cluster_join_check_backoff_time = 120 [healthcheck] diff --git a/shipyard_airflow/conf/config.py b/shipyard_airflow/conf/config.py index 66bcc9cc..a49093c1 100644 --- a/shipyard_airflow/conf/config.py +++ b/shipyard_airflow/conf/config.py @@ -170,6 +170,11 @@ SECTIONS = [ default=3600, help='Time out (in seconds) for deploy_node task' ), + cfg.IntOpt( + 'cluster_join_check_backoff_time', + default=120, + help='Backoff time (in seconds) before checking cluster join' + ), ] ), ConfigSection( diff --git a/shipyard_airflow/plugins/check_k8s_node_status.py b/shipyard_airflow/plugins/check_k8s_node_status.py index b4d2359c..6268c164 100644 --- a/shipyard_airflow/plugins/check_k8s_node_status.py +++ b/shipyard_airflow/plugins/check_k8s_node_status.py @@ -31,8 +31,14 @@ def check_node_status(time_out, interval): Example:: + import time from check_k8s_node_status import check_node_status + # Wait for a while before checking the cluster-join process as + # it takes time for process to be triggered across all nodes + # We will wait for 120 seconds in this example + time.sleep(120) + # Calls function to check that all nodes are in Ready State # Time out in this case is set to 15 mins, the time interval # has been set to 60 seconds @@ -48,8 +54,9 @@ def check_node_status(time_out, interval): # Logs initial state of all nodes in the cluster ret_init = v1.list_node(watch=False) + logging.info("Current state of nodes in Cluster is") + for i in ret_init.items: - logging.info("Current state of nodes in Cluster is") logging.info("%s\t%s\t%s", i.metadata.name, i.status.conditions[-1].status, i.status.conditions[-1].type) diff --git a/shipyard_airflow/plugins/drydock_operators.py b/shipyard_airflow/plugins/drydock_operators.py index 2b5cd4e6..bc1002df 100644 --- a/shipyard_airflow/plugins/drydock_operators.py +++ b/shipyard_airflow/plugins/drydock_operators.py @@ -165,6 +165,16 @@ class DryDockOperator(BaseOperator): self.drydock_action(drydock_client, context, self.action, query_interval, task_timeout) + # Wait for 120 seconds (default value) before checking the cluster + # join process as it takes time for process to be triggered across + # all nodes + cluster_join_check_backoff_time = config.get( + 'drydock', 'cluster_join_check_backoff_time') + logging.info("All nodes deployed in MAAS") + logging.info("Wait for %d seconds before checking node state...", + int(cluster_join_check_backoff_time)) + time.sleep(cluster_join_check_backoff_time) + # Check that cluster join process is completed before declaring # deploy_node as 'completed'. Set time out to 30 minutes and set # polling interval to 30 seconds. diff --git a/tests/unit/control/test.conf b/tests/unit/control/test.conf index ee3309e6..2001ad0e 100644 --- a/tests/unit/control/test.conf +++ b/tests/unit/control/test.conf @@ -8,6 +8,7 @@ web_server = http://airflow-web-int.ucp.svc.cluster.local:8080/ [deckhand] service_type = deckhand [drydock] +cluster_join_check_backoff_time = 120 deploy_node_query_interval = 30 deploy_node_task_timeout = 3600 prepare_node_query_interval = 30 @@ -36,4 +37,5 @@ project_name = service user_domain_name = default username = shipyard [shipyard] -service_type = shipyard \ No newline at end of file +service_type = shipyard +