Update DryDock Operator & Shipyard Chart

This Patch Set is meant to expose the 'query_interval' and
'task_timeout' parameters for Drydock tasks in Shipyard.
This will allow us to specify the values for a particular
site.

The corresponding changes for the Helm Chart are included in
this Patch Set as well.

It is also noted that the task has been updated to 'prepare_nodes'
and 'deploy_nodes' instead.

Task State can either be 'completed' or 'terminated'. These new
changes have been captured in this Patch Set as well.

Change-Id: I1b446f7bcf493bc8e5bbfdba842158797f0e3594
This commit is contained in:
Anthony Lin 2017-10-23 18:01:37 +00:00
parent 1a07818711
commit dfa7cedb19
6 changed files with 106 additions and 20 deletions

View File

@ -51,7 +51,15 @@
{{ if not .armada.service_type }}#{{ end }}service_type = {{ .armada.service_type | default "armada" }}
[drydock]
{{ if not .drydock.service_type }}#{{ end }}service_type = {{ .drydock.service_type | default "drydock" }}
{{ if not .drydock.service_type }}#{{ end }}service_type = {{ .drydock.service_type | default "physicalprovisioner" }}
{{ if not .drydock.verify_site_query_interval }}#{{ end }}verify_site_query_interval = {{ .drydock.verify_site_query_interval | default "10" }}
{{ if not .drydock.verify_site_task_timeout }}#{{ end }}verify_site_task_timeout = {{ .drydock.verify_site_task_timeout | default "60" }}
{{ if not .drydock.prepare_site_query_interval }}#{{ end }}prepare_site_query_interval = {{ .drydock.prepare_site_query_interval | default "10" }}
{{ if not .drydock.prepare_site_task_timeout }}#{{ end }}prepare_site_task_timeout = {{ .drydock.prepare_site_task_timeout | default "120" }}
{{ if not .drydock.prepare_node_query_interval }}#{{ end }}prepare_node_query_interval = {{ .drydock.prepare_node_query_interval | default "30" }}
{{ if not .drydock.prepare_node_task_timeout }}#{{ end }}prepare_node_task_timeout = {{ .drydock.prepare_node_task_timeout | default "1800" }}
{{ if not .drydock.deploy_node_query_interval }}#{{ end }}deploy_node_query_interval = {{ .drydock.deploy_node_query_interval | default "30" }}
{{ if not .drydock.deploy_node_task_timeout }}#{{ end }}deploy_node_task_timeout = {{ .drydock.deploy_node_task_timeout | default "3600" }}
[healthcheck]
{{ if not .healthcheck.schema }}#{{ end }}schema = {{ .healthcheck.schema | default "<None>" }}

View File

@ -297,7 +297,15 @@ conf:
armada:
service_type: armada
drydock:
service_type: drydock
service_type: physicalprovisioner
verify_site_query_interval: 10
verify_site_task_timeout: 60
prepare_site_query_interval: 10
prepare_site_task_timeout: 120
prepare_node_query_interval: 30
prepare_node_task_timeout: 1800
deploy_node_query_interval: 30
deploy_node_task_timeout: 3600
healthcheck:
schema: http
endpoint: /api/v1.0/health

View File

@ -130,6 +130,46 @@ SECTIONS = [
'the service lookup in the Keystone service catalog. '
)
),
cfg.IntOpt(
'verify_site_query_interval',
default=10,
help='Query interval (in seconds) for verify_site task'
),
cfg.IntOpt(
'verify_site_task_timeout',
default=60,
help='Time out (in seconds) for verify_site task'
),
cfg.IntOpt(
'prepare_site_query_interval',
default=10,
help='Query interval (in seconds) for prepare_site task'
),
cfg.IntOpt(
'prepare_site_task_timeout',
default=120,
help='Time out (in seconds) for prepare_site task'
),
cfg.IntOpt(
'prepare_node_query_interval',
default=30,
help='Query interval (in seconds) for prepare_node task'
),
cfg.IntOpt(
'prepare_node_task_timeout',
default=1800,
help='Time out (in seconds) for prepare_node task'
),
cfg.IntOpt(
'deploy_node_query_interval',
default=30,
help='Query interval (in seconds) for deploy_node task'
),
cfg.IntOpt(
'deploy_node_task_timeout',
default=3600,
help='Time out (in seconds) for deploy_node task'
),
]
),
ConfigSection(

View File

@ -24,8 +24,8 @@ config_path = '/usr/local/airflow/plugins/shipyard.conf'
CREATE_DRYDOCK_CLIENT_DAG_NAME = 'create_drydock_client'
DRYDOCK_VERIFY_SITE_DAG_NAME = 'verify_site'
DRYDOCK_PREPARE_SITE_DAG_NAME = 'prepare_site'
DRYDOCK_PREPARE_NODE_DAG_NAME = 'prepare_node'
DRYDOCK_DEPLOY_NODE_DAG_NAME = 'deploy_node'
DRYDOCK_PREPARE_NODE_DAG_NAME = 'prepare_nodes'
DRYDOCK_DEPLOY_NODE_DAG_NAME = 'deploy_nodes'
def get_drydock_subdag_step(parent_dag_name, child_dag_name, args):
@ -84,14 +84,14 @@ def deploy_site_drydock(parent_dag_name, child_dag_name, args):
task_id=DRYDOCK_PREPARE_SITE_DAG_NAME,
dag=dag)
drydock_prepare_node = SubDagOperator(
drydock_prepare_nodes = SubDagOperator(
subdag=get_drydock_subdag_step(dag.dag_id,
DRYDOCK_PREPARE_NODE_DAG_NAME,
args),
task_id=DRYDOCK_PREPARE_NODE_DAG_NAME,
dag=dag)
drydock_deploy_node = SubDagOperator(
drydock_deploy_nodes = SubDagOperator(
subdag=get_drydock_subdag_step(dag.dag_id,
DRYDOCK_DEPLOY_NODE_DAG_NAME,
args),
@ -101,7 +101,7 @@ def deploy_site_drydock(parent_dag_name, child_dag_name, args):
# DAG Wiring
drydock_verify_site.set_upstream(drydock_client)
drydock_prepare_site.set_upstream(drydock_verify_site)
drydock_prepare_node.set_upstream(drydock_prepare_site)
drydock_deploy_node.set_upstream(drydock_prepare_node)
drydock_prepare_nodes.set_upstream(drydock_prepare_site)
drydock_deploy_nodes.set_upstream(drydock_prepare_nodes)
return dag

View File

@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import configparser
import logging
import os
import time
@ -135,32 +136,57 @@ class DryDockOperator(BaseOperator):
else:
raise AirflowException("Unable to Retrieve Genesis Node IP!")
# Read shipyard.conf
config = configparser.ConfigParser()
config.read(self.shipyard_conf)
if not config.read(self.shipyard_conf):
raise AirflowException("Unable to read content of shipyard.conf")
# Create Task for verify_site
if self.action == 'verify_site':
# Default settings for 'verify_site' execution is to query
# the task every 10 seconds and to time out after 60 seconds
# TODO: Need to decide if we want to make polling interval and
# time out a variable in the Dags
self.drydock_action(drydock_client, context, self.action, 10, 60)
query_interval = config.get('drydock',
'verify_site_query_interval')
task_timeout = config.get('drydock', 'verify_site_task_timeout')
self.drydock_action(drydock_client, context, self.action,
query_interval, task_timeout)
# Create Task for prepare_site
elif self.action == 'prepare_site':
# Default settings for 'prepare_site' execution is to query
# the task every 10 seconds and to time out after 120 seconds
self.drydock_action(drydock_client, context, self.action, 10, 120)
query_interval = config.get('drydock',
'prepare_site_query_interval')
task_timeout = config.get('drydock', 'prepare_site_task_timeout')
self.drydock_action(drydock_client, context, self.action,
query_interval, task_timeout)
# Create Task for prepare_node
elif self.action == 'prepare_node':
elif self.action == 'prepare_nodes':
# Default settings for 'prepare_node' execution is to query
# the task every 30 seconds and to time out after 1800 seconds
self.drydock_action(drydock_client, context, self.action, 30, 1800)
query_interval = config.get('drydock',
'prepare_node_query_interval')
task_timeout = config.get('drydock', 'prepare_node_task_timeout')
self.drydock_action(drydock_client, context, self.action,
query_interval, task_timeout)
# Create Task for deploy_node
elif self.action == 'deploy_node':
elif self.action == 'deploy_nodes':
# Default settings for 'deploy_node' execution is to query
# the task every 30 seconds and to time out after 3600 seconds
self.drydock_action(drydock_client, context, self.action, 30, 3600)
query_interval = config.get('drydock',
'deploy_node_query_interval')
task_timeout = config.get('drydock', 'deploy_node_task_timeout')
self.drydock_action(drydock_client, context, self.action,
query_interval, task_timeout)
# Do not perform any action
else:
@ -247,7 +273,10 @@ class DryDockOperator(BaseOperator):
def drydock_query_task(self, drydock_client, interval, time_out, task_id):
# Calculate number of times to execute the 'for' loop
end_range = int(time_out / interval)
# Convert 'time_out' and 'interval' from string into integer
# The result from the division will be a floating number which
# We will round off to nearest whole number
end_range = round(int(time_out) / int(interval))
# Query task status
for i in range(0, end_range + 1):
@ -264,11 +293,12 @@ class DryDockOperator(BaseOperator):
if task_status == 'running' and i == end_range:
raise AirflowException("Task Execution Timed Out!")
# Exit 'for' loop if task is in 'complete' state
if task_status == 'complete':
# Exit 'for' loop if the task is in 'complete' or 'terminated'
# state
if task_status in ['complete', 'terminated']:
break
else:
time.sleep(interval)
time.sleep(int(interval))
# Get final task state
# NOTE: There is a known bug in Drydock where the task result