From 44f7022df6438de541501c2fdd5c46df198b82bf Mon Sep 17 00:00:00 2001 From: Bryan Strassner Date: Wed, 3 Oct 2018 19:40:21 -0500 Subject: [PATCH] [FIX] Tune Airflow scheduler to be less aggressive Tunes the interval between checks for tasks to run to every 5 seconds instead of a continuous loop. In practice this looks like it should be somewhere between 6 - 10 times less active against the database, and likewise spawn the same factor less processes. Additionally tunes and adds notes about some of the other tuning parameters. Change-Id: I5e28b4c081c53a553072470ae53302915a90dd1a --- charts/shipyard/values.yaml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/charts/shipyard/values.yaml b/charts/shipyard/values.yaml index b97b00a5..059a6ec9 100644 --- a/charts/shipyard/values.yaml +++ b/charts/shipyard/values.yaml @@ -575,10 +575,14 @@ conf: job_heartbeat_sec: 5 scheduler_heartbeat_sec: 5 run_duration: -1 - min_file_process_interval: 0 + # Check for pending tasks no more than every 5 seconds + min_file_process_interval: 5 + # This is part of 1.10, but disabled in 1.10.1 (pending) See: + # https://github.com/apache/incubator-airflow/blob/master/UPDATING.md#min_file_parsing_loop_time-config-option-temporarily-disabled min_file_parsing_loop_time: 1 dag_dir_list_interval: 300 - print_stats_interval: 120 + # Stats for the scheduler are minimally useful - every 5 mins is enough + print_stats_interval: 300 child_process_log_directory: /usr/local/airflow/logs/scheduler scheduler_zombie_task_threshold: 300 catchup_by_default: "True" @@ -587,7 +591,8 @@ conf: statsd_host: "localhost" statsd_port: 8125 statsd_prefix: "airflow" - max_threads: 2 + # Shipyard's use of Airflow is low volume. 1 Thread is probably enough. + max_threads: 1 authenticate: "False" ldap: # Shipyard is not using this