diff --git a/charts/shipyard/templates/statefulset-airflow-worker.yaml b/charts/shipyard/templates/statefulset-airflow-worker.yaml index 64a3997e..12c738d7 100644 --- a/charts/shipyard/templates/statefulset-airflow-worker.yaml +++ b/charts/shipyard/templates/statefulset-airflow-worker.yaml @@ -149,6 +149,8 @@ spec: imagePullPolicy: {{ .Values.images.pull_policy }} {{ tuple $envAll $envAll.Values.pod.resources.airflow.logrotate | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} env: + - name: PERCENT_MAX_LOG_FS_USAGE + value: {{ .Values.logrotate.percent_max_log_fs_usage | quote }} - name: DAYS_BEFORE_LOG_DELETION value: {{ .Values.logrotate.days_before_deletion | quote }} - name: LOGROTATE_PATH diff --git a/charts/shipyard/values.yaml b/charts/shipyard/values.yaml index 38f4c564..06601ceb 100644 --- a/charts/shipyard/values.yaml +++ b/charts/shipyard/values.yaml @@ -146,6 +146,7 @@ volume_worker: logrotate: days_before_deletion: 30 + percent_max_log_fs_usage: 80 # typically overriden by environmental # values, but should include all endpoints diff --git a/images/airflow/script/airflow_logrotate.sh b/images/airflow/script/airflow_logrotate.sh index fee7f1f0..40924ee2 100755 --- a/images/airflow/script/airflow_logrotate.sh +++ b/images/airflow/script/airflow_logrotate.sh @@ -16,6 +16,10 @@ set -ex +get_usage() { + df /usr/local/airflow/logs/ --output='pcent' | grep -o '[0-9]*' +} + while true; do # Delete logs that are more than 30 days old in the directories @@ -23,6 +27,14 @@ while true; do # Delete empty directories under the Airflow log path find ${LOGROTATE_PATH} \( -type f -name '*.log' -mtime +${DAYS_BEFORE_LOG_DELETION} -o -type d -empty \) -print -delete + # Delete oldest logs and empty directories when + # the Airflow log path filesystem reaches max usage + CURR_USAGE=$(get_usage) + while [ $CURR_USAGE -gt ${PERCENT_MAX_LOG_FS_USAGE} ]; do + find ${LOGROTATE_PATH} \( -type f -name '*.log' -o -type d -empty \) -printf '%T+ %p\n' | sort | head -n 1 | xargs -r -l1 sh -c 'rm -rf $1' + CURR_USAGE=$(get_usage) + done + # Sleep for 1 hr between each wait loop sleep 3600