Browse Source

Update Airflow logrotate logic

The current logrotate logic deletes logs that are
more than X days old in the Airflow log path, however
the Airflow log archive may still reach 100%
usage and cause the airflow-worker to crashloop.

This PS adds logic to logrotate.sh to delete the oldest
logs and empty dirs when the Airflow log archive
reaches the max usage specified in values.yaml.

Change-Id: I3dcb80901d7dd36da6812850a1f54e7ebf3b1cf2
changes/33/656033/11
anthony.bellino 2 years ago
parent
commit
5f92be2f07
3 changed files with 15 additions and 0 deletions
  1. +2
    -0
      charts/shipyard/templates/statefulset-airflow-worker.yaml
  2. +1
    -0
      charts/shipyard/values.yaml
  3. +12
    -0
      images/airflow/script/airflow_logrotate.sh

+ 2
- 0
charts/shipyard/templates/statefulset-airflow-worker.yaml View File

@ -149,6 +149,8 @@ spec:
imagePullPolicy: {{ .Values.images.pull_policy }}
{{ tuple $envAll $envAll.Values.pod.resources.airflow.logrotate | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
env:
- name: PERCENT_MAX_LOG_FS_USAGE
value: {{ .Values.logrotate.percent_max_log_fs_usage | quote }}
- name: DAYS_BEFORE_LOG_DELETION
value: {{ .Values.logrotate.days_before_deletion | quote }}
- name: LOGROTATE_PATH


+ 1
- 0
charts/shipyard/values.yaml View File

@ -146,6 +146,7 @@ volume_worker:
logrotate:
days_before_deletion: 30
percent_max_log_fs_usage: 80
# typically overriden by environmental
# values, but should include all endpoints


+ 12
- 0
images/airflow/script/airflow_logrotate.sh View File

@ -16,6 +16,10 @@
set -ex
get_usage() {
df /usr/local/airflow/logs/ --output='pcent' | grep -o '[0-9]*'
}
while true; do
# Delete logs that are more than 30 days old in the directories
@ -23,6 +27,14 @@ while true; do
# Delete empty directories under the Airflow log path
find ${LOGROTATE_PATH} \( -type f -name '*.log' -mtime +${DAYS_BEFORE_LOG_DELETION} -o -type d -empty \) -print -delete
# Delete oldest logs and empty directories when
# the Airflow log path filesystem reaches max usage
CURR_USAGE=$(get_usage)
while [ $CURR_USAGE -gt ${PERCENT_MAX_LOG_FS_USAGE} ]; do
find ${LOGROTATE_PATH} \( -type f -name '*.log' -o -type d -empty \) -printf '%T+ %p\n' | sort | head -n 1 | xargs -r -l1 sh -c 'rm -rf $1'
CURR_USAGE=$(get_usage)
done
# Sleep for 1 hr between each wait loop
sleep 3600


Loading…
Cancel
Save