From 51733e562391a702b4b31385d3f5977102be54ae Mon Sep 17 00:00:00 2001 From: Ian Wienand Date: Thu, 21 Jan 2021 11:58:52 +1100 Subject: [PATCH] borg-backup: implement saving a stream, use for database backups Add facility to borg-backup role to run a command and save the output of it to a separate archive file during the backup process. This is mostly useful for database backups. Compressed on-disk logs are terrible for differential backups because revisions have essentially no common data. By saving the uncompressed stream directly from mysqldump, we allow borg the chance to de-duplicate, saving considerable space on the backup servers. This is implemented for our ansible-managed servers currently doing dumps. We also add it to the testinfra. This also separates the archive names for the filesystem and stream backup with unique prefixes so they can be pruned separately. Otherwise we end up keeping only one of the stream or filesystem backups which isn't the intention. However, due to issues with --append-only mode we are not issuing prune commands at this time. Note the updated dump commands are updated slightly, particularly with "--skip-extended-insert" which was suggested by mordred and significantly improves incremental diff-ability by being slightly more verbose but keeping much more of the output stable across dumps. Change-Id: I500062c1c52c74a567621df9aaa716de804ffae7 --- .../host_vars/etherpad01.opendev.org.yaml | 2 + .../host_vars/gitea01.opendev.org.yaml | 2 + .../host_vars/review01.openstack.org.yaml | 2 + playbooks/roles/borg-backup/README.rst | 6 +++ .../borg-backup/templates/borg-backup.j2 | 37 +++++++++++-------- playbooks/roles/etherpad/tasks/main.yaml | 16 +++++++- playbooks/roles/gerrit/tasks/main.yaml | 13 +++++++ playbooks/roles/gitea/tasks/main.yaml | 13 +++++++ playbooks/test-borg-backup.yaml | 14 +++++++ zuul.d/system-config-run.yaml | 3 ++ 10 files changed, 91 insertions(+), 17 deletions(-) create mode 100644 playbooks/test-borg-backup.yaml diff --git a/inventory/service/host_vars/etherpad01.opendev.org.yaml b/inventory/service/host_vars/etherpad01.opendev.org.yaml index c7bd142b12..e3e4112a23 100644 --- a/inventory/service/host_vars/etherpad01.opendev.org.yaml +++ b/inventory/service/host_vars/etherpad01.opendev.org.yaml @@ -8,3 +8,5 @@ etherpad_redirect_vhost: etherpad.openstack.org borg_backup_excludes_extra: # live db; we store daily dumps - /var/etherpad/* + # local db backups, we store stream + - /var/backups/etherpad-mariadb diff --git a/inventory/service/host_vars/gitea01.opendev.org.yaml b/inventory/service/host_vars/gitea01.opendev.org.yaml index 9587736b73..5bc0b20d63 100644 --- a/inventory/service/host_vars/gitea01.opendev.org.yaml +++ b/inventory/service/host_vars/gitea01.opendev.org.yaml @@ -7,3 +7,5 @@ borg_backup_excludes_extra: - /var/gitea/data/ # db is backed up in dumps, don't capture live files - /var/gitea/db + # backed up by streaming backup + - /var/backups/gitea-mariadb diff --git a/inventory/service/host_vars/review01.openstack.org.yaml b/inventory/service/host_vars/review01.openstack.org.yaml index 39992072a8..cec2216945 100644 --- a/inventory/service/host_vars/review01.openstack.org.yaml +++ b/inventory/service/host_vars/review01.openstack.org.yaml @@ -76,3 +76,5 @@ borg_backup_excludes_extra: - /home/gerrit2/review_site/cache/* - /home/gerrit2/review_site/tmp/* - /home/gerrit2/review_site/index/* + # dump directly via stream + - /home/gerrit2/mysql_backups/* diff --git a/playbooks/roles/borg-backup/README.rst b/playbooks/roles/borg-backup/README.rst index c97c88161f..b751d9f477 100644 --- a/playbooks/roles/borg-backup/README.rst +++ b/playbooks/roles/borg-backup/README.rst @@ -15,6 +15,12 @@ correctly on the backup server. This role sets a tuple ``borg_user`` with the username and public key; the ``borg-backup-server`` role uses this variable for each host in the ``borg-backup`` group to initalise users. +Hosts can place into ``/etc/borg-streams`` which should be a script +that outputs to stdout data to be fed into a backup archive on each +run. This will be saved to an archive with the name of the file. +This is useful for raw database dumps which allow ``borg`` to +deduplicate as much as possible. + **Role Variables** .. zuul:rolevar:: borg_username diff --git a/playbooks/roles/borg-backup/templates/borg-backup.j2 b/playbooks/roles/borg-backup/templates/borg-backup.j2 index 4ac8225e52..5a05b6e3f9 100644 --- a/playbooks/roles/borg-backup/templates/borg-backup.j2 +++ b/playbooks/roles/borg-backup/templates/borg-backup.j2 @@ -9,6 +9,7 @@ if [ -z "$1" ]; then fi BORG="/opt/borg/bin/borg" +BORG_CREATE="${BORG} create --verbose --filter AME --list --stats --show-rc --compression lz4 --exclude-caches " # Setting this, so the repo does not need to be given on the commandline: export BORG_REPO="ssh://{{ borg_username}}@${1}/opt/backups/{{ borg_username }}/backup" @@ -24,31 +25,35 @@ export BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK=1 # Backup the most important directories into an archive named after # the machine this script is currently running on: -${BORG} create \ - --verbose \ - --filter AME \ - --list \ - --stats \ - --show-rc \ - --compression lz4 \ - --exclude-caches \ +${BORG_CREATE} \ {% for item in borg_backup_excludes + borg_backup_excludes_extra -%} --exclude '{{ item }}' \ {% endfor -%} - \ - ::'{hostname}-{now}' \ + ::'{hostname}-filesystem-{now}' \ {% for item in borg_backup_dirs + borg_backup_dirs_extra -%} {{ item }} {{ '\\' if not loop.last }} {% endfor -%} backup_exit=$? -if [ ${backup_exit} -eq 0 ]; then - info "Running prune" - ${BORG} prune --verbose --list --prefix '{hostname}-' \ - --show-rc --keep-daily 7 --keep-weekly 4 --keep-monthly 12 - backup_exit=$? -fi +for f in $(shopt -s nullglob; echo /etc/borg-streams/*) +do + stream_name=$(basename $f) + info "Backing up stream archive $stream_name" + bash $f | ${BORG_CREATE} --stdin-name ${stream_name} \ + ::"{hostname}-${stream_name}-{now}" - + + if [[ ${PIPESTATUS[0]} -ne 0 ]]; then + info "Streaming script ${f} failed!" + stream_exit=${PIPESTATUS[0]} + elif [[ ${PIPESTATUS[1]} -ne 1 ]]; then + info "Borg failed!" + stream_exit=${PIPESTATUS[1]} + else + stream_exit=0 + fi + (( backup_exit = backup_exit || stream_exit )) +done if [ ${backup_exit} -eq 0 ]; then info "Backup finished successfully" diff --git a/playbooks/roles/etherpad/tasks/main.yaml b/playbooks/roles/etherpad/tasks/main.yaml index 4d5967aee3..7b118328f9 100644 --- a/playbooks/roles/etherpad/tasks/main.yaml +++ b/playbooks/roles/etherpad/tasks/main.yaml @@ -123,7 +123,7 @@ owner: root group: root -- name: Set up cron job to backup the database +- name: Set up cron job for local database backup cron: name: etherpad-db-backup state: present @@ -142,3 +142,17 @@ logrotate_rotate: 2 logrotate_file_name: /var/backups/etherpad-mariadb/etherpad-mariadb.sql.gz logrotate_compress: false + +- name: Setup db backup streaming job + block: + - name: Create backup streaming config dir + file: + path: /etc/borg-streams + state: directory + + - name: Create db streaming file + copy: + content: >- + /usr/local/bin/docker-compose -f /etc/etherpad-docker/docker-compose.yaml exec -T mariadb + bash -c '/usr/bin/mysqldump --skip-extended-insert --databases etherpad-lite --single-transaction -uroot -p"$MYSQL_ROOT_PASSWORD"' + dest: /etc/borg-streams/mysql diff --git a/playbooks/roles/gerrit/tasks/main.yaml b/playbooks/roles/gerrit/tasks/main.yaml index 1c4f244a60..edad89e89a 100644 --- a/playbooks/roles/gerrit/tasks/main.yaml +++ b/playbooks/roles/gerrit/tasks/main.yaml @@ -338,3 +338,16 @@ job: 'find /home/gerrit2/review_site/logs/*.gz -mtime +30 -exec rm -f {} \;' minute: 1 hour: 6 + +- name: Setup db backup streaming job + block: + - name: Create backup streaming config dir + file: + path: /etc/borg-streams + state: directory + + - name: Create db streaming file + copy: + content: >- + /usr/bin/mysqldump --defaults-file=/root/.gerrit_db.cnf --skip-extended-insert --ignore-table mysql.event --all-databases --single-transaction + dest: /etc/borg-streams/mysql diff --git a/playbooks/roles/gitea/tasks/main.yaml b/playbooks/roles/gitea/tasks/main.yaml index 60164f0be3..6221e3d242 100644 --- a/playbooks/roles/gitea/tasks/main.yaml +++ b/playbooks/roles/gitea/tasks/main.yaml @@ -186,3 +186,16 @@ vars: logrotate_file_name: /var/backups/gitea-mariadb/gitea-mariadb.sql.gz logrotate_compress: false +- name: Setup db backup streaming job + block: + - name: Create backup streaming config dir + file: + path: /etc/borg-streams + state: directory + + - name: Create db streaming file + copy: + content: >- + /usr/local/bin/docker-compose -f /etc/gitea-docker/docker-compose.yaml exec -T mariadb + bash -c '/usr/bin/mysqldump --skip-extended-insert --databases gitea --single-transaction -uroot -p"$MYSQL_ROOT_PASSWORD"'T_PASSWORD"' + dest: /etc/borg-streams/mysql diff --git a/playbooks/test-borg-backup.yaml b/playbooks/test-borg-backup.yaml new file mode 100644 index 0000000000..3b449a3e07 --- /dev/null +++ b/playbooks/test-borg-backup.yaml @@ -0,0 +1,14 @@ +- hosts: "borg-backup" + tasks: + - name: Setup db backup streaming job + block: + - name: Create backup streaming config dir + file: + path: /etc/borg-streams + state: directory + + - name: Create sample streaming file + copy: + content: >- + dd if=/dev/urandom bs=1M count=5 + dest: /etc/borg-streams/random diff --git a/zuul.d/system-config-run.yaml b/zuul.d/system-config-run.yaml index 45413988b0..f89c7b696d 100644 --- a/zuul.d/system-config-run.yaml +++ b/zuul.d/system-config-run.yaml @@ -347,7 +347,10 @@ vars: run_playbooks: - playbooks/service-borg-backup.yaml + run_test_playbook: playbooks/test-borg-backup.yaml files: + - playbooks/service-borg-backup.yaml + - playbooks/test-borg-bcakup.yaml - playbooks/install-ansible.yaml - playbooks/roles/install-borg - playbooks/roles/borg-backup