From fbb9790d49ebd485798316c1556b405129d97def Mon Sep 17 00:00:00 2001 From: Ian Wienand Date: Thu, 6 Feb 2020 15:16:23 +1100 Subject: [PATCH] Allow for periodic afs releases from mirror-update This is a migration of the current periodic "vos release" script to mirror-update.opendev.org. The current script is deployed by puppet and run by a cron job on afsdb01.dfw.openstack.org. My initial motivation for this was wanting to better track our release of these various volumes. With tarballs and releases moving to AFS publishing, we are going to want to track the release process more carefully. Initially, I wanted to send timing statistics to graphite so we could build a dashboard and track the release times of all volumes. Because this requires an additional libraries and since we are deprecating puppet, further development there is unappealing and it would better live in ansible. Since I6c96f89c6f113362e6085febca70d58176f678e7 we have the ability to call "vos release" with "-localauth" permissions via ssh on mirror-update; this avoids various timeout issues (see the changelog comment there for more details). So we do not need to run this script directly on the afsdb server. We are alreadying publishing mirror update logs from mirror-update, and it would be good to also publish these release logs so anyone can see if there are problems. All this points to mirror-update.opendev.org being a good future home for this script. The script has been refactored some to - have a no-op mode - send timing stats for each volume release - call "vos release" via the ssh mecahnism we created - use an advisory lock to avoid running over itself It runs from a virtualenv and it's logs are published via the same mechanism as the mirror logs (slightly misnamed now). Note this script is currently a no-op to test the deployment, running and log publishing. A follow-up will disable the old job and make this active. Change-Id: I62ae941e70c7d58e00bc663a50d52e79dfa5a684 --- playbooks/roles/afs-release/README.rst | 9 + .../afs-release/files/release-volumes.py | 177 ++++++++++++++++++ .../roles/afs-release/files/requirements.txt | 2 + playbooks/roles/afs-release/tasks/main.yaml | 44 +++++ .../mirror-update/files/publish-mirror-logs | 1 + playbooks/roles/mirror-update/tasks/main.yaml | 4 + testinfra/test_mirror-update.py | 10 + 7 files changed, 247 insertions(+) create mode 100644 playbooks/roles/afs-release/README.rst create mode 100644 playbooks/roles/afs-release/files/release-volumes.py create mode 100644 playbooks/roles/afs-release/files/requirements.txt create mode 100644 playbooks/roles/afs-release/tasks/main.yaml diff --git a/playbooks/roles/afs-release/README.rst b/playbooks/roles/afs-release/README.rst new file mode 100644 index 0000000000..ce1707cbdf --- /dev/null +++ b/playbooks/roles/afs-release/README.rst @@ -0,0 +1,9 @@ +afs-release + +Install the script and related bits and pieces for periodic release of +various AFS volumes. This role is really only intended to be run on +the `mirror-update` host, as it uses the ssh-key installed by that +host to run `vos release` under `-localauth` on the remote AFS +servers. + +**Role Variables** diff --git a/playbooks/roles/afs-release/files/release-volumes.py b/playbooks/roles/afs-release/files/release-volumes.py new file mode 100644 index 0000000000..a4db5029e9 --- /dev/null +++ b/playbooks/roles/afs-release/files/release-volumes.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python3 +# Copyright 2020 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# +# This script is intended to run on mirror-update.opendev.org +# periodically called from a cron job. +# + +import argparse +import fcntl +import logging +import os +import re +import sys +import statsd +import subprocess + +from contextlib import contextmanager +from datetime import datetime +from paramiko import SSHClient + + +VOLUMES = ['docs', + 'docs.dev', + 'project.governance', + 'project.opendev', + 'project.security', + 'project.service-types', + 'project.specs', + 'project.starlingx', + 'project.zuul', + ] + +STATSD_PREFIX='afs.release' + +UPDATE_RE = re.compile("^\s+Last Update (.*)$") + +log = logging.getLogger("release") + + +def get_last_update(volume): + ret = [] + out = subprocess.check_output(['vos', 'examine', volume], + stderr=subprocess.STDOUT).decode('utf-8') + state = 0 + for line in out.split('\n'): + if state == 0 and line.startswith(volume): + state = 1 + site = None + elif state == 1: + site = line.strip() + state = 0 + m = UPDATE_RE.match(line) + if m: + ret.append(dict(site=site, + volume=volume, + updated=datetime.strptime(m.group(1), + '%a %b %d %H:%M:%S %Y'))) + return ret + + +def release(volume, host, key, stats): + log.info("Releasing %s" % volume) + + vn = volume.replace('.','_') + + with stats.timer('%s.%s' % (STATSD_PREFIX, vn)): + client = SSHClient() + client.load_host_keys(key) + client.connect(host) + stdin, stdout, stderr = client.exec_command('vos release %s' % volume) + for line in stdout.readlines(): + log.debug(line) + client.close() + logging.info("Release of %s successful" % volume) + + +def check_release(volume): + '''Check if a volume needs release. + + Return TRUE if it needs to be released, FALSE if not + ''' + log.info("Checking %s" % volume) + rw = get_last_update(volume)[0] + log.debug(" %s %s %s" % (rw['site'], rw['updated'], rw['volume'])) + ros = get_last_update(volume + '.readonly') + update = False + for ro in ros: + log.debug(" %s %s %s" % (ro['site'], ro['updated'], ro['volume'])) + if ro['updated'] < rw['updated']: + update = True + if update: + return True + else: + log.info("... no release required") + + +@contextmanager +def get_lock(path): + with open(path, 'w') as f: + try: + fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB) + except IOError: + print("Unable to get lockfile!") + sys.exit(1) + f.write("%s\n" % os.getpid()) + f.flush() + log.debug("Acquired release lock") + yield + logging.debug("Release lock") + fcntl.flock(f, fcntl.LOCK_UN) + + +def main(): + parser = argparse.ArgumentParser( + description='Periodically release various OpenDev AFS volumes') + parser.add_argument('-d', '--debug', action='store_true') + parser.add_argument('--lockfile', action='store', + default='/var/run/release-volumes.lock', + help='volume release lockfile') + parser.add_argument('--force-release', action='store_true', + help="Force vos release, even if not required") + parser.add_argument('--skip-release', action='store_true', + help="Skip vos release, even if required") + parser.add_argument('--ssh-identity', action='store', + default='/root/.ssh/id_vos_release', + help="SSH identify file for remote vos release") + parser.add_argument('--ssh-server', action='store', + default='afs01.dfw.openstack.org', + help="Remote host to run vos release") + parser.add_argument('--statsd-host', action='store', + default='graphite.opendev.org', + help='Remote host to send stats to') + parser.add_argument('--statsd-port', action='store', + default=8125, + help='Remote port to send stats to') + + args = parser.parse_args() + + level = logging.DEBUG if args.debug else logging.INFO + logging.basicConfig(level=level, + format='%(asctime)s %(name)s ' + '%(levelname)-8s %(message)s') + + log.debug("--- Starting %s ---" % datetime.now()) + log.debug("Sending stats to %s:%s" % (args.statsd_host, args.statsd_port)) + if args.force_release: + log.info("Forcing release of all volumes") + + stats = statsd.StatsClient(host=args.statsd_host, + port=args.statsd_port) + + with get_lock(args.lockfile): + for volume in VOLUMES: + if check_release(volume) or args.force_release: + if args.skip_release: + log.info("Force skipping release") + else: + release(volume, args.ssh_server, args.ssh_identity, + stats) + + log.debug("--- Complete %s ---" % datetime.now()) + +if __name__ == '__main__': + main() diff --git a/playbooks/roles/afs-release/files/requirements.txt b/playbooks/roles/afs-release/files/requirements.txt new file mode 100644 index 0000000000..a3a62b8dbd --- /dev/null +++ b/playbooks/roles/afs-release/files/requirements.txt @@ -0,0 +1,2 @@ +paramiko # LGPL 2.1 +statsd>=3.2.1 # MIT diff --git a/playbooks/roles/afs-release/tasks/main.yaml b/playbooks/roles/afs-release/tasks/main.yaml new file mode 100644 index 0000000000..ed593367b2 --- /dev/null +++ b/playbooks/roles/afs-release/tasks/main.yaml @@ -0,0 +1,44 @@ +- name: Ensure release script directory + file: + path: '/opt/afs-release' + state: directory + owner: root + group: root + mode: 0755 + +- name: Ensure log directory + file: + path: '/var/log/afs-release/' + state: directory + owner: root + group: root + mode: 0644 + +- name: Copy script + copy: + src: release-volumes.py + dest: '/opt/afs-release' + mode: '0755' + +- name: Install script dependency packages + package: + name: + - python3-statsd + - python3-paramiko + state: present + +- name: Install release cron job + cron: + name: 'Release afs volumes' + state: present + # NOTE(ianw) : for initial testing purposes while we migrate, + # don't actually release + job: '/opt/afs-release/release-volumes.py -d --skip-release 2&>1 >> /var/log/afs-release/afs-release.log' + hour: '*' + minute: '5' + +- name: Install logrotate rules + include_role: + name: logrotate + vars: + logrotate_file_name: '/var/log/afs-release/afs-release.log' \ No newline at end of file diff --git a/playbooks/roles/mirror-update/files/publish-mirror-logs b/playbooks/roles/mirror-update/files/publish-mirror-logs index 9eaae829db..adbfe5c4f2 100755 --- a/playbooks/roles/mirror-update/files/publish-mirror-logs +++ b/playbooks/roles/mirror-update/files/publish-mirror-logs @@ -19,6 +19,7 @@ K5START="k5start -t -f /etc/logs.keytab service/logs-mirror --" RSYNC="rsync -avz --no-perms --no-owner --no-group" $K5START $RSYNC /var/log/rsync-mirrors $DEST +$K5START $RSYNC /var/log/afs-release $DEST # NOTE(ianw) : r/w volume as this is infrequently accessed; thus no # replications and no "vos release" etc required. diff --git a/playbooks/roles/mirror-update/tasks/main.yaml b/playbooks/roles/mirror-update/tasks/main.yaml index db7eaefac0..f95df72b42 100644 --- a/playbooks/roles/mirror-update/tasks/main.yaml +++ b/playbooks/roles/mirror-update/tasks/main.yaml @@ -33,4 +33,8 @@ - name: Setup rsync mirror scripts include_tasks: rsync.yaml +- name: Setup periodic AFS release script + include_role: + name: afs-release + # TODO: reprepro and other mirror components diff --git a/testinfra/test_mirror-update.py b/testinfra/test_mirror-update.py index e7178ad8b6..4fd5740bbe 100644 --- a/testinfra/test_mirror-update.py +++ b/testinfra/test_mirror-update.py @@ -53,3 +53,13 @@ def test_keytabs(host): assert f.sha256sum == KEYTAB_SHA256 assert f.mode == 0o400 +def test_afs_release_script(host): + f = host.file('/opt/afs-release/release-volumes.py') + assert f.exists + +def test_afs_release_script_run(host): + # This will just run the command in a no-op mode to make sure deps + # are installed, etc. + cmd = host.run('/opt/afs-release/release-volumes.py ' + '--debug --skip-release') + assert cmd.succeeded