From 85297878f56783e24ade39771ce1b6df4d18da08 Mon Sep 17 00:00:00 2001 From: Ian Wienand Date: Fri, 16 Nov 2018 10:28:11 +1100 Subject: [PATCH] Retire bandersnatch mirroring Bandersnatch mirroring has been disabled since I88a838cb28fee3bd16b2b0a26e614ac5c2f23241 which is currently almost 6 months ago. Since then we have been running a reverse caching proxy. Although bandersnatch served us well, it seems pypi has become impractical to mirror locally. This is partially due to 2TB volume limitations of OpenAFS and partially due to us not having a sane way to filter large, frequently updating packages. With the reverse proxy working there are no plans to restore our local mirror. Retire the references to it before we clean up the AFS volumes. Change-Id: Ia23828328dd859bbf26f95735c1c2e99c573d10e --- doc/source/bandersnatch.rst | 57 --------- doc/source/systems.rst | 1 - manifests/site.pp | 1 - modules.env | 1 - .../files/bandersnatch-mirror-update.sh | 119 ------------------ .../manifests/mirror_update.pp | 64 ---------- 6 files changed, 243 deletions(-) delete mode 100644 doc/source/bandersnatch.rst delete mode 100644 modules/openstack_project/files/bandersnatch-mirror-update.sh diff --git a/doc/source/bandersnatch.rst b/doc/source/bandersnatch.rst deleted file mode 100644 index 32370d8719..0000000000 --- a/doc/source/bandersnatch.rst +++ /dev/null @@ -1,57 +0,0 @@ -:title: Bandersnatch - -.. _bandersnatch: - -Bandersnatch -############ - -A pypi mirror tool - -At a Glance -=========== - -:Hosts: - * http://mirror.bhs1.ovh.openstack.org/pypi - * http://mirror.ca-ymq-1.vexxhost.openstack.org/pypi - * http://mirror.dfw.rax.openstack.org/pypi - * http://mirror.gra1.ovh.openstack.org/pypi - * http://mirror.iad.rax.openstack.org/pypi - * http://mirror.ord.rax.openstack.org/pypi -:Puppet: - * :cgit_file:`modules/openstack_project/manifests/static.pp` -:Projects: - * https://pypi.python.org/pypi/bandersnatch - * https://git.openstack.org/cgit/openstack-infra/puppet-bandersnatch -:Documentation: - * https://pypi.python.org/pypi/bandersnatch#configuration -:Bugs: - * https://storyboard.openstack.org/#!/project/748 - * https://bitbucket.org/pypa/bandersnatch/issues?status=new&status=open - -Overview -======== - -Bandersnatch is a tool we run on the static.openstack.org host to -build a complete mirror of pypi.python.org. Cron execs bandersnatch -on an interval with logs going to ``/var/log/bandersnatch``. - -Stale Packages -============== - -There is an issue with pypi.python.org syncing to its CDN occasionally -resulting in stale package artifacts. You will notice this in the -bandersnatch logs as:: - - 2014-07-11 01:30:04,592 INFO: Syncing package: python-novaclient (serial 1154164) - 2014-07-11 01:30:04,592 DEBUG: Getting /pypi/python-novaclient/json (serial 1154164) - 2014-07-11 01:30:04,599 DEBUG: Expected PyPI serial 1154164 for request https://pypi.python.org/pypi/python-novaclient/json but got 1154163 - 2014-07-11 01:30:04,599 ERROR: Stale serial for package python-novaclient - 2014-07-11 01:30:04,599 ERROR: Stale serial for python-novaclient (1154164) not updating. Giving up. - -The fix for this is to issue a PURGE against the url specified above:: - - curl -X PURGE https://pypi.python.org/pypi/python-novaclient/json - -The next run of bandersnatch will sync the package. Note this PURGE -step should be performed automatically by our bandersnatch wrapper -script, but can be performed by hand safely if necessary. diff --git a/doc/source/systems.rst b/doc/source/systems.rst index 1ae21e6344..8f006a33f8 100644 --- a/doc/source/systems.rst +++ b/doc/source/systems.rst @@ -26,7 +26,6 @@ Major Systems planet puppet static - bandersnatch reprepro lists wiki diff --git a/manifests/site.pp b/manifests/site.pp index fb682a38d0..45f4b56ef0 100644 --- a/manifests/site.pp +++ b/manifests/site.pp @@ -508,7 +508,6 @@ node /^mirror-update\d*\.openstack\.org$/ { $group = "afsadmin" class { 'openstack_project::mirror_update': - bandersnatch_keytab => hiera('bandersnatch_keytab'), admin_keytab => hiera('afsadmin_keytab'), fedora_keytab => hiera('fedora_keytab'), opensuse_keytab => hiera('opensuse_keytab'), diff --git a/modules.env b/modules.env index 26505ba2d5..8d9fe1dda1 100644 --- a/modules.env +++ b/modules.env @@ -83,7 +83,6 @@ INTEGRATION_MODULES["$OPENSTACK_GIT_ROOT/openstack-infra/puppet-accessbot"]="ori INTEGRATION_MODULES["$OPENSTACK_GIT_ROOT/openstack-infra/puppet-ansible"]="origin/master" INTEGRATION_MODULES["$OPENSTACK_GIT_ROOT/openstack-infra/puppet-askbot"]="origin/master" INTEGRATION_MODULES["$OPENSTACK_GIT_ROOT/openstack-infra/puppet-asterisk"]="origin/master" -INTEGRATION_MODULES["$OPENSTACK_GIT_ROOT/openstack-infra/puppet-bandersnatch"]="origin/master" INTEGRATION_MODULES["$OPENSTACK_GIT_ROOT/openstack-infra/puppet-bugdaystats"]="origin/master" INTEGRATION_MODULES["$OPENSTACK_GIT_ROOT/openstack-infra/puppet-bup"]="origin/master" INTEGRATION_MODULES["$OPENSTACK_GIT_ROOT/openstack-infra/puppet-cgit"]="origin/master" diff --git a/modules/openstack_project/files/bandersnatch-mirror-update.sh b/modules/openstack_project/files/bandersnatch-mirror-update.sh deleted file mode 100644 index caf36c6f21..0000000000 --- a/modules/openstack_project/files/bandersnatch-mirror-update.sh +++ /dev/null @@ -1,119 +0,0 @@ -#!/bin/bash - -# Copyright 2016 IBM Corp. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -set -e - -# Set up logging, see: -# http://www.tldp.org/LDP/abs/html/x17974.html -LOG_FILE=$1 -# Open STDOUT as $LOG_FILE file for write appending. -exec 1>>$LOG_FILE -# Redirect STDERR to STDOUT -exec 2>&1 - -START_TIME=$(date --iso-8601=ns) -echo $START_TIME -echo "Obtaining bandersnatch tokens and running bandersnatch." -# Note that the set -e is important above as it will cause us -# to not do a vos release if bandersnatch fails. Below we check -# additional conditions on whether or not to do a vos release. -k5start -t -f /etc/bandersnatch.keytab service/bandersnatch -- timeout -k 2m 4h run-bandersnatch - -# Make sure logs made it to disk -sync - -# This is what it looks like when bandersnatch logs it. -# 2017-06-09 19:40:02,545 INFO: Syncing package: shodan (serial 2939083) -# Need to get package name (shodan) and compare it to upper-constraints in -# openstack/requirements (all branches) - -# We get the list of packages out of our own log. There is a lot happening -# with sed below so lets talk about it. -# First we don't print every line we process (-n) we only print those lines -# that match using the trailing /p -# Next we only match beginning at our start time to the end of the file -# (/from_pattern/,to_pattern) -# Then we oonly look for lines that say Syncing package as these actually -# give us the package name. We extract the package name from here and print -# it. -sed -n -e "/$START_TIME/,\$s/.*Syncing\spackage:\s\(.*\)\s(serial\s[0-9]\+)/\1/p" $LOG_FILE | sort -u > /tmp/bandersnatch_updated_packages - -LAST_VOS_RELEASE=$(vos examine mirror.pypi.readonly -format | grep 'updateDate' | head -1 | sed -e 's/updateDate\s\([0-9]\+\)\s.*/\1/') -NOW=$(date +%s) -DELTA=$((NOW - LAST_VOS_RELEASE)) - -NEED_RELEASE="no" -if [[ "$DELTA" -gt "14400" ]] ; then - NEED_RELEASE="yes" -elif [[ $(wc -l /tmp/bandersnatch_updated_packages | cut -d' ' -f 1) -gt "512" ]] ; then - # If there are a lot of packages updated just go ahead and sync. - NEED_RELEASE="yes" -else - date --iso-8601=ns - echo "Checking package updates against requirements" - - REPO_PATH=/opt/pypi_mirror_update/requirements - if ! [ -d $REPO_PATH ] ; then - mkdir -p $REPO_PATH - fi - - export GIT_DIR="$REPO_PATH/.git" - if ! [ -d $GIT_DIR ] ; then - git clone https://git.openstack.org/openstack/requirements $REPO_PATH - fi - - # Ensure repo contents are up to date - git remote update - git prune - - PACKAGES=$(cat /tmp/bandersnatch_updated_packages) - for BRANCH in `git branch -a | grep 'remotes/origin' | grep -v 'HEAD'` ; do - # Make best effort at listing packages but don't fail if we can't - # list then on a specific branch for some reason. - set +e - git show "$BRANCH:upper-constraints.txt" > /tmp/pypi-update-constraints - git show "$BRANCH:blacklist.txt" > /tmp/pypi-update-blacklist - set -e - for PACKAGE in $PACKAGES ; do - # Note this regex is as specific as possible to avoid matching - # substrings inadverdently. - if grep -q -i "^$PACKAGE\(=\+.*\)\?$" /tmp/pypi-update-constraints /tmp/pypi-update-blacklist ; then - NEED_RELEASE="yes" - break - fi - done - if [[ "$NEED_RELEASE" == "yes" ]] ; then - break - fi - done - - unset GIT_DIR -fi - -date --iso-8601=ns -if [[ "$NEED_RELEASE" == "yes" ]] ; then - echo "Bandersnatch completed successfully, running vos release." - k5start -t -f /etc/afsadmin.keytab service/afsadmin -- vos release -v mirror.pypi -else - # We minimize vos releases as a release causes the remote AFS caches to - # update file metadata on reads. This significantly tanks the performance - # of remote caches on the other side of the world. Note this appears to - # happen even if vos release doesn't update any data. - echo "Bandersnatch completed successfully, not updating as no constrained package was updated." -fi - -date --iso-8601=ns -echo "Done." diff --git a/modules/openstack_project/manifests/mirror_update.pp b/modules/openstack_project/manifests/mirror_update.pp index 57b1dab18e..93c0936554 100644 --- a/modules/openstack_project/manifests/mirror_update.pp +++ b/modules/openstack_project/manifests/mirror_update.pp @@ -1,7 +1,6 @@ # == Class: openstack_project::mirror_update # class openstack_project::mirror_update ( - $bandersnatch_keytab = '', $reprepro_keytab = '', $admin_keytab = '', $gem_keytab = '', @@ -20,45 +19,6 @@ class openstack_project::mirror_update ( class { 'openstack_project::gem_mirror': } - class { 'bandersnatch': - bandersnatch_source => 'pip3', - } - - class { 'bandersnatch::mirror': - mirror_root => '/afs/.openstack.org/mirror/pypi', - static_root => '/afs/.openstack.org/mirror', - hash_index => true, - package_blacklist => [ - # These packages are quite large and release often. Ignore them. - tensorflow, - tf-nightly, - tf-nightly-gpu, - tfp-nightly, - tfp-nightly-gpu, - tensorboard, - tb-nightly, - mxnet, - mxnet-mkl, - mxnet-cu75, - mxnet-cu75mkl, - mxnet-cu80, - mxnet-cu80mkl, - mxnet-cu80-win, - mxnet-cu90, - mxnet-cu90mkl, - mxnet-cu91, - mxnet-cu91mkl, - ], - require => Class['bandersnatch'], - } - - file { '/etc/bandersnatch.keytab': - owner => 'root', - group => 'root', - mode => '0400', - content => $bandersnatch_keytab, - } - file { '/etc/gem.keytab': owner => 'rubygems', group => 'root', @@ -74,14 +34,6 @@ class openstack_project::mirror_update ( content => $admin_keytab, } - file { '/usr/local/bin/bandersnatch-mirror-update': - ensure => present, - owner => 'root', - group => 'root', - mode => '0755', - source => 'puppet:///modules/openstack_project/bandersnatch-mirror-update.sh', - } - file { '/usr/local/bin/gem-mirror-update': ensure => present, owner => 'root', @@ -90,22 +42,6 @@ class openstack_project::mirror_update ( source => 'puppet:///modules/openstack_project/gem-mirror-update.sh', } - cron { 'bandersnatch': - # Disabled until we sort out how to mirror without unbound growth. - # We may just switch to caching proxy long term. - ensure => absent, - user => $user, - minute => '*/5', - command => 'flock -n /var/run/bandersnatch/mirror.lock bandersnatch-mirror-update /var/log/bandersnatch/mirror.log', - environment => 'PATH=/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin', - require => [ - File['/usr/local/bin/bandersnatch-mirror-update'], - File['/etc/afsadmin.keytab'], - File['/etc/bandersnatch.keytab'], - Class['bandersnatch::mirror'] - ] - } - file { '/etc/reprepro.keytab': owner => 'root', group => 'root',