Merge "Retire bandersnatch mirroring"

This commit is contained in:
Zuul 2018-11-28 07:22:27 +00:00 committed by Gerrit Code Review
commit 0626de0b59
6 changed files with 0 additions and 243 deletions

View File

@ -1,57 +0,0 @@
:title: Bandersnatch
.. _bandersnatch:
Bandersnatch
############
A pypi mirror tool
At a Glance
===========
:Hosts:
* http://mirror.bhs1.ovh.openstack.org/pypi
* http://mirror.ca-ymq-1.vexxhost.openstack.org/pypi
* http://mirror.dfw.rax.openstack.org/pypi
* http://mirror.gra1.ovh.openstack.org/pypi
* http://mirror.iad.rax.openstack.org/pypi
* http://mirror.ord.rax.openstack.org/pypi
:Puppet:
* :cgit_file:`modules/openstack_project/manifests/static.pp`
:Projects:
* https://pypi.python.org/pypi/bandersnatch
* https://git.openstack.org/cgit/openstack-infra/puppet-bandersnatch
:Documentation:
* https://pypi.python.org/pypi/bandersnatch#configuration
:Bugs:
* https://storyboard.openstack.org/#!/project/748
* https://bitbucket.org/pypa/bandersnatch/issues?status=new&status=open
Overview
========
Bandersnatch is a tool we run on the static.openstack.org host to
build a complete mirror of pypi.python.org. Cron execs bandersnatch
on an interval with logs going to ``/var/log/bandersnatch``.
Stale Packages
==============
There is an issue with pypi.python.org syncing to its CDN occasionally
resulting in stale package artifacts. You will notice this in the
bandersnatch logs as::
2014-07-11 01:30:04,592 INFO: Syncing package: python-novaclient (serial 1154164)
2014-07-11 01:30:04,592 DEBUG: Getting /pypi/python-novaclient/json (serial 1154164)
2014-07-11 01:30:04,599 DEBUG: Expected PyPI serial 1154164 for request https://pypi.python.org/pypi/python-novaclient/json but got 1154163
2014-07-11 01:30:04,599 ERROR: Stale serial for package python-novaclient
2014-07-11 01:30:04,599 ERROR: Stale serial for python-novaclient (1154164) not updating. Giving up.
The fix for this is to issue a PURGE against the url specified above::
curl -X PURGE https://pypi.python.org/pypi/python-novaclient/json
The next run of bandersnatch will sync the package. Note this PURGE
step should be performed automatically by our bandersnatch wrapper
script, but can be performed by hand safely if necessary.

View File

@ -26,7 +26,6 @@ Major Systems
planet
puppet
static
bandersnatch
reprepro
lists
wiki

View File

@ -508,7 +508,6 @@ node /^mirror-update\d*\.openstack\.org$/ {
$group = "afsadmin"
class { 'openstack_project::mirror_update':
bandersnatch_keytab => hiera('bandersnatch_keytab'),
admin_keytab => hiera('afsadmin_keytab'),
fedora_keytab => hiera('fedora_keytab'),
opensuse_keytab => hiera('opensuse_keytab'),

View File

@ -83,7 +83,6 @@ INTEGRATION_MODULES["$OPENSTACK_GIT_ROOT/openstack-infra/puppet-accessbot"]="ori
INTEGRATION_MODULES["$OPENSTACK_GIT_ROOT/openstack-infra/puppet-ansible"]="origin/master"
INTEGRATION_MODULES["$OPENSTACK_GIT_ROOT/openstack-infra/puppet-askbot"]="origin/master"
INTEGRATION_MODULES["$OPENSTACK_GIT_ROOT/openstack-infra/puppet-asterisk"]="origin/master"
INTEGRATION_MODULES["$OPENSTACK_GIT_ROOT/openstack-infra/puppet-bandersnatch"]="origin/master"
INTEGRATION_MODULES["$OPENSTACK_GIT_ROOT/openstack-infra/puppet-bugdaystats"]="origin/master"
INTEGRATION_MODULES["$OPENSTACK_GIT_ROOT/openstack-infra/puppet-bup"]="origin/master"
INTEGRATION_MODULES["$OPENSTACK_GIT_ROOT/openstack-infra/puppet-cgit"]="origin/master"

View File

@ -1,119 +0,0 @@
#!/bin/bash
# Copyright 2016 IBM Corp.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
set -e
# Set up logging, see:
# http://www.tldp.org/LDP/abs/html/x17974.html
LOG_FILE=$1
# Open STDOUT as $LOG_FILE file for write appending.
exec 1>>$LOG_FILE
# Redirect STDERR to STDOUT
exec 2>&1
START_TIME=$(date --iso-8601=ns)
echo $START_TIME
echo "Obtaining bandersnatch tokens and running bandersnatch."
# Note that the set -e is important above as it will cause us
# to not do a vos release if bandersnatch fails. Below we check
# additional conditions on whether or not to do a vos release.
k5start -t -f /etc/bandersnatch.keytab service/bandersnatch -- timeout -k 2m 4h run-bandersnatch
# Make sure logs made it to disk
sync
# This is what it looks like when bandersnatch logs it.
# 2017-06-09 19:40:02,545 INFO: Syncing package: shodan (serial 2939083)
# Need to get package name (shodan) and compare it to upper-constraints in
# openstack/requirements (all branches)
# We get the list of packages out of our own log. There is a lot happening
# with sed below so lets talk about it.
# First we don't print every line we process (-n) we only print those lines
# that match using the trailing /p
# Next we only match beginning at our start time to the end of the file
# (/from_pattern/,to_pattern)
# Then we oonly look for lines that say Syncing package as these actually
# give us the package name. We extract the package name from here and print
# it.
sed -n -e "/$START_TIME/,\$s/.*Syncing\spackage:\s\(.*\)\s(serial\s[0-9]\+)/\1/p" $LOG_FILE | sort -u > /tmp/bandersnatch_updated_packages
LAST_VOS_RELEASE=$(vos examine mirror.pypi.readonly -format | grep 'updateDate' | head -1 | sed -e 's/updateDate\s\([0-9]\+\)\s.*/\1/')
NOW=$(date +%s)
DELTA=$((NOW - LAST_VOS_RELEASE))
NEED_RELEASE="no"
if [[ "$DELTA" -gt "14400" ]] ; then
NEED_RELEASE="yes"
elif [[ $(wc -l /tmp/bandersnatch_updated_packages | cut -d' ' -f 1) -gt "512" ]] ; then
# If there are a lot of packages updated just go ahead and sync.
NEED_RELEASE="yes"
else
date --iso-8601=ns
echo "Checking package updates against requirements"
REPO_PATH=/opt/pypi_mirror_update/requirements
if ! [ -d $REPO_PATH ] ; then
mkdir -p $REPO_PATH
fi
export GIT_DIR="$REPO_PATH/.git"
if ! [ -d $GIT_DIR ] ; then
git clone https://git.openstack.org/openstack/requirements $REPO_PATH
fi
# Ensure repo contents are up to date
git remote update
git prune
PACKAGES=$(cat /tmp/bandersnatch_updated_packages)
for BRANCH in `git branch -a | grep 'remotes/origin' | grep -v 'HEAD'` ; do
# Make best effort at listing packages but don't fail if we can't
# list then on a specific branch for some reason.
set +e
git show "$BRANCH:upper-constraints.txt" > /tmp/pypi-update-constraints
git show "$BRANCH:blacklist.txt" > /tmp/pypi-update-blacklist
set -e
for PACKAGE in $PACKAGES ; do
# Note this regex is as specific as possible to avoid matching
# substrings inadverdently.
if grep -q -i "^$PACKAGE\(=\+.*\)\?$" /tmp/pypi-update-constraints /tmp/pypi-update-blacklist ; then
NEED_RELEASE="yes"
break
fi
done
if [[ "$NEED_RELEASE" == "yes" ]] ; then
break
fi
done
unset GIT_DIR
fi
date --iso-8601=ns
if [[ "$NEED_RELEASE" == "yes" ]] ; then
echo "Bandersnatch completed successfully, running vos release."
k5start -t -f /etc/afsadmin.keytab service/afsadmin -- vos release -v mirror.pypi
else
# We minimize vos releases as a release causes the remote AFS caches to
# update file metadata on reads. This significantly tanks the performance
# of remote caches on the other side of the world. Note this appears to
# happen even if vos release doesn't update any data.
echo "Bandersnatch completed successfully, not updating as no constrained package was updated."
fi
date --iso-8601=ns
echo "Done."

View File

@ -1,7 +1,6 @@
# == Class: openstack_project::mirror_update
#
class openstack_project::mirror_update (
$bandersnatch_keytab = '',
$reprepro_keytab = '',
$admin_keytab = '',
$gem_keytab = '',
@ -20,45 +19,6 @@ class openstack_project::mirror_update (
class { 'openstack_project::gem_mirror': }
class { 'bandersnatch':
bandersnatch_source => 'pip3',
}
class { 'bandersnatch::mirror':
mirror_root => '/afs/.openstack.org/mirror/pypi',
static_root => '/afs/.openstack.org/mirror',
hash_index => true,
package_blacklist => [
# These packages are quite large and release often. Ignore them.
tensorflow,
tf-nightly,
tf-nightly-gpu,
tfp-nightly,
tfp-nightly-gpu,
tensorboard,
tb-nightly,
mxnet,
mxnet-mkl,
mxnet-cu75,
mxnet-cu75mkl,
mxnet-cu80,
mxnet-cu80mkl,
mxnet-cu80-win,
mxnet-cu90,
mxnet-cu90mkl,
mxnet-cu91,
mxnet-cu91mkl,
],
require => Class['bandersnatch'],
}
file { '/etc/bandersnatch.keytab':
owner => 'root',
group => 'root',
mode => '0400',
content => $bandersnatch_keytab,
}
file { '/etc/gem.keytab':
owner => 'rubygems',
group => 'root',
@ -74,14 +34,6 @@ class openstack_project::mirror_update (
content => $admin_keytab,
}
file { '/usr/local/bin/bandersnatch-mirror-update':
ensure => present,
owner => 'root',
group => 'root',
mode => '0755',
source => 'puppet:///modules/openstack_project/bandersnatch-mirror-update.sh',
}
file { '/usr/local/bin/gem-mirror-update':
ensure => present,
owner => 'root',
@ -90,22 +42,6 @@ class openstack_project::mirror_update (
source => 'puppet:///modules/openstack_project/gem-mirror-update.sh',
}
cron { 'bandersnatch':
# Disabled until we sort out how to mirror without unbound growth.
# We may just switch to caching proxy long term.
ensure => absent,
user => $user,
minute => '*/5',
command => 'flock -n /var/run/bandersnatch/mirror.lock bandersnatch-mirror-update /var/log/bandersnatch/mirror.log',
environment => 'PATH=/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin',
require => [
File['/usr/local/bin/bandersnatch-mirror-update'],
File['/etc/afsadmin.keytab'],
File['/etc/bandersnatch.keytab'],
Class['bandersnatch::mirror']
]
}
file { '/etc/reprepro.keytab':
owner => 'root',
group => 'root',