From bcc262371be8ba4573adf6ab2b60032a65635eec Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Sat, 3 Aug 2013 21:42:29 -0700 Subject: [PATCH] Add a helper pip downloading tool. This tool helps remove duplicate downloads (keeping the latest version) as well as automatically creates and uses a cache and download cache directory. Change-Id: Ibcce2d69d4e99a6f1ad787787a903fdd70b294cb --- smithy | 27 +------- tools/pip-download | 160 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 162 insertions(+), 25 deletions(-) create mode 100755 tools/pip-download diff --git a/smithy b/smithy index d8603f3f..4340b719 100755 --- a/smithy +++ b/smithy @@ -10,10 +10,9 @@ cd "$(dirname "$0")" VERBOSE="${VERBOSE:-0}" PY2RPM_CMD="$PWD/tools/py2rpm" YUMFIND_CMD="$PWD/tools/yumfind" -PIP_CMD="" +PIPDOWNLOAD_CMD="$PWD/tools/pip-download" YUM_OPTS="--assumeyes --nogpgcheck" -PIP_OPTS="" RPM_OPTS="" CURL_OPTS="" @@ -36,7 +35,6 @@ fi if [ "$VERBOSE" == "0" ]; then YUM_OPTS="$YUM_OPTS -q" - PIP_OPTS="-q" RPM_OPTS="-q" CURL_OPTS="-s" fi @@ -58,25 +56,6 @@ if [ -z "$BOOT_FILES" ]; then BOOT_FILES="${PWD}/$BOOT_FN" fi -find_pip() -{ - if [ -n "$PIP_CMD" ]; then - return - fi - # Handle how RHEL likes to rename it. - PIP_CMD="" - for name in pip pip-python; do - if which "$name" &>/dev/null; then - PIP_CMD=$name - break - fi - done - if [ -z "$PIP_CMD" ]; then - echo -e "${COL_RED}pip/pip-python${COL_RESET} command not found!" - exit 1 - fi -} - clean_pip() { # https://github.com/pypa/pip/issues/982 @@ -195,10 +174,8 @@ except KeyError: echo -e "Building ${COL_YELLOW}missing${COL_RESET} python requirements:" dump_list "$missing_packages" local pip_tmp_dir=$(mktemp -d) - find_pip - local pip_opts="$PIP_OPTS -U -I" echo "Downloading..." - $PIP_CMD install $pip_opts $missing_packages --download "$pip_tmp_dir" + $PIPDOWNLOAD_CMD -d "$pip_tmp_dir" $missing_packages | grep "^Saved" echo "Building RPMs..." local rpm_names=$("$PY2RPM_CMD" --package-map $package_map --scripts-dir "conf/templates/packaging/scripts" -- "$pip_tmp_dir/"* 2>/dev/null | awk '/^Wrote: /{ print $2 }' | grep -v '.src.rpm' | sort -u) diff --git a/tools/pip-download b/tools/pip-download new file mode 100755 index 00000000..41bde2ce --- /dev/null +++ b/tools/pip-download @@ -0,0 +1,160 @@ +#!/usr/bin/python + +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright (C) 2012 Yahoo! Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from distutils.version import LooseVersion + +import collections +import distutils +import optparse +import os +import subprocess + +from pip import req as pip_req +from pip import util as pip_util + +PIP_CMDS = ['pip-python', 'pip'] +ARCHIVE_EXTS = ['.zip', '.tgz', '.tbz', '.tar.gz', '.tar', '.gz', '.bz2'] + + +def call(cmd): + proc = subprocess.Popen(cmd, stderr=None, stdin=None, stdout=None) + ret = proc.communicate() + if proc.returncode != 0: + raise RuntimeError("Failed running %s" % (" ".join(cmd))) + return ret + + +def find_pip(): + for pp in PIP_CMDS: + bin_name = distutils.spawn.find_executable(pp) + if bin_name: + return bin_name + raise RuntimeError("Unable to find pip via any of %s commands" % (PIP_CMDS)) + + +def download_list(options, deps, download_dir, cache_dir): + cmd = [find_pip()] + if options.verbose: + cmd.extend(['-v']) + else: + cmd.extend(['-q']) + cmd.extend(['install', '-I', '-U', + '--download', download_dir, + '--exists-action', 'w', + '--download-cache', cache_dir]) + cmd.extend([str(d) for d in deps]) + call(cmd) + + +def remove_archive_ext(path): + for i in ARCHIVE_EXTS: + if path.endswith(i): + path = path[0:-len(i)] + return path + + +def extract_requirement(path, source_dir): + req = pip_req.InstallRequirement.from_line(path) + req.source_dir = source_dir + req.run_egg_info() + return req + + +def perform_download(options, deps, extract_dir, download_dir, cache_dir): + download_list(options, deps, download_dir, cache_dir) + files_examined = {} + for basename in os.listdir(download_dir): + if basename.startswith("."): + continue + filename = os.path.join(download_dir, basename) + if not os.path.isfile(filename): + continue + untar_dir = os.path.join(extract_dir, remove_archive_ext(basename)) + if not os.path.isdir(untar_dir): + if options.verbose: + print("Extracting %s -> %s" % (filename, untar_dir)) + pip_util.unpack_file(filename, untar_dir, content_type='', link='') + if options.verbose: + print("Examining %s" % (untar_dir)) + files_examined[filename] = extract_requirement(filename, untar_dir) + return files_examined + + +def evict_equivalent(options, downloaded): + + def ver_comp(item1, item2): + if item1[1] < item2[1]: + return -1 + if item1[1] > item2[1]: + return 1 + return 0 + + duplicates = collections.defaultdict(list) + for (filename, req) in downloaded.items(): + duplicates[req.name].append((filename, req)) + dups_found = 0 + for (name, matches) in duplicates.items(): + if len(matches) > 1: + dups_found += 1 + if not dups_found: + return + if options.verbose: + print("%s duplicate found..." % (dups_found)) + for (name, matches) in duplicates.items(): + if len(matches) <= 1: + continue + versions = [] + for (filename, req) in matches: + if options.verbose: + print("Duplicate %s at %s with version %s" % (name, filename, req.installed_version)) + versions.append((filename, LooseVersion(req.installed_version))) + selected_filename = list(sorted(versions, cmp=ver_comp))[-1][0] + if options.verbose: + print('Keeping %s' % (selected_filename)) + for (filename, req) in matches: + if filename != selected_filename: + if options.verbose: + print("Deleting %s" % (filename)) + os.unlink(filename) + downloaded.pop(filename) + + +if __name__ == '__main__': + usage = "usage: %prog [options] req req ..." + parser = optparse.OptionParser(usage=usage) + parser.add_option("-d", action="store", dest="download_dir", + help='directory to download dependencies too', metavar="DIR") + parser.add_option("-v", '--verbose', action="store_true", help='enable verbose output', + dest="verbose", default=False) + (options, args) = parser.parse_args() + download_dir = options.download_dir + if not options.download_dir: + raise IOError("Download directory required") + if not os.path.isdir(download_dir): + raise IOError("Download directory '%s' not found" % (download_dir)) + if not args: + raise IOError("Download requirement/s expected") + extract_dir = os.path.join(download_dir, '.extract') + cache_dir = os.path.join(download_dir, '.cache') + for d in [extract_dir, cache_dir]: + if not os.path.isdir(d): + os.makedirs(d) + downloaded = perform_download(options, list(args), extract_dir, download_dir, cache_dir) + evict_equivalent(options, downloaded) + for filename in sorted(downloaded.keys()): + print("Saved %s" % (filename))