From 0cb928cd5389ddc2fda60bc9d7447390cd65f0bf Mon Sep 17 00:00:00 2001 From: Dave Borowitz Date: Wed, 18 Nov 2015 10:19:52 -0500 Subject: [PATCH] download_bower.py: Fix hash_dir We were passing the wrong directory name, resulting in hashing a nonexistent directory and all components having the same SHA-1. Unfortunately os.walk succeeds silently in this case. Move hash_dir and hash_file to util so they can be used by other scripts. (Migration of existing hashfile implementations will come later.) Restructure the main function to look a little more like the logic in download_file.py, where we download to the cached location first and then copy it after. Fix all the broken SHA-1s in lib/js/BUCK. Change-Id: I58bc62f84b62169bf18695a6a6704c989036c4f6 --- lib/js/BUCK | 20 +++++----- tools/js/BUCK | 1 + tools/js/download_bower.py | 82 ++++++++++++++------------------------ tools/util.py | 51 ++++++++++++++++++++++++ 4 files changed, 91 insertions(+), 63 deletions(-) diff --git a/lib/js/BUCK b/lib/js/BUCK index 0520e33e98..5a516c05e9 100644 --- a/lib/js/BUCK +++ b/lib/js/BUCK @@ -50,7 +50,7 @@ bower_component( ':polymer', ], license = 'polymer', - sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709', + sha1 = 'cf7fdf9ffa3349d28632fc3e86b84300d1439e29', ) bower_component( @@ -59,7 +59,7 @@ bower_component( version = '1.0.8', deps = [':polymer'], license = 'polymer', - sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709', + sha1 = '2f1ea0b4542e2949de195dff5cbe02b7cb953eff', ) bower_component( @@ -71,7 +71,7 @@ bower_component( ':promise-polyfill', ], license = 'polymer', - sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709', + sha1 = 'f94a3a3d847842c49def41e27da42c7c94f8d7c7', ) bower_component( @@ -83,7 +83,7 @@ bower_component( ':polymer', ], license = 'polymer', - sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709', + sha1 = '2d3eedf0a26046c0e828b1ce3d5b102ee1d0ab19', ) bower_component( @@ -92,7 +92,7 @@ bower_component( version = '1.1.1', deps = [':polymer'], license = 'polymer', - sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709', + sha1 = 'e06281b6ddb3355ceca44975a167381b1fd72ce5', ) bower_component( @@ -104,7 +104,7 @@ bower_component( ':polymer', ], license = 'polymer', - sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709', + sha1 = '5a68250d6d9abcd576f116dc4fc7312426323883', ) bower_component( @@ -112,7 +112,7 @@ bower_component( package = 'visionmedia/page.js', version = '1.6.4', license = 'page.js', - sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709', + sha1 = 'cc442386d4e392be26c85873f463db76fafbaeaf', ) bower_component( @@ -121,7 +121,7 @@ bower_component( version = '1.2.2', deps = [':webcomponentsjs'], license = 'polymer', - sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709', + sha1 = '7f4033438425584d8912a80614d1a4f754438e15', ) bower_component( @@ -130,7 +130,7 @@ bower_component( version = '1.0.0', deps = [':polymer'], license = 'promise-polyfill', - sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709', + sha1 = 'a3b598c06cbd7f441402e666ff748326030905d6', ) bower_component( @@ -138,5 +138,5 @@ bower_component( package = 'webcomponentsjs', version = '0.7.17', license = 'polymer', - sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709', + sha1 = '36e29cfe21caa71322a0b5026d7d423c33c0426f', ) diff --git a/tools/js/BUCK b/tools/js/BUCK index 3c71704499..87999d8f46 100644 --- a/tools/js/BUCK +++ b/tools/js/BUCK @@ -1,6 +1,7 @@ python_binary( name = 'download_bower', main = 'download_bower.py', + deps = ['//tools:util'], visibility = ['PUBLIC'], ) diff --git a/tools/js/download_bower.py b/tools/js/download_bower.py index e794cc0be4..80720d78fc 100644 --- a/tools/js/download_bower.py +++ b/tools/js/download_bower.py @@ -23,34 +23,13 @@ import shutil import subprocess import sys +from tools import util + + CACHE_DIR = os.path.expanduser(os.path.join( '~', '.gerritcodereview', 'buck-cache', 'downloaded-artifacts')) -def hash_file(h, p): - with open(p, 'rb') as f: - while True: - b = f.read(8192) - if not b: - break - h.update(p) - - -def hash_dir(dir): - # It's hard to get zipfiles to hash deterministically. Instead, do a sorted - # walk and hash filenames and contents together. - h = hashlib.sha1() - - for root, dirs, files in os.walk(dir): - dirs.sort() - for f in sorted(files): - p = os.path.join(root, f) - h.update(p) - hash_file(h, p) - - return h.hexdigest() - - def bower_cmd(bower, *args): cmd = bower.split(' ') cmd.extend(args) @@ -89,11 +68,10 @@ def ignore_deps(info): json.dump({'ignoredDependencies': deps.keys()}, f) -def cache_entry(name, version, sha1): - c = os.path.join(CACHE_DIR, '%s-%s.zip' % (name, version)) - if sha1: - c += '-%s' % sha1 - return c +def cache_entry(name, package, version, sha1): + if not sha1: + sha1 = hashlib.sha1('%s#%s' % (package, version)).hexdigest() + return os.path.join(CACHE_DIR, '%s-%s.zip-%s' % (name, version, sha1)) def main(args): @@ -106,33 +84,31 @@ def main(args): opts.add_option('-o', help='output file location') opts, _ = opts.parse_args() - outzip = os.path.join(os.getcwd(), opts.o) - # TODO(dborowitz): match download_file behavior of pulling any old file from - # the cache if there is no -s - # Also don't double-append sha1. - cached = cache_entry(opts.n, opts.v, opts.s) - if os.path.isfile(cached): - shutil.copyfile(cached, outzip) - return 0 + cwd = os.getcwd() + outzip = os.path.join(cwd, opts.o) + cached = cache_entry(opts.n, opts.p, opts.v, opts.s) - info = bower_info(opts.b, opts.n, opts.p, opts.v) - ignore_deps(info) - subprocess.check_call( - bower_cmd(opts.b, '--quiet', 'install', '%s#%s' % (opts.p, opts.v))) - name = info['name'] - sha1 = hash_dir(name) + if not os.path.exists(cached): + info = bower_info(opts.b, opts.n, opts.p, opts.v) + ignore_deps(info) + subprocess.check_call( + bower_cmd(opts.b, '--quiet', 'install', '%s#%s' % (opts.p, opts.v))) + bc = os.path.join(cwd, 'bower_components') + subprocess.check_call( + ['zip', '-q', '--exclude', '.bower.json', '-r', cached, opts.n], + cwd=bc) - if opts.s and sha1 != opts.s: - print(( - '%s#%s:\n' - 'expected %s\n' - 'received %s\n') % (opts.p, opts.v, opts.s, sha1), file=sys.stderr) - return 1 + if opts.s: + path = os.path.join(bc, opts.n) + sha1 = util.hash_bower_component(hashlib.sha1(), path).hexdigest() + if opts.s != sha1: + print(( + '%s#%s:\n' + 'expected %s\n' + 'received %s\n') % (opts.p, opts.v, opts.s, sha1), file=sys.stderr) + return 1 - os.chdir('bower_components') - cmd = ['zip', '-q', '-r', outzip, opts.n] - subprocess.check_call(cmd) - shutil.copyfile(outzip, cache_entry(opts.n, opts.v, sha1)) + shutil.copyfile(cached, outzip) return 0 diff --git a/tools/util.py b/tools/util.py index ec895dddfa..96f60471eb 100644 --- a/tools/util.py +++ b/tools/util.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os from os import path REPO_ROOTS = { @@ -49,3 +50,53 @@ def resolve_url(url, redirects): root = root.rstrip('/') rest = rest.lstrip('/') return '/'.join([root, rest]) + + +def hash_file(hash_obj, path): + """Hash the contents of a file. + + Args: + hash_obj: an open hash object, e.g. hashlib.sha1(). + path: path to the file to hash. + + Returns: + The passed-in hash_obj. + """ + with open(path, 'rb') as f: + while True: + b = f.read(8192) + if not b: + break + hash_obj.update(b) + return hash_obj + + +def hash_bower_component(hash_obj, path): + """Hash the contents of a bower component directory. + + This is a stable hash of a directory downloaded with `bower install`, minus + the .bower.json file, which is autogenerated each time by bower. Used in lieu + of hashing a zipfile of the contents, since zipfiles are difficult to hash in + a stable manner. + + Args: + hash_obj: an open hash object, e.g. hashlib.sha1(). + path: path to the directory to hash. + + Returns: + The passed-in hash_obj. + """ + if not os.path.isdir(path): + raise ValueError('Not a directory: %s' % path) + + path = os.path.abspath(path) + for root, dirs, files in os.walk(path): + dirs.sort() + for f in sorted(files): + if f == '.bower.json': + continue + p = os.path.join(root, f) + hash_obj.update(p[len(path)+1:]) + hash_file(hash_obj, p) + + return hash_obj