Merge "Add option to skip downloading/uploading identical files"
This commit is contained in:
commit
0aa0f4f6cc
62
bin/swift
62
bin/swift
@ -21,7 +21,8 @@ from errno import EEXIST, ENOENT
|
|||||||
from hashlib import md5
|
from hashlib import md5
|
||||||
from optparse import OptionParser, SUPPRESS_HELP
|
from optparse import OptionParser, SUPPRESS_HELP
|
||||||
from os import environ, listdir, makedirs, utime, _exit as os_exit
|
from os import environ, listdir, makedirs, utime, _exit as os_exit
|
||||||
from os.path import basename, dirname, getmtime, getsize, isdir, join
|
from os.path import basename, dirname, getmtime, getsize, isdir, join, \
|
||||||
|
sep as os_path_sep
|
||||||
from random import shuffle
|
from random import shuffle
|
||||||
from sys import argv, exit, stderr, stdout
|
from sys import argv, exit, stderr, stdout
|
||||||
from time import sleep, time, gmtime, strftime
|
from time import sleep, time, gmtime, strftime
|
||||||
@ -274,7 +275,7 @@ Optional arguments:
|
|||||||
Adds a customized request header to the query, like
|
Adds a customized request header to the query, like
|
||||||
"Range" or "If-Match". This argument is repeatable.
|
"Range" or "If-Match". This argument is repeatable.
|
||||||
Example --header "content-type:text/plain"
|
Example --header "content-type:text/plain"
|
||||||
|
--skip-identical Skip downloading files that are identical on both sides
|
||||||
'''.strip("\n")
|
'''.strip("\n")
|
||||||
|
|
||||||
|
|
||||||
@ -310,6 +311,10 @@ def st_download(parser, args, thread_manager):
|
|||||||
'Adds a customized request header to the query, like "Range" or '
|
'Adds a customized request header to the query, like "Range" or '
|
||||||
'"If-Match". This argument is repeatable. Example'
|
'"If-Match". This argument is repeatable. Example'
|
||||||
' --header "content-type:text/plain"')
|
' --header "content-type:text/plain"')
|
||||||
|
parser.add_option(
|
||||||
|
'--skip-identical', action='store_true', dest='skip_identical',
|
||||||
|
default=False, help='Skip downloading files that are identical on '
|
||||||
|
'both sides')
|
||||||
(options, args) = parse_args(parser, args)
|
(options, args) = parse_args(parser, args)
|
||||||
args = args[1:]
|
args = args[1:]
|
||||||
if options.out_file == '-':
|
if options.out_file == '-':
|
||||||
@ -330,6 +335,23 @@ def st_download(parser, args, thread_manager):
|
|||||||
container, obj, out_file = queue_arg
|
container, obj, out_file = queue_arg
|
||||||
else:
|
else:
|
||||||
raise Exception("Invalid queue_arg length of %s" % len(queue_arg))
|
raise Exception("Invalid queue_arg length of %s" % len(queue_arg))
|
||||||
|
path = options.yes_all and join(container, obj) or obj
|
||||||
|
path = path.lstrip(os_path_sep)
|
||||||
|
if options.skip_identical and out_file != '-':
|
||||||
|
filename = out_file if out_file else path
|
||||||
|
try:
|
||||||
|
fp = open(filename, 'rb')
|
||||||
|
except IOError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
with fp:
|
||||||
|
md5sum = md5()
|
||||||
|
while True:
|
||||||
|
data = fp.read(65536)
|
||||||
|
if not data:
|
||||||
|
break
|
||||||
|
md5sum.update(data)
|
||||||
|
req_headers['If-None-Match'] = md5sum.hexdigest()
|
||||||
try:
|
try:
|
||||||
start_time = time()
|
start_time = time()
|
||||||
headers, body = \
|
headers, body = \
|
||||||
@ -342,9 +364,6 @@ def st_download(parser, args, thread_manager):
|
|||||||
else:
|
else:
|
||||||
content_length = None
|
content_length = None
|
||||||
etag = headers.get('etag')
|
etag = headers.get('etag')
|
||||||
path = options.yes_all and join(container, obj) or obj
|
|
||||||
if path[:1] in ('/', '\\'):
|
|
||||||
path = path[1:]
|
|
||||||
md5sum = None
|
md5sum = None
|
||||||
make_dir = not options.no_download and out_file != "-"
|
make_dir = not options.no_download and out_file != "-"
|
||||||
if content_type.split(';', 1)[0] == 'text/directory':
|
if content_type.split(';', 1)[0] == 'text/directory':
|
||||||
@ -409,6 +428,9 @@ def st_download(parser, args, thread_manager):
|
|||||||
else:
|
else:
|
||||||
thread_manager.print_msg('%s [%s]', path, time_str)
|
thread_manager.print_msg('%s [%s]', path, time_str)
|
||||||
except ClientException as err:
|
except ClientException as err:
|
||||||
|
if err.http_status == 304 and options.skip_identical:
|
||||||
|
thread_manager.print_msg("Skipped identical file '%s'", path)
|
||||||
|
return
|
||||||
if err.http_status != 404:
|
if err.http_status != 404:
|
||||||
raise
|
raise
|
||||||
thread_manager.error("Object '%s/%s' not found", container, obj)
|
thread_manager.error("Object '%s/%s' not found", container, obj)
|
||||||
@ -762,7 +784,7 @@ def st_post(parser, args, thread_manager):
|
|||||||
thread_manager.error('Usage: %s post %s\n%s', basename(argv[0]),
|
thread_manager.error('Usage: %s post %s\n%s', basename(argv[0]),
|
||||||
st_post_options, st_post_help)
|
st_post_options, st_post_help)
|
||||||
|
|
||||||
st_upload_options = '''[--changed] [--segment-size <size>]
|
st_upload_options = '''[--changed] [--skip-identical] [--segment-size <size>]
|
||||||
[--segment-container <container>] [--leave-segments]
|
[--segment-container <container>] [--leave-segments]
|
||||||
[--object-threads <thread>] [--segment-threads <threads>]
|
[--object-threads <thread>] [--segment-threads <threads>]
|
||||||
[--header <header>] [--use-slo]
|
[--header <header>] [--use-slo]
|
||||||
@ -781,6 +803,7 @@ Positional arguments:
|
|||||||
Optional arguments:
|
Optional arguments:
|
||||||
--changed Only upload files that have changed since the last
|
--changed Only upload files that have changed since the last
|
||||||
upload
|
upload
|
||||||
|
--skip-identical Skip uploading files that are identical on both sides
|
||||||
--segment-size <size> Upload files in segments no larger than <size> and
|
--segment-size <size> Upload files in segments no larger than <size> and
|
||||||
then create a "manifest" file that will download all
|
then create a "manifest" file that will download all
|
||||||
the segments as if it were the original file
|
the segments as if it were the original file
|
||||||
@ -815,6 +838,10 @@ def st_upload(parser, args, thread_manager):
|
|||||||
'-c', '--changed', action='store_true', dest='changed',
|
'-c', '--changed', action='store_true', dest='changed',
|
||||||
default=False, help='Will only upload files that have changed since '
|
default=False, help='Will only upload files that have changed since '
|
||||||
'the last upload')
|
'the last upload')
|
||||||
|
parser.add_option(
|
||||||
|
'--skip-identical', action='store_true', dest='skip_identical',
|
||||||
|
default=False, help='Skip uploading files that are identical on '
|
||||||
|
'both sides')
|
||||||
parser.add_option(
|
parser.add_option(
|
||||||
'-S', '--segment-size', dest='segment_size', help='Will '
|
'-S', '--segment-size', dest='segment_size', help='Will '
|
||||||
'upload files in segments no larger than <size> and then create a '
|
'upload files in segments no larger than <size> and then create a '
|
||||||
@ -922,11 +949,32 @@ def st_upload(parser, args, thread_manager):
|
|||||||
old_manifest = None
|
old_manifest = None
|
||||||
old_slo_manifest_paths = []
|
old_slo_manifest_paths = []
|
||||||
new_slo_manifest_paths = set()
|
new_slo_manifest_paths = set()
|
||||||
if options.changed or not options.leave_segments:
|
if options.changed or options.skip_identical \
|
||||||
|
or not options.leave_segments:
|
||||||
|
if options.skip_identical:
|
||||||
|
checksum = None
|
||||||
|
try:
|
||||||
|
fp = open(path, 'rb')
|
||||||
|
except IOError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
with fp:
|
||||||
|
md5sum = md5()
|
||||||
|
while True:
|
||||||
|
data = fp.read(65536)
|
||||||
|
if not data:
|
||||||
|
break
|
||||||
|
md5sum.update(data)
|
||||||
|
checksum = md5sum.hexdigest()
|
||||||
try:
|
try:
|
||||||
headers = conn.head_object(container, obj)
|
headers = conn.head_object(container, obj)
|
||||||
cl = int(headers.get('content-length'))
|
cl = int(headers.get('content-length'))
|
||||||
mt = headers.get('x-object-meta-mtime')
|
mt = headers.get('x-object-meta-mtime')
|
||||||
|
if (options.skip_identical and
|
||||||
|
checksum == headers.get('etag')):
|
||||||
|
thread_manager.print_msg(
|
||||||
|
"Skipped identical file '%s'", path)
|
||||||
|
return
|
||||||
if options.changed and cl == getsize(path) and \
|
if options.changed and cl == getsize(path) and \
|
||||||
mt == put_headers['x-object-meta-mtime']:
|
mt == put_headers['x-object-meta-mtime']:
|
||||||
return
|
return
|
||||||
|
Loading…
x
Reference in New Issue
Block a user