Update container on fast-POST

This patch makes a number of changes to enable content-type
metadata to be updated when using the fast-POST mode of
operation, as proposed in the associated spec [1].

* the object server and diskfile are modified to allow
  content-type to be updated by a POST and the updated value
  to be stored in .meta files.

* the object server accepts PUTs and DELETEs with older
  timestamps than existing .meta files. This is to be
  consistent with replication that will leave a later .meta
  file in place when replicating a .data file.

* the diskfile interface is modified to provide accessor
  methods for the content-type and its timestamp.

* the naming of .meta files is modified to encode two
  timestamps when the .meta file contains a content-type value
  that was set prior to the latest metadata update; this
  enables consistency to be achieved when rsync is used for
  replication.

* ssync is modified to sync meta files when content-type
  differs between local and remote copies of objects.

* the object server issues container updates when handling
  POST requests, notifying the container server of the current
  immutable metadata (etag, size, hash, swift_bytes),
  content-type with their respective timestamps, and the
  mutable metadata timestamp.

* the container server maintains the most recently reported
  values for immutable metadata, content-type and mutable
  metadata, each with their respective timestamps, in a single
  db row.

* new probe tests verify that replication achieves eventual
  consistency of containers and objects after discrete updates
  to content-type and mutable metadata, and that container-sync
  sync's objects after fast-post updates.

[1] spec change-id: I60688efc3df692d3a39557114dca8c5490f7837e

Change-Id: Ia597cd460bb5fd40aa92e886e3e18a7542603d01
This commit is contained in:
Alistair Coles 2015-08-10 10:30:10 -05:00 committed by Alistair Coles
parent 30624a866a
commit e91de49d68
26 changed files with 3141 additions and 251 deletions

View File

@ -700,6 +700,7 @@ def drop_buffer_cache(fd, offset, length):
NORMAL_FORMAT = "%016.05f" NORMAL_FORMAT = "%016.05f"
INTERNAL_FORMAT = NORMAL_FORMAT + '_%016x' INTERNAL_FORMAT = NORMAL_FORMAT + '_%016x'
SHORT_FORMAT = NORMAL_FORMAT + '_%x'
MAX_OFFSET = (16 ** 16) - 1 MAX_OFFSET = (16 ** 16) - 1
PRECISION = 1e-5 PRECISION = 1e-5
# Setting this to True will cause the internal format to always display # Setting this to True will cause the internal format to always display
@ -820,6 +821,13 @@ class Timestamp(object):
else: else:
return self.normal return self.normal
@property
def short(self):
if self.offset or FORCE_INTERNAL:
return SHORT_FORMAT % (self.timestamp, self.offset)
else:
return self.normal
@property @property
def isoformat(self): def isoformat(self):
t = float(self.normal) t = float(self.normal)
@ -849,16 +857,22 @@ class Timestamp(object):
return isoformat return isoformat
def __eq__(self, other): def __eq__(self, other):
if other is None:
return False
if not isinstance(other, Timestamp): if not isinstance(other, Timestamp):
other = Timestamp(other) other = Timestamp(other)
return self.internal == other.internal return self.internal == other.internal
def __ne__(self, other): def __ne__(self, other):
if other is None:
return True
if not isinstance(other, Timestamp): if not isinstance(other, Timestamp):
other = Timestamp(other) other = Timestamp(other)
return self.internal != other.internal return self.internal != other.internal
def __lt__(self, other): def __lt__(self, other):
if other is None:
return False
if not isinstance(other, Timestamp): if not isinstance(other, Timestamp):
other = Timestamp(other) other = Timestamp(other)
return self.internal < other.internal return self.internal < other.internal
@ -867,6 +881,94 @@ class Timestamp(object):
return hash(self.internal) return hash(self.internal)
def encode_timestamps(t1, t2=None, t3=None, explicit=False):
"""
Encode up to three timestamps into a string. Unlike a Timestamp object, the
encoded string does NOT used fixed width fields and consequently no
relative chronology of the timestamps can be inferred from lexicographic
sorting of encoded timestamp strings.
The format of the encoded string is:
<t1>[<+/-><t2 - t1>[<+/-><t3 - t2>]]
i.e. if t1 = t2 = t3 then just the string representation of t1 is returned,
otherwise the time offsets for t2 and t3 are appended. If explicit is True
then the offsets for t2 and t3 are always appended even if zero.
Note: any offset value in t1 will be preserved, but offsets on t2 and t3
are not preserved. In the anticipated use cases for this method (and the
inverse decode_timestamps method) the timestamps passed as t2 and t3 are
not expected to have offsets as they will be timestamps associated with a
POST request. In the case where the encoding is used in a container objects
table row, t1 could be the PUT or DELETE time but t2 and t3 represent the
content type and metadata times (if different from the data file) i.e.
correspond to POST timestamps. In the case where the encoded form is used
in a .meta file name, t1 and t2 both correspond to POST timestamps.
"""
form = '{0}'
values = [t1.short]
if t2 is not None:
t2_t1_delta = t2.raw - t1.raw
explicit = explicit or (t2_t1_delta != 0)
values.append(t2_t1_delta)
if t3 is not None:
t3_t2_delta = t3.raw - t2.raw
explicit = explicit or (t3_t2_delta != 0)
values.append(t3_t2_delta)
if explicit:
form += '{1:+x}'
if t3 is not None:
form += '{2:+x}'
return form.format(*values)
def decode_timestamps(encoded, explicit=False):
"""
Parses a string of the form generated by encode_timestamps and returns
a tuple of the three component timestamps. If explicit is False, component
timestamps that are not explicitly encoded will be assumed to have zero
delta from the previous component and therefore take the value of the
previous component. If explicit is True, component timestamps that are
not explicitly encoded will be returned with value None.
"""
# TODO: some tests, e.g. in test_replicator, put float timestamps values
# into container db's, hence this defensive check, but in real world
# this may never happen.
if not isinstance(encoded, basestring):
ts = Timestamp(encoded)
return ts, ts, ts
parts = []
signs = []
pos_parts = encoded.split('+')
for part in pos_parts:
# parse time components and their signs
# e.g. x-y+z --> parts = [x, y, z] and signs = [+1, -1, +1]
neg_parts = part.split('-')
parts = parts + neg_parts
signs = signs + [1] + [-1] * (len(neg_parts) - 1)
t1 = Timestamp(parts[0])
t2 = t3 = None
if len(parts) > 1:
t2 = t1
delta = signs[1] * int(parts[1], 16)
# if delta = 0 we want t2 = t3 = t1 in order to
# preserve any offset in t1 - only construct a distinct
# timestamp if there is a non-zero delta.
if delta:
t2 = Timestamp((t1.raw + delta) * PRECISION)
elif not explicit:
t2 = t1
if len(parts) > 2:
t3 = t2
delta = signs[2] * int(parts[2], 16)
if delta:
t3 = Timestamp((t2.raw + delta) * PRECISION)
elif not explicit:
t3 = t2
return t1, t2, t3
def normalize_timestamp(timestamp): def normalize_timestamp(timestamp):
""" """
Format a timestamp (string or numeric) into a standardized Format a timestamp (string or numeric) into a standardized
@ -3357,6 +3459,25 @@ def parse_content_type(content_type):
return content_type, parm_list return content_type, parm_list
def extract_swift_bytes(content_type):
"""
Parse a content-type and return a tuple containing:
- the content_type string minus any swift_bytes param,
- the swift_bytes value or None if the param was not found
:param content_type: a content-type string
:return: a tuple of (content-type, swift_bytes or None)
"""
content_type, params = parse_content_type(content_type)
swift_bytes = None
for k, v in params:
if k == 'swift_bytes':
swift_bytes = v
else:
content_type += ';%s=%s' % (k, v)
return content_type, swift_bytes
def override_bytes_from_content_type(listing_dict, logger=None): def override_bytes_from_content_type(listing_dict, logger=None):
""" """
Takes a dict from a container listing and overrides the content_type, Takes a dict from a container listing and overrides the content_type,

View File

@ -25,7 +25,8 @@ import six.moves.cPickle as pickle
from six.moves import range from six.moves import range
import sqlite3 import sqlite3
from swift.common.utils import Timestamp from swift.common.utils import Timestamp, encode_timestamps, decode_timestamps, \
extract_swift_bytes
from swift.common.db import DatabaseBroker, utf8encode from swift.common.db import DatabaseBroker, utf8encode
@ -137,6 +138,90 @@ CONTAINER_STAT_VIEW_SCRIPT = '''
''' '''
def update_new_item_from_existing(new_item, existing):
"""
Compare the data and meta related timestamps of a new object item with
the timestamps of an existing object record, and update the new item
with data and/or meta related attributes from the existing record if
their timestamps are newer.
The multiple timestamps are encoded into a single string for storing
in the 'created_at' column of the the objects db table.
:param new_item: A dict of object update attributes
:param existing: A dict of existing object attributes
:return: True if any attributes of the new item dict were found to be
newer than the existing and therefore not updated, otherwise
False implying that the updated item is equal to the existing.
"""
# item[created_at] may be updated so keep a copy of the original
# value in case we process this item again
new_item.setdefault('data_timestamp', new_item['created_at'])
# content-type and metadata timestamps may be encoded in
# item[created_at], or may be set explicitly.
item_ts_data, item_ts_ctype, item_ts_meta = decode_timestamps(
new_item['data_timestamp'])
if new_item.get('ctype_timestamp'):
item_ts_ctype = Timestamp(new_item.get('ctype_timestamp'))
item_ts_meta = item_ts_ctype
if new_item.get('meta_timestamp'):
item_ts_meta = Timestamp(new_item.get('meta_timestamp'))
if not existing:
# encode new_item timestamps into one string for db record
new_item['created_at'] = encode_timestamps(
item_ts_data, item_ts_ctype, item_ts_meta)
return True
# decode existing timestamp into separate data, content-type and
# metadata timestamps
rec_ts_data, rec_ts_ctype, rec_ts_meta = decode_timestamps(
existing['created_at'])
# Extract any swift_bytes values from the content_type values. This is
# necessary because the swift_bytes value to persist should be that at the
# most recent data timestamp whereas the content-type value to persist is
# that at the most recent content-type timestamp. The two values happen to
# be stored in the same database column for historical reasons.
for item in (new_item, existing):
content_type, swift_bytes = extract_swift_bytes(item['content_type'])
item['content_type'] = content_type
item['swift_bytes'] = swift_bytes
newer_than_existing = [True, True, True]
if rec_ts_data >= item_ts_data:
# apply data attributes from existing record
new_item.update([(k, existing[k])
for k in ('size', 'etag', 'deleted', 'swift_bytes')])
item_ts_data = rec_ts_data
newer_than_existing[0] = False
if rec_ts_ctype >= item_ts_ctype:
# apply content-type attribute from existing record
new_item['content_type'] = existing['content_type']
item_ts_ctype = rec_ts_ctype
newer_than_existing[1] = False
if rec_ts_meta >= item_ts_meta:
# apply metadata timestamp from existing record
item_ts_meta = rec_ts_meta
newer_than_existing[2] = False
# encode updated timestamps into one string for db record
new_item['created_at'] = encode_timestamps(
item_ts_data, item_ts_ctype, item_ts_meta)
# append the most recent swift_bytes onto the most recent content_type in
# new_item and restore existing to its original state
for item in (new_item, existing):
if item['swift_bytes']:
item['content_type'] += ';swift_bytes=%s' % item['swift_bytes']
del item['swift_bytes']
return any(newer_than_existing)
class ContainerBroker(DatabaseBroker): class ContainerBroker(DatabaseBroker):
"""Encapsulates working with a container database.""" """Encapsulates working with a container database."""
db_type = 'container' db_type = 'container'
@ -284,13 +369,20 @@ class ContainerBroker(DatabaseBroker):
storage_policy_index = data[6] storage_policy_index = data[6]
else: else:
storage_policy_index = 0 storage_policy_index = 0
content_type_timestamp = meta_timestamp = None
if len(data) > 7:
content_type_timestamp = data[7]
if len(data) > 8:
meta_timestamp = data[8]
item_list.append({'name': name, item_list.append({'name': name,
'created_at': timestamp, 'created_at': timestamp,
'size': size, 'size': size,
'content_type': content_type, 'content_type': content_type,
'etag': etag, 'etag': etag,
'deleted': deleted, 'deleted': deleted,
'storage_policy_index': storage_policy_index}) 'storage_policy_index': storage_policy_index,
'ctype_timestamp': content_type_timestamp,
'meta_timestamp': meta_timestamp})
def empty(self): def empty(self):
""" """
@ -325,10 +417,13 @@ class ContainerBroker(DatabaseBroker):
def make_tuple_for_pickle(self, record): def make_tuple_for_pickle(self, record):
return (record['name'], record['created_at'], record['size'], return (record['name'], record['created_at'], record['size'],
record['content_type'], record['etag'], record['deleted'], record['content_type'], record['etag'], record['deleted'],
record['storage_policy_index']) record['storage_policy_index'],
record['ctype_timestamp'],
record['meta_timestamp'])
def put_object(self, name, timestamp, size, content_type, etag, deleted=0, def put_object(self, name, timestamp, size, content_type, etag, deleted=0,
storage_policy_index=0): storage_policy_index=0, ctype_timestamp=None,
meta_timestamp=None):
""" """
Creates an object in the DB with its metadata. Creates an object in the DB with its metadata.
@ -340,11 +435,16 @@ class ContainerBroker(DatabaseBroker):
:param deleted: if True, marks the object as deleted and sets the :param deleted: if True, marks the object as deleted and sets the
deleted_at timestamp to timestamp deleted_at timestamp to timestamp
:param storage_policy_index: the storage policy index for the object :param storage_policy_index: the storage policy index for the object
:param ctype_timestamp: timestamp of when content_type was last
updated
:param meta_timestamp: timestamp of when metadata was last updated
""" """
record = {'name': name, 'created_at': timestamp, 'size': size, record = {'name': name, 'created_at': timestamp, 'size': size,
'content_type': content_type, 'etag': etag, 'content_type': content_type, 'etag': etag,
'deleted': deleted, 'deleted': deleted,
'storage_policy_index': storage_policy_index} 'storage_policy_index': storage_policy_index,
'ctype_timestamp': ctype_timestamp,
'meta_timestamp': meta_timestamp}
self.put_record(record) self.put_record(record)
def _is_deleted_info(self, object_count, put_timestamp, delete_timestamp, def _is_deleted_info(self, object_count, put_timestamp, delete_timestamp,
@ -647,7 +747,7 @@ class ContainerBroker(DatabaseBroker):
# is no delimiter then we can simply return the result as # is no delimiter then we can simply return the result as
# prefixes are now handled in the SQL statement. # prefixes are now handled in the SQL statement.
if prefix is None or not delimiter: if prefix is None or not delimiter:
return [r for r in curs] return [self._transform_record(r) for r in curs]
# We have a delimiter and a prefix (possibly empty string) to # We have a delimiter and a prefix (possibly empty string) to
# handle # handle
@ -686,18 +786,35 @@ class ContainerBroker(DatabaseBroker):
results.append([dir_name, '0', 0, None, '']) results.append([dir_name, '0', 0, None, ''])
curs.close() curs.close()
break break
results.append(row) results.append(self._transform_record(row))
if not rowcount: if not rowcount:
break break
return results return results
def _transform_record(self, record):
"""
Decode the created_at timestamp into separate data, content-type and
meta timestamps and replace the created_at timestamp with the
metadata timestamp i.e. the last-modified time.
"""
t_data, t_ctype, t_meta = decode_timestamps(record[1])
return (record[0], t_meta.internal) + record[2:]
def _record_to_dict(self, rec):
if rec:
keys = ('name', 'created_at', 'size', 'content_type', 'etag',
'deleted', 'storage_policy_index')
return dict(zip(keys, rec))
return None
def merge_items(self, item_list, source=None): def merge_items(self, item_list, source=None):
""" """
Merge items into the object table. Merge items into the object table.
:param item_list: list of dictionaries of {'name', 'created_at', :param item_list: list of dictionaries of {'name', 'created_at',
'size', 'content_type', 'etag', 'deleted', 'size', 'content_type', 'etag', 'deleted',
'storage_policy_index'} 'storage_policy_index', 'ctype_timestamp',
'meta_timestamp'}
:param source: if defined, update incoming_sync with the source :param source: if defined, update incoming_sync with the source
""" """
for item in item_list: for item in item_list:
@ -711,15 +828,16 @@ class ContainerBroker(DatabaseBroker):
else: else:
query_mod = '' query_mod = ''
curs.execute('BEGIN IMMEDIATE') curs.execute('BEGIN IMMEDIATE')
# Get created_at times for objects in item_list that already exist. # Get sqlite records for objects in item_list that already exist.
# We must chunk it up to avoid sqlite's limit of 999 args. # We must chunk it up to avoid sqlite's limit of 999 args.
created_at = {} records = {}
for offset in range(0, len(item_list), SQLITE_ARG_LIMIT): for offset in range(0, len(item_list), SQLITE_ARG_LIMIT):
chunk = [rec['name'] for rec in chunk = [rec['name'] for rec in
item_list[offset:offset + SQLITE_ARG_LIMIT]] item_list[offset:offset + SQLITE_ARG_LIMIT]]
created_at.update( records.update(
((rec[0], rec[1]), rec[2]) for rec in curs.execute( ((rec[0], rec[6]), rec) for rec in curs.execute(
'SELECT name, storage_policy_index, created_at ' 'SELECT name, created_at, size, content_type,'
'etag, deleted, storage_policy_index '
'FROM object WHERE ' + query_mod + ' name IN (%s)' % 'FROM object WHERE ' + query_mod + ' name IN (%s)' %
','.join('?' * len(chunk)), chunk)) ','.join('?' * len(chunk)), chunk))
# Sort item_list into things that need adding and deleting, based # Sort item_list into things that need adding and deleting, based
@ -729,14 +847,13 @@ class ContainerBroker(DatabaseBroker):
for item in item_list: for item in item_list:
item.setdefault('storage_policy_index', 0) # legacy item.setdefault('storage_policy_index', 0) # legacy
item_ident = (item['name'], item['storage_policy_index']) item_ident = (item['name'], item['storage_policy_index'])
if created_at.get(item_ident) < item['created_at']: existing = self._record_to_dict(records.get(item_ident))
if item_ident in created_at: # exists with older timestamp if update_new_item_from_existing(item, existing):
if item_ident in records: # exists with older timestamp
to_delete[item_ident] = item to_delete[item_ident] = item
if item_ident in to_add: # duplicate entries in item_list if item_ident in to_add: # duplicate entries in item_list
to_add[item_ident] = max(item, to_add[item_ident], update_new_item_from_existing(item, to_add[item_ident])
key=lambda i: i['created_at']) to_add[item_ident] = item
else:
to_add[item_ident] = item
if to_delete: if to_delete:
curs.executemany( curs.executemany(
'DELETE FROM object WHERE ' + query_mod + 'DELETE FROM object WHERE ' + query_mod +

View File

@ -27,8 +27,7 @@ from swift.common.direct_client import (
from swift.common.internal_client import InternalClient, UnexpectedResponse from swift.common.internal_client import InternalClient, UnexpectedResponse
from swift.common.utils import get_logger, split_path, quorum_size, \ from swift.common.utils import get_logger, split_path, quorum_size, \
FileLikeIter, Timestamp, last_modified_date_to_timestamp, \ FileLikeIter, Timestamp, last_modified_date_to_timestamp, \
LRUCache LRUCache, decode_timestamps
MISPLACED_OBJECTS_ACCOUNT = '.misplaced_objects' MISPLACED_OBJECTS_ACCOUNT = '.misplaced_objects'
MISPLACED_OBJECTS_CONTAINER_DIVISOR = 3600 # 1 hour MISPLACED_OBJECTS_CONTAINER_DIVISOR = 3600 # 1 hour
@ -116,7 +115,18 @@ def best_policy_index(headers):
def get_reconciler_container_name(obj_timestamp): def get_reconciler_container_name(obj_timestamp):
return str(int(Timestamp(obj_timestamp)) // """
Get the name of a container into which a misplaced object should be
enqueued. The name is the object's last modified time rounded down to the
nearest hour.
:param obj_timestamp: a string representation of the object's 'created_at'
time from it's container db row.
:return: a container name
"""
# Use last modified time of object to determine reconciler container name
_junk, _junk, ts_meta = decode_timestamps(obj_timestamp)
return str(int(ts_meta) //
MISPLACED_OBJECTS_CONTAINER_DIVISOR * MISPLACED_OBJECTS_CONTAINER_DIVISOR *
MISPLACED_OBJECTS_CONTAINER_DIVISOR) MISPLACED_OBJECTS_CONTAINER_DIVISOR)
@ -262,7 +272,7 @@ def parse_raw_obj(obj_info):
'container': container, 'container': container,
'obj': obj, 'obj': obj,
'q_op': q_op, 'q_op': q_op,
'q_ts': Timestamp(obj_info['hash']), 'q_ts': decode_timestamps((obj_info['hash']))[0],
'q_record': last_modified_date_to_timestamp( 'q_record': last_modified_date_to_timestamp(
obj_info['last_modified']), obj_info['last_modified']),
'path': '/%s/%s/%s' % (account, container, obj) 'path': '/%s/%s/%s' % (account, container, obj)

View File

@ -368,7 +368,9 @@ class ContainerController(BaseStorageServer):
int(req.headers['x-size']), int(req.headers['x-size']),
req.headers['x-content-type'], req.headers['x-content-type'],
req.headers['x-etag'], 0, req.headers['x-etag'], 0,
obj_policy_index) obj_policy_index,
req.headers.get('x-content-type-timestamp'),
req.headers.get('x-meta-timestamp'))
return HTTPCreated(request=req) return HTTPCreated(request=req)
else: # put container else: # put container
if requested_policy_index is None: if requested_policy_index is None:

View File

@ -36,7 +36,7 @@ from swift.common.ring.utils import is_local_device
from swift.common.utils import ( from swift.common.utils import (
clean_content_type, config_true_value, clean_content_type, config_true_value,
FileLikeIter, get_logger, hash_path, quote, urlparse, validate_sync_to, FileLikeIter, get_logger, hash_path, quote, urlparse, validate_sync_to,
whataremyips, Timestamp) whataremyips, Timestamp, decode_timestamps)
from swift.common.daemon import Daemon from swift.common.daemon import Daemon
from swift.common.http import HTTP_UNAUTHORIZED, HTTP_NOT_FOUND from swift.common.http import HTTP_UNAUTHORIZED, HTTP_NOT_FOUND
from swift.common.storage_policy import POLICIES from swift.common.storage_policy import POLICIES
@ -431,9 +431,14 @@ class ContainerSync(Daemon):
""" """
try: try:
start_time = time() start_time = time()
# extract last modified time from the created_at value
ts_data, ts_ctype, ts_meta = decode_timestamps(
row['created_at'])
if row['deleted']: if row['deleted']:
# when sync'ing a deleted object, use ts_data - this is the
# timestamp of the source tombstone
try: try:
headers = {'x-timestamp': row['created_at']} headers = {'x-timestamp': ts_data.internal}
if realm and realm_key: if realm and realm_key:
nonce = uuid.uuid4().hex nonce = uuid.uuid4().hex
path = urlparse(sync_to).path + '/' + quote( path = urlparse(sync_to).path + '/' + quote(
@ -456,13 +461,14 @@ class ContainerSync(Daemon):
self.logger.increment('deletes') self.logger.increment('deletes')
self.logger.timing_since('deletes.timing', start_time) self.logger.timing_since('deletes.timing', start_time)
else: else:
# when sync'ing a live object, use ts_meta - this is the time
# at which the source object was last modified by a PUT or POST
part, nodes = \ part, nodes = \
self.get_object_ring(info['storage_policy_index']). \ self.get_object_ring(info['storage_policy_index']). \
get_nodes(info['account'], info['container'], get_nodes(info['account'], info['container'],
row['name']) row['name'])
shuffle(nodes) shuffle(nodes)
exc = None exc = None
looking_for_timestamp = Timestamp(row['created_at'])
# look up for the newest one # look up for the newest one
headers_out = {'X-Newest': True, headers_out = {'X-Newest': True,
'X-Backend-Storage-Policy-Index': 'X-Backend-Storage-Policy-Index':
@ -479,7 +485,7 @@ class ContainerSync(Daemon):
body = None body = None
exc = err exc = err
timestamp = Timestamp(headers.get('x-timestamp', 0)) timestamp = Timestamp(headers.get('x-timestamp', 0))
if timestamp < looking_for_timestamp: if timestamp < ts_meta:
if exc: if exc:
raise exc raise exc
raise Exception( raise Exception(

View File

@ -56,7 +56,7 @@ from swift.common.utils import mkdirs, Timestamp, \
storage_directory, hash_path, renamer, fallocate, fsync, fdatasync, \ storage_directory, hash_path, renamer, fallocate, fsync, fdatasync, \
fsync_dir, drop_buffer_cache, ThreadPool, lock_path, write_pickle, \ fsync_dir, drop_buffer_cache, ThreadPool, lock_path, write_pickle, \
config_true_value, listdir, split_path, ismount, remove_file, \ config_true_value, listdir, split_path, ismount, remove_file, \
get_md5_socket, F_SETPIPE_SZ get_md5_socket, F_SETPIPE_SZ, decode_timestamps, encode_timestamps
from swift.common.splice import splice, tee from swift.common.splice import splice, tee
from swift.common.exceptions import DiskFileQuarantined, DiskFileNotExist, \ from swift.common.exceptions import DiskFileQuarantined, DiskFileNotExist, \
DiskFileCollision, DiskFileNoSpace, DiskFileDeviceUnavailable, \ DiskFileCollision, DiskFileNoSpace, DiskFileDeviceUnavailable, \
@ -76,7 +76,7 @@ METADATA_KEY = 'user.swift.metadata'
DROP_CACHE_WINDOW = 1024 * 1024 DROP_CACHE_WINDOW = 1024 * 1024
# These are system-set metadata keys that cannot be changed with a POST. # These are system-set metadata keys that cannot be changed with a POST.
# They should be lowercase. # They should be lowercase.
DATAFILE_SYSTEM_META = set('content-length content-type deleted etag'.split()) DATAFILE_SYSTEM_META = set('content-length deleted etag'.split())
DATADIR_BASE = 'objects' DATADIR_BASE = 'objects'
ASYNCDIR_BASE = 'async_pending' ASYNCDIR_BASE = 'async_pending'
TMP_BASE = 'tmp' TMP_BASE = 'tmp'
@ -442,23 +442,78 @@ class BaseDiskFileManager(object):
max_pipe_size = int(f.read()) max_pipe_size = int(f.read())
self.pipe_size = min(max_pipe_size, self.disk_chunk_size) self.pipe_size = min(max_pipe_size, self.disk_chunk_size)
def make_on_disk_filename(self, timestamp, ext=None,
ctype_timestamp=None, *a, **kw):
"""
Returns filename for given timestamp.
:param timestamp: the object timestamp, an instance of
:class:`~swift.common.utils.Timestamp`
:param ext: an optional string representing a file extension to be
appended to the returned file name
:param ctype_timestamp: an optional content-type timestamp, an instance
of :class:`~swift.common.utils.Timestamp`
:returns: a file name
"""
rv = timestamp.internal
if ext == '.meta' and ctype_timestamp:
# If ctype_timestamp is None then the filename is simply the
# internal form of the timestamp. If ctype_timestamp is not None
# then the difference between the raw values of the two timestamps
# is appended as a hex number, with its sign.
#
# There are two reasons for encoding the content-type timestamp
# in the filename in this way. First, it means that two .meta files
# having the same timestamp but different content-type timestamps
# (and potentially different content-type values) will be distinct
# and therefore will be independently replicated when rsync
# replication is used. That ensures that all nodes end up having
# all content-type values after replication (with the most recent
# value being selected when the diskfile is opened). Second, having
# the content-type encoded in timestamp in the filename makes it
# possible for the on disk file search code to determine that
# timestamp by inspecting only the filename, and not needing to
# open the file and read its xattrs.
rv = encode_timestamps(timestamp, ctype_timestamp, explicit=True)
if ext:
rv = '%s%s' % (rv, ext)
return rv
def parse_on_disk_filename(self, filename): def parse_on_disk_filename(self, filename):
""" """
Parse an on disk file name. Parse an on disk file name.
:param filename: the data file name including extension :param filename: the file name including extension
:returns: a dict, with keys for timestamp, and ext: :returns: a dict, with keys for timestamp, ext and ctype_timestamp:
* timestamp is a :class:`~swift.common.utils.Timestamp` * timestamp is a :class:`~swift.common.utils.Timestamp`
* ctype_timestamp is a :class:`~swift.common.utils.Timestamp` or
None for .meta files, otherwise None
* ext is a string, the file extension including the leading dot or * ext is a string, the file extension including the leading dot or
the empty string if the filename has no extension. the empty string if the filename has no extension.
Subclases may add further keys to the returned dict. Subclasses may override this method to add further keys to the
returned dict.
:raises DiskFileError: if any part of the filename is not able to be :raises DiskFileError: if any part of the filename is not able to be
validated. validated.
""" """
raise NotImplementedError ts_ctype = None
fname, ext = splitext(filename)
try:
if ext == '.meta':
timestamp, ts_ctype = decode_timestamps(
fname, explicit=True)[:2]
else:
timestamp = Timestamp(fname)
except ValueError:
raise DiskFileError('Invalid Timestamp value in filename %r'
% filename)
return {
'timestamp': timestamp,
'ext': ext,
'ctype_timestamp': ts_ctype
}
def _process_ondisk_files(self, exts, results, **kwargs): def _process_ondisk_files(self, exts, results, **kwargs):
""" """
@ -592,18 +647,45 @@ class BaseDiskFileManager(object):
# the results dict is used to collect results of file filtering # the results dict is used to collect results of file filtering
results = {} results = {}
# non-tombstones older than or equal to latest tombstone are obsolete
if exts.get('.ts'): if exts.get('.ts'):
# non-tombstones older than or equal to latest tombstone are
# obsolete
for ext in filter(lambda ext: ext != '.ts', exts.keys()): for ext in filter(lambda ext: ext != '.ts', exts.keys()):
exts[ext], older = self._split_gt_timestamp( exts[ext], older = self._split_gt_timestamp(
exts[ext], exts['.ts'][0]['timestamp']) exts[ext], exts['.ts'][0]['timestamp'])
results.setdefault('obsolete', []).extend(older) results.setdefault('obsolete', []).extend(older)
# all but most recent .ts are obsolete
results.setdefault('obsolete', []).extend(exts['.ts'][1:])
exts['.ts'] = exts['.ts'][:1]
# all but most recent .meta and .ts are obsolete if exts.get('.meta'):
for ext in ('.meta', '.ts'): # retain the newest meta file
if ext in exts: retain = 1
results.setdefault('obsolete', []).extend(exts[ext][1:]) if exts['.meta'][1:]:
exts[ext] = exts[ext][:1] # there are other meta files so find the one with newest
# ctype_timestamp...
exts['.meta'][1:] = sorted(
exts['.meta'][1:],
key=lambda info: info['ctype_timestamp'],
reverse=True)
# ...and retain this IFF its ctype_timestamp is greater than
# newest meta file
if (exts['.meta'][1]['ctype_timestamp'] >
exts['.meta'][0]['ctype_timestamp']):
if (exts['.meta'][1]['timestamp'] ==
exts['.meta'][0]['timestamp']):
# both at same timestamp so retain only the one with
# newest ctype
exts['.meta'][:2] = [exts['.meta'][1],
exts['.meta'][0]]
retain = 1
else:
# retain both - first has newest metadata, second has
# newest ctype
retain = 2
# discard all meta files not being retained...
results.setdefault('obsolete', []).extend(exts['.meta'][retain:])
exts['.meta'] = exts['.meta'][:retain]
# delegate to subclass handler # delegate to subclass handler
self._process_ondisk_files(exts, results, **kwargs) self._process_ondisk_files(exts, results, **kwargs)
@ -612,11 +694,16 @@ class BaseDiskFileManager(object):
if exts.get('.ts'): if exts.get('.ts'):
results['ts_info'] = exts['.ts'][0] results['ts_info'] = exts['.ts'][0]
if 'data_info' in results and exts.get('.meta'): if 'data_info' in results and exts.get('.meta'):
# only report a meta file if there is a data file # only report meta files if there is a data file
results['meta_info'] = exts['.meta'][0] results['meta_info'] = exts['.meta'][0]
ctype_info = exts['.meta'].pop()
if (ctype_info['ctype_timestamp']
> results['data_info']['timestamp']):
results['ctype_info'] = ctype_info
# set ts_file, data_file and meta_file with path to chosen file or None # set ts_file, data_file, meta_file and ctype_file with path to
for info_key in ('data_info', 'meta_info', 'ts_info'): # chosen file or None
for info_key in ('data_info', 'meta_info', 'ts_info', 'ctype_info'):
info = results.get(info_key) info = results.get(info_key)
key = info_key[:-5] + '_file' key = info_key[:-5] + '_file'
results[key] = join(datadir, info['filename']) if info else None results[key] = join(datadir, info['filename']) if info else None
@ -742,6 +829,16 @@ class BaseDiskFileManager(object):
# delegate to subclass for data file related updates... # delegate to subclass for data file related updates...
self._update_suffix_hashes(hashes, ondisk_info) self._update_suffix_hashes(hashes, ondisk_info)
if 'ctype_info' in ondisk_info:
# We have a distinct content-type timestamp so update the
# hash. As a precaution, append '_ctype' to differentiate this
# value from any other timestamp value that might included in
# the hash in future. There is no .ctype file so use _ctype to
# avoid any confusion.
info = ondisk_info['ctype_info']
hashes[None].update(info['ctype_timestamp'].internal
+ '_ctype')
try: try:
os.rmdir(path) os.rmdir(path)
except OSError as e: except OSError as e:
@ -1066,6 +1163,8 @@ class BaseDiskFileManager(object):
ts_data -> timestamp of data or tombstone file, ts_data -> timestamp of data or tombstone file,
ts_meta -> timestamp of meta file, if one exists ts_meta -> timestamp of meta file, if one exists
ts_ctype -> timestamp of meta file containing most recent
content-type value, if one exists
where timestamps are instances of where timestamps are instances of
:class:`~swift.common.utils.Timestamp` :class:`~swift.common.utils.Timestamp`
@ -1088,9 +1187,10 @@ class BaseDiskFileManager(object):
(os.path.join(partition_path, suffix), suffix) (os.path.join(partition_path, suffix), suffix)
for suffix in suffixes) for suffix in suffixes)
key_preference = ( key_preference = (
('ts_meta', 'meta_info'), ('ts_meta', 'meta_info', 'timestamp'),
('ts_data', 'data_info'), ('ts_data', 'data_info', 'timestamp'),
('ts_data', 'ts_info'), ('ts_data', 'ts_info', 'timestamp'),
('ts_ctype', 'ctype_info', 'ctype_timestamp'),
) )
for suffix_path, suffix in suffixes: for suffix_path, suffix in suffixes:
for object_hash in self._listdir(suffix_path): for object_hash in self._listdir(suffix_path):
@ -1099,10 +1199,10 @@ class BaseDiskFileManager(object):
results = self.cleanup_ondisk_files( results = self.cleanup_ondisk_files(
object_path, self.reclaim_age, **kwargs) object_path, self.reclaim_age, **kwargs)
timestamps = {} timestamps = {}
for ts_key, info_key in key_preference: for ts_key, info_key, info_ts_key in key_preference:
if info_key not in results: if info_key not in results:
continue continue
timestamps[ts_key] = results[info_key]['timestamp'] timestamps[ts_key] = results[info_key][info_ts_key]
if 'ts_data' not in timestamps: if 'ts_data' not in timestamps:
# file sets that do not include a .data or .ts # file sets that do not include a .data or .ts
# file cannot be opened and therefore cannot # file cannot be opened and therefore cannot
@ -1226,6 +1326,34 @@ class BaseDiskFileWriter(object):
except OSError: except OSError:
logging.exception(_('Problem cleaning up %s'), self._datadir) logging.exception(_('Problem cleaning up %s'), self._datadir)
def _put(self, metadata, cleanup=True, *a, **kw):
"""
Helper method for subclasses.
For this implementation, this method is responsible for renaming the
temporary file to the final name and directory location. This method
should be called after the final call to
:func:`swift.obj.diskfile.DiskFileWriter.write`.
:param metadata: dictionary of metadata to be associated with the
object
:param cleanup: a Boolean. If True then obsolete files will be removed
from the object dir after the put completes, otherwise
obsolete files are left in place.
"""
timestamp = Timestamp(metadata['X-Timestamp'])
ctype_timestamp = metadata.get('Content-Type-Timestamp')
if ctype_timestamp:
ctype_timestamp = Timestamp(ctype_timestamp)
filename = self.manager.make_on_disk_filename(
timestamp, self._extension, ctype_timestamp=ctype_timestamp,
*a, **kw)
metadata['name'] = self._name
target_path = join(self._datadir, filename)
self._threadpool.force_run_in_thread(
self._finalize_put, metadata, target_path, cleanup)
def put(self, metadata): def put(self, metadata):
""" """
Finalize writing the file on disk. Finalize writing the file on disk.
@ -1682,6 +1810,20 @@ class BaseDiskFile(object):
def fragments(self): def fragments(self):
return None return None
@property
def content_type(self):
if self._metadata is None:
raise DiskFileNotOpen()
return self._metadata.get('Content-Type')
@property
def content_type_timestamp(self):
if self._metadata is None:
raise DiskFileNotOpen()
t = self._metadata.get('Content-Type-Timestamp',
self._datafile_metadata.get('X-Timestamp'))
return Timestamp(t)
@classmethod @classmethod
def from_hash_dir(cls, mgr, hash_dir_path, device_path, partition, policy): def from_hash_dir(cls, mgr, hash_dir_path, device_path, partition, policy):
return cls(mgr, device_path, None, partition, _datadir=hash_dir_path, return cls(mgr, device_path, None, partition, _datadir=hash_dir_path,
@ -1920,14 +2062,36 @@ class BaseDiskFile(object):
quarantine_filename, quarantine_filename,
"Exception reading metadata: %s" % err) "Exception reading metadata: %s" % err)
def _construct_from_data_file(self, data_file, meta_file, **kwargs): def _merge_content_type_metadata(self, ctype_file):
"""
When a second .meta file is providing the most recent Content-Type
metadata then merge it into the metafile_metadata.
:param ctype_file: An on-disk .meta file
"""
ctypefile_metadata = self._failsafe_read_metadata(
ctype_file, ctype_file)
if ('Content-Type' in ctypefile_metadata
and (ctypefile_metadata.get('Content-Type-Timestamp') >
self._metafile_metadata.get('Content-Type-Timestamp'))
and (ctypefile_metadata.get('Content-Type-Timestamp') >
self.data_timestamp)):
self._metafile_metadata['Content-Type'] = \
ctypefile_metadata['Content-Type']
self._metafile_metadata['Content-Type-Timestamp'] = \
ctypefile_metadata.get('Content-Type-Timestamp')
def _construct_from_data_file(self, data_file, meta_file, ctype_file,
**kwargs):
""" """
Open the `.data` file to fetch its metadata, and fetch the metadata Open the `.data` file to fetch its metadata, and fetch the metadata
from the fast-POST `.meta` file as well if it exists, merging them from fast-POST `.meta` files as well if any exist, merging them
properly. properly.
:param data_file: on-disk `.data` file being considered :param data_file: on-disk `.data` file being considered
:param meta_file: on-disk fast-POST `.meta` file being considered :param meta_file: on-disk fast-POST `.meta` file being considered
:param ctype_file: on-disk fast-POST `.meta` file being considered that
contains content-type and content-type timestamp
:returns: an opened data file pointer :returns: an opened data file pointer
:raises DiskFileError: various exceptions from :raises DiskFileError: various exceptions from
:func:`swift.obj.diskfile.DiskFile._verify_data_file` :func:`swift.obj.diskfile.DiskFile._verify_data_file`
@ -1938,6 +2102,8 @@ class BaseDiskFile(object):
if meta_file: if meta_file:
self._metafile_metadata = self._failsafe_read_metadata( self._metafile_metadata = self._failsafe_read_metadata(
meta_file, meta_file) meta_file, meta_file)
if ctype_file and ctype_file != meta_file:
self._merge_content_type_metadata(ctype_file)
sys_metadata = dict( sys_metadata = dict(
[(key, val) for key, val in self._datafile_metadata.items() [(key, val) for key, val in self._datafile_metadata.items()
if key.lower() in DATAFILE_SYSTEM_META if key.lower() in DATAFILE_SYSTEM_META
@ -1946,6 +2112,14 @@ class BaseDiskFile(object):
self._metadata.update(sys_metadata) self._metadata.update(sys_metadata)
# diskfile writer added 'name' to metafile, so remove it here # diskfile writer added 'name' to metafile, so remove it here
self._metafile_metadata.pop('name', None) self._metafile_metadata.pop('name', None)
# TODO: the check for Content-Type is only here for tests that
# create .data files without Content-Type
if ('Content-Type' in self._datafile_metadata and
(self.data_timestamp >
self._metafile_metadata.get('Content-Type-Timestamp'))):
self._metadata['Content-Type'] = \
self._datafile_metadata['Content-Type']
self._metadata.pop('Content-Type-Timestamp', None)
else: else:
self._metadata.update(self._datafile_metadata) self._metadata.update(self._datafile_metadata)
if self._name is None: if self._name is None:
@ -2144,21 +2318,10 @@ class DiskFileWriter(BaseDiskFileWriter):
""" """
Finalize writing the file on disk. Finalize writing the file on disk.
For this implementation, this method is responsible for renaming the
temporary file to the final name and directory location. This method
should be called after the final call to
:func:`swift.obj.diskfile.DiskFileWriter.write`.
:param metadata: dictionary of metadata to be associated with the :param metadata: dictionary of metadata to be associated with the
object object
""" """
timestamp = Timestamp(metadata['X-Timestamp']).internal super(DiskFileWriter, self)._put(metadata, True)
metadata['name'] = self._name
target_path = join(self._datadir, timestamp + self._extension)
cleanup = True
self._threadpool.force_run_in_thread(
self._finalize_put, metadata, target_path, cleanup)
class DiskFile(BaseDiskFile): class DiskFile(BaseDiskFile):
@ -2174,31 +2337,6 @@ class DiskFile(BaseDiskFile):
class DiskFileManager(BaseDiskFileManager): class DiskFileManager(BaseDiskFileManager):
diskfile_cls = DiskFile diskfile_cls = DiskFile
def parse_on_disk_filename(self, filename):
"""
Returns the timestamp extracted .data file name.
:param filename: the data file name including extension
:returns: a dict, with keys for timestamp, and ext:
* timestamp is a :class:`~swift.common.utils.Timestamp`
* ext is a string, the file extension including the leading dot or
the empty string if the filename has no extension.
:raises DiskFileError: if any part of the filename is not able to be
validated.
"""
float_part, ext = splitext(filename)
try:
timestamp = Timestamp(float_part)
except ValueError:
raise DiskFileError('Invalid Timestamp value in filename %r'
% filename)
return {
'timestamp': timestamp,
'ext': ext,
}
def _process_ondisk_files(self, exts, results, **kwargs): def _process_ondisk_files(self, exts, results, **kwargs):
""" """
Implement replication policy specific handling of .data files. Implement replication policy specific handling of .data files.
@ -2303,12 +2441,10 @@ class ECDiskFileWriter(BaseDiskFileWriter):
def put(self, metadata): def put(self, metadata):
""" """
The only difference between this method and the replication policy The only difference between this method and the replication policy
DiskFileWriter method is the call into manager.make_on_disk_filename DiskFileWriter method is adding the frag index to the metadata.
to construct the data file name.
:param metadata: dictionary of metadata to be associated with object :param metadata: dictionary of metadata to be associated with object
""" """
timestamp = Timestamp(metadata['X-Timestamp'])
fi = None fi = None
cleanup = True cleanup = True
if self._extension == '.data': if self._extension == '.data':
@ -2320,13 +2456,7 @@ class ECDiskFileWriter(BaseDiskFileWriter):
self._diskfile._frag_index) self._diskfile._frag_index)
# defer cleanup until commit() writes .durable # defer cleanup until commit() writes .durable
cleanup = False cleanup = False
filename = self.manager.make_on_disk_filename( super(ECDiskFileWriter, self)._put(metadata, cleanup, frag_index=fi)
timestamp, self._extension, frag_index=fi)
metadata['name'] = self._name
target_path = join(self._datadir, filename)
self._threadpool.force_run_in_thread(
self._finalize_put, metadata, target_path, cleanup)
class ECDiskFile(BaseDiskFile): class ECDiskFile(BaseDiskFile):
@ -2436,7 +2566,7 @@ class ECDiskFileManager(BaseDiskFileManager):
return frag_index return frag_index
def make_on_disk_filename(self, timestamp, ext=None, frag_index=None, def make_on_disk_filename(self, timestamp, ext=None, frag_index=None,
*a, **kw): ctype_timestamp=None, *a, **kw):
""" """
Returns the EC specific filename for given timestamp. Returns the EC specific filename for given timestamp.
@ -2446,32 +2576,36 @@ class ECDiskFileManager(BaseDiskFileManager):
appended to the returned file name appended to the returned file name
:param frag_index: a fragment archive index, used with .data extension :param frag_index: a fragment archive index, used with .data extension
only, must be a whole number. only, must be a whole number.
:param ctype_timestamp: an optional content-type timestamp, an instance
of :class:`~swift.common.utils.Timestamp`
:returns: a file name :returns: a file name
:raises DiskFileError: if ext=='.data' and the kwarg frag_index is not :raises DiskFileError: if ext=='.data' and the kwarg frag_index is not
a whole number a whole number
""" """
rv = timestamp.internal
if ext == '.data': if ext == '.data':
# for datafiles only we encode the fragment index in the filename # for datafiles only we encode the fragment index in the filename
# to allow archives of different indexes to temporarily be stored # to allow archives of different indexes to temporarily be stored
# on the same node in certain situations # on the same node in certain situations
frag_index = self.validate_fragment_index(frag_index) frag_index = self.validate_fragment_index(frag_index)
rv += '#' + str(frag_index) rv = timestamp.internal + '#' + str(frag_index)
if ext: return '%s%s' % (rv, ext or '')
rv = '%s%s' % (rv, ext) return super(ECDiskFileManager, self).make_on_disk_filename(
return rv timestamp, ext, ctype_timestamp, *a, **kw)
def parse_on_disk_filename(self, filename): def parse_on_disk_filename(self, filename):
""" """
Returns the timestamp extracted from a policy specific .data file name. Returns timestamp(s) and other info extracted from a policy specific
For EC policy the data file name includes a fragment index which must file name. For EC policy the data file name includes a fragment index
be stripped off to retrieve the timestamp. which must be stripped off to retrieve the timestamp.
:param filename: the data file name including extension :param filename: the file name including extension
:returns: a dict, with keys for timestamp, frag_index, and ext: :returns: a dict, with keys for timestamp, frag_index, ext and
ctype_timestamp:
* timestamp is a :class:`~swift.common.utils.Timestamp` * timestamp is a :class:`~swift.common.utils.Timestamp`
* frag_index is an int or None * frag_index is an int or None
* ctype_timestamp is a :class:`~swift.common.utils.Timestamp` or
None for .meta files, otherwise None
* ext is a string, the file extension including the leading dot or * ext is a string, the file extension including the leading dot or
the empty string if the filename has no extension. the empty string if the filename has no extension.
@ -2480,13 +2614,13 @@ class ECDiskFileManager(BaseDiskFileManager):
""" """
frag_index = None frag_index = None
float_frag, ext = splitext(filename) float_frag, ext = splitext(filename)
parts = float_frag.split('#', 1)
try:
timestamp = Timestamp(parts[0])
except ValueError:
raise DiskFileError('Invalid Timestamp value in filename %r'
% filename)
if ext == '.data': if ext == '.data':
parts = float_frag.split('#', 1)
try:
timestamp = Timestamp(parts[0])
except ValueError:
raise DiskFileError('Invalid Timestamp value in filename %r'
% filename)
# it is an error for an EC data file to not have a valid # it is an error for an EC data file to not have a valid
# fragment index # fragment index
try: try:
@ -2495,11 +2629,15 @@ class ECDiskFileManager(BaseDiskFileManager):
# expect validate_fragment_index raise DiskFileError # expect validate_fragment_index raise DiskFileError
pass pass
frag_index = self.validate_fragment_index(frag_index) frag_index = self.validate_fragment_index(frag_index)
return { return {
'timestamp': timestamp, 'timestamp': timestamp,
'frag_index': frag_index, 'frag_index': frag_index,
'ext': ext, 'ext': ext,
} 'ctype_timestamp': None
}
rv = super(ECDiskFileManager, self).parse_on_disk_filename(filename)
rv['frag_index'] = None
return rv
def _process_ondisk_files(self, exts, results, frag_index=None, **kwargs): def _process_ondisk_files(self, exts, results, frag_index=None, **kwargs):
""" """

View File

@ -443,3 +443,11 @@ class DiskFile(object):
data_timestamp = timestamp data_timestamp = timestamp
durable_timestamp = timestamp durable_timestamp = timestamp
content_type_timestamp = timestamp
@property
def content_type(self):
if self._metadata is None:
raise DiskFileNotOpen()
return self._metadata.get('Content-Type')

View File

@ -33,7 +33,7 @@ from swift.common.utils import public, get_logger, \
config_true_value, timing_stats, replication, \ config_true_value, timing_stats, replication, \
normalize_delete_at_timestamp, get_log_line, Timestamp, \ normalize_delete_at_timestamp, get_log_line, Timestamp, \
get_expirer_container, parse_mime_headers, \ get_expirer_container, parse_mime_headers, \
iter_multipart_mime_documents iter_multipart_mime_documents, extract_swift_bytes
from swift.common.bufferedhttp import http_connect from swift.common.bufferedhttp import http_connect
from swift.common.constraints import check_object_creation, \ from swift.common.constraints import check_object_creation, \
valid_timestamp, check_utf8 valid_timestamp, check_utf8
@ -479,35 +479,103 @@ class ObjectController(BaseStorageServer):
except (DiskFileNotExist, DiskFileQuarantined): except (DiskFileNotExist, DiskFileQuarantined):
return HTTPNotFound(request=request) return HTTPNotFound(request=request)
orig_timestamp = Timestamp(orig_metadata.get('X-Timestamp', 0)) orig_timestamp = Timestamp(orig_metadata.get('X-Timestamp', 0))
if orig_timestamp >= req_timestamp: orig_ctype_timestamp = disk_file.content_type_timestamp
req_ctype_time = '0'
req_ctype = request.headers.get('Content-Type')
if req_ctype:
req_ctype_time = request.headers.get('Content-Type-Timestamp',
req_timestamp.internal)
req_ctype_timestamp = Timestamp(req_ctype_time)
if orig_timestamp >= req_timestamp \
and orig_ctype_timestamp >= req_ctype_timestamp:
return HTTPConflict( return HTTPConflict(
request=request, request=request,
headers={'X-Backend-Timestamp': orig_timestamp.internal}) headers={'X-Backend-Timestamp': orig_timestamp.internal})
metadata = {'X-Timestamp': req_timestamp.internal}
self._preserve_slo_manifest(metadata, orig_metadata) if req_timestamp > orig_timestamp:
metadata.update(val for val in request.headers.items() metadata = {'X-Timestamp': req_timestamp.internal}
if is_user_meta('object', val[0])) self._preserve_slo_manifest(metadata, orig_metadata)
headers_to_copy = ( metadata.update(val for val in request.headers.items()
request.headers.get( if is_user_meta('object', val[0]))
'X-Backend-Replication-Headers', '').split() + headers_to_copy = (
list(self.allowed_headers)) request.headers.get(
for header_key in headers_to_copy: 'X-Backend-Replication-Headers', '').split() +
if header_key in request.headers: list(self.allowed_headers))
header_caps = header_key.title() for header_key in headers_to_copy:
metadata[header_caps] = request.headers[header_key] if header_key in request.headers:
orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0) header_caps = header_key.title()
if orig_delete_at != new_delete_at: metadata[header_caps] = request.headers[header_key]
if new_delete_at: orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0)
self.delete_at_update('PUT', new_delete_at, account, container, if orig_delete_at != new_delete_at:
obj, request, device, policy) if new_delete_at:
if orig_delete_at: self.delete_at_update(
self.delete_at_update('DELETE', orig_delete_at, account, 'PUT', new_delete_at, account, container, obj, request,
container, obj, request, device, device, policy)
policy) if orig_delete_at:
self.delete_at_update('DELETE', orig_delete_at, account,
container, obj, request, device,
policy)
else:
# preserve existing metadata, only content-type may be updated
metadata = dict(disk_file.get_metafile_metadata())
if req_ctype_timestamp > orig_ctype_timestamp:
# we have a new content-type, add to metadata and container update
content_type_headers = {
'Content-Type': request.headers['Content-Type'],
'Content-Type-Timestamp': req_ctype_timestamp.internal
}
metadata.update(content_type_headers)
else:
# send existing content-type with container update
content_type_headers = {
'Content-Type': disk_file.content_type,
'Content-Type-Timestamp': orig_ctype_timestamp.internal
}
if orig_ctype_timestamp != disk_file.data_timestamp:
# only add to metadata if it's not the datafile content-type
metadata.update(content_type_headers)
try: try:
disk_file.write_metadata(metadata) disk_file.write_metadata(metadata)
except (DiskFileXattrNotSupported, DiskFileNoSpace): except (DiskFileXattrNotSupported, DiskFileNoSpace):
return HTTPInsufficientStorage(drive=device, request=request) return HTTPInsufficientStorage(drive=device, request=request)
update_etag = orig_metadata['ETag']
if 'X-Object-Sysmeta-Ec-Etag' in orig_metadata:
# For EC policy, send X-Object-Sysmeta-Ec-Etag which is same as the
# X-Backend-Container-Update-Override-Etag value sent with the
# original PUT. We have to send Etag (and size etc) with a POST
# container update because the original PUT container update may
# have failed or be in async_pending.
update_etag = orig_metadata['X-Object-Sysmeta-Ec-Etag']
if (content_type_headers['Content-Type-Timestamp']
!= disk_file.data_timestamp):
# Current content-type is not from the datafile, but the datafile
# content-type may have a swift_bytes param that was appended by
# SLO and we must continue to send that with the container update.
# Do this (rather than use a separate header) for backwards
# compatibility because there may be 'legacy' container updates in
# async pending that have content-types with swift_bytes params, so
# we have to be able to handle those in container server anyway.
_, swift_bytes = extract_swift_bytes(
disk_file.get_datafile_metadata()['Content-Type'])
if swift_bytes:
content_type_headers['Content-Type'] += (';swift_bytes=%s'
% swift_bytes)
self.container_update(
'PUT', account, container, obj, request,
HeaderKeyDict({
'x-size': orig_metadata['Content-Length'],
'x-content-type': content_type_headers['Content-Type'],
'x-timestamp': disk_file.data_timestamp.internal,
'x-content-type-timestamp':
content_type_headers['Content-Type-Timestamp'],
'x-meta-timestamp': metadata['X-Timestamp'],
'x-etag': update_etag}),
device, policy)
return HTTPAccepted(request=request) return HTTPAccepted(request=request)
@public @public

View File

@ -31,7 +31,7 @@ def decode_missing(line):
""" """
Parse a string of the form generated by Parse a string of the form generated by
:py:func:`~swift.obj.ssync_sender.encode_missing` and return a dict :py:func:`~swift.obj.ssync_sender.encode_missing` and return a dict
with keys ``object_hash``, ``ts_data``, ``ts_meta``. with keys ``object_hash``, ``ts_data``, ``ts_meta``, ``ts_ctype``.
The encoder for this line is The encoder for this line is
:py:func:`~swift.obj.ssync_sender.encode_missing` :py:func:`~swift.obj.ssync_sender.encode_missing`
@ -40,7 +40,8 @@ def decode_missing(line):
parts = line.split() parts = line.split()
result['object_hash'] = urllib.parse.unquote(parts[0]) result['object_hash'] = urllib.parse.unquote(parts[0])
t_data = urllib.parse.unquote(parts[1]) t_data = urllib.parse.unquote(parts[1])
result['ts_data'] = result['ts_meta'] = Timestamp(t_data) result['ts_data'] = Timestamp(t_data)
result['ts_meta'] = result['ts_ctype'] = result['ts_data']
if len(parts) > 2: if len(parts) > 2:
# allow for a comma separated list of k:v pairs to future-proof # allow for a comma separated list of k:v pairs to future-proof
subparts = urllib.parse.unquote(parts[2]).split(',') subparts = urllib.parse.unquote(parts[2]).split(',')
@ -48,6 +49,8 @@ def decode_missing(line):
k, v = item.split(':') k, v = item.split(':')
if k == 'm': if k == 'm':
result['ts_meta'] = Timestamp(t_data, delta=int(v, 16)) result['ts_meta'] = Timestamp(t_data, delta=int(v, 16))
elif k == 't':
result['ts_ctype'] = Timestamp(t_data, delta=int(v, 16))
return result return result
@ -71,6 +74,9 @@ def encode_wanted(remote, local):
want['data'] = True want['data'] = True
if 'ts_meta' in local and remote['ts_meta'] > local['ts_meta']: if 'ts_meta' in local and remote['ts_meta'] > local['ts_meta']:
want['meta'] = True want['meta'] = True
if ('ts_ctype' in local and remote['ts_ctype'] > local['ts_ctype']
and remote['ts_ctype'] > remote['ts_data']):
want['meta'] = True
else: else:
# we got nothing, so we'll take whatever the remote has # we got nothing, so we'll take whatever the remote has
want['data'] = True want['data'] = True
@ -264,6 +270,7 @@ class Receiver(object):
return { return {
'ts_data': df.data_timestamp, 'ts_data': df.data_timestamp,
'ts_meta': df.timestamp, 'ts_meta': df.timestamp,
'ts_ctype': df.content_type_timestamp,
} }
def _check_missing(self, line): def _check_missing(self, line):

View File

@ -21,11 +21,12 @@ from swift.common import exceptions
from swift.common import http from swift.common import http
def encode_missing(object_hash, ts_data, ts_meta=None): def encode_missing(object_hash, ts_data, ts_meta=None, ts_ctype=None):
""" """
Returns a string representing the object hash, its data file timestamp Returns a string representing the object hash, its data file timestamp
and the delta forwards to its metafile timestamp, if non-zero, in the form: and the delta forwards to its metafile and content-type timestamps, if
``<hash> <timestamp> m:<hex delta>`` non-zero, in the form:
``<hash> <ts_data> [m:<hex delta to ts_meta>[,t:<hex delta to ts_ctype>]]``
The decoder for this line is The decoder for this line is
:py:func:`~swift.obj.ssync_receiver.decode_missing` :py:func:`~swift.obj.ssync_receiver.decode_missing`
@ -36,6 +37,9 @@ def encode_missing(object_hash, ts_data, ts_meta=None):
if ts_meta and ts_meta != ts_data: if ts_meta and ts_meta != ts_data:
delta = ts_meta.raw - ts_data.raw delta = ts_meta.raw - ts_data.raw
msg = '%s m:%x' % (msg, delta) msg = '%s m:%x' % (msg, delta)
if ts_ctype and ts_ctype != ts_data:
delta = ts_ctype.raw - ts_data.raw
msg = '%s,t:%x' % (msg, delta)
return msg return msg

View File

@ -2185,6 +2185,56 @@ class TestFile(Base):
info = file_item.info() info = file_item.info()
self.assertEqual(etag, info['etag']) self.assertEqual(etag, info['etag'])
def test_POST(self):
# verify consistency between object and container listing metadata
file_name = Utils.create_name()
file_item = self.env.container.file(file_name)
file_item.content_type = 'text/foobar'
file_item.write_random(1024)
# sanity check
file_item = self.env.container.file(file_name)
file_item.initialize()
self.assertEqual('text/foobar', file_item.content_type)
self.assertEqual(1024, file_item.size)
etag = file_item.etag
# check container listing is consistent
listing = self.env.container.files(parms={'format': 'json'})
for f_dict in listing:
if f_dict['name'] == file_name:
break
else:
self.fail('Failed to find file %r in listing' % file_name)
self.assertEqual(1024, f_dict['bytes'])
self.assertEqual('text/foobar', f_dict['content_type'])
self.assertEqual(etag, f_dict['hash'])
# now POST updated content-type to each file
file_item = self.env.container.file(file_name)
file_item.content_type = 'image/foobarbaz'
file_item.sync_metadata({'Test': 'blah'})
# sanity check object metadata
file_item = self.env.container.file(file_name)
file_item.initialize()
self.assertEqual(1024, file_item.size)
self.assertEqual('image/foobarbaz', file_item.content_type)
self.assertEqual(etag, file_item.etag)
self.assertIn('test', file_item.metadata)
# check for consistency between object and container listing
listing = self.env.container.files(parms={'format': 'json'})
for f_dict in listing:
if f_dict['name'] == file_name:
break
else:
self.fail('Failed to find file %r in listing' % file_name)
self.assertEqual(1024, f_dict['bytes'])
self.assertEqual('image/foobarbaz', f_dict['content_type'])
self.assertEqual(etag, f_dict['hash'])
class TestFileUTF8(Base2, TestFile): class TestFileUTF8(Base2, TestFile):
set_up = False set_up = False
@ -2777,6 +2827,42 @@ class TestSlo(Base):
self.assertEqual('d', file_contents[-2]) self.assertEqual('d', file_contents[-2])
self.assertEqual('e', file_contents[-1]) self.assertEqual('e', file_contents[-1])
def test_slo_container_listing(self):
# the listing object size should equal the sum of the size of the
# segments, not the size of the manifest body
raise SkipTest('Only passes with object_post_as_copy=False')
file_item = self.env.container.file(Utils.create_name)
file_item.write(
json.dumps([self.env.seg_info['seg_a']]),
parms={'multipart-manifest': 'put'})
files = self.env.container.files(parms={'format': 'json'})
for f_dict in files:
if f_dict['name'] == file_item.name:
self.assertEqual(1024 * 1024, f_dict['bytes'])
self.assertEqual('application/octet-stream',
f_dict['content_type'])
break
else:
self.fail('Failed to find manifest file in container listing')
# now POST updated content-type file
file_item.content_type = 'image/jpeg'
file_item.sync_metadata({'X-Object-Meta-Test': 'blah'})
file_item.initialize()
self.assertEqual('image/jpeg', file_item.content_type) # sanity
# verify that the container listing is consistent with the file
files = self.env.container.files(parms={'format': 'json'})
for f_dict in files:
if f_dict['name'] == file_item.name:
self.assertEqual(1024 * 1024, f_dict['bytes'])
self.assertEqual(file_item.content_type,
f_dict['content_type'])
break
else:
self.fail('Failed to find manifest file in container listing')
def test_slo_get_nested_manifest(self): def test_slo_get_nested_manifest(self):
file_item = self.env.container.file('manifest-abcde-submanifest') file_item = self.env.container.file('manifest-abcde-submanifest')
file_contents = file_item.read() file_contents = file_item.read()

View File

@ -17,6 +17,8 @@ from __future__ import print_function
import os import os
from subprocess import Popen, PIPE from subprocess import Popen, PIPE
import sys import sys
from tempfile import mkdtemp
from textwrap import dedent
from time import sleep, time from time import sleep, time
from collections import defaultdict from collections import defaultdict
import unittest import unittest
@ -25,8 +27,10 @@ from uuid import uuid4
from nose import SkipTest from nose import SkipTest
from six.moves.http_client import HTTPConnection from six.moves.http_client import HTTPConnection
import shutil
from swiftclient import get_auth, head_account from swiftclient import get_auth, head_account
from swift.common import internal_client
from swift.obj.diskfile import get_data_dir from swift.obj.diskfile import get_data_dir
from swift.common.ring import Ring from swift.common.ring import Ring
from swift.common.utils import readconf, renamer, \ from swift.common.utils import readconf, renamer, \
@ -430,6 +434,33 @@ class ProbeTest(unittest.TestCase):
else: else:
os.system('sudo mount %s' % device) os.system('sudo mount %s' % device)
def make_internal_client(self, object_post_as_copy=True):
tempdir = mkdtemp()
try:
conf_path = os.path.join(tempdir, 'internal_client.conf')
conf_body = """
[DEFAULT]
swift_dir = /etc/swift
[pipeline:main]
pipeline = catch_errors cache proxy-server
[app:proxy-server]
use = egg:swift#proxy
object_post_as_copy = %s
[filter:cache]
use = egg:swift#memcache
[filter:catch_errors]
use = egg:swift#catch_errors
""" % object_post_as_copy
with open(conf_path, 'w') as f:
f.write(dedent(conf_body))
return internal_client.InternalClient(conf_path, 'test', 1)
finally:
shutil.rmtree(tempdir)
class ReplProbeTest(ProbeTest): class ReplProbeTest(ProbeTest):

View File

@ -81,20 +81,72 @@ class TestContainerSync(ReplProbeTest):
return source_container, dest_container return source_container, dest_container
def test_sync(self): def _test_sync(self, object_post_as_copy):
source_container, dest_container = self._setup_synced_containers() source_container, dest_container = self._setup_synced_containers()
# upload to source # upload to source
object_name = 'object-%s' % uuid.uuid4() object_name = 'object-%s' % uuid.uuid4()
put_headers = {'X-Object-Meta-Test': 'put_value'}
client.put_object(self.url, self.token, source_container, object_name, client.put_object(self.url, self.token, source_container, object_name,
'test-body') 'test-body', headers=put_headers)
# cycle container-sync # cycle container-sync
Manager(['container-sync']).once() Manager(['container-sync']).once()
_junk, body = client.get_object(self.url, self.token, resp_headers, body = client.get_object(self.url, self.token,
dest_container, object_name) dest_container, object_name)
self.assertEqual(body, 'test-body') self.assertEqual(body, 'test-body')
self.assertIn('x-object-meta-test', resp_headers)
self.assertEqual('put_value', resp_headers['x-object-meta-test'])
# update metadata with a POST, using an internal client so we can
# vary the object_post_as_copy setting - first use post-as-copy
post_headers = {'Content-Type': 'image/jpeg',
'X-Object-Meta-Test': 'post_value'}
int_client = self.make_internal_client(
object_post_as_copy=object_post_as_copy)
int_client.set_object_metadata(self.account, source_container,
object_name, post_headers)
# sanity checks...
resp_headers = client.head_object(
self.url, self.token, source_container, object_name)
self.assertIn('x-object-meta-test', resp_headers)
self.assertEqual('post_value', resp_headers['x-object-meta-test'])
self.assertEqual('image/jpeg', resp_headers['content-type'])
# cycle container-sync
Manager(['container-sync']).once()
# verify that metadata changes were sync'd
resp_headers, body = client.get_object(self.url, self.token,
dest_container, object_name)
self.assertEqual(body, 'test-body')
self.assertIn('x-object-meta-test', resp_headers)
self.assertEqual('post_value', resp_headers['x-object-meta-test'])
self.assertEqual('image/jpeg', resp_headers['content-type'])
# delete the object
client.delete_object(
self.url, self.token, source_container, object_name)
with self.assertRaises(ClientException) as cm:
client.get_object(
self.url, self.token, source_container, object_name)
self.assertEqual(404, cm.exception.http_status) # sanity check
# cycle container-sync
Manager(['container-sync']).once()
# verify delete has been sync'd
with self.assertRaises(ClientException) as cm:
client.get_object(
self.url, self.token, dest_container, object_name)
self.assertEqual(404, cm.exception.http_status) # sanity check
def test_sync_with_post_as_copy(self):
self._test_sync(True)
def test_sync_with_fast_post(self):
self._test_sync(False)
def test_sync_lazy_skey(self): def test_sync_lazy_skey(self):
# Create synced containers, but with no key at source # Create synced containers, but with no key at source

View File

@ -14,19 +14,16 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from io import StringIO from io import StringIO
from tempfile import mkdtemp
from textwrap import dedent
import unittest import unittest
import os import os
import shutil
import uuid import uuid
from swift.common.direct_client import direct_get_suffix_hashes from swift.common.direct_client import direct_get_suffix_hashes
from swift.common.exceptions import DiskFileDeleted from swift.common.exceptions import DiskFileDeleted
from swift.common.internal_client import UnexpectedResponse from swift.common.internal_client import UnexpectedResponse
from swift.container.backend import ContainerBroker from swift.container.backend import ContainerBroker
from swift.common import internal_client, utils from swift.common import utils
from swiftclient import client from swiftclient import client
from swift.common.ring import Ring from swift.common.ring import Ring
from swift.common.utils import Timestamp, get_logger, hash_path from swift.common.utils import Timestamp, get_logger, hash_path
@ -48,40 +45,17 @@ class Test(ReplProbeTest):
self.brain = BrainSplitter(self.url, self.token, self.container_name, self.brain = BrainSplitter(self.url, self.token, self.container_name,
self.object_name, 'object', self.object_name, 'object',
policy=self.policy) policy=self.policy)
self.tempdir = mkdtemp() self.int_client = self.make_internal_client(object_post_as_copy=False)
conf_path = os.path.join(self.tempdir, 'internal_client.conf')
conf_body = """
[DEFAULT]
swift_dir = /etc/swift
[pipeline:main]
pipeline = catch_errors cache proxy-server
[app:proxy-server]
use = egg:swift#proxy
object_post_as_copy = false
[filter:cache]
use = egg:swift#memcache
[filter:catch_errors]
use = egg:swift#catch_errors
"""
with open(conf_path, 'w') as f:
f.write(dedent(conf_body))
self.int_client = internal_client.InternalClient(conf_path, 'test', 1)
def tearDown(self): def tearDown(self):
super(Test, self).tearDown() super(Test, self).tearDown()
shutil.rmtree(self.tempdir)
def _get_object_info(self, account, container, obj, number, def _get_object_info(self, account, container, obj, number):
policy=None):
obj_conf = self.configs['object-server'] obj_conf = self.configs['object-server']
config_path = obj_conf[number] config_path = obj_conf[number]
options = utils.readconf(config_path, 'app:object-server') options = utils.readconf(config_path, 'app:object-server')
swift_dir = options.get('swift_dir', '/etc/swift') swift_dir = options.get('swift_dir', '/etc/swift')
ring = POLICIES.get_object_ring(policy, swift_dir) ring = POLICIES.get_object_ring(int(self.policy), swift_dir)
part, nodes = ring.get_nodes(account, container, obj) part, nodes = ring.get_nodes(account, container, obj)
for node in nodes: for node in nodes:
# assumes one to one mapping # assumes one to one mapping
@ -92,7 +66,7 @@ class Test(ReplProbeTest):
return None return None
mgr = DiskFileManager(options, get_logger(options)) mgr = DiskFileManager(options, get_logger(options))
disk_file = mgr.get_diskfile(device, part, account, container, obj, disk_file = mgr.get_diskfile(device, part, account, container, obj,
policy) self.policy)
info = disk_file.read_metadata() info = disk_file.read_metadata()
return info return info
@ -105,9 +79,7 @@ class Test(ReplProbeTest):
obj_info.append(info_i) obj_info.append(info_i)
self.assertTrue(len(obj_info) > 1) self.assertTrue(len(obj_info) > 1)
for other in obj_info[1:]: for other in obj_info[1:]:
self.assertEqual(obj_info[0], other, self.assertDictEqual(obj_info[0], other)
'Object metadata mismatch: %s != %s'
% (obj_info[0], other))
def _assert_consistent_deleted_object(self): def _assert_consistent_deleted_object(self):
for i in range(1, 5): for i in range(1, 5):
@ -275,6 +247,7 @@ class Test(ReplProbeTest):
self._assert_consistent_object_metadata() self._assert_consistent_object_metadata()
self._assert_consistent_container_dbs() self._assert_consistent_container_dbs()
self._assert_consistent_suffix_hashes()
def test_sysmeta_after_replication_with_subsequent_put(self): def test_sysmeta_after_replication_with_subsequent_put(self):
sysmeta = {'x-object-sysmeta-foo': 'older'} sysmeta = {'x-object-sysmeta-foo': 'older'}
@ -332,9 +305,11 @@ class Test(ReplProbeTest):
for key in sysmeta2.keys(): for key in sysmeta2.keys():
self.assertTrue(key in metadata, key) self.assertTrue(key in metadata, key)
self.assertEqual(metadata[key], sysmeta2[key]) self.assertEqual(metadata[key], sysmeta2[key])
self.brain.start_handoff_half()
self._assert_consistent_object_metadata() self._assert_consistent_object_metadata()
self._assert_consistent_container_dbs() self._assert_consistent_container_dbs()
self._assert_consistent_suffix_hashes()
def test_sysmeta_after_replication_with_subsequent_post(self): def test_sysmeta_after_replication_with_subsequent_post(self):
sysmeta = {'x-object-sysmeta-foo': 'sysmeta-foo'} sysmeta = {'x-object-sysmeta-foo': 'sysmeta-foo'}
@ -382,8 +357,11 @@ class Test(ReplProbeTest):
for key in expected.keys(): for key in expected.keys():
self.assertTrue(key in metadata, key) self.assertTrue(key in metadata, key)
self.assertEqual(metadata[key], expected[key]) self.assertEqual(metadata[key], expected[key])
self.brain.start_handoff_half()
self._assert_consistent_object_metadata() self._assert_consistent_object_metadata()
self._assert_consistent_container_dbs() self._assert_consistent_container_dbs()
self._assert_consistent_suffix_hashes()
def test_sysmeta_after_replication_with_prior_post(self): def test_sysmeta_after_replication_with_prior_post(self):
sysmeta = {'x-object-sysmeta-foo': 'sysmeta-foo'} sysmeta = {'x-object-sysmeta-foo': 'sysmeta-foo'}
@ -433,8 +411,294 @@ class Test(ReplProbeTest):
self.assertEqual(metadata[key], sysmeta[key]) self.assertEqual(metadata[key], sysmeta[key])
for key in usermeta: for key in usermeta:
self.assertFalse(key in metadata) self.assertFalse(key in metadata)
self.brain.start_handoff_half()
self._assert_consistent_object_metadata() self._assert_consistent_object_metadata()
self._assert_consistent_container_dbs() self._assert_consistent_container_dbs()
self._assert_consistent_suffix_hashes()
def test_post_ctype_replicated_when_previous_incomplete_puts(self):
# primary half handoff half
# ------------ ------------
# t0.data: ctype = foo
# t1.data: ctype = bar
# t2.meta: ctype = baz
#
# ...run replicator and expect...
#
# t1.data:
# t2.meta: ctype = baz
self.brain.put_container(policy_index=0)
# incomplete write to primary half
self.brain.stop_handoff_half()
self._put_object(headers={'Content-Type': 'foo'})
self.brain.start_handoff_half()
# handoff write
self.brain.stop_primary_half()
self._put_object(headers={'Content-Type': 'bar'})
self.brain.start_primary_half()
# content-type update to primary half
self.brain.stop_handoff_half()
self._post_object(headers={'Content-Type': 'baz'})
self.brain.start_handoff_half()
self.get_to_final_state()
# check object metadata
metadata = client.head_object(self.url, self.token,
self.container_name,
self.object_name)
# check container listing metadata
container_metadata, objs = client.get_container(self.url, self.token,
self.container_name)
for obj in objs:
if obj['name'] == self.object_name:
break
expected = 'baz'
self.assertEqual(obj['content_type'], expected)
self._assert_object_metadata_matches_listing(obj, metadata)
self._assert_consistent_container_dbs()
self._assert_consistent_object_metadata()
self._assert_consistent_suffix_hashes()
def test_put_ctype_replicated_when_subsequent_post(self):
# primary half handoff half
# ------------ ------------
# t0.data: ctype = foo
# t1.data: ctype = bar
# t2.meta:
#
# ...run replicator and expect...
#
# t1.data: ctype = bar
# t2.meta:
self.brain.put_container(policy_index=0)
# incomplete write
self.brain.stop_handoff_half()
self._put_object(headers={'Content-Type': 'foo'})
self.brain.start_handoff_half()
# handoff write
self.brain.stop_primary_half()
self._put_object(headers={'Content-Type': 'bar'})
self.brain.start_primary_half()
# metadata update with newest data unavailable
self.brain.stop_handoff_half()
self._post_object(headers={'X-Object-Meta-Color': 'Blue'})
self.brain.start_handoff_half()
self.get_to_final_state()
# check object metadata
metadata = client.head_object(self.url, self.token,
self.container_name,
self.object_name)
# check container listing metadata
container_metadata, objs = client.get_container(self.url, self.token,
self.container_name)
for obj in objs:
if obj['name'] == self.object_name:
break
else:
self.fail('obj not found in container listing')
expected = 'bar'
self.assertEqual(obj['content_type'], expected)
self.assertEqual(metadata['x-object-meta-color'], 'Blue')
self._assert_object_metadata_matches_listing(obj, metadata)
self._assert_consistent_container_dbs()
self._assert_consistent_object_metadata()
self._assert_consistent_suffix_hashes()
def test_post_ctype_replicated_when_subsequent_post_without_ctype(self):
# primary half handoff half
# ------------ ------------
# t0.data: ctype = foo
# t1.data: ctype = bar
# t2.meta: ctype = bif
# t3.data: ctype = baz, color = 'Red'
# t4.meta: color = Blue
#
# ...run replicator and expect...
#
# t1.data:
# t4-delta.meta: ctype = baz, color = Blue
self.brain.put_container(policy_index=0)
# incomplete write
self.brain.stop_handoff_half()
self._put_object(headers={'Content-Type': 'foo',
'X-Object-Sysmeta-Test': 'older'})
self.brain.start_handoff_half()
# handoff write
self.brain.stop_primary_half()
self._put_object(headers={'Content-Type': 'bar',
'X-Object-Sysmeta-Test': 'newer'})
self.brain.start_primary_half()
# incomplete post with content type
self.brain.stop_handoff_half()
self._post_object(headers={'Content-Type': 'bif'})
self.brain.start_handoff_half()
# incomplete post to handoff with content type
self.brain.stop_primary_half()
self._post_object(headers={'Content-Type': 'baz',
'X-Object-Meta-Color': 'Red'})
self.brain.start_primary_half()
# complete post with no content type
self._post_object(headers={'X-Object-Meta-Color': 'Blue',
'X-Object-Sysmeta-Test': 'ignored'})
# 'baz' wins over 'bar' but 'Blue' wins over 'Red'
self.get_to_final_state()
# check object metadata
metadata = self._get_object_metadata()
# check container listing metadata
container_metadata, objs = client.get_container(self.url, self.token,
self.container_name)
for obj in objs:
if obj['name'] == self.object_name:
break
expected = 'baz'
self.assertEqual(obj['content_type'], expected)
self.assertEqual(metadata['x-object-meta-color'], 'Blue')
self.assertEqual(metadata['x-object-sysmeta-test'], 'newer')
self._assert_object_metadata_matches_listing(obj, metadata)
self._assert_consistent_container_dbs()
self._assert_consistent_object_metadata()
self._assert_consistent_suffix_hashes()
def test_put_ctype_replicated_when_subsequent_posts_without_ctype(self):
# primary half handoff half
# ------------ ------------
# t0.data: ctype = foo
# t1.data: ctype = bar
# t2.meta:
# t3.meta
#
# ...run replicator and expect...
#
# t1.data: ctype = bar
# t3.meta
self.brain.put_container(policy_index=0)
self._put_object(headers={'Content-Type': 'foo',
'X-Object-Sysmeta-Test': 'older'})
# incomplete write to handoff half
self.brain.stop_primary_half()
self._put_object(headers={'Content-Type': 'bar',
'X-Object-Sysmeta-Test': 'newer'})
self.brain.start_primary_half()
# incomplete post with no content type to primary half
self.brain.stop_handoff_half()
self._post_object(headers={'X-Object-Meta-Color': 'Red',
'X-Object-Sysmeta-Test': 'ignored'})
self.brain.start_handoff_half()
# incomplete post with no content type to handoff half
self.brain.stop_primary_half()
self._post_object(headers={'X-Object-Meta-Color': 'Blue'})
self.brain.start_primary_half()
self.get_to_final_state()
# check object metadata
metadata = self._get_object_metadata()
# check container listing metadata
container_metadata, objs = client.get_container(self.url, self.token,
self.container_name)
for obj in objs:
if obj['name'] == self.object_name:
break
expected = 'bar'
self.assertEqual(obj['content_type'], expected)
self._assert_object_metadata_matches_listing(obj, metadata)
self.assertEqual(metadata['x-object-meta-color'], 'Blue')
self.assertEqual(metadata['x-object-sysmeta-test'], 'newer')
self._assert_object_metadata_matches_listing(obj, metadata)
self._assert_consistent_container_dbs()
self._assert_consistent_object_metadata()
self._assert_consistent_suffix_hashes()
def test_posted_metadata_only_persists_after_prior_put(self):
# newer metadata posted to subset of nodes should persist after an
# earlier put on other nodes, but older content-type on that subset
# should not persist
self.brain.put_container(policy_index=0)
# incomplete put to handoff
self.brain.stop_primary_half()
self._put_object(headers={'Content-Type': 'oldest',
'X-Object-Sysmeta-Test': 'oldest',
'X-Object-Meta-Test': 'oldest'})
self.brain.start_primary_half()
# incomplete put to primary
self.brain.stop_handoff_half()
self._put_object(headers={'Content-Type': 'oldest',
'X-Object-Sysmeta-Test': 'oldest',
'X-Object-Meta-Test': 'oldest'})
self.brain.start_handoff_half()
# incomplete post with content-type to handoff
self.brain.stop_primary_half()
self._post_object(headers={'Content-Type': 'newer',
'X-Object-Meta-Test': 'newer'})
self.brain.start_primary_half()
# incomplete put to primary
self.brain.stop_handoff_half()
self._put_object(headers={'Content-Type': 'newest',
'X-Object-Sysmeta-Test': 'newest',
'X-Object-Meta-Test': 'newer'})
self.brain.start_handoff_half()
# incomplete post with no content-type to handoff which still has
# out of date content-type
self.brain.stop_primary_half()
self._post_object(headers={'X-Object-Meta-Test': 'newest'})
metadata = self._get_object_metadata()
self.assertEqual(metadata['x-object-meta-test'], 'newest')
self.assertEqual(metadata['content-type'], 'newer')
self.brain.start_primary_half()
self.get_to_final_state()
# check object metadata
metadata = self._get_object_metadata()
self.assertEqual(metadata['x-object-meta-test'], 'newest')
self.assertEqual(metadata['x-object-sysmeta-test'], 'newest')
self.assertEqual(metadata['content-type'], 'newest')
# check container listing metadata
container_metadata, objs = client.get_container(self.url, self.token,
self.container_name)
for obj in objs:
if obj['name'] == self.object_name:
break
self.assertEqual(obj['content_type'], 'newest')
self._assert_object_metadata_matches_listing(obj, metadata)
self._assert_object_metadata_matches_listing(obj, metadata)
self._assert_consistent_container_dbs()
self._assert_consistent_object_metadata()
self._assert_consistent_suffix_hashes()
def test_post_trumped_by_prior_delete(self): def test_post_trumped_by_prior_delete(self):
# new metadata and content-type posted to subset of nodes should not # new metadata and content-type posted to subset of nodes should not
@ -466,7 +730,7 @@ class Test(ReplProbeTest):
metadata = self._get_object_metadata() metadata = self._get_object_metadata()
self.assertEqual(metadata['x-object-sysmeta-test'], 'oldest') self.assertEqual(metadata['x-object-sysmeta-test'], 'oldest')
self.assertEqual(metadata['x-object-meta-test'], 'newest') self.assertEqual(metadata['x-object-meta-test'], 'newest')
self.assertEqual(metadata['content-type'], 'oldest') self.assertEqual(metadata['content-type'], 'newest')
self.brain.start_primary_half() self.brain.start_primary_half()
@ -482,5 +746,6 @@ class Test(ReplProbeTest):
self._assert_consistent_deleted_object() self._assert_consistent_deleted_object()
self._assert_consistent_suffix_hashes() self._assert_consistent_suffix_hashes()
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

View File

@ -289,6 +289,8 @@ class TestTimestamp(unittest.TestCase):
self.assertIs(True, utils.Timestamp(ts) == ts) # sanity self.assertIs(True, utils.Timestamp(ts) == ts) # sanity
self.assertIs(False, utils.Timestamp(ts) != utils.Timestamp(ts)) self.assertIs(False, utils.Timestamp(ts) != utils.Timestamp(ts))
self.assertIs(False, utils.Timestamp(ts) != ts) self.assertIs(False, utils.Timestamp(ts) != ts)
self.assertIs(False, utils.Timestamp(ts) is None)
self.assertIs(True, utils.Timestamp(ts) is not None)
def test_no_force_internal_no_offset(self): def test_no_force_internal_no_offset(self):
"""Test that internal is the same as normal with no offset""" """Test that internal is the same as normal with no offset"""
@ -406,6 +408,15 @@ class TestTimestamp(unittest.TestCase):
'%r is not bigger than %f given %r' % ( '%r is not bigger than %f given %r' % (
timestamp, float(normal), value)) timestamp, float(normal), value))
def test_short_format_with_offset(self):
expected = '1402436408.91203_f0'
timestamp = utils.Timestamp(1402436408.91203, 0xf0)
self.assertEqual(expected, timestamp.short)
expected = '1402436408.91203'
timestamp = utils.Timestamp(1402436408.91203)
self.assertEqual(expected, timestamp.short)
def test_raw(self): def test_raw(self):
expected = 140243640891203 expected = 140243640891203
timestamp = utils.Timestamp(1402436408.91203) timestamp = utils.Timestamp(1402436408.91203)
@ -694,6 +705,11 @@ class TestTimestamp(unittest.TestCase):
'%r is not smaller than %r given %r' % ( '%r is not smaller than %r given %r' % (
timestamp, int(other), value)) timestamp, int(other), value))
def test_cmp_with_none(self):
self.assertGreater(utils.Timestamp(0), None)
self.assertGreater(utils.Timestamp(1.0), None)
self.assertGreater(utils.Timestamp(1.0, 42), None)
def test_ordering(self): def test_ordering(self):
given = [ given = [
'1402444820.62590_000000000000000a', '1402444820.62590_000000000000000a',
@ -789,6 +805,107 @@ class TestTimestamp(unittest.TestCase):
self.assertIn(ts_0_also, d) self.assertIn(ts_0_also, d)
class TestTimestampEncoding(unittest.TestCase):
def setUp(self):
t0 = utils.Timestamp(0.0)
t1 = utils.Timestamp(997.9996)
t2 = utils.Timestamp(999)
t3 = utils.Timestamp(1000, 24)
t4 = utils.Timestamp(1001)
t5 = utils.Timestamp(1002.00040)
# encodings that are expected when explicit = False
self.non_explicit_encodings = (
('0000001000.00000_18', (t3, t3, t3)),
('0000001000.00000_18', (t3, t3, None)),
)
# mappings that are expected when explicit = True
self.explicit_encodings = (
('0000001000.00000_18+0+0', (t3, t3, t3)),
('0000001000.00000_18+0', (t3, t3, None)),
)
# mappings that are expected when explicit = True or False
self.encodings = (
('0000001000.00000_18+0+186a0', (t3, t3, t4)),
('0000001000.00000_18+186a0+186c8', (t3, t4, t5)),
('0000001000.00000_18-186a0+0', (t3, t2, t2)),
('0000001000.00000_18+0-186a0', (t3, t3, t2)),
('0000001000.00000_18-186a0-186c8', (t3, t2, t1)),
('0000001000.00000_18', (t3, None, None)),
('0000001000.00000_18+186a0', (t3, t4, None)),
('0000001000.00000_18-186a0', (t3, t2, None)),
('0000001000.00000_18', (t3, None, t1)),
('0000001000.00000_18-5f5e100', (t3, t0, None)),
('0000001000.00000_18+0-5f5e100', (t3, t3, t0)),
('0000001000.00000_18-5f5e100+5f45a60', (t3, t0, t2)),
)
# decodings that are expected when explicit = False
self.non_explicit_decodings = (
('0000001000.00000_18', (t3, t3, t3)),
('0000001000.00000_18+186a0', (t3, t4, t4)),
('0000001000.00000_18-186a0', (t3, t2, t2)),
('0000001000.00000_18+186a0', (t3, t4, t4)),
('0000001000.00000_18-186a0', (t3, t2, t2)),
('0000001000.00000_18-5f5e100', (t3, t0, t0)),
)
# decodings that are expected when explicit = True
self.explicit_decodings = (
('0000001000.00000_18+0+0', (t3, t3, t3)),
('0000001000.00000_18+0', (t3, t3, None)),
('0000001000.00000_18', (t3, None, None)),
('0000001000.00000_18+186a0', (t3, t4, None)),
('0000001000.00000_18-186a0', (t3, t2, None)),
('0000001000.00000_18-5f5e100', (t3, t0, None)),
)
# decodings that are expected when explicit = True or False
self.decodings = (
('0000001000.00000_18+0+186a0', (t3, t3, t4)),
('0000001000.00000_18+186a0+186c8', (t3, t4, t5)),
('0000001000.00000_18-186a0+0', (t3, t2, t2)),
('0000001000.00000_18+0-186a0', (t3, t3, t2)),
('0000001000.00000_18-186a0-186c8', (t3, t2, t1)),
('0000001000.00000_18-5f5e100+5f45a60', (t3, t0, t2)),
)
def _assertEqual(self, expected, actual, test):
self.assertEqual(expected, actual,
'Got %s but expected %s for parameters %s'
% (actual, expected, test))
def test_encoding(self):
for test in self.explicit_encodings:
actual = utils.encode_timestamps(test[1][0], test[1][1],
test[1][2], True)
self._assertEqual(test[0], actual, test[1])
for test in self.non_explicit_encodings:
actual = utils.encode_timestamps(test[1][0], test[1][1],
test[1][2], False)
self._assertEqual(test[0], actual, test[1])
for explicit in (True, False):
for test in self.encodings:
actual = utils.encode_timestamps(test[1][0], test[1][1],
test[1][2], explicit)
self._assertEqual(test[0], actual, test[1])
def test_decoding(self):
for test in self.explicit_decodings:
actual = utils.decode_timestamps(test[0], True)
self._assertEqual(test[1], actual, test[0])
for test in self.non_explicit_decodings:
actual = utils.decode_timestamps(test[0], False)
self._assertEqual(test[1], actual, test[0])
for explicit in (True, False):
for test in self.decodings:
actual = utils.decode_timestamps(test[0], explicit)
self._assertEqual(test[1], actual, test[0])
class TestUtils(unittest.TestCase): class TestUtils(unittest.TestCase):
"""Tests for swift.common.utils """ """Tests for swift.common.utils """

View File

@ -28,8 +28,9 @@ import sqlite3
import pickle import pickle
import json import json
from swift.container.backend import ContainerBroker from swift.container.backend import ContainerBroker, \
from swift.common.utils import Timestamp update_new_item_from_existing
from swift.common.utils import Timestamp, encode_timestamps
from swift.common.storage_policy import POLICIES from swift.common.storage_policy import POLICIES
import mock import mock
@ -431,6 +432,357 @@ class TestContainerBroker(unittest.TestCase):
self.assertEqual(conn.execute( self.assertEqual(conn.execute(
"SELECT deleted FROM object").fetchone()[0], 0) "SELECT deleted FROM object").fetchone()[0], 0)
def test_make_tuple_for_pickle(self):
record = {'name': 'obj',
'created_at': '1234567890.12345',
'size': 42,
'content_type': 'text/plain',
'etag': 'hash_test',
'deleted': '1',
'storage_policy_index': '2',
'ctype_timestamp': None,
'meta_timestamp': None}
broker = ContainerBroker(':memory:', account='a', container='c')
expect = ('obj', '1234567890.12345', 42, 'text/plain', 'hash_test',
'1', '2', None, None)
result = broker.make_tuple_for_pickle(record)
self.assertEqual(expect, result)
record['ctype_timestamp'] = '2233445566.00000'
expect = ('obj', '1234567890.12345', 42, 'text/plain', 'hash_test',
'1', '2', '2233445566.00000', None)
result = broker.make_tuple_for_pickle(record)
self.assertEqual(expect, result)
record['meta_timestamp'] = '5566778899.00000'
expect = ('obj', '1234567890.12345', 42, 'text/plain', 'hash_test',
'1', '2', '2233445566.00000', '5566778899.00000')
result = broker.make_tuple_for_pickle(record)
self.assertEqual(expect, result)
@with_tempdir
def test_load_old_record_from_pending_file(self, tempdir):
# Test reading old update record from pending file
db_path = os.path.join(tempdir, 'container.db')
broker = ContainerBroker(db_path, account='a', container='c')
broker.initialize(time(), 0)
record = {'name': 'obj',
'created_at': '1234567890.12345',
'size': 42,
'content_type': 'text/plain',
'etag': 'hash_test',
'deleted': '1',
'storage_policy_index': '2',
'ctype_timestamp': None,
'meta_timestamp': None}
# sanity check
self.assertFalse(os.path.isfile(broker.pending_file))
# simulate existing pending items written with old code,
# i.e. without content_type and meta timestamps
def old_make_tuple_for_pickle(_, record):
return (record['name'], record['created_at'], record['size'],
record['content_type'], record['etag'], record['deleted'],
record['storage_policy_index'])
_new = 'swift.container.backend.ContainerBroker.make_tuple_for_pickle'
with mock.patch(_new, old_make_tuple_for_pickle):
broker.put_record(dict(record))
self.assertTrue(os.path.getsize(broker.pending_file) > 0)
read_items = []
def mock_merge_items(_, item_list, *args):
# capture the items read from the pending file
read_items.extend(item_list)
with mock.patch('swift.container.backend.ContainerBroker.merge_items',
mock_merge_items):
broker._commit_puts()
self.assertEqual(1, len(read_items))
self.assertEqual(record, read_items[0])
self.assertTrue(os.path.getsize(broker.pending_file) == 0)
@with_tempdir
def test_save_and_load_record_from_pending_file(self, tempdir):
db_path = os.path.join(tempdir, 'container.db')
broker = ContainerBroker(db_path, account='a', container='c')
broker.initialize(time(), 0)
record = {'name': 'obj',
'created_at': '1234567890.12345',
'size': 42,
'content_type': 'text/plain',
'etag': 'hash_test',
'deleted': '1',
'storage_policy_index': '2',
'ctype_timestamp': '1234567890.44444',
'meta_timestamp': '1234567890.99999'}
# sanity check
self.assertFalse(os.path.isfile(broker.pending_file))
broker.put_record(dict(record))
self.assertTrue(os.path.getsize(broker.pending_file) > 0)
read_items = []
def mock_merge_items(_, item_list, *args):
# capture the items read from the pending file
read_items.extend(item_list)
with mock.patch('swift.container.backend.ContainerBroker.merge_items',
mock_merge_items):
broker._commit_puts()
self.assertEqual(1, len(read_items))
self.assertEqual(record, read_items[0])
self.assertTrue(os.path.getsize(broker.pending_file) == 0)
def _assert_db_row(self, broker, name, timestamp, size, content_type, hash,
deleted=0):
with broker.get() as conn:
self.assertEqual(conn.execute(
"SELECT name FROM object").fetchone()[0], name)
self.assertEqual(conn.execute(
"SELECT created_at FROM object").fetchone()[0], timestamp)
self.assertEqual(conn.execute(
"SELECT size FROM object").fetchone()[0], size)
self.assertEqual(conn.execute(
"SELECT content_type FROM object").fetchone()[0],
content_type)
self.assertEqual(conn.execute(
"SELECT etag FROM object").fetchone()[0], hash)
self.assertEqual(conn.execute(
"SELECT deleted FROM object").fetchone()[0], deleted)
def _test_put_object_multiple_encoded_timestamps(self, broker):
ts = (Timestamp(t) for t in itertools.count(int(time())))
broker.initialize(ts.next().internal, 0)
t = [ts.next() for _ in range(9)]
# Create initial object
broker.put_object('obj_name', t[0].internal, 123,
'application/x-test',
'5af83e3196bf99f440f31f2e1a6c9afe')
self.assertEqual(1, len(broker.get_items_since(0, 100)))
self._assert_db_row(broker, 'obj_name', t[0].internal, 123,
'application/x-test',
'5af83e3196bf99f440f31f2e1a6c9afe')
# hash and size change with same data timestamp are ignored
t_encoded = encode_timestamps(t[0], t[1], t[1])
broker.put_object('obj_name', t_encoded, 456,
'application/x-test-2',
'1234567890abcdeffedcba0987654321')
self.assertEqual(1, len(broker.get_items_since(0, 100)))
self._assert_db_row(broker, 'obj_name', t_encoded, 123,
'application/x-test-2',
'5af83e3196bf99f440f31f2e1a6c9afe')
# content-type change with same timestamp is ignored
t_encoded = encode_timestamps(t[0], t[1], t[2])
broker.put_object('obj_name', t_encoded, 456,
'application/x-test-3',
'1234567890abcdeffedcba0987654321')
self.assertEqual(1, len(broker.get_items_since(0, 100)))
self._assert_db_row(broker, 'obj_name', t_encoded, 123,
'application/x-test-2',
'5af83e3196bf99f440f31f2e1a6c9afe')
# update with differing newer timestamps
t_encoded = encode_timestamps(t[4], t[6], t[8])
broker.put_object('obj_name', t_encoded, 789,
'application/x-test-3',
'abcdef1234567890abcdef1234567890')
self.assertEqual(1, len(broker.get_items_since(0, 100)))
self._assert_db_row(broker, 'obj_name', t_encoded, 789,
'application/x-test-3',
'abcdef1234567890abcdef1234567890')
# update with differing older timestamps should be ignored
t_encoded_older = encode_timestamps(t[3], t[5], t[7])
self.assertEqual(1, len(broker.get_items_since(0, 100)))
broker.put_object('obj_name', t_encoded_older, 9999,
'application/x-test-ignored',
'ignored_hash')
self.assertEqual(1, len(broker.get_items_since(0, 100)))
self._assert_db_row(broker, 'obj_name', t_encoded, 789,
'application/x-test-3',
'abcdef1234567890abcdef1234567890')
def test_put_object_multiple_encoded_timestamps_using_memory(self):
# Test ContainerBroker.put_object with differing data, content-type
# and metadata timestamps
broker = ContainerBroker(':memory:', account='a', container='c')
self._test_put_object_multiple_encoded_timestamps(broker)
@with_tempdir
def test_put_object_multiple_encoded_timestamps_using_file(self, tempdir):
# Test ContainerBroker.put_object with differing data, content-type
# and metadata timestamps, using file db to ensure that the code paths
# to write/read pending file are exercised.
db_path = os.path.join(tempdir, 'container.db')
broker = ContainerBroker(db_path, account='a', container='c')
self._test_put_object_multiple_encoded_timestamps(broker)
def _test_put_object_multiple_explicit_timestamps(self, broker):
ts = (Timestamp(t) for t in itertools.count(int(time())))
broker.initialize(ts.next().internal, 0)
t = [ts.next() for _ in range(11)]
# Create initial object
broker.put_object('obj_name', t[0].internal, 123,
'application/x-test',
'5af83e3196bf99f440f31f2e1a6c9afe',
ctype_timestamp=None,
meta_timestamp=None)
self.assertEqual(1, len(broker.get_items_since(0, 100)))
self._assert_db_row(broker, 'obj_name', t[0].internal, 123,
'application/x-test',
'5af83e3196bf99f440f31f2e1a6c9afe')
# hash and size change with same data timestamp are ignored
t_encoded = encode_timestamps(t[0], t[1], t[1])
broker.put_object('obj_name', t[0].internal, 456,
'application/x-test-2',
'1234567890abcdeffedcba0987654321',
ctype_timestamp=t[1].internal,
meta_timestamp=t[1].internal)
self.assertEqual(1, len(broker.get_items_since(0, 100)))
self._assert_db_row(broker, 'obj_name', t_encoded, 123,
'application/x-test-2',
'5af83e3196bf99f440f31f2e1a6c9afe')
# content-type change with same timestamp is ignored
t_encoded = encode_timestamps(t[0], t[1], t[2])
broker.put_object('obj_name', t[0].internal, 456,
'application/x-test-3',
'1234567890abcdeffedcba0987654321',
ctype_timestamp=t[1].internal,
meta_timestamp=t[2].internal)
self.assertEqual(1, len(broker.get_items_since(0, 100)))
self._assert_db_row(broker, 'obj_name', t_encoded, 123,
'application/x-test-2',
'5af83e3196bf99f440f31f2e1a6c9afe')
# update with differing newer timestamps
t_encoded = encode_timestamps(t[4], t[6], t[8])
broker.put_object('obj_name', t[4].internal, 789,
'application/x-test-3',
'abcdef1234567890abcdef1234567890',
ctype_timestamp=t[6].internal,
meta_timestamp=t[8].internal)
self.assertEqual(1, len(broker.get_items_since(0, 100)))
self._assert_db_row(broker, 'obj_name', t_encoded, 789,
'application/x-test-3',
'abcdef1234567890abcdef1234567890')
# update with differing older timestamps should be ignored
broker.put_object('obj_name', t[3].internal, 9999,
'application/x-test-ignored',
'ignored_hash',
ctype_timestamp=t[5].internal,
meta_timestamp=t[7].internal)
self.assertEqual(1, len(broker.get_items_since(0, 100)))
self._assert_db_row(broker, 'obj_name', t_encoded, 789,
'application/x-test-3',
'abcdef1234567890abcdef1234567890')
# content_type_timestamp == None defaults to data timestamp
t_encoded = encode_timestamps(t[9], t[9], t[8])
broker.put_object('obj_name', t[9].internal, 9999,
'application/x-test-new',
'new_hash',
ctype_timestamp=None,
meta_timestamp=t[7].internal)
self.assertEqual(1, len(broker.get_items_since(0, 100)))
self._assert_db_row(broker, 'obj_name', t_encoded, 9999,
'application/x-test-new',
'new_hash')
# meta_timestamp == None defaults to data timestamp
t_encoded = encode_timestamps(t[9], t[10], t[10])
broker.put_object('obj_name', t[8].internal, 1111,
'application/x-test-newer',
'older_hash',
ctype_timestamp=t[10].internal,
meta_timestamp=None)
self.assertEqual(1, len(broker.get_items_since(0, 100)))
self._assert_db_row(broker, 'obj_name', t_encoded, 9999,
'application/x-test-newer',
'new_hash')
def test_put_object_multiple_explicit_timestamps_using_memory(self):
# Test ContainerBroker.put_object with differing data, content-type
# and metadata timestamps passed as explicit args
broker = ContainerBroker(':memory:', account='a', container='c')
self._test_put_object_multiple_explicit_timestamps(broker)
@with_tempdir
def test_put_object_multiple_explicit_timestamps_using_file(self, tempdir):
# Test ContainerBroker.put_object with differing data, content-type
# and metadata timestamps passed as explicit args, using file db to
# ensure that the code paths to write/read pending file are exercised.
db_path = os.path.join(tempdir, 'container.db')
broker = ContainerBroker(db_path, account='a', container='c')
self._test_put_object_multiple_explicit_timestamps(broker)
def test_last_modified_time(self):
# Test container listing reports the most recent of data or metadata
# timestamp as last-modified time
ts = (Timestamp(t) for t in itertools.count(int(time())))
broker = ContainerBroker(':memory:', account='a', container='c')
broker.initialize(ts.next().internal, 0)
# simple 'single' timestamp case
t0 = ts.next()
broker.put_object('obj1', t0.internal, 0, 'text/plain', 'hash1')
listing = broker.list_objects_iter(100, '', None, None, '')
self.assertEqual(len(listing), 1)
self.assertEqual(listing[0][0], 'obj1')
self.assertEqual(listing[0][1], t0.internal)
# content-type and metadata are updated at t1
t1 = ts.next()
t_encoded = encode_timestamps(t0, t1, t1)
broker.put_object('obj1', t_encoded, 0, 'text/plain', 'hash1')
listing = broker.list_objects_iter(100, '', None, None, '')
self.assertEqual(len(listing), 1)
self.assertEqual(listing[0][0], 'obj1')
self.assertEqual(listing[0][1], t1.internal)
# used later
t2 = ts.next()
# metadata is updated at t3
t3 = ts.next()
t_encoded = encode_timestamps(t0, t1, t3)
broker.put_object('obj1', t_encoded, 0, 'text/plain', 'hash1')
listing = broker.list_objects_iter(100, '', None, None, '')
self.assertEqual(len(listing), 1)
self.assertEqual(listing[0][0], 'obj1')
self.assertEqual(listing[0][1], t3.internal)
# all parts updated at t2, last-modified should remain at t3
t_encoded = encode_timestamps(t2, t2, t2)
broker.put_object('obj1', t_encoded, 0, 'text/plain', 'hash1')
listing = broker.list_objects_iter(100, '', None, None, '')
self.assertEqual(len(listing), 1)
self.assertEqual(listing[0][0], 'obj1')
self.assertEqual(listing[0][1], t3.internal)
# all parts updated at t4, last-modified should be t4
t4 = ts.next()
t_encoded = encode_timestamps(t4, t4, t4)
broker.put_object('obj1', t_encoded, 0, 'text/plain', 'hash1')
listing = broker.list_objects_iter(100, '', None, None, '')
self.assertEqual(len(listing), 1)
self.assertEqual(listing[0][0], 'obj1')
self.assertEqual(listing[0][1], t4.internal)
@patch_policies @patch_policies
def test_put_misplaced_object_does_not_effect_container_stats(self): def test_put_misplaced_object_does_not_effect_container_stats(self):
policy = random.choice(list(POLICIES)) policy = random.choice(list(POLICIES))
@ -2172,3 +2524,298 @@ class TestContainerBrokerBeforeSPI(ContainerBrokerMigrationMixin,
info = broker.get_info() info = broker.get_info()
self.assertEqual(info['object_count'], 1) self.assertEqual(info['object_count'], 1)
self.assertEqual(info['bytes_used'], 456) self.assertEqual(info['bytes_used'], 456)
class TestUpdateNewItemFromExisting(unittest.TestCase):
# TODO: add test scenarios that have swift_bytes in content_type
t0 = '1234567890.00000'
t1 = '1234567890.00001'
t2 = '1234567890.00002'
t3 = '1234567890.00003'
t4 = '1234567890.00004'
t5 = '1234567890.00005'
t6 = '1234567890.00006'
t7 = '1234567890.00007'
t8 = '1234567890.00008'
t20 = '1234567890.00020'
t30 = '1234567890.00030'
base_new_item = {'etag': 'New_item',
'size': 'nEw_item',
'content_type': 'neW_item',
'deleted': '0'}
base_existing = {'etag': 'Existing',
'size': 'eXisting',
'content_type': 'exIsting',
'deleted': '0'}
#
# each scenario is a tuple of:
# (existing time, new item times, expected updated item)
#
# e.g.:
# existing -> ({'created_at': t5},
# new_item -> {'created_at': t, 'ctype_timestamp': t, 'meta_timestamp': t},
# expected -> {'created_at': t,
# 'etag': <val>, 'size': <val>, 'content_type': <val>})
#
scenarios_when_all_existing_wins = (
#
# all new_item times <= all existing times -> existing values win
#
# existing has attrs at single time
#
({'created_at': t3},
{'created_at': t0, 'ctype_timestamp': t0, 'meta_timestamp': t0},
{'created_at': t3,
'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}),
({'created_at': t3},
{'created_at': t0, 'ctype_timestamp': t0, 'meta_timestamp': t1},
{'created_at': t3,
'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}),
({'created_at': t3},
{'created_at': t0, 'ctype_timestamp': t1, 'meta_timestamp': t1},
{'created_at': t3,
'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}),
({'created_at': t3},
{'created_at': t0, 'ctype_timestamp': t1, 'meta_timestamp': t2},
{'created_at': t3,
'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}),
({'created_at': t3},
{'created_at': t0, 'ctype_timestamp': t1, 'meta_timestamp': t3},
{'created_at': t3,
'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}),
({'created_at': t3},
{'created_at': t0, 'ctype_timestamp': t3, 'meta_timestamp': t3},
{'created_at': t3,
'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}),
({'created_at': t3},
{'created_at': t3, 'ctype_timestamp': t3, 'meta_timestamp': t3},
{'created_at': t3,
'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}),
#
# existing has attrs at multiple times:
# data @ t3, ctype @ t5, meta @t7 -> existing created_at = t3+2+2
#
({'created_at': t3 + '+2+2'},
{'created_at': t0, 'ctype_timestamp': t0, 'meta_timestamp': t0},
{'created_at': t3 + '+2+2',
'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}),
({'created_at': t3 + '+2+2'},
{'created_at': t3, 'ctype_timestamp': t3, 'meta_timestamp': t3},
{'created_at': t3 + '+2+2',
'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}),
({'created_at': t3 + '+2+2'},
{'created_at': t3, 'ctype_timestamp': t4, 'meta_timestamp': t4},
{'created_at': t3 + '+2+2',
'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}),
({'created_at': t3 + '+2+2'},
{'created_at': t3, 'ctype_timestamp': t4, 'meta_timestamp': t5},
{'created_at': t3 + '+2+2',
'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}),
({'created_at': t3 + '+2+2'},
{'created_at': t3, 'ctype_timestamp': t4, 'meta_timestamp': t7},
{'created_at': t3 + '+2+2',
'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}),
({'created_at': t3 + '+2+2'},
{'created_at': t3, 'ctype_timestamp': t4, 'meta_timestamp': t7},
{'created_at': t3 + '+2+2',
'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}),
({'created_at': t3 + '+2+2'},
{'created_at': t3, 'ctype_timestamp': t5, 'meta_timestamp': t5},
{'created_at': t3 + '+2+2',
'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}),
({'created_at': t3 + '+2+2'},
{'created_at': t3, 'ctype_timestamp': t5, 'meta_timestamp': t6},
{'created_at': t3 + '+2+2',
'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}),
({'created_at': t3 + '+2+2'},
{'created_at': t3, 'ctype_timestamp': t5, 'meta_timestamp': t7},
{'created_at': t3 + '+2+2',
'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}),
)
scenarios_when_all_new_item_wins = (
# no existing record
(None,
{'created_at': t4, 'ctype_timestamp': t4, 'meta_timestamp': t4},
{'created_at': t4,
'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}),
(None,
{'created_at': t4, 'ctype_timestamp': t4, 'meta_timestamp': t5},
{'created_at': t4 + '+0+1',
'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}),
(None,
{'created_at': t4, 'ctype_timestamp': t5, 'meta_timestamp': t5},
{'created_at': t4 + '+1+0',
'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}),
(None,
{'created_at': t4, 'ctype_timestamp': t5, 'meta_timestamp': t6},
{'created_at': t4 + '+1+1',
'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}),
#
# all new_item times > all existing times -> new item values win
#
# existing has attrs at single time
#
({'created_at': t3},
{'created_at': t4, 'ctype_timestamp': t4, 'meta_timestamp': t4},
{'created_at': t4,
'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}),
({'created_at': t3},
{'created_at': t4, 'ctype_timestamp': t4, 'meta_timestamp': t5},
{'created_at': t4 + '+0+1',
'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}),
({'created_at': t3},
{'created_at': t4, 'ctype_timestamp': t5, 'meta_timestamp': t5},
{'created_at': t4 + '+1+0',
'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}),
({'created_at': t3},
{'created_at': t4, 'ctype_timestamp': t5, 'meta_timestamp': t6},
{'created_at': t4 + '+1+1',
'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}),
#
# existing has attrs at multiple times:
# data @ t3, ctype @ t5, meta @t7 -> existing created_at = t3+2+2
#
({'created_at': t3 + '+2+2'},
{'created_at': t4, 'ctype_timestamp': t6, 'meta_timestamp': t8},
{'created_at': t4 + '+2+2',
'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}),
({'created_at': t3 + '+2+2'},
{'created_at': t6, 'ctype_timestamp': t6, 'meta_timestamp': t8},
{'created_at': t6 + '+0+2',
'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}),
({'created_at': t3 + '+2+2'},
{'created_at': t4, 'ctype_timestamp': t8, 'meta_timestamp': t8},
{'created_at': t4 + '+4+0',
'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}),
({'created_at': t3 + '+2+2'},
{'created_at': t6, 'ctype_timestamp': t8, 'meta_timestamp': t8},
{'created_at': t6 + '+2+0',
'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}),
({'created_at': t3 + '+2+2'},
{'created_at': t8, 'ctype_timestamp': t8, 'meta_timestamp': t8},
{'created_at': t8,
'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}),
)
scenarios_when_some_new_item_wins = (
#
# some but not all new_item times > existing times -> mixed updates
#
# existing has attrs at single time
#
({'created_at': t3},
{'created_at': t3, 'ctype_timestamp': t3, 'meta_timestamp': t4},
{'created_at': t3 + '+0+1',
'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}),
({'created_at': t3},
{'created_at': t3, 'ctype_timestamp': t4, 'meta_timestamp': t4},
{'created_at': t3 + '+1+0',
'etag': 'Existing', 'size': 'eXisting', 'content_type': 'neW_item'}),
({'created_at': t3},
{'created_at': t3, 'ctype_timestamp': t4, 'meta_timestamp': t5},
{'created_at': t3 + '+1+1',
'etag': 'Existing', 'size': 'eXisting', 'content_type': 'neW_item'}),
#
# existing has attrs at multiple times:
# data @ t3, ctype @ t5, meta @t7 -> existing created_at = t3+2+2
#
({'created_at': t3 + '+2+2'},
{'created_at': t3, 'ctype_timestamp': t3, 'meta_timestamp': t8},
{'created_at': t3 + '+2+3',
'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}),
({'created_at': t3 + '+2+2'},
{'created_at': t3, 'ctype_timestamp': t6, 'meta_timestamp': t8},
{'created_at': t3 + '+3+2',
'etag': 'Existing', 'size': 'eXisting', 'content_type': 'neW_item'}),
({'created_at': t3 + '+2+2'},
{'created_at': t4, 'ctype_timestamp': t4, 'meta_timestamp': t6},
{'created_at': t4 + '+1+2',
'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'exIsting'}),
({'created_at': t3 + '+2+2'},
{'created_at': t4, 'ctype_timestamp': t6, 'meta_timestamp': t6},
{'created_at': t4 + '+2+1',
'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}),
({'created_at': t3 + '+2+2'},
{'created_at': t4, 'ctype_timestamp': t4, 'meta_timestamp': t8},
{'created_at': t4 + '+1+3',
'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'exIsting'}),
# this scenario is to check that the deltas are in hex
({'created_at': t3 + '+2+2'},
{'created_at': t2, 'ctype_timestamp': t20, 'meta_timestamp': t30},
{'created_at': t3 + '+11+a',
'etag': 'Existing', 'size': 'eXisting', 'content_type': 'neW_item'}),
)
def _test_scenario(self, scenario, newer):
existing_time, new_item_times, expected_attrs = scenario
# this is the existing record...
existing = None
if existing_time:
existing = dict(self.base_existing)
existing.update(existing_time)
# this is the new item to update
new_item = dict(self.base_new_item)
new_item.update(new_item_times)
# this is the expected result of the update
expected = dict(new_item)
expected.update(expected_attrs)
expected['data_timestamp'] = new_item['created_at']
try:
self.assertIs(newer,
update_new_item_from_existing(new_item, existing))
self.assertDictEqual(expected, new_item)
except AssertionError as e:
msg = ('Scenario: existing %s, new_item %s, expected %s.'
% scenario)
msg = '%s Failed with: %s' % (msg, e.message)
raise AssertionError(msg)
def test_update_new_item_from_existing(self):
for scenario in self.scenarios_when_all_existing_wins:
self._test_scenario(scenario, False)
for scenario in self.scenarios_when_all_new_item_wins:
self._test_scenario(scenario, True)
for scenario in self.scenarios_when_some_new_item_wins:
self._test_scenario(scenario, True)

View File

@ -12,6 +12,8 @@
# limitations under the License. # limitations under the License.
import json import json
import numbers
import mock import mock
import operator import operator
import time import time
@ -29,7 +31,7 @@ from swift.container import reconciler
from swift.container.server import gen_resp_headers from swift.container.server import gen_resp_headers
from swift.common.direct_client import ClientException from swift.common.direct_client import ClientException
from swift.common import swob from swift.common import swob
from swift.common.utils import split_path, Timestamp from swift.common.utils import split_path, Timestamp, encode_timestamps
from test.unit import debug_logger, FakeRing, fake_http_connect from test.unit import debug_logger, FakeRing, fake_http_connect
from test.unit.common.middleware.helpers import FakeSwift from test.unit.common.middleware.helpers import FakeSwift
@ -132,12 +134,16 @@ class FakeInternalClient(reconciler.InternalClient):
'DELETE', obj_path, swob.HTTPNoContent, {}) 'DELETE', obj_path, swob.HTTPNoContent, {})
# container listing entry # container listing entry
last_modified = timestamp_to_last_modified(timestamp) last_modified = timestamp_to_last_modified(timestamp)
# some tests setup mock listings using floats, some use
# strings, so normalize here
if isinstance(timestamp, numbers.Number):
timestamp = '%f' % timestamp
obj_data = { obj_data = {
'bytes': 0, 'bytes': 0,
# listing data is unicode # listing data is unicode
'name': obj_name.decode('utf-8'), 'name': obj_name.decode('utf-8'),
'last_modified': last_modified, 'last_modified': last_modified,
'hash': timestamp, 'hash': timestamp.decode('utf-8'),
'content_type': content_type, 'content_type': content_type,
} }
container_listing_data.append(obj_data) container_listing_data.append(obj_data)
@ -210,6 +216,26 @@ class TestReconcilerUtils(unittest.TestCase):
self.assertEqual(got['q_record'], 1234.20192) self.assertEqual(got['q_record'], 1234.20192)
self.assertEqual(got['q_op'], 'PUT') self.assertEqual(got['q_op'], 'PUT')
# the 'hash' field in object listing has the raw 'created_at' value
# which could be a composite of timestamps
timestamp_str = encode_timestamps(Timestamp(1234.20190),
Timestamp(1245.20190),
Timestamp(1256.20190),
explicit=True)
got = reconciler.parse_raw_obj({
'name': "1:/AUTH_bob/con/obj",
'hash': timestamp_str,
'last_modified': timestamp_to_last_modified(1234.20192),
'content_type': 'application/x-put',
})
self.assertEqual(got['q_policy_index'], 1)
self.assertEqual(got['account'], 'AUTH_bob')
self.assertEqual(got['container'], 'con')
self.assertEqual(got['obj'], 'obj')
self.assertEqual(got['q_ts'], 1234.20190)
self.assertEqual(got['q_record'], 1234.20192)
self.assertEqual(got['q_op'], 'PUT')
# negative test # negative test
obj_info = { obj_info = {
'name': "1:/AUTH_bob/con/obj", 'name': "1:/AUTH_bob/con/obj",

View File

@ -26,7 +26,7 @@ from swift.common import db_replicator
from swift.container import replicator, backend, server, sync_store from swift.container import replicator, backend, server, sync_store
from swift.container.reconciler import ( from swift.container.reconciler import (
MISPLACED_OBJECTS_ACCOUNT, get_reconciler_container_name) MISPLACED_OBJECTS_ACCOUNT, get_reconciler_container_name)
from swift.common.utils import Timestamp from swift.common.utils import Timestamp, encode_timestamps
from swift.common.storage_policy import POLICIES from swift.common.storage_policy import POLICIES
from test.unit.common import test_db_replicator from test.unit.common import test_db_replicator
@ -827,38 +827,52 @@ class TestReplicatorSync(test_db_replicator.TestReplicatorSync):
self.assertEqual(info[key], value) self.assertEqual(info[key], value)
def test_misplaced_rows_replicate_and_enqueue(self): def test_misplaced_rows_replicate_and_enqueue(self):
ts = (Timestamp(t).internal for t in # force all timestamps to fall in same hour
itertools.count(int(time.time()))) ts = (Timestamp(t) for t in
itertools.count(int(time.time()) // 3600 * 3600))
policy = random.choice(list(POLICIES)) policy = random.choice(list(POLICIES))
broker = self._get_broker('a', 'c', node_index=0) broker = self._get_broker('a', 'c', node_index=0)
broker.initialize(next(ts), policy.idx) broker.initialize(next(ts).internal, policy.idx)
remote_policy = random.choice([p for p in POLICIES if p is not remote_policy = random.choice([p for p in POLICIES if p is not
policy]) policy])
remote_broker = self._get_broker('a', 'c', node_index=1) remote_broker = self._get_broker('a', 'c', node_index=1)
remote_broker.initialize(next(ts), remote_policy.idx) remote_broker.initialize(next(ts).internal, remote_policy.idx)
# add a misplaced row to *local* broker # add a misplaced row to *local* broker
obj_put_timestamp = next(ts) obj_put_timestamp = next(ts).internal
broker.put_object( broker.put_object(
'o', obj_put_timestamp, 0, 'content-type', 'o', obj_put_timestamp, 0, 'content-type',
'etag', storage_policy_index=remote_policy.idx) 'etag', storage_policy_index=remote_policy.idx)
misplaced = broker.get_misplaced_since(-1, 1) misplaced = broker.get_misplaced_since(-1, 10)
self.assertEqual(len(misplaced), 1) self.assertEqual(len(misplaced), 1)
# since this row is misplaced it doesn't show up in count # since this row is misplaced it doesn't show up in count
self.assertEqual(broker.get_info()['object_count'], 0) self.assertEqual(broker.get_info()['object_count'], 0)
# add another misplaced row to *local* broker with composite timestamp
ts_data = next(ts)
ts_ctype = next(ts)
ts_meta = next(ts)
broker.put_object(
'o2', ts_data.internal, 0, 'content-type',
'etag', storage_policy_index=remote_policy.idx,
ctype_timestamp=ts_ctype.internal, meta_timestamp=ts_meta.internal)
misplaced = broker.get_misplaced_since(-1, 10)
self.assertEqual(len(misplaced), 2)
# since this row is misplaced it doesn't show up in count
self.assertEqual(broker.get_info()['object_count'], 0)
# replicate # replicate
part, node = self._get_broker_part_node(broker) part, node = self._get_broker_part_node(broker)
daemon = self._run_once(node) daemon = self._run_once(node)
# push to remote, and third node was missing (also maybe reconciler) # push to remote, and third node was missing (also maybe reconciler)
self.assertTrue(2 < daemon.stats['rsync'] <= 3) self.assertTrue(2 < daemon.stats['rsync'] <= 3, daemon.stats['rsync'])
# grab the rsynced instance of remote_broker # grab the rsynced instance of remote_broker
remote_broker = self._get_broker('a', 'c', node_index=1) remote_broker = self._get_broker('a', 'c', node_index=1)
# remote has misplaced rows too now # remote has misplaced rows too now
misplaced = remote_broker.get_misplaced_since(-1, 1) misplaced = remote_broker.get_misplaced_since(-1, 10)
self.assertEqual(len(misplaced), 1) self.assertEqual(len(misplaced), 2)
# and the correct policy_index and object_count # and the correct policy_index and object_count
info = remote_broker.get_info() info = remote_broker.get_info()
@ -869,22 +883,29 @@ class TestReplicatorSync(test_db_replicator.TestReplicatorSync):
for key, value in expectations.items(): for key, value in expectations.items():
self.assertEqual(info[key], value) self.assertEqual(info[key], value)
# and we should have also enqeued these rows in the reconciler # and we should have also enqueued these rows in a single reconciler,
# since we forced the object timestamps to be in the same hour.
reconciler = daemon.get_reconciler_broker(misplaced[0]['created_at']) reconciler = daemon.get_reconciler_broker(misplaced[0]['created_at'])
# but it may not be on the same node as us anymore though... # but it may not be on the same node as us anymore though...
reconciler = self._get_broker(reconciler.account, reconciler = self._get_broker(reconciler.account,
reconciler.container, node_index=0) reconciler.container, node_index=0)
self.assertEqual(reconciler.get_info()['object_count'], 1) self.assertEqual(reconciler.get_info()['object_count'], 2)
objects = reconciler.list_objects_iter( objects = reconciler.list_objects_iter(
1, '', None, None, None, None, storage_policy_index=0) 10, '', None, None, None, None, storage_policy_index=0)
self.assertEqual(len(objects), 1) self.assertEqual(len(objects), 2)
expected = ('%s:/a/c/o' % remote_policy.idx, obj_put_timestamp, 0, expected = ('%s:/a/c/o' % remote_policy.idx, obj_put_timestamp, 0,
'application/x-put', obj_put_timestamp) 'application/x-put', obj_put_timestamp)
self.assertEqual(objects[0], expected) self.assertEqual(objects[0], expected)
# the second object's listing has ts_meta as its last modified time
# but its full composite timestamp is in the hash field.
expected = ('%s:/a/c/o2' % remote_policy.idx, ts_meta.internal, 0,
'application/x-put',
encode_timestamps(ts_data, ts_ctype, ts_meta))
self.assertEqual(objects[1], expected)
# having safely enqueued to the reconciler we can advance # having safely enqueued to the reconciler we can advance
# our sync pointer # our sync pointer
self.assertEqual(broker.get_reconciler_sync(), 1) self.assertEqual(broker.get_reconciler_sync(), 2)
def test_multiple_out_sync_reconciler_enqueue_normalize(self): def test_multiple_out_sync_reconciler_enqueue_normalize(self):
ts = (Timestamp(t).internal for t in ts = (Timestamp(t).internal for t in

View File

@ -1619,6 +1619,203 @@ class TestContainerController(unittest.TestCase):
listing_data = json.loads(resp.body) listing_data = json.loads(resp.body)
self.assertEqual(0, len(listing_data)) self.assertEqual(0, len(listing_data))
def test_object_update_with_multiple_timestamps(self):
def do_update(t_data, etag, size, content_type,
t_type=None, t_meta=None):
"""
Make a PUT request to container controller to update an object
"""
headers = {'X-Timestamp': t_data.internal,
'X-Size': size,
'X-Content-Type': content_type,
'X-Etag': etag}
if t_type:
headers['X-Content-Type-Timestamp'] = t_type.internal
if t_meta:
headers['X-Meta-Timestamp'] = t_meta.internal
req = Request.blank(
'/sda1/p/a/c/o', method='PUT', headers=headers)
self._update_object_put_headers(req)
return req.get_response(self.controller)
ts = (Timestamp(t) for t in itertools.count(int(time.time())))
t0 = ts.next()
# create container
req = Request.blank('/sda1/p/a/c', method='PUT', headers={
'X-Timestamp': t0.internal})
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 201)
# check status
req = Request.blank('/sda1/p/a/c', method='HEAD')
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 204)
# create object at t1
t1 = ts.next()
resp = do_update(t1, 'etag_at_t1', 1, 'ctype_at_t1')
self.assertEqual(resp.status_int, 201)
# check listing, expect last_modified = t1
req = Request.blank('/sda1/p/a/c', method='GET',
query_string='format=json')
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 200)
self.assertEqual(int(resp.headers['X-Container-Object-Count']), 1)
self.assertEqual(int(resp.headers['X-Container-Bytes-Used']), 1)
listing_data = json.loads(resp.body)
self.assertEqual(1, len(listing_data))
for obj in listing_data:
self.assertEqual(obj['name'], 'o')
self.assertEqual(obj['bytes'], 1)
self.assertEqual(obj['hash'], 'etag_at_t1')
self.assertEqual(obj['content_type'], 'ctype_at_t1')
self.assertEqual(obj['last_modified'], t1.isoformat)
# send an update with a content type timestamp at t4
t2 = ts.next()
t3 = ts.next()
t4 = ts.next()
resp = do_update(t1, 'etag_at_t1', 1, 'ctype_at_t4', t_type=t4)
self.assertEqual(resp.status_int, 201)
# check updated listing, expect last_modified = t4
req = Request.blank('/sda1/p/a/c', method='GET',
query_string='format=json')
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 200)
self.assertEqual(int(resp.headers['X-Container-Object-Count']), 1)
self.assertEqual(int(resp.headers['X-Container-Bytes-Used']), 1)
listing_data = json.loads(resp.body)
self.assertEqual(1, len(listing_data))
for obj in listing_data:
self.assertEqual(obj['name'], 'o')
self.assertEqual(obj['bytes'], 1)
self.assertEqual(obj['hash'], 'etag_at_t1')
self.assertEqual(obj['content_type'], 'ctype_at_t4')
self.assertEqual(obj['last_modified'], t4.isoformat)
# now overwrite with an in-between data timestamp at t2
resp = do_update(t2, 'etag_at_t2', 2, 'ctype_at_t2', t_type=t2)
self.assertEqual(resp.status_int, 201)
# check updated listing
req = Request.blank('/sda1/p/a/c', method='GET',
query_string='format=json')
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 200)
self.assertEqual(int(resp.headers['X-Container-Object-Count']), 1)
self.assertEqual(int(resp.headers['X-Container-Bytes-Used']), 2)
listing_data = json.loads(resp.body)
self.assertEqual(1, len(listing_data))
for obj in listing_data:
self.assertEqual(obj['name'], 'o')
self.assertEqual(obj['bytes'], 2)
self.assertEqual(obj['hash'], 'etag_at_t2')
self.assertEqual(obj['content_type'], 'ctype_at_t4')
self.assertEqual(obj['last_modified'], t4.isoformat)
# now overwrite with an in-between content-type timestamp at t3
resp = do_update(t2, 'etag_at_t2', 2, 'ctype_at_t3', t_type=t3)
self.assertEqual(resp.status_int, 201)
# check updated listing
req = Request.blank('/sda1/p/a/c', method='GET',
query_string='format=json')
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 200)
self.assertEqual(int(resp.headers['X-Container-Object-Count']), 1)
self.assertEqual(int(resp.headers['X-Container-Bytes-Used']), 2)
listing_data = json.loads(resp.body)
self.assertEqual(1, len(listing_data))
for obj in listing_data:
self.assertEqual(obj['name'], 'o')
self.assertEqual(obj['bytes'], 2)
self.assertEqual(obj['hash'], 'etag_at_t2')
self.assertEqual(obj['content_type'], 'ctype_at_t4')
self.assertEqual(obj['last_modified'], t4.isoformat)
# now update with an in-between meta timestamp at t5
t5 = ts.next()
resp = do_update(t2, 'etag_at_t2', 2, 'ctype_at_t3', t_type=t3,
t_meta=t5)
self.assertEqual(resp.status_int, 201)
# check updated listing
req = Request.blank('/sda1/p/a/c', method='GET',
query_string='format=json')
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 200)
self.assertEqual(int(resp.headers['X-Container-Object-Count']), 1)
self.assertEqual(int(resp.headers['X-Container-Bytes-Used']), 2)
listing_data = json.loads(resp.body)
self.assertEqual(1, len(listing_data))
for obj in listing_data:
self.assertEqual(obj['name'], 'o')
self.assertEqual(obj['bytes'], 2)
self.assertEqual(obj['hash'], 'etag_at_t2')
self.assertEqual(obj['content_type'], 'ctype_at_t4')
self.assertEqual(obj['last_modified'], t5.isoformat)
# delete object at t6
t6 = ts.next()
req = Request.blank(
'/sda1/p/a/c/o', method='DELETE', headers={
'X-Timestamp': t6.internal})
self._update_object_put_headers(req)
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 204)
# check empty listing
req = Request.blank('/sda1/p/a/c', method='GET',
query_string='format=json')
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 200)
self.assertEqual(int(resp.headers['X-Container-Object-Count']), 0)
self.assertEqual(int(resp.headers['X-Container-Bytes-Used']), 0)
listing_data = json.loads(resp.body)
self.assertEqual(0, len(listing_data))
# subsequent content type timestamp at t8 should leave object deleted
t7 = ts.next()
t8 = ts.next()
t9 = ts.next()
resp = do_update(t2, 'etag_at_t2', 2, 'ctype_at_t8', t_type=t8,
t_meta=t9)
self.assertEqual(resp.status_int, 201)
# check empty listing
req = Request.blank('/sda1/p/a/c', method='GET',
query_string='format=json')
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 200)
self.assertEqual(int(resp.headers['X-Container-Object-Count']), 0)
self.assertEqual(int(resp.headers['X-Container-Bytes-Used']), 0)
listing_data = json.loads(resp.body)
self.assertEqual(0, len(listing_data))
# object recreated at t7 should pick up existing, later content-type
resp = do_update(t7, 'etag_at_t7', 7, 'ctype_at_t7')
self.assertEqual(resp.status_int, 201)
# check listing
req = Request.blank('/sda1/p/a/c', method='GET',
query_string='format=json')
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 200)
self.assertEqual(int(resp.headers['X-Container-Object-Count']), 1)
self.assertEqual(int(resp.headers['X-Container-Bytes-Used']), 7)
listing_data = json.loads(resp.body)
self.assertEqual(1, len(listing_data))
for obj in listing_data:
self.assertEqual(obj['name'], 'o')
self.assertEqual(obj['bytes'], 7)
self.assertEqual(obj['hash'], 'etag_at_t7')
self.assertEqual(obj['content_type'], 'ctype_at_t8')
self.assertEqual(obj['last_modified'], t9.isoformat)
def test_DELETE_account_update(self): def test_DELETE_account_update(self):
bindsock = listen(('127.0.0.1', 0)) bindsock = listen(('127.0.0.1', 0))

View File

@ -20,6 +20,7 @@ from textwrap import dedent
import mock import mock
import errno import errno
from swift.common.utils import Timestamp
from test.unit import debug_logger from test.unit import debug_logger
from swift.container import sync from swift.container import sync
from swift.common.db import DatabaseConnectionError from swift.common.db import DatabaseConnectionError
@ -750,6 +751,7 @@ class TestContainerSync(unittest.TestCase):
hex = 'abcdef' hex = 'abcdef'
sync.uuid = FakeUUID sync.uuid = FakeUUID
ts_data = Timestamp(1.1)
def fake_delete_object(path, name=None, headers=None, proxy=None, def fake_delete_object(path, name=None, headers=None, proxy=None,
logger=None, timeout=None): logger=None, timeout=None):
@ -758,12 +760,13 @@ class TestContainerSync(unittest.TestCase):
if realm: if realm:
self.assertEqual(headers, { self.assertEqual(headers, {
'x-container-sync-auth': 'x-container-sync-auth':
'US abcdef 90e95aabb45a6cdc0892a3db5535e7f918428c90', 'US abcdef a2401ecb1256f469494a0abcb0eb62ffa73eca63',
'x-timestamp': '1.2'}) 'x-timestamp': ts_data.internal})
else: else:
self.assertEqual( self.assertEqual(
headers, headers,
{'x-container-sync-key': 'key', 'x-timestamp': '1.2'}) {'x-container-sync-key': 'key',
'x-timestamp': ts_data.internal})
self.assertEqual(proxy, 'http://proxy') self.assertEqual(proxy, 'http://proxy')
self.assertEqual(timeout, 5.0) self.assertEqual(timeout, 5.0)
self.assertEqual(logger, self.logger) self.assertEqual(logger, self.logger)
@ -774,11 +777,13 @@ class TestContainerSync(unittest.TestCase):
cs = sync.ContainerSync({}, container_ring=FakeRing(), cs = sync.ContainerSync({}, container_ring=FakeRing(),
logger=self.logger) logger=self.logger)
cs.http_proxies = ['http://proxy'] cs.http_proxies = ['http://proxy']
# Success # Success.
# simulate a row with tombstone at 1.1 and later ctype, meta times
created_at = ts_data.internal + '+1388+1388' # last modified = 1.2
self.assertTrue(cs.container_sync_row( self.assertTrue(cs.container_sync_row(
{'deleted': True, {'deleted': True,
'name': 'object', 'name': 'object',
'created_at': '1.2'}, 'http://sync/to/path', 'created_at': created_at}, 'http://sync/to/path',
'key', FakeContainerBroker('broker'), 'key', FakeContainerBroker('broker'),
{'account': 'a', 'container': 'c', 'storage_policy_index': 0}, {'account': 'a', 'container': 'c', 'storage_policy_index': 0},
realm, realm_key)) realm, realm_key))
@ -858,6 +863,8 @@ class TestContainerSync(unittest.TestCase):
sync.uuid = FakeUUID sync.uuid = FakeUUID
sync.shuffle = lambda x: x sync.shuffle = lambda x: x
ts_data = Timestamp(1.1)
timestamp = Timestamp(1.2)
def fake_put_object(sync_to, name=None, headers=None, def fake_put_object(sync_to, name=None, headers=None,
contents=None, proxy=None, logger=None, contents=None, proxy=None, logger=None,
@ -867,15 +874,15 @@ class TestContainerSync(unittest.TestCase):
if realm: if realm:
self.assertEqual(headers, { self.assertEqual(headers, {
'x-container-sync-auth': 'x-container-sync-auth':
'US abcdef ef62c64bb88a33fa00722daa23d5d43253164962', 'US abcdef a5fb3cf950738e6e3b364190e246bd7dd21dad3c',
'x-timestamp': '1.2', 'x-timestamp': timestamp.internal,
'etag': 'etagvalue', 'etag': 'etagvalue',
'other-header': 'other header value', 'other-header': 'other header value',
'content-type': 'text/plain'}) 'content-type': 'text/plain'})
else: else:
self.assertEqual(headers, { self.assertEqual(headers, {
'x-container-sync-key': 'key', 'x-container-sync-key': 'key',
'x-timestamp': '1.2', 'x-timestamp': timestamp.internal,
'other-header': 'other header value', 'other-header': 'other header value',
'etag': 'etagvalue', 'etag': 'etagvalue',
'content-type': 'text/plain'}) 'content-type': 'text/plain'})
@ -897,16 +904,19 @@ class TestContainerSync(unittest.TestCase):
'0') '0')
return (200, return (200,
{'other-header': 'other header value', {'other-header': 'other header value',
'etag': '"etagvalue"', 'x-timestamp': '1.2', 'etag': '"etagvalue"',
'x-timestamp': timestamp.internal,
'content-type': 'text/plain; swift_bytes=123'}, 'content-type': 'text/plain; swift_bytes=123'},
iter('contents')) iter('contents'))
cs.swift.get_object = fake_get_object cs.swift.get_object = fake_get_object
# Success as everything says it worked # Success as everything says it worked.
# simulate a row with data at 1.1 and later ctype, meta times
created_at = ts_data.internal + '+1388+1388' # last modified = 1.2
self.assertTrue(cs.container_sync_row( self.assertTrue(cs.container_sync_row(
{'deleted': False, {'deleted': False,
'name': 'object', 'name': 'object',
'created_at': '1.2'}, 'http://sync/to/path', 'created_at': created_at}, 'http://sync/to/path',
'key', FakeContainerBroker('broker'), 'key', FakeContainerBroker('broker'),
{'account': 'a', 'container': 'c', 'storage_policy_index': 0}, {'account': 'a', 'container': 'c', 'storage_policy_index': 0},
realm, realm_key)) realm, realm_key))
@ -920,7 +930,7 @@ class TestContainerSync(unittest.TestCase):
return (200, return (200,
{'date': 'date value', {'date': 'date value',
'last-modified': 'last modified value', 'last-modified': 'last modified value',
'x-timestamp': '1.2', 'x-timestamp': timestamp.internal,
'other-header': 'other header value', 'other-header': 'other header value',
'etag': '"etagvalue"', 'etag': '"etagvalue"',
'content-type': 'text/plain; swift_bytes=123'}, 'content-type': 'text/plain; swift_bytes=123'},
@ -933,7 +943,7 @@ class TestContainerSync(unittest.TestCase):
self.assertTrue(cs.container_sync_row( self.assertTrue(cs.container_sync_row(
{'deleted': False, {'deleted': False,
'name': 'object', 'name': 'object',
'created_at': '1.2'}, 'http://sync/to/path', 'created_at': timestamp.internal}, 'http://sync/to/path',
'key', FakeContainerBroker('broker'), 'key', FakeContainerBroker('broker'),
{'account': 'a', 'container': 'c', 'storage_policy_index': 0}, {'account': 'a', 'container': 'c', 'storage_policy_index': 0},
realm, realm_key)) realm, realm_key))
@ -967,7 +977,7 @@ class TestContainerSync(unittest.TestCase):
self.assertFalse(cs.container_sync_row( self.assertFalse(cs.container_sync_row(
{'deleted': False, {'deleted': False,
'name': 'object', 'name': 'object',
'created_at': '1.2'}, 'http://sync/to/path', 'created_at': timestamp.internal}, 'http://sync/to/path',
'key', FakeContainerBroker('broker'), 'key', FakeContainerBroker('broker'),
{'account': 'a', 'container': 'c', 'storage_policy_index': 0}, {'account': 'a', 'container': 'c', 'storage_policy_index': 0},
realm, realm_key)) realm, realm_key))
@ -990,7 +1000,7 @@ class TestContainerSync(unittest.TestCase):
self.assertFalse(cs.container_sync_row( self.assertFalse(cs.container_sync_row(
{'deleted': False, {'deleted': False,
'name': 'object', 'name': 'object',
'created_at': '1.2'}, 'http://sync/to/path', 'created_at': timestamp.internal}, 'http://sync/to/path',
'key', FakeContainerBroker('broker'), 'key', FakeContainerBroker('broker'),
{'account': 'a', 'container': 'c', 'storage_policy_index': 0}, {'account': 'a', 'container': 'c', 'storage_policy_index': 0},
realm, realm_key)) realm, realm_key))
@ -1003,7 +1013,8 @@ class TestContainerSync(unittest.TestCase):
self.assertEqual(headers['X-Backend-Storage-Policy-Index'], self.assertEqual(headers['X-Backend-Storage-Policy-Index'],
'0') '0')
return (200, {'other-header': 'other header value', return (200, {'other-header': 'other header value',
'x-timestamp': '1.2', 'etag': '"etagvalue"'}, 'x-timestamp': timestamp.internal,
'etag': '"etagvalue"'},
iter('contents')) iter('contents'))
def fake_put_object(*args, **kwargs): def fake_put_object(*args, **kwargs):
@ -1015,7 +1026,7 @@ class TestContainerSync(unittest.TestCase):
self.assertFalse(cs.container_sync_row( self.assertFalse(cs.container_sync_row(
{'deleted': False, {'deleted': False,
'name': 'object', 'name': 'object',
'created_at': '1.2'}, 'http://sync/to/path', 'created_at': timestamp.internal}, 'http://sync/to/path',
'key', FakeContainerBroker('broker'), 'key', FakeContainerBroker('broker'),
{'account': 'a', 'container': 'c', 'storage_policy_index': 0}, {'account': 'a', 'container': 'c', 'storage_policy_index': 0},
realm, realm_key)) realm, realm_key))
@ -1030,7 +1041,7 @@ class TestContainerSync(unittest.TestCase):
self.assertFalse(cs.container_sync_row( self.assertFalse(cs.container_sync_row(
{'deleted': False, {'deleted': False,
'name': 'object', 'name': 'object',
'created_at': '1.2'}, 'http://sync/to/path', 'created_at': timestamp.internal}, 'http://sync/to/path',
'key', FakeContainerBroker('broker'), 'key', FakeContainerBroker('broker'),
{'account': 'a', 'container': 'c', 'storage_policy_index': 0}, {'account': 'a', 'container': 'c', 'storage_policy_index': 0},
realm, realm_key)) realm, realm_key))
@ -1045,7 +1056,7 @@ class TestContainerSync(unittest.TestCase):
self.assertFalse(cs.container_sync_row( self.assertFalse(cs.container_sync_row(
{'deleted': False, {'deleted': False,
'name': 'object', 'name': 'object',
'created_at': '1.2'}, 'http://sync/to/path', 'created_at': timestamp.internal}, 'http://sync/to/path',
'key', FakeContainerBroker('broker'), 'key', FakeContainerBroker('broker'),
{'account': 'a', 'container': 'c', 'storage_policy_index': 0}, {'account': 'a', 'container': 'c', 'storage_policy_index': 0},
realm, realm_key)) realm, realm_key))

View File

@ -45,7 +45,7 @@ from test.unit import (FakeLogger, mock as unit_mock, temptree,
from nose import SkipTest from nose import SkipTest
from swift.obj import diskfile from swift.obj import diskfile
from swift.common import utils from swift.common import utils
from swift.common.utils import hash_path, mkdirs, Timestamp from swift.common.utils import hash_path, mkdirs, Timestamp, encode_timestamps
from swift.common import ring from swift.common import ring
from swift.common.splice import splice from swift.common.splice import splice
from swift.common.exceptions import DiskFileNotExist, DiskFileQuarantined, \ from swift.common.exceptions import DiskFileNotExist, DiskFileQuarantined, \
@ -616,7 +616,8 @@ class DiskFileManagerMixin(BaseDiskFileTestMixin):
def test_get_ondisk_files_with_empty_dir(self): def test_get_ondisk_files_with_empty_dir(self):
files = [] files = []
expected = dict(data_file=None, meta_file=None, ts_file=None) expected = dict(
data_file=None, meta_file=None, ctype_file=None, ts_file=None)
for policy in POLICIES: for policy in POLICIES:
for frag_index in (0, None, '14'): for frag_index in (0, None, '14'):
# check manager # check manager
@ -1214,6 +1215,64 @@ class TestDiskFileManager(DiskFileManagerMixin, unittest.TestCase):
} }
self._check_yield_hashes(POLICIES.default, suffix_map, expected) self._check_yield_hashes(POLICIES.default, suffix_map, expected)
def test_yield_hashes_yields_content_type_timestamp(self):
hash_ = '9373a92d072897b136b3fc06595b4abc'
ts_iter = make_timestamp_iter()
ts0, ts1, ts2, ts3, ts4 = (next(ts_iter) for _ in range(5))
data_file = ts1.internal + '.data'
# no content-type delta
meta_file = ts2.internal + '.meta'
suffix_map = {'abc': {hash_: [data_file, meta_file]}}
expected = {hash_: {'ts_data': ts1,
'ts_meta': ts2}}
self._check_yield_hashes(POLICIES.default, suffix_map, expected)
# non-zero content-type delta
delta = ts3.raw - ts2.raw
meta_file = '%s-%x.meta' % (ts3.internal, delta)
suffix_map = {'abc': {hash_: [data_file, meta_file]}}
expected = {hash_: {'ts_data': ts1,
'ts_meta': ts3,
'ts_ctype': ts2}}
self._check_yield_hashes(POLICIES.default, suffix_map, expected)
# zero content-type delta
meta_file = '%s+0.meta' % ts3.internal
suffix_map = {'abc': {hash_: [data_file, meta_file]}}
expected = {hash_: {'ts_data': ts1,
'ts_meta': ts3,
'ts_ctype': ts3}}
self._check_yield_hashes(POLICIES.default, suffix_map, expected)
# content-type in second meta file
delta = ts3.raw - ts2.raw
meta_file1 = '%s-%x.meta' % (ts3.internal, delta)
meta_file2 = '%s.meta' % ts4.internal
suffix_map = {'abc': {hash_: [data_file, meta_file1, meta_file2]}}
expected = {hash_: {'ts_data': ts1,
'ts_meta': ts4,
'ts_ctype': ts2}}
self._check_yield_hashes(POLICIES.default, suffix_map, expected)
# obsolete content-type in second meta file, older than data file
delta = ts3.raw - ts0.raw
meta_file1 = '%s-%x.meta' % (ts3.internal, delta)
meta_file2 = '%s.meta' % ts4.internal
suffix_map = {'abc': {hash_: [data_file, meta_file1, meta_file2]}}
expected = {hash_: {'ts_data': ts1,
'ts_meta': ts4}}
self._check_yield_hashes(POLICIES.default, suffix_map, expected)
# obsolete content-type in second meta file, same time as data file
delta = ts3.raw - ts1.raw
meta_file1 = '%s-%x.meta' % (ts3.internal, delta)
meta_file2 = '%s.meta' % ts4.internal
suffix_map = {'abc': {hash_: [data_file, meta_file1, meta_file2]}}
expected = {hash_: {'ts_data': ts1,
'ts_meta': ts4}}
self._check_yield_hashes(POLICIES.default, suffix_map, expected)
def test_yield_hashes_suffix_filter(self): def test_yield_hashes_suffix_filter(self):
# test again with limited suffixes # test again with limited suffixes
old_ts = '1383180000.12345' old_ts = '1383180000.12345'
@ -1611,6 +1670,7 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase):
'timestamp': ts, 'timestamp': ts,
'frag_index': int(frag), 'frag_index': int(frag),
'ext': '.data', 'ext': '.data',
'ctype_timestamp': None
}) })
# these functions are inverse # these functions are inverse
self.assertEqual( self.assertEqual(
@ -1631,6 +1691,7 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase):
'timestamp': ts, 'timestamp': ts,
'frag_index': None, 'frag_index': None,
'ext': ext, 'ext': ext,
'ctype_timestamp': None
}) })
# these functions are inverse # these functions are inverse
self.assertEqual( self.assertEqual(
@ -1662,6 +1723,30 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase):
actual = mgr.make_on_disk_filename(ts, ext, frag_index=frag) actual = mgr.make_on_disk_filename(ts, ext, frag_index=frag)
self.assertEqual(expected, actual) self.assertEqual(expected, actual)
def test_make_on_disk_filename_for_meta_with_content_type(self):
# verify .meta filename encodes content-type timestamp
mgr = self.df_router[POLICIES.default]
time_ = 1234567890.00001
for delta in (0.0, .00001, 1.11111):
t_meta = Timestamp(time_)
t_type = Timestamp(time_ - delta)
sign = '-' if delta else '+'
expected = '%s%s%x.meta' % (t_meta.short, sign, 100000 * delta)
actual = mgr.make_on_disk_filename(
t_meta, '.meta', ctype_timestamp=t_type)
self.assertEqual(expected, actual)
parsed = mgr.parse_on_disk_filename(actual)
self.assertEqual(parsed, {
'timestamp': t_meta,
'frag_index': None,
'ext': '.meta',
'ctype_timestamp': t_type
})
# these functions are inverse
self.assertEqual(
mgr.make_on_disk_filename(**parsed),
expected)
def test_yield_hashes(self): def test_yield_hashes(self):
old_ts = '1383180000.12345' old_ts = '1383180000.12345'
fresh_ts = Timestamp(time() - 10).internal fresh_ts = Timestamp(time() - 10).internal
@ -1979,6 +2064,7 @@ class DiskFileMixin(BaseDiskFileTestMixin):
tpool.execute = self._orig_tpool_exc tpool.execute = self._orig_tpool_exc
def _create_ondisk_file(self, df, data, timestamp, metadata=None, def _create_ondisk_file(self, df, data, timestamp, metadata=None,
ctype_timestamp=None,
ext='.data'): ext='.data'):
mkdirs(df._datadir) mkdirs(df._datadir)
if timestamp is None: if timestamp is None:
@ -1996,10 +2082,17 @@ class DiskFileMixin(BaseDiskFileTestMixin):
metadata['name'] = '/a/c/o' metadata['name'] = '/a/c/o'
if 'Content-Length' not in metadata: if 'Content-Length' not in metadata:
metadata['Content-Length'] = str(len(data)) metadata['Content-Length'] = str(len(data))
filename = timestamp.internal + ext filename = timestamp.internal
if ext == '.data' and df.policy.policy_type == EC_POLICY: if ext == '.data' and df.policy.policy_type == EC_POLICY:
filename = '%s#%s.data' % (timestamp.internal, df._frag_index) filename = '%s#%s' % (timestamp.internal, df._frag_index)
data_file = os.path.join(df._datadir, filename) if ctype_timestamp:
metadata.update(
{'Content-Type-Timestamp':
Timestamp(ctype_timestamp).internal})
filename = encode_timestamps(timestamp,
Timestamp(ctype_timestamp),
explicit=True)
data_file = os.path.join(df._datadir, filename + ext)
with open(data_file, 'wb') as f: with open(data_file, 'wb') as f:
f.write(data) f.write(data)
xattr.setxattr(f.fileno(), diskfile.METADATA_KEY, xattr.setxattr(f.fileno(), diskfile.METADATA_KEY,
@ -2779,6 +2872,99 @@ class DiskFileMixin(BaseDiskFileTestMixin):
exp_name = '%s.meta' % timestamp exp_name = '%s.meta' % timestamp
self.assertTrue(exp_name in set(dl)) self.assertTrue(exp_name in set(dl))
def test_write_metadata_with_content_type(self):
# if metadata has content-type then its time should be in file name
df = self._create_test_file('1234567890')
file_count = len(os.listdir(df._datadir))
timestamp = Timestamp(time())
metadata = {'X-Timestamp': timestamp.internal,
'X-Object-Meta-test': 'data',
'Content-Type': 'foo',
'Content-Type-Timestamp': timestamp.internal}
df.write_metadata(metadata)
dl = os.listdir(df._datadir)
self.assertEqual(len(dl), file_count + 1)
exp_name = '%s+0.meta' % timestamp.internal
self.assertTrue(exp_name in set(dl),
'Expected file %s not found in %s' % (exp_name, dl))
def test_write_metadata_with_older_content_type(self):
# if metadata has content-type then its time should be in file name
ts_iter = make_timestamp_iter()
df = self._create_test_file('1234567890', timestamp=ts_iter.next())
file_count = len(os.listdir(df._datadir))
timestamp = ts_iter.next()
timestamp2 = ts_iter.next()
metadata = {'X-Timestamp': timestamp2.internal,
'X-Object-Meta-test': 'data',
'Content-Type': 'foo',
'Content-Type-Timestamp': timestamp.internal}
df.write_metadata(metadata)
dl = os.listdir(df._datadir)
self.assertEqual(len(dl), file_count + 1, dl)
exp_name = '%s-%x.meta' % (timestamp2.internal,
timestamp2.raw - timestamp.raw)
self.assertTrue(exp_name in set(dl),
'Expected file %s not found in %s' % (exp_name, dl))
def test_write_metadata_with_content_type_removes_same_time_meta(self):
# a meta file without content-type should be cleaned up in favour of
# a meta file at same time with content-type
ts_iter = make_timestamp_iter()
df = self._create_test_file('1234567890', timestamp=ts_iter.next())
file_count = len(os.listdir(df._datadir))
timestamp = ts_iter.next()
timestamp2 = ts_iter.next()
metadata = {'X-Timestamp': timestamp2.internal,
'X-Object-Meta-test': 'data'}
df.write_metadata(metadata)
metadata = {'X-Timestamp': timestamp2.internal,
'X-Object-Meta-test': 'data',
'Content-Type': 'foo',
'Content-Type-Timestamp': timestamp.internal}
df.write_metadata(metadata)
dl = os.listdir(df._datadir)
self.assertEqual(len(dl), file_count + 1, dl)
exp_name = '%s-%x.meta' % (timestamp2.internal,
timestamp2.raw - timestamp.raw)
self.assertTrue(exp_name in set(dl),
'Expected file %s not found in %s' % (exp_name, dl))
def test_write_metadata_with_content_type_removes_multiple_metas(self):
# a combination of a meta file without content-type and an older meta
# file with content-type should be cleaned up in favour of a meta file
# at newer time with content-type
ts_iter = make_timestamp_iter()
df = self._create_test_file('1234567890', timestamp=ts_iter.next())
file_count = len(os.listdir(df._datadir))
timestamp = ts_iter.next()
timestamp2 = ts_iter.next()
metadata = {'X-Timestamp': timestamp2.internal,
'X-Object-Meta-test': 'data'}
df.write_metadata(metadata)
metadata = {'X-Timestamp': timestamp.internal,
'X-Object-Meta-test': 'data',
'Content-Type': 'foo',
'Content-Type-Timestamp': timestamp.internal}
df.write_metadata(metadata)
dl = os.listdir(df._datadir)
self.assertEqual(len(dl), file_count + 2, dl)
metadata = {'X-Timestamp': timestamp2.internal,
'X-Object-Meta-test': 'data',
'Content-Type': 'foo',
'Content-Type-Timestamp': timestamp.internal}
df.write_metadata(metadata)
dl = os.listdir(df._datadir)
self.assertEqual(len(dl), file_count + 1, dl)
exp_name = '%s-%x.meta' % (timestamp2.internal,
timestamp2.raw - timestamp.raw)
self.assertTrue(exp_name in set(dl),
'Expected file %s not found in %s' % (exp_name, dl))
def test_write_metadata_no_xattr(self): def test_write_metadata_no_xattr(self):
timestamp = Timestamp(time()).internal timestamp = Timestamp(time()).internal
metadata = {'X-Timestamp': timestamp, 'X-Object-Meta-test': 'data'} metadata = {'X-Timestamp': timestamp, 'X-Object-Meta-test': 'data'}
@ -3133,7 +3319,54 @@ class DiskFileMixin(BaseDiskFileTestMixin):
Timestamp(10).internal) Timestamp(10).internal)
self.assertTrue('deleted' not in df._metadata) self.assertTrue('deleted' not in df._metadata)
def test_ondisk_search_loop_data_meta_ts(self): def test_ondisk_search_loop_multiple_meta_data(self):
df = self._simple_get_diskfile()
self._create_ondisk_file(df, '', ext='.meta', timestamp=10,
metadata={'X-Object-Meta-User': 'user-meta'})
self._create_ondisk_file(df, '', ext='.meta', timestamp=9,
ctype_timestamp=9,
metadata={'Content-Type': 'newest',
'X-Object-Meta-User': 'blah'})
self._create_ondisk_file(df, 'B', ext='.data', timestamp=8,
metadata={'Content-Type': 'newer'})
self._create_ondisk_file(df, 'A', ext='.data', timestamp=7,
metadata={'Content-Type': 'oldest'})
if df.policy.policy_type == EC_POLICY:
self._create_ondisk_file(df, '', ext='.durable', timestamp=8)
self._create_ondisk_file(df, '', ext='.durable', timestamp=7)
df = self._simple_get_diskfile()
with df.open():
self.assertTrue('X-Timestamp' in df._metadata)
self.assertEqual(df._metadata['X-Timestamp'],
Timestamp(10).internal)
self.assertTrue('Content-Type' in df._metadata)
self.assertEqual(df._metadata['Content-Type'], 'newest')
self.assertTrue('X-Object-Meta-User' in df._metadata)
self.assertEqual(df._metadata['X-Object-Meta-User'], 'user-meta')
def test_ondisk_search_loop_stale_meta_data(self):
df = self._simple_get_diskfile()
self._create_ondisk_file(df, '', ext='.meta', timestamp=10,
metadata={'X-Object-Meta-User': 'user-meta'})
self._create_ondisk_file(df, '', ext='.meta', timestamp=9,
ctype_timestamp=7,
metadata={'Content-Type': 'older',
'X-Object-Meta-User': 'blah'})
self._create_ondisk_file(df, 'B', ext='.data', timestamp=8,
metadata={'Content-Type': 'newer'})
if df.policy.policy_type == EC_POLICY:
self._create_ondisk_file(df, '', ext='.durable', timestamp=8)
df = self._simple_get_diskfile()
with df.open():
self.assertTrue('X-Timestamp' in df._metadata)
self.assertEqual(df._metadata['X-Timestamp'],
Timestamp(10).internal)
self.assertTrue('Content-Type' in df._metadata)
self.assertEqual(df._metadata['Content-Type'], 'newer')
self.assertTrue('X-Object-Meta-User' in df._metadata)
self.assertEqual(df._metadata['X-Object-Meta-User'], 'user-meta')
def test_ondisk_search_loop_data_ts_meta(self):
df = self._simple_get_diskfile() df = self._simple_get_diskfile()
self._create_ondisk_file(df, 'B', ext='.data', timestamp=10) self._create_ondisk_file(df, 'B', ext='.data', timestamp=10)
self._create_ondisk_file(df, 'A', ext='.data', timestamp=9) self._create_ondisk_file(df, 'A', ext='.data', timestamp=9)
@ -3295,6 +3528,37 @@ class DiskFileMixin(BaseDiskFileTestMixin):
with self.assertRaises(DiskFileNotOpen): with self.assertRaises(DiskFileNotOpen):
df.data_timestamp df.data_timestamp
def test_content_type_and_timestamp(self):
ts_1 = self.ts()
self._get_open_disk_file(ts=ts_1.internal,
extra_metadata={'Content-Type': 'image/jpeg'})
df = self._simple_get_diskfile()
with df.open():
self.assertEqual(ts_1.internal, df.data_timestamp)
self.assertEqual(ts_1.internal, df.timestamp)
self.assertEqual(ts_1.internal, df.content_type_timestamp)
self.assertEqual('image/jpeg', df.content_type)
ts_2 = self.ts()
ts_3 = self.ts()
df.write_metadata({'X-Timestamp': ts_3.internal,
'Content-Type': 'image/gif',
'Content-Type-Timestamp': ts_2.internal})
with df.open():
self.assertEqual(ts_1.internal, df.data_timestamp)
self.assertEqual(ts_3.internal, df.timestamp)
self.assertEqual(ts_2.internal, df.content_type_timestamp)
self.assertEqual('image/gif', df.content_type)
def test_content_type_timestamp_not_open(self):
df = self._simple_get_diskfile()
with self.assertRaises(DiskFileNotOpen):
df.content_type_timestamp
def test_content_type_not_open(self):
df = self._simple_get_diskfile()
with self.assertRaises(DiskFileNotOpen):
df.content_type
def test_durable_timestamp(self): def test_durable_timestamp(self):
ts_1 = self.ts() ts_1 = self.ts()
df = self._get_open_disk_file(ts=ts_1.internal) df = self._get_open_disk_file(ts=ts_1.internal)
@ -4211,6 +4475,14 @@ class TestSuffixHashes(unittest.TestCase):
filename += '.data' filename += '.data'
return filename return filename
def _metafilename(self, meta_timestamp, ctype_timestamp=None):
filename = meta_timestamp.internal
if ctype_timestamp is not None:
delta = meta_timestamp.raw - ctype_timestamp.raw
filename = '%s-%x' % (filename, delta)
filename += '.meta'
return filename
def check_hash_cleanup_listdir(self, policy, input_files, output_files): def check_hash_cleanup_listdir(self, policy, input_files, output_files):
orig_unlink = os.unlink orig_unlink = os.unlink
file_list = list(input_files) file_list = list(input_files)
@ -4771,6 +5043,175 @@ class TestSuffixHashes(unittest.TestCase):
self.assertEqual(sorted(os.listdir(df._datadir)), self.assertEqual(sorted(os.listdir(df._datadir)),
sorted(expected_files)) sorted(expected_files))
def _verify_get_hashes(self, filenames, ts_data, ts_meta, ts_ctype,
policy):
"""
Helper method to create a set of ondisk files and verify suffix_hashes.
:param filenames: list of filenames to create in an object hash dir
:param ts_data: newest data timestamp, used for expected result
:param ts_meta: newest meta timestamp, used for expected result
:param ts_ctype: newest content-type timestamp, used for expected
result
:param policy: storage policy to use for test
"""
df_mgr = self.df_router[policy]
df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
policy=policy, frag_index=4)
suffix = os.path.basename(os.path.dirname(df._datadir))
mkdirs(df._datadir)
# calculate expected result
hasher = md5()
if policy.policy_type == EC_POLICY:
hasher.update(ts_meta.internal + '.meta')
hasher.update(ts_data.internal + '.durable')
if ts_ctype:
hasher.update(ts_ctype.internal + '_ctype')
expected = {
suffix: {
None: hasher.hexdigest(),
4: md5(ts_data.internal).hexdigest(),
}
}
elif policy.policy_type == REPL_POLICY:
hasher.update(ts_meta.internal + '.meta')
hasher.update(ts_data.internal + '.data')
if ts_ctype:
hasher.update(ts_ctype.internal + '_ctype')
expected = {suffix: hasher.hexdigest()}
else:
self.fail('unknown policy type %r' % policy.policy_type)
for fname in filenames:
open(os.path.join(df._datadir, fname), 'w').close()
hashes = df_mgr.get_hashes('sda1', '0', [], policy)
msg = 'expected %r != %r for policy %r' % (
expected, hashes, policy)
self.assertEqual(hashes, expected, msg)
def test_hash_suffix_with_older_content_type_in_meta(self):
# single meta file having older content-type
for policy in self.iter_policies():
ts_data, ts_ctype, ts_meta = (
self.ts(), self.ts(), self.ts())
filenames = [self._datafilename(ts_data, policy, frag_index=4),
self._metafilename(ts_meta, ts_ctype)]
if policy.policy_type == EC_POLICY:
filenames.append(ts_data.internal + '.durable')
self._verify_get_hashes(
filenames, ts_data, ts_meta, ts_ctype, policy)
def test_hash_suffix_with_same_age_content_type_in_meta(self):
# single meta file having same age content-type
for policy in self.iter_policies():
ts_data, ts_meta = (self.ts(), self.ts())
filenames = [self._datafilename(ts_data, policy, frag_index=4),
self._metafilename(ts_meta, ts_meta)]
if policy.policy_type == EC_POLICY:
filenames.append(ts_data.internal + '.durable')
self._verify_get_hashes(
filenames, ts_data, ts_meta, ts_meta, policy)
def test_hash_suffix_with_obsolete_content_type_in_meta(self):
# After rsync replication we could have a single meta file having
# content-type older than a replicated data file
for policy in self.iter_policies():
ts_ctype, ts_data, ts_meta = (self.ts(), self.ts(), self.ts())
filenames = [self._datafilename(ts_data, policy, frag_index=4),
self._metafilename(ts_meta, ts_ctype)]
if policy.policy_type == EC_POLICY:
filenames.append(ts_data.internal + '.durable')
self._verify_get_hashes(
filenames, ts_data, ts_meta, None, policy)
def test_hash_suffix_with_older_content_type_in_newer_meta(self):
# After rsync replication we could have two meta files: newest
# content-type is in newer meta file, older than newer meta file
for policy in self.iter_policies():
ts_data, ts_older_meta, ts_ctype, ts_newer_meta = (
self.ts() for _ in range(4))
filenames = [self._datafilename(ts_data, policy, frag_index=4),
self._metafilename(ts_older_meta),
self._metafilename(ts_newer_meta, ts_ctype)]
if policy.policy_type == EC_POLICY:
filenames.append(ts_data.internal + '.durable')
self._verify_get_hashes(
filenames, ts_data, ts_newer_meta, ts_ctype, policy)
def test_hash_suffix_with_same_age_content_type_in_newer_meta(self):
# After rsync replication we could have two meta files: newest
# content-type is in newer meta file, at same age as newer meta file
for policy in self.iter_policies():
ts_data, ts_older_meta, ts_newer_meta = (
self.ts() for _ in range(3))
filenames = [self._datafilename(ts_data, policy, frag_index=4),
self._metafilename(ts_newer_meta, ts_newer_meta)]
if policy.policy_type == EC_POLICY:
filenames.append(ts_data.internal + '.durable')
self._verify_get_hashes(
filenames, ts_data, ts_newer_meta, ts_newer_meta, policy)
def test_hash_suffix_with_older_content_type_in_older_meta(self):
# After rsync replication we could have two meta files: newest
# content-type is in older meta file, older than older meta file
for policy in self.iter_policies():
ts_data, ts_ctype, ts_older_meta, ts_newer_meta = (
self.ts() for _ in range(4))
filenames = [self._datafilename(ts_data, policy, frag_index=4),
self._metafilename(ts_newer_meta),
self._metafilename(ts_older_meta, ts_ctype)]
if policy.policy_type == EC_POLICY:
filenames.append(ts_data.internal + '.durable')
self._verify_get_hashes(
filenames, ts_data, ts_newer_meta, ts_ctype, policy)
def test_hash_suffix_with_same_age_content_type_in_older_meta(self):
# After rsync replication we could have two meta files: newest
# content-type is in older meta file, at same age as older meta file
for policy in self.iter_policies():
ts_data, ts_older_meta, ts_newer_meta = (
self.ts() for _ in range(3))
filenames = [self._datafilename(ts_data, policy, frag_index=4),
self._metafilename(ts_newer_meta),
self._metafilename(ts_older_meta, ts_older_meta)]
if policy.policy_type == EC_POLICY:
filenames.append(ts_data.internal + '.durable')
self._verify_get_hashes(
filenames, ts_data, ts_newer_meta, ts_older_meta, policy)
def test_hash_suffix_with_obsolete_content_type_in_older_meta(self):
# After rsync replication we could have two meta files: newest
# content-type is in older meta file, but older than data file
for policy in self.iter_policies():
ts_ctype, ts_data, ts_older_meta, ts_newer_meta = (
self.ts() for _ in range(4))
filenames = [self._datafilename(ts_data, policy, frag_index=4),
self._metafilename(ts_newer_meta),
self._metafilename(ts_older_meta, ts_ctype)]
if policy.policy_type == EC_POLICY:
filenames.append(ts_data.internal + '.durable')
self._verify_get_hashes(
filenames, ts_data, ts_newer_meta, None, policy)
def test_hash_suffix_removes_empty_hashdir_and_suffix(self): def test_hash_suffix_removes_empty_hashdir_and_suffix(self):
for policy in self.iter_policies(): for policy in self.iter_policies():
df_mgr = self.df_router[policy] df_mgr = self.df_router[policy]

View File

@ -51,7 +51,8 @@ from swift.obj import server as object_server
from swift.obj import diskfile from swift.obj import diskfile
from swift.common import utils, bufferedhttp from swift.common import utils, bufferedhttp
from swift.common.utils import hash_path, mkdirs, normalize_timestamp, \ from swift.common.utils import hash_path, mkdirs, normalize_timestamp, \
NullLogger, storage_directory, public, replication NullLogger, storage_directory, public, replication, encode_timestamps, \
Timestamp
from swift.common import constraints from swift.common import constraints
from swift.common.swob import Request, HeaderKeyDict, WsgiBytesIO from swift.common.swob import Request, HeaderKeyDict, WsgiBytesIO
from swift.common.splice import splice from swift.common.splice import splice
@ -168,7 +169,7 @@ class TestObjectController(unittest.TestCase):
dah = ['content-disposition', 'content-encoding', 'x-delete-at', dah = ['content-disposition', 'content-encoding', 'x-delete-at',
'x-object-manifest', 'x-static-large-object'] 'x-object-manifest', 'x-static-large-object']
conf = {'devices': self.testdir, 'mount_check': 'false', conf = {'devices': self.testdir, 'mount_check': 'false',
'allowed_headers': ','.join(['content-type'] + dah)} 'allowed_headers': ','.join(['content-length'] + dah)}
self.object_controller = object_server.ObjectController( self.object_controller = object_server.ObjectController(
conf, logger=debug_logger()) conf, logger=debug_logger())
self.assertEqual(self.object_controller.allowed_headers, set(dah)) self.assertEqual(self.object_controller.allowed_headers, set(dah))
@ -416,12 +417,14 @@ class TestObjectController(unittest.TestCase):
self.assertEqual(resp.status_int, 400) self.assertEqual(resp.status_int, 400)
def test_POST_container_connection(self): def test_POST_container_connection(self):
# Test that POST does call container_update and returns success
def mock_http_connect(response, with_exc=False): # whether update to container server succeeds or fails
def mock_http_connect(calls, response, with_exc=False):
class FakeConn(object): class FakeConn(object):
def __init__(self, status, with_exc): def __init__(self, calls, status, with_exc):
self.calls = calls
self.status = status self.status = status
self.reason = 'Fake' self.reason = 'Fake'
self.host = '1.2.3.4' self.host = '1.2.3.4'
@ -429,6 +432,7 @@ class TestObjectController(unittest.TestCase):
self.with_exc = with_exc self.with_exc = with_exc
def getresponse(self): def getresponse(self):
calls[0] += 1
if self.with_exc: if self.with_exc:
raise Exception('test') raise Exception('test')
return self return self
@ -436,7 +440,7 @@ class TestObjectController(unittest.TestCase):
def read(self, amt=None): def read(self, amt=None):
return '' return ''
return lambda *args, **kwargs: FakeConn(response, with_exc) return lambda *args, **kwargs: FakeConn(calls, response, with_exc)
ts = time() ts = time()
timestamp = normalize_timestamp(ts) timestamp = normalize_timestamp(ts)
@ -456,8 +460,9 @@ class TestObjectController(unittest.TestCase):
'X-Container-Device': 'sda1', 'X-Container-Device': 'sda1',
'X-Container-Timestamp': '1', 'X-Container-Timestamp': '1',
'Content-Type': 'application/new1'}) 'Content-Type': 'application/new1'})
calls = [0]
with mock.patch.object(object_server, 'http_connect', with mock.patch.object(object_server, 'http_connect',
mock_http_connect(202)): mock_http_connect(calls, 202)):
resp = req.get_response(self.object_controller) resp = req.get_response(self.object_controller)
self.assertEqual(resp.status_int, 202) self.assertEqual(resp.status_int, 202)
req = Request.blank( req = Request.blank(
@ -469,8 +474,9 @@ class TestObjectController(unittest.TestCase):
'X-Container-Device': 'sda1', 'X-Container-Device': 'sda1',
'X-Container-Timestamp': '1', 'X-Container-Timestamp': '1',
'Content-Type': 'application/new1'}) 'Content-Type': 'application/new1'})
calls = [0]
with mock.patch.object(object_server, 'http_connect', with mock.patch.object(object_server, 'http_connect',
mock_http_connect(202, with_exc=True)): mock_http_connect(calls, 202, with_exc=True)):
resp = req.get_response(self.object_controller) resp = req.get_response(self.object_controller)
self.assertEqual(resp.status_int, 202) self.assertEqual(resp.status_int, 202)
req = Request.blank( req = Request.blank(
@ -482,11 +488,215 @@ class TestObjectController(unittest.TestCase):
'X-Container-Device': 'sda1', 'X-Container-Device': 'sda1',
'X-Container-Timestamp': '1', 'X-Container-Timestamp': '1',
'Content-Type': 'application/new2'}) 'Content-Type': 'application/new2'})
calls = [0]
with mock.patch.object(object_server, 'http_connect', with mock.patch.object(object_server, 'http_connect',
mock_http_connect(500)): mock_http_connect(calls, 500)):
resp = req.get_response(self.object_controller) resp = req.get_response(self.object_controller)
self.assertEqual(resp.status_int, 202) self.assertEqual(resp.status_int, 202)
def _test_POST_container_updates(self, policy, update_etag=None):
# Test that POST requests result in correct calls to container_update
ts_iter = (Timestamp(t) for t in itertools.count(int(time())))
t = [ts_iter.next() for _ in range(0, 5)]
calls_made = []
update_etag = update_etag or '098f6bcd4621d373cade4e832627b4f6'
def mock_container_update(ctlr, op, account, container, obj, request,
headers_out, objdevice, policy_idx):
calls_made.append(headers_out)
headers = {
'X-Timestamp': t[1].internal,
'Content-Type': 'application/octet-stream;swift_bytes=123456789',
'Content-Length': '4',
'X-Backend-Storage-Policy': int(policy)}
if policy.policy_type == EC_POLICY:
headers['X-Backend-Container-Update-Override-Etag'] = update_etag
headers['X-Object-Sysmeta-Ec-Etag'] = update_etag
req = Request.blank('/sda1/p/a/c/o',
environ={'REQUEST_METHOD': 'PUT'},
headers=headers)
req.body = 'test'
with mock.patch('swift.obj.server.ObjectController.container_update',
mock_container_update):
resp = req.get_response(self.object_controller)
self.assertEqual(resp.status_int, 201)
self.assertEqual(1, len(calls_made))
expected_headers = HeaderKeyDict({
'x-size': '4',
'x-content-type': 'application/octet-stream;swift_bytes=123456789',
'x-timestamp': t[1].internal,
'x-etag': update_etag})
self.assertDictEqual(expected_headers, calls_made[0])
# POST with no metadata newer than the data should return 409,
# container update not expected
calls_made = []
req = Request.blank('/sda1/p/a/c/o',
environ={'REQUEST_METHOD': 'POST'},
headers={'X-Timestamp': t[0].internal,
'X-Backend-Storage-Policy': int(policy)})
with mock.patch('swift.obj.server.ObjectController.container_update',
mock_container_update):
resp = req.get_response(self.object_controller)
self.assertEqual(resp.status_int, 409)
self.assertEqual(resp.headers['x-backend-timestamp'],
t[1].internal)
self.assertEqual(0, len(calls_made))
# POST with newer metadata returns success and container update
# is expected
calls_made = []
req = Request.blank('/sda1/p/a/c/o',
environ={'REQUEST_METHOD': 'POST'},
headers={'X-Timestamp': t[3].internal,
'X-Backend-Storage-Policy': int(policy)})
with mock.patch('swift.obj.server.ObjectController.container_update',
mock_container_update):
resp = req.get_response(self.object_controller)
self.assertEqual(resp.status_int, 202)
self.assertEqual(1, len(calls_made))
expected_headers = HeaderKeyDict({
'x-size': '4',
'x-content-type': 'application/octet-stream;swift_bytes=123456789',
'x-timestamp': t[1].internal,
'x-content-type-timestamp': t[1].internal,
'x-meta-timestamp': t[3].internal,
'x-etag': update_etag})
self.assertDictEqual(expected_headers, calls_made[0])
# POST with no metadata newer than existing metadata should return
# 409, container update not expected
calls_made = []
req = Request.blank('/sda1/p/a/c/o',
environ={'REQUEST_METHOD': 'POST'},
headers={'X-Timestamp': t[2].internal,
'X-Backend-Storage-Policy': int(policy)})
with mock.patch('swift.obj.server.ObjectController.container_update',
mock_container_update):
resp = req.get_response(self.object_controller)
self.assertEqual(resp.status_int, 409)
self.assertEqual(resp.headers['x-backend-timestamp'],
t[3].internal)
self.assertEqual(0, len(calls_made))
# POST with newer content-type but older metadata returns success
# and container update is expected newer content-type should have
# existing swift_bytes appended
calls_made = []
req = Request.blank('/sda1/p/a/c/o',
environ={'REQUEST_METHOD': 'POST'},
headers={
'X-Timestamp': t[2].internal,
'Content-Type': 'text/plain',
'Content-Type-Timestamp': t[2].internal,
'X-Backend-Storage-Policy': int(policy)
})
with mock.patch('swift.obj.server.ObjectController.container_update',
mock_container_update):
resp = req.get_response(self.object_controller)
self.assertEqual(resp.status_int, 202)
self.assertEqual(1, len(calls_made))
expected_headers = HeaderKeyDict({
'x-size': '4',
'x-content-type': 'text/plain;swift_bytes=123456789',
'x-timestamp': t[1].internal,
'x-content-type-timestamp': t[2].internal,
'x-meta-timestamp': t[3].internal,
'x-etag': update_etag})
self.assertDictEqual(expected_headers, calls_made[0])
# POST with older content-type but newer metadata returns success
# and container update is expected
calls_made = []
req = Request.blank('/sda1/p/a/c/o',
environ={'REQUEST_METHOD': 'POST'},
headers={
'X-Timestamp': t[4].internal,
'Content-Type': 'older',
'Content-Type-Timestamp': t[1].internal,
'X-Backend-Storage-Policy': int(policy)
})
with mock.patch('swift.obj.server.ObjectController.container_update',
mock_container_update):
resp = req.get_response(self.object_controller)
self.assertEqual(resp.status_int, 202)
self.assertEqual(1, len(calls_made))
expected_headers = HeaderKeyDict({
'x-size': '4',
'x-content-type': 'text/plain;swift_bytes=123456789',
'x-timestamp': t[1].internal,
'x-content-type-timestamp': t[2].internal,
'x-meta-timestamp': t[4].internal,
'x-etag': update_etag})
self.assertDictEqual(expected_headers, calls_made[0])
# POST with same-time content-type and metadata returns 409
# and no container update is expected
calls_made = []
req = Request.blank('/sda1/p/a/c/o',
environ={'REQUEST_METHOD': 'POST'},
headers={
'X-Timestamp': t[4].internal,
'Content-Type': 'ignored',
'Content-Type-Timestamp': t[2].internal,
'X-Backend-Storage-Policy': int(policy)
})
with mock.patch('swift.obj.server.ObjectController.container_update',
mock_container_update):
resp = req.get_response(self.object_controller)
self.assertEqual(resp.status_int, 409)
self.assertEqual(0, len(calls_made))
# POST with implicit newer content-type but older metadata
# returns success and container update is expected,
# update reports existing metadata timestamp
calls_made = []
req = Request.blank('/sda1/p/a/c/o',
environ={'REQUEST_METHOD': 'POST'},
headers={
'X-Timestamp': t[3].internal,
'Content-Type': 'text/newer',
'X-Backend-Storage-Policy': int(policy)
})
with mock.patch('swift.obj.server.ObjectController.container_update',
mock_container_update):
resp = req.get_response(self.object_controller)
self.assertEqual(resp.status_int, 202)
self.assertEqual(1, len(calls_made))
expected_headers = HeaderKeyDict({
'x-size': '4',
'x-content-type': 'text/newer;swift_bytes=123456789',
'x-timestamp': t[1].internal,
'x-content-type-timestamp': t[3].internal,
'x-meta-timestamp': t[4].internal,
'x-etag': update_etag})
self.assertDictEqual(expected_headers, calls_made[0])
def test_POST_container_updates_with_replication_policy(self):
self._test_POST_container_updates(POLICIES[0])
def test_POST_container_updates_with_EC_policy(self):
self._test_POST_container_updates(
POLICIES[1], update_etag='override_etag')
def test_POST_quarantine_zbyte(self): def test_POST_quarantine_zbyte(self):
timestamp = normalize_timestamp(time()) timestamp = normalize_timestamp(time())
req = Request.blank('/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'PUT'}, req = Request.blank('/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'PUT'},
@ -1332,6 +1542,84 @@ class TestObjectController(unittest.TestCase):
'name': '/a/c/o', 'name': '/a/c/o',
'X-Object-Meta-1': 'Not One'}) 'X-Object-Meta-1': 'Not One'})
def test_POST_then_fetch_content_type(self):
# check that content_type is updated by a POST
timestamp1 = normalize_timestamp(time())
req = Request.blank(
'/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'PUT'},
headers={'X-Timestamp': timestamp1,
'Content-Type': 'text/plain',
'ETag': '1000d172764c9dbc3a5798a67ec5bb76',
'X-Object-Meta-1': 'One'})
req.body = 'VERIFY SYSMETA'
resp = req.get_response(self.object_controller)
self.assertEqual(resp.status_int, 201)
timestamp2 = normalize_timestamp(time())
req = Request.blank(
'/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'POST'},
headers={'X-Timestamp': timestamp2,
'X-Object-Meta-1': 'Not One',
'Content-Type': 'text/html'})
resp = req.get_response(self.object_controller)
self.assertEqual(resp.status_int, 202)
# original .data file metadata should be unchanged
objfile = os.path.join(
self.testdir, 'sda1',
storage_directory(diskfile.get_data_dir(0), 'p',
hash_path('a', 'c', 'o')),
timestamp1 + '.data')
self.assertTrue(os.path.isfile(objfile))
self.assertEqual(open(objfile).read(), 'VERIFY SYSMETA')
self.assertEqual(diskfile.read_metadata(objfile),
{'X-Timestamp': timestamp1,
'Content-Length': '14',
'Content-Type': 'text/plain',
'ETag': '1000d172764c9dbc3a5798a67ec5bb76',
'name': '/a/c/o',
'X-Object-Meta-1': 'One'})
# .meta file metadata should have updated content-type
metafile_name = encode_timestamps(Timestamp(timestamp2),
Timestamp(timestamp2),
explicit=True)
metafile = os.path.join(
self.testdir, 'sda1',
storage_directory(diskfile.get_data_dir(0), 'p',
hash_path('a', 'c', 'o')),
metafile_name + '.meta')
self.assertTrue(os.path.isfile(metafile))
self.assertEqual(diskfile.read_metadata(metafile),
{'X-Timestamp': timestamp2,
'name': '/a/c/o',
'Content-Type': 'text/html',
'Content-Type-Timestamp': timestamp2,
'X-Object-Meta-1': 'Not One'})
def check_response(resp):
self.assertEqual(resp.status_int, 200)
self.assertEqual(resp.content_length, 14)
self.assertEqual(resp.content_type, 'text/html')
self.assertEqual(resp.headers['content-type'], 'text/html')
self.assertEqual(
resp.headers['last-modified'],
strftime('%a, %d %b %Y %H:%M:%S GMT',
gmtime(math.ceil(float(timestamp2)))))
self.assertEqual(resp.headers['etag'],
'"1000d172764c9dbc3a5798a67ec5bb76"')
self.assertEqual(resp.headers['x-object-meta-1'], 'Not One')
req = Request.blank('/sda1/p/a/c/o',
environ={'REQUEST_METHOD': 'HEAD'})
resp = req.get_response(self.object_controller)
check_response(resp)
req = Request.blank('/sda1/p/a/c/o',
environ={'REQUEST_METHOD': 'GET'})
resp = req.get_response(self.object_controller)
check_response(resp)
def test_PUT_then_fetch_system_metadata(self): def test_PUT_then_fetch_system_metadata(self):
timestamp = normalize_timestamp(time()) timestamp = normalize_timestamp(time())
req = Request.blank( req = Request.blank(

View File

@ -904,6 +904,165 @@ class TestSsyncReplication(TestBaseSsync):
# tx meta file should not have been sync'd to rx data file # tx meta file should not have been sync'd to rx data file
self.assertNotIn('X-Object-Meta-Test', rx_obj.get_metadata()) self.assertNotIn('X-Object-Meta-Test', rx_obj.get_metadata())
def test_content_type_sync(self):
policy = POLICIES.default
rx_node_index = 0
# create diskfiles...
tx_objs = {}
rx_objs = {}
tx_df_mgr = self.daemon._diskfile_router[policy]
rx_df_mgr = self.rx_controller._diskfile_router[policy]
expected_subreqs = defaultdict(list)
# o1 on tx only with two meta files
name = 'o1'
t1 = self.ts_iter.next()
tx_objs[name] = self._create_ondisk_files(tx_df_mgr, name, policy, t1)
t1_type = self.ts_iter.next()
metadata_1 = {'X-Timestamp': t1_type.internal,
'Content-Type': 'text/test',
'Content-Type-Timestamp': t1_type.internal}
tx_objs[name][0].write_metadata(metadata_1)
t1_meta = self.ts_iter.next()
metadata_2 = {'X-Timestamp': t1_meta.internal,
'X-Object-Meta-Test': name}
tx_objs[name][0].write_metadata(metadata_2)
expected_subreqs['PUT'].append(name)
expected_subreqs['POST'].append(name)
# o2 on tx with two meta files, rx has .data and newest .meta but is
# missing latest content-type
name = 'o2'
t2 = self.ts_iter.next()
tx_objs[name] = self._create_ondisk_files(tx_df_mgr, name, policy, t2)
t2_type = self.ts_iter.next()
metadata_1 = {'X-Timestamp': t2_type.internal,
'Content-Type': 'text/test',
'Content-Type-Timestamp': t2_type.internal}
tx_objs[name][0].write_metadata(metadata_1)
t2_meta = self.ts_iter.next()
metadata_2 = {'X-Timestamp': t2_meta.internal,
'X-Object-Meta-Test': name}
tx_objs[name][0].write_metadata(metadata_2)
rx_objs[name] = self._create_ondisk_files(rx_df_mgr, name, policy, t2)
rx_objs[name][0].write_metadata(metadata_2)
expected_subreqs['POST'].append(name)
# o3 on tx with two meta files, rx has .data and one .meta but does
# have latest content-type so nothing to sync
name = 'o3'
t3 = self.ts_iter.next()
tx_objs[name] = self._create_ondisk_files(tx_df_mgr, name, policy, t3)
t3_type = self.ts_iter.next()
metadata_1 = {'X-Timestamp': t3_type.internal,
'Content-Type': 'text/test',
'Content-Type-Timestamp': t3_type.internal}
tx_objs[name][0].write_metadata(metadata_1)
t3_meta = self.ts_iter.next()
metadata_2 = {'X-Timestamp': t3_meta.internal,
'X-Object-Meta-Test': name}
tx_objs[name][0].write_metadata(metadata_2)
rx_objs[name] = self._create_ondisk_files(rx_df_mgr, name, policy, t3)
metadata_2b = {'X-Timestamp': t3_meta.internal,
'X-Object-Meta-Test': name,
'Content-Type': 'text/test',
'Content-Type-Timestamp': t3_type.internal}
rx_objs[name][0].write_metadata(metadata_2b)
# o4 on tx with one meta file having latest content-type, rx has
# .data and two .meta having latest content-type so nothing to sync
# i.e. o4 is the reverse of o3 scenario
name = 'o4'
t4 = self.ts_iter.next()
tx_objs[name] = self._create_ondisk_files(tx_df_mgr, name, policy, t4)
t4_type = self.ts_iter.next()
t4_meta = self.ts_iter.next()
metadata_2b = {'X-Timestamp': t4_meta.internal,
'X-Object-Meta-Test': name,
'Content-Type': 'text/test',
'Content-Type-Timestamp': t4_type.internal}
tx_objs[name][0].write_metadata(metadata_2b)
rx_objs[name] = self._create_ondisk_files(rx_df_mgr, name, policy, t4)
metadata_1 = {'X-Timestamp': t4_type.internal,
'Content-Type': 'text/test',
'Content-Type-Timestamp': t4_type.internal}
rx_objs[name][0].write_metadata(metadata_1)
metadata_2 = {'X-Timestamp': t4_meta.internal,
'X-Object-Meta-Test': name}
rx_objs[name][0].write_metadata(metadata_2)
# o5 on tx with one meta file having latest content-type, rx has
# .data and no .meta
name = 'o5'
t5 = self.ts_iter.next()
tx_objs[name] = self._create_ondisk_files(tx_df_mgr, name, policy, t5)
t5_type = self.ts_iter.next()
t5_meta = self.ts_iter.next()
metadata = {'X-Timestamp': t5_meta.internal,
'X-Object-Meta-Test': name,
'Content-Type': 'text/test',
'Content-Type-Timestamp': t5_type.internal}
tx_objs[name][0].write_metadata(metadata)
rx_objs[name] = self._create_ondisk_files(rx_df_mgr, name, policy, t5)
expected_subreqs['POST'].append(name)
suffixes = set()
for diskfiles in tx_objs.values():
for df in diskfiles:
suffixes.add(os.path.basename(os.path.dirname(df._datadir)))
# create ssync sender instance...
job = {'device': self.device,
'partition': self.partition,
'policy': policy}
node = dict(self.rx_node)
node.update({'index': rx_node_index})
sender = ssync_sender.Sender(self.daemon, node, job, suffixes)
# wrap connection from tx to rx to capture ssync messages...
sender.connect, trace = self.make_connect_wrapper(sender)
# run the sync protocol...
success, in_sync_objs = sender()
self.assertEqual(5, len(in_sync_objs), trace['messages'])
self.assertTrue(success)
# verify protocol
results = self._analyze_trace(trace)
self.assertEqual(5, len(results['tx_missing']))
self.assertEqual(3, len(results['rx_missing']))
for subreq in results.get('tx_updates'):
obj = subreq['path'].split('/')[3]
method = subreq['method']
self.assertTrue(obj in expected_subreqs[method],
'Unexpected %s subreq for object %s, expected %s'
% (method, obj, expected_subreqs[method]))
expected_subreqs[method].remove(obj)
if method == 'PUT':
expected_body = '%s___None' % subreq['path']
self.assertEqual(expected_body, subreq['body'])
# verify all expected subreqs consumed
for _method, expected in expected_subreqs.items():
self.assertFalse(expected,
'Expected subreqs not seen for %s for objects %s'
% (_method, expected))
self.assertFalse(results['rx_updates'])
# verify on disk files...
self._verify_ondisk_files(tx_objs, policy)
for oname, rx_obj in rx_objs.items():
df = rx_obj[0].open()
metadata = df.get_metadata()
self.assertEqual(metadata['X-Object-Meta-Test'], oname)
self.assertEqual(metadata['Content-Type'], 'text/test')
# verify that tx and rx both generate the same suffix hashes...
tx_hashes = tx_df_mgr.get_hashes(
self.device, self.partition, suffixes, policy)
rx_hashes = rx_df_mgr.get_hashes(
self.device, self.partition, suffixes, policy)
self.assertEqual(tx_hashes, rx_hashes)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -1927,27 +1927,57 @@ class TestModuleMethods(unittest.TestCase):
ts_iter = make_timestamp_iter() ts_iter = make_timestamp_iter()
t_data = next(ts_iter) t_data = next(ts_iter)
t_meta = next(ts_iter) t_meta = next(ts_iter)
t_ctype = next(ts_iter)
d_meta_data = t_meta.raw - t_data.raw d_meta_data = t_meta.raw - t_data.raw
d_ctype_data = t_ctype.raw - t_data.raw
# legacy single timestamp string # legacy single timestamp string
msg = '%s %s' % (object_hash, t_data.internal) msg = '%s %s' % (object_hash, t_data.internal)
expected = dict(object_hash=object_hash, expected = dict(object_hash=object_hash,
ts_meta=t_data, ts_meta=t_data,
ts_data=t_data) ts_data=t_data,
ts_ctype=t_data)
self.assertEqual(expected, ssync_receiver.decode_missing(msg)) self.assertEqual(expected, ssync_receiver.decode_missing(msg))
# hex meta delta encoded as extra message part # hex meta delta encoded as extra message part
msg = '%s %s m:%x' % (object_hash, t_data.internal, d_meta_data) msg = '%s %s m:%x' % (object_hash, t_data.internal, d_meta_data)
expected = dict(object_hash=object_hash, expected = dict(object_hash=object_hash,
ts_data=t_data, ts_data=t_data,
ts_meta=t_meta) ts_meta=t_meta,
ts_ctype=t_data)
self.assertEqual(expected, ssync_receiver.decode_missing(msg)) self.assertEqual(expected, ssync_receiver.decode_missing(msg))
# hex content type delta encoded in extra message part
msg = '%s %s t:%x,m:%x' % (object_hash, t_data.internal,
d_ctype_data, d_meta_data)
expected = dict(object_hash=object_hash,
ts_data=t_data,
ts_meta=t_meta,
ts_ctype=t_ctype)
self.assertEqual(
expected, ssync_receiver.decode_missing(msg))
# order of subparts does not matter
msg = '%s %s m:%x,t:%x' % (object_hash, t_data.internal,
d_meta_data, d_ctype_data)
self.assertEqual(
expected, ssync_receiver.decode_missing(msg))
# hex content type delta may be zero
msg = '%s %s t:0,m:%x' % (object_hash, t_data.internal, d_meta_data)
expected = dict(object_hash=object_hash,
ts_data=t_data,
ts_meta=t_meta,
ts_ctype=t_data)
self.assertEqual(
expected, ssync_receiver.decode_missing(msg))
# unexpected zero delta is tolerated # unexpected zero delta is tolerated
msg = '%s %s m:0' % (object_hash, t_data.internal) msg = '%s %s m:0' % (object_hash, t_data.internal)
expected = dict(object_hash=object_hash, expected = dict(object_hash=object_hash,
ts_meta=t_data, ts_meta=t_data,
ts_data=t_data) ts_data=t_data,
ts_ctype=t_data)
self.assertEqual(expected, ssync_receiver.decode_missing(msg)) self.assertEqual(expected, ssync_receiver.decode_missing(msg))
# unexpected subparts in timestamp delta part are tolerated # unexpected subparts in timestamp delta part are tolerated
@ -1956,7 +1986,8 @@ class TestModuleMethods(unittest.TestCase):
d_meta_data) d_meta_data)
expected = dict(object_hash=object_hash, expected = dict(object_hash=object_hash,
ts_meta=t_meta, ts_meta=t_meta,
ts_data=t_data) ts_data=t_data,
ts_ctype=t_data)
self.assertEqual( self.assertEqual(
expected, ssync_receiver.decode_missing(msg)) expected, ssync_receiver.decode_missing(msg))
@ -1966,7 +1997,8 @@ class TestModuleMethods(unittest.TestCase):
d_meta_data) d_meta_data)
expected = dict(object_hash=object_hash, expected = dict(object_hash=object_hash,
ts_meta=t_meta, ts_meta=t_meta,
ts_data=t_data) ts_data=t_data,
ts_ctype=t_data)
self.assertEqual(expected, ssync_receiver.decode_missing(msg)) self.assertEqual(expected, ssync_receiver.decode_missing(msg))
def test_encode_wanted(self): def test_encode_wanted(self):

View File

@ -763,12 +763,14 @@ class TestSender(BaseTest):
'/srv/node/dev/objects/9/def/' '/srv/node/dev/objects/9/def/'
'9d41d8cd98f00b204e9800998ecf0def', '9d41d8cd98f00b204e9800998ecf0def',
'9d41d8cd98f00b204e9800998ecf0def', '9d41d8cd98f00b204e9800998ecf0def',
{'ts_data': Timestamp(1380144472.22222)}) {'ts_data': Timestamp(1380144472.22222),
'ts_meta': Timestamp(1380144473.22222)})
yield ( yield (
'/srv/node/dev/objects/9/def/' '/srv/node/dev/objects/9/def/'
'9d41d8cd98f00b204e9800998ecf1def', '9d41d8cd98f00b204e9800998ecf1def',
'9d41d8cd98f00b204e9800998ecf1def', '9d41d8cd98f00b204e9800998ecf1def',
{'ts_data': Timestamp(1380144474.44444), {'ts_data': Timestamp(1380144474.44444),
'ts_ctype': Timestamp(1380144474.44448),
'ts_meta': Timestamp(1380144475.44444)}) 'ts_meta': Timestamp(1380144475.44444)})
else: else:
raise Exception( raise Exception(
@ -792,18 +794,21 @@ class TestSender(BaseTest):
''.join(self.sender.connection.sent), ''.join(self.sender.connection.sent),
'17\r\n:MISSING_CHECK: START\r\n\r\n' '17\r\n:MISSING_CHECK: START\r\n\r\n'
'33\r\n9d41d8cd98f00b204e9800998ecf0abc 1380144470.00000\r\n\r\n' '33\r\n9d41d8cd98f00b204e9800998ecf0abc 1380144470.00000\r\n\r\n'
'33\r\n9d41d8cd98f00b204e9800998ecf0def 1380144472.22222\r\n\r\n' '3b\r\n9d41d8cd98f00b204e9800998ecf0def 1380144472.22222 '
'3b\r\n9d41d8cd98f00b204e9800998ecf1def 1380144474.44444 '
'm:186a0\r\n\r\n' 'm:186a0\r\n\r\n'
'3f\r\n9d41d8cd98f00b204e9800998ecf1def 1380144474.44444 '
'm:186a0,t:4\r\n\r\n'
'15\r\n:MISSING_CHECK: END\r\n\r\n') '15\r\n:MISSING_CHECK: END\r\n\r\n')
self.assertEqual(self.sender.send_map, {}) self.assertEqual(self.sender.send_map, {})
candidates = [('9d41d8cd98f00b204e9800998ecf0abc', candidates = [('9d41d8cd98f00b204e9800998ecf0abc',
dict(ts_data=Timestamp(1380144470.00000))), dict(ts_data=Timestamp(1380144470.00000))),
('9d41d8cd98f00b204e9800998ecf0def', ('9d41d8cd98f00b204e9800998ecf0def',
dict(ts_data=Timestamp(1380144472.22222))), dict(ts_data=Timestamp(1380144472.22222),
ts_meta=Timestamp(1380144473.22222))),
('9d41d8cd98f00b204e9800998ecf1def', ('9d41d8cd98f00b204e9800998ecf1def',
dict(ts_data=Timestamp(1380144474.44444), dict(ts_data=Timestamp(1380144474.44444),
ts_meta=Timestamp(1380144475.44444)))] ts_meta=Timestamp(1380144475.44444),
ts_ctype=Timestamp(1380144474.44448)))]
self.assertEqual(self.sender.available_map, dict(candidates)) self.assertEqual(self.sender.available_map, dict(candidates))
def test_missing_check_far_end_disconnect(self): def test_missing_check_far_end_disconnect(self):
@ -1545,8 +1550,10 @@ class TestModuleMethods(unittest.TestCase):
object_hash = '9d41d8cd98f00b204e9800998ecf0abc' object_hash = '9d41d8cd98f00b204e9800998ecf0abc'
ts_iter = make_timestamp_iter() ts_iter = make_timestamp_iter()
t_data = next(ts_iter) t_data = next(ts_iter)
t_type = next(ts_iter)
t_meta = next(ts_iter) t_meta = next(ts_iter)
d_meta_data = t_meta.raw - t_data.raw d_meta_data = t_meta.raw - t_data.raw
d_type_data = t_type.raw - t_data.raw
# equal data and meta timestamps -> legacy single timestamp string # equal data and meta timestamps -> legacy single timestamp string
expected = '%s %s' % (object_hash, t_data.internal) expected = '%s %s' % (object_hash, t_data.internal)
@ -1560,9 +1567,36 @@ class TestModuleMethods(unittest.TestCase):
expected, expected,
ssync_sender.encode_missing(object_hash, t_data, ts_meta=t_meta)) ssync_sender.encode_missing(object_hash, t_data, ts_meta=t_meta))
# newer meta timestamp -> hex data delta encoded as extra message part
# content type timestamp equals data timestamp -> no delta
expected = '%s %s m:%x' % (object_hash, t_data.internal, d_meta_data)
self.assertEqual(
expected,
ssync_sender.encode_missing(object_hash, t_data, t_meta, t_data))
# content type timestamp newer data timestamp -> delta encoded
expected = ('%s %s m:%x,t:%x'
% (object_hash, t_data.internal, d_meta_data, d_type_data))
self.assertEqual(
expected,
ssync_sender.encode_missing(object_hash, t_data, t_meta, t_type))
# content type timestamp equal to meta timestamp -> delta encoded
expected = ('%s %s m:%x,t:%x'
% (object_hash, t_data.internal, d_meta_data, d_type_data))
self.assertEqual(
expected,
ssync_sender.encode_missing(object_hash, t_data, t_meta, t_type))
# test encode and decode functions invert # test encode and decode functions invert
expected = {'object_hash': object_hash, 'ts_meta': t_meta, expected = {'object_hash': object_hash, 'ts_meta': t_meta,
'ts_data': t_data} 'ts_data': t_data, 'ts_ctype': t_type}
msg = ssync_sender.encode_missing(**expected)
actual = ssync_receiver.decode_missing(msg)
self.assertEqual(expected, actual)
expected = {'object_hash': object_hash, 'ts_meta': t_meta,
'ts_data': t_meta, 'ts_ctype': t_meta}
msg = ssync_sender.encode_missing(**expected) msg = ssync_sender.encode_missing(**expected)
actual = ssync_receiver.decode_missing(msg) actual = ssync_receiver.decode_missing(msg)
self.assertEqual(expected, actual) self.assertEqual(expected, actual)

View File

@ -3210,7 +3210,8 @@ class TestObjectController(unittest.TestCase):
backend_requests.append((method, path, headers)) backend_requests.append((method, path, headers))
req = Request.blank('/v1/a/c/o', {}, method='POST', req = Request.blank('/v1/a/c/o', {}, method='POST',
headers={'X-Object-Meta-Color': 'Blue'}) headers={'X-Object-Meta-Color': 'Blue',
'Content-Type': 'text/plain'})
# we want the container_info response to says a policy index of 1 # we want the container_info response to says a policy index of 1
resp_headers = {'X-Backend-Storage-Policy-Index': 1} resp_headers = {'X-Backend-Storage-Policy-Index': 1}
@ -3271,6 +3272,7 @@ class TestObjectController(unittest.TestCase):
backend_requests = [] backend_requests = []
req = Request.blank('/v1/a/c/o', {}, method='POST', req = Request.blank('/v1/a/c/o', {}, method='POST',
headers={'X-Object-Meta-Color': 'Blue', headers={'X-Object-Meta-Color': 'Blue',
'Content-Type': 'text/plain',
'X-Backend-Storage-Policy-Index': 0}) 'X-Backend-Storage-Policy-Index': 0})
with mocked_http_conn( with mocked_http_conn(
200, 200, 202, 202, 202, 200, 200, 202, 202, 202,