diff --git a/swift/obj/auditor.py b/swift/obj/auditor.py index ffb358da63..611eedd4da 100644 --- a/swift/obj/auditor.py +++ b/swift/obj/auditor.py @@ -240,7 +240,7 @@ class AuditorWorker(object): df = diskfile_mgr.get_diskfile_from_audit_location(location) reader = None try: - with df.open(): + with df.open(modernize=True): metadata = df.get_metadata() obj_size = int(metadata['Content-Length']) if self.stats_sizes: diff --git a/swift/obj/diskfile.py b/swift/obj/diskfile.py index 7f6456e7dc..96f45954f4 100644 --- a/swift/obj/diskfile.py +++ b/swift/obj/diskfile.py @@ -131,11 +131,12 @@ def _encode_metadata(metadata): return dict(((encode_str(k), encode_str(v)) for k, v in metadata.items())) -def read_metadata(fd): +def read_metadata(fd, add_missing_checksum=False): """ Helper function to read the pickled metadata from an object file. :param fd: file descriptor or filename to load the metadata from + :param add_missing_checksum: if set and checksum is missing, add it :returns: dictionary of metadata """ @@ -159,12 +160,17 @@ def read_metadata(fd): metadata_checksum = None try: metadata_checksum = xattr.getxattr(fd, METADATA_CHECKSUM_KEY) - except (IOError, OSError) as e: + except (IOError, OSError): # All the interesting errors were handled above; the only thing left # here is ENODATA / ENOATTR to indicate that this attribute doesn't # exist. This is fine; it just means that this object predates the # introduction of metadata checksums. - pass + if add_missing_checksum: + new_checksum = hashlib.md5(metadata).hexdigest() + try: + xattr.setxattr(fd, METADATA_CHECKSUM_KEY, new_checksum) + except (IOError, OSError) as e: + logging.error("Error adding metadata: %s" % e) if metadata_checksum: computed_checksum = hashlib.md5(metadata).hexdigest() @@ -2182,7 +2188,7 @@ class BaseDiskFile(object): return cls(mgr, device_path, None, partition, _datadir=hash_dir_path, policy=policy) - def open(self): + def open(self, modernize=False): """ Open the object. @@ -2190,6 +2196,10 @@ class BaseDiskFile(object): the associated metadata in the extended attributes, additionally combining metadata from fast-POST `.meta` files. + :param modernize: if set, update this diskfile to the latest format. + Currently, this means adding metadata checksums if none are + present. + .. note:: An implementation is allowed to raise any of the following @@ -2228,7 +2238,8 @@ class BaseDiskFile(object): self._data_file = file_info.get('data_file') if not self._data_file: raise self._construct_exception_from_ts_file(**file_info) - self._fp = self._construct_from_data_file(**file_info) + self._fp = self._construct_from_data_file( + modernize=modernize, **file_info) # This method must populate the internal _metadata attribute. self._metadata = self._metadata or {} return self @@ -2395,7 +2406,8 @@ class BaseDiskFile(object): self._content_length = obj_size return obj_size - def _failsafe_read_metadata(self, source, quarantine_filename=None): + def _failsafe_read_metadata(self, source, quarantine_filename=None, + add_missing_checksum=False): """ Read metadata from source object file. In case of failure, quarantine the file. @@ -2405,9 +2417,11 @@ class BaseDiskFile(object): :param source: file descriptor or filename to load the metadata from :param quarantine_filename: full path of file to load the metadata from + :param add_missing_checksum: if True and no metadata checksum is + present, generate one and write it down """ try: - return read_metadata(source) + return read_metadata(source, add_missing_checksum) except (DiskFileXattrNotSupported, DiskFileNotExist): raise except DiskFileBadMetadataChecksum as err: @@ -2437,6 +2451,7 @@ class BaseDiskFile(object): ctypefile_metadata.get('Content-Type-Timestamp') def _construct_from_data_file(self, data_file, meta_file, ctype_file, + modernize=False, **kwargs): """ Open the `.data` file to fetch its metadata, and fetch the metadata @@ -2447,16 +2462,21 @@ class BaseDiskFile(object): :param meta_file: on-disk fast-POST `.meta` file being considered :param ctype_file: on-disk fast-POST `.meta` file being considered that contains content-type and content-type timestamp + :param modernize: whether to update the on-disk files to the newest + format :returns: an opened data file pointer :raises DiskFileError: various exceptions from :func:`swift.obj.diskfile.DiskFile._verify_data_file` """ fp = open(data_file, 'rb') - self._datafile_metadata = self._failsafe_read_metadata(fp, data_file) + self._datafile_metadata = self._failsafe_read_metadata( + fp, data_file, + add_missing_checksum=modernize) self._metadata = {} if meta_file: self._metafile_metadata = self._failsafe_read_metadata( - meta_file, meta_file) + meta_file, meta_file, + add_missing_checksum=modernize) if ctype_file and ctype_file != meta_file: self._merge_content_type_metadata(ctype_file) sys_metadata = dict( diff --git a/swift/obj/mem_diskfile.py b/swift/obj/mem_diskfile.py index 83a7309447..eaa3a7d533 100644 --- a/swift/obj/mem_diskfile.py +++ b/swift/obj/mem_diskfile.py @@ -273,7 +273,7 @@ class DiskFile(object): self._filesystem = fs self.fragments = None - def open(self): + def open(self, modernize=False): """ Open the file and read the metadata. diff --git a/test/unit/obj/test_auditor.py b/test/unit/obj/test_auditor.py index c57f1531a4..32d4df4069 100644 --- a/test/unit/obj/test_auditor.py +++ b/test/unit/obj/test_auditor.py @@ -19,6 +19,7 @@ import mock import os import time import string +import xattr from shutil import rmtree from hashlib import md5 from tempfile import mkdtemp @@ -188,6 +189,74 @@ class TestAuditor(unittest.TestCase): run_tests(self.disk_file_p1) run_tests(self.disk_file_ec) + def test_object_audit_adds_metadata_checksums(self): + disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o-md', + policy=POLICIES.legacy) + + # simulate a PUT + now = time.time() + data = b'boots and cats and ' * 1024 + hasher = md5() + with disk_file.create() as writer: + writer.write(data) + hasher.update(data) + etag = hasher.hexdigest() + metadata = { + 'ETag': etag, + 'X-Timestamp': str(normalize_timestamp(now)), + 'Content-Length': len(data), + 'Content-Type': 'the old type', + } + writer.put(metadata) + writer.commit(Timestamp(now)) + + # simulate a subsequent POST + post_metadata = metadata.copy() + post_metadata['Content-Type'] = 'the new type' + post_metadata['X-Object-Meta-Biff'] = 'buff' + post_metadata['X-Timestamp'] = str(normalize_timestamp(now + 1)) + disk_file.write_metadata(post_metadata) + + file_paths = [os.path.join(disk_file._datadir, fname) + for fname in os.listdir(disk_file._datadir) + if fname not in ('.', '..')] + file_paths.sort() + + # sanity check: make sure we have a .data and a .meta file + self.assertEqual(len(file_paths), 2) + self.assertTrue(file_paths[0].endswith(".data")) + self.assertTrue(file_paths[1].endswith(".meta")) + + # Go remove the xattr "user.swift.metadata_checksum" as if this + # object were written before Swift supported metadata checksums. + for file_path in file_paths: + xattr.removexattr(file_path, "user.swift.metadata_checksum") + + # Run the auditor... + auditor_worker = auditor.AuditorWorker(self.conf, self.logger, + self.rcache, self.devices) + auditor_worker.object_audit( + AuditLocation(disk_file._datadir, 'sda', '0', + policy=disk_file.policy)) + self.assertEqual(auditor_worker.quarantines, 0) # sanity + + # ...and the checksums are back + for file_path in file_paths: + metadata = xattr.getxattr(file_path, "user.swift.metadata") + i = 1 + while True: + try: + metadata += xattr.getxattr( + file_path, "user.swift.metadata%d" % i) + i += 1 + except (IOError, OSError): + break + + checksum = xattr.getxattr( + file_path, "user.swift.metadata_checksum") + + self.assertEqual(checksum, md5(metadata).hexdigest()) + def test_object_audit_diff_data(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)