From 62d21eceb7ebd061d527339b85d5d9e24fa13a2b Mon Sep 17 00:00:00 2001 From: Samuel Merritt Date: Thu, 19 Oct 2017 17:13:00 -0700 Subject: [PATCH] Add metadata checksums to old objects in auditor. When the object auditor examines an object, it will now add any missing metadata checksums. This goes for both .data and .meta files, but not .ts files, as tombstones don't live very long anyway. Change-Id: I9417a8b0cc5099470845c0504c834746188d89e8 --- swift/obj/auditor.py | 2 +- swift/obj/diskfile.py | 38 ++++++++++++++----- swift/obj/mem_diskfile.py | 2 +- test/unit/obj/test_auditor.py | 69 +++++++++++++++++++++++++++++++++++ 4 files changed, 100 insertions(+), 11 deletions(-) diff --git a/swift/obj/auditor.py b/swift/obj/auditor.py index ffb358da63..611eedd4da 100644 --- a/swift/obj/auditor.py +++ b/swift/obj/auditor.py @@ -240,7 +240,7 @@ class AuditorWorker(object): df = diskfile_mgr.get_diskfile_from_audit_location(location) reader = None try: - with df.open(): + with df.open(modernize=True): metadata = df.get_metadata() obj_size = int(metadata['Content-Length']) if self.stats_sizes: diff --git a/swift/obj/diskfile.py b/swift/obj/diskfile.py index 7f6456e7dc..96f45954f4 100644 --- a/swift/obj/diskfile.py +++ b/swift/obj/diskfile.py @@ -131,11 +131,12 @@ def _encode_metadata(metadata): return dict(((encode_str(k), encode_str(v)) for k, v in metadata.items())) -def read_metadata(fd): +def read_metadata(fd, add_missing_checksum=False): """ Helper function to read the pickled metadata from an object file. :param fd: file descriptor or filename to load the metadata from + :param add_missing_checksum: if set and checksum is missing, add it :returns: dictionary of metadata """ @@ -159,12 +160,17 @@ def read_metadata(fd): metadata_checksum = None try: metadata_checksum = xattr.getxattr(fd, METADATA_CHECKSUM_KEY) - except (IOError, OSError) as e: + except (IOError, OSError): # All the interesting errors were handled above; the only thing left # here is ENODATA / ENOATTR to indicate that this attribute doesn't # exist. This is fine; it just means that this object predates the # introduction of metadata checksums. - pass + if add_missing_checksum: + new_checksum = hashlib.md5(metadata).hexdigest() + try: + xattr.setxattr(fd, METADATA_CHECKSUM_KEY, new_checksum) + except (IOError, OSError) as e: + logging.error("Error adding metadata: %s" % e) if metadata_checksum: computed_checksum = hashlib.md5(metadata).hexdigest() @@ -2182,7 +2188,7 @@ class BaseDiskFile(object): return cls(mgr, device_path, None, partition, _datadir=hash_dir_path, policy=policy) - def open(self): + def open(self, modernize=False): """ Open the object. @@ -2190,6 +2196,10 @@ class BaseDiskFile(object): the associated metadata in the extended attributes, additionally combining metadata from fast-POST `.meta` files. + :param modernize: if set, update this diskfile to the latest format. + Currently, this means adding metadata checksums if none are + present. + .. note:: An implementation is allowed to raise any of the following @@ -2228,7 +2238,8 @@ class BaseDiskFile(object): self._data_file = file_info.get('data_file') if not self._data_file: raise self._construct_exception_from_ts_file(**file_info) - self._fp = self._construct_from_data_file(**file_info) + self._fp = self._construct_from_data_file( + modernize=modernize, **file_info) # This method must populate the internal _metadata attribute. self._metadata = self._metadata or {} return self @@ -2395,7 +2406,8 @@ class BaseDiskFile(object): self._content_length = obj_size return obj_size - def _failsafe_read_metadata(self, source, quarantine_filename=None): + def _failsafe_read_metadata(self, source, quarantine_filename=None, + add_missing_checksum=False): """ Read metadata from source object file. In case of failure, quarantine the file. @@ -2405,9 +2417,11 @@ class BaseDiskFile(object): :param source: file descriptor or filename to load the metadata from :param quarantine_filename: full path of file to load the metadata from + :param add_missing_checksum: if True and no metadata checksum is + present, generate one and write it down """ try: - return read_metadata(source) + return read_metadata(source, add_missing_checksum) except (DiskFileXattrNotSupported, DiskFileNotExist): raise except DiskFileBadMetadataChecksum as err: @@ -2437,6 +2451,7 @@ class BaseDiskFile(object): ctypefile_metadata.get('Content-Type-Timestamp') def _construct_from_data_file(self, data_file, meta_file, ctype_file, + modernize=False, **kwargs): """ Open the `.data` file to fetch its metadata, and fetch the metadata @@ -2447,16 +2462,21 @@ class BaseDiskFile(object): :param meta_file: on-disk fast-POST `.meta` file being considered :param ctype_file: on-disk fast-POST `.meta` file being considered that contains content-type and content-type timestamp + :param modernize: whether to update the on-disk files to the newest + format :returns: an opened data file pointer :raises DiskFileError: various exceptions from :func:`swift.obj.diskfile.DiskFile._verify_data_file` """ fp = open(data_file, 'rb') - self._datafile_metadata = self._failsafe_read_metadata(fp, data_file) + self._datafile_metadata = self._failsafe_read_metadata( + fp, data_file, + add_missing_checksum=modernize) self._metadata = {} if meta_file: self._metafile_metadata = self._failsafe_read_metadata( - meta_file, meta_file) + meta_file, meta_file, + add_missing_checksum=modernize) if ctype_file and ctype_file != meta_file: self._merge_content_type_metadata(ctype_file) sys_metadata = dict( diff --git a/swift/obj/mem_diskfile.py b/swift/obj/mem_diskfile.py index 83a7309447..eaa3a7d533 100644 --- a/swift/obj/mem_diskfile.py +++ b/swift/obj/mem_diskfile.py @@ -273,7 +273,7 @@ class DiskFile(object): self._filesystem = fs self.fragments = None - def open(self): + def open(self, modernize=False): """ Open the file and read the metadata. diff --git a/test/unit/obj/test_auditor.py b/test/unit/obj/test_auditor.py index c57f1531a4..32d4df4069 100644 --- a/test/unit/obj/test_auditor.py +++ b/test/unit/obj/test_auditor.py @@ -19,6 +19,7 @@ import mock import os import time import string +import xattr from shutil import rmtree from hashlib import md5 from tempfile import mkdtemp @@ -188,6 +189,74 @@ class TestAuditor(unittest.TestCase): run_tests(self.disk_file_p1) run_tests(self.disk_file_ec) + def test_object_audit_adds_metadata_checksums(self): + disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o-md', + policy=POLICIES.legacy) + + # simulate a PUT + now = time.time() + data = b'boots and cats and ' * 1024 + hasher = md5() + with disk_file.create() as writer: + writer.write(data) + hasher.update(data) + etag = hasher.hexdigest() + metadata = { + 'ETag': etag, + 'X-Timestamp': str(normalize_timestamp(now)), + 'Content-Length': len(data), + 'Content-Type': 'the old type', + } + writer.put(metadata) + writer.commit(Timestamp(now)) + + # simulate a subsequent POST + post_metadata = metadata.copy() + post_metadata['Content-Type'] = 'the new type' + post_metadata['X-Object-Meta-Biff'] = 'buff' + post_metadata['X-Timestamp'] = str(normalize_timestamp(now + 1)) + disk_file.write_metadata(post_metadata) + + file_paths = [os.path.join(disk_file._datadir, fname) + for fname in os.listdir(disk_file._datadir) + if fname not in ('.', '..')] + file_paths.sort() + + # sanity check: make sure we have a .data and a .meta file + self.assertEqual(len(file_paths), 2) + self.assertTrue(file_paths[0].endswith(".data")) + self.assertTrue(file_paths[1].endswith(".meta")) + + # Go remove the xattr "user.swift.metadata_checksum" as if this + # object were written before Swift supported metadata checksums. + for file_path in file_paths: + xattr.removexattr(file_path, "user.swift.metadata_checksum") + + # Run the auditor... + auditor_worker = auditor.AuditorWorker(self.conf, self.logger, + self.rcache, self.devices) + auditor_worker.object_audit( + AuditLocation(disk_file._datadir, 'sda', '0', + policy=disk_file.policy)) + self.assertEqual(auditor_worker.quarantines, 0) # sanity + + # ...and the checksums are back + for file_path in file_paths: + metadata = xattr.getxattr(file_path, "user.swift.metadata") + i = 1 + while True: + try: + metadata += xattr.getxattr( + file_path, "user.swift.metadata%d" % i) + i += 1 + except (IOError, OSError): + break + + checksum = xattr.getxattr( + file_path, "user.swift.metadata_checksum") + + self.assertEqual(checksum, md5(metadata).hexdigest()) + def test_object_audit_diff_data(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices)