Merge "Add metadata checksums to old objects in auditor."
This commit is contained in:
commit
5731fab90b
@ -240,7 +240,7 @@ class AuditorWorker(object):
|
|||||||
df = diskfile_mgr.get_diskfile_from_audit_location(location)
|
df = diskfile_mgr.get_diskfile_from_audit_location(location)
|
||||||
reader = None
|
reader = None
|
||||||
try:
|
try:
|
||||||
with df.open():
|
with df.open(modernize=True):
|
||||||
metadata = df.get_metadata()
|
metadata = df.get_metadata()
|
||||||
obj_size = int(metadata['Content-Length'])
|
obj_size = int(metadata['Content-Length'])
|
||||||
if self.stats_sizes:
|
if self.stats_sizes:
|
||||||
|
@ -131,11 +131,12 @@ def _encode_metadata(metadata):
|
|||||||
return dict(((encode_str(k), encode_str(v)) for k, v in metadata.items()))
|
return dict(((encode_str(k), encode_str(v)) for k, v in metadata.items()))
|
||||||
|
|
||||||
|
|
||||||
def read_metadata(fd):
|
def read_metadata(fd, add_missing_checksum=False):
|
||||||
"""
|
"""
|
||||||
Helper function to read the pickled metadata from an object file.
|
Helper function to read the pickled metadata from an object file.
|
||||||
|
|
||||||
:param fd: file descriptor or filename to load the metadata from
|
:param fd: file descriptor or filename to load the metadata from
|
||||||
|
:param add_missing_checksum: if set and checksum is missing, add it
|
||||||
|
|
||||||
:returns: dictionary of metadata
|
:returns: dictionary of metadata
|
||||||
"""
|
"""
|
||||||
@ -159,12 +160,17 @@ def read_metadata(fd):
|
|||||||
metadata_checksum = None
|
metadata_checksum = None
|
||||||
try:
|
try:
|
||||||
metadata_checksum = xattr.getxattr(fd, METADATA_CHECKSUM_KEY)
|
metadata_checksum = xattr.getxattr(fd, METADATA_CHECKSUM_KEY)
|
||||||
except (IOError, OSError) as e:
|
except (IOError, OSError):
|
||||||
# All the interesting errors were handled above; the only thing left
|
# All the interesting errors were handled above; the only thing left
|
||||||
# here is ENODATA / ENOATTR to indicate that this attribute doesn't
|
# here is ENODATA / ENOATTR to indicate that this attribute doesn't
|
||||||
# exist. This is fine; it just means that this object predates the
|
# exist. This is fine; it just means that this object predates the
|
||||||
# introduction of metadata checksums.
|
# introduction of metadata checksums.
|
||||||
pass
|
if add_missing_checksum:
|
||||||
|
new_checksum = hashlib.md5(metadata).hexdigest()
|
||||||
|
try:
|
||||||
|
xattr.setxattr(fd, METADATA_CHECKSUM_KEY, new_checksum)
|
||||||
|
except (IOError, OSError) as e:
|
||||||
|
logging.error("Error adding metadata: %s" % e)
|
||||||
|
|
||||||
if metadata_checksum:
|
if metadata_checksum:
|
||||||
computed_checksum = hashlib.md5(metadata).hexdigest()
|
computed_checksum = hashlib.md5(metadata).hexdigest()
|
||||||
@ -2182,7 +2188,7 @@ class BaseDiskFile(object):
|
|||||||
return cls(mgr, device_path, None, partition, _datadir=hash_dir_path,
|
return cls(mgr, device_path, None, partition, _datadir=hash_dir_path,
|
||||||
policy=policy)
|
policy=policy)
|
||||||
|
|
||||||
def open(self):
|
def open(self, modernize=False):
|
||||||
"""
|
"""
|
||||||
Open the object.
|
Open the object.
|
||||||
|
|
||||||
@ -2190,6 +2196,10 @@ class BaseDiskFile(object):
|
|||||||
the associated metadata in the extended attributes, additionally
|
the associated metadata in the extended attributes, additionally
|
||||||
combining metadata from fast-POST `.meta` files.
|
combining metadata from fast-POST `.meta` files.
|
||||||
|
|
||||||
|
:param modernize: if set, update this diskfile to the latest format.
|
||||||
|
Currently, this means adding metadata checksums if none are
|
||||||
|
present.
|
||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
|
|
||||||
An implementation is allowed to raise any of the following
|
An implementation is allowed to raise any of the following
|
||||||
@ -2228,7 +2238,8 @@ class BaseDiskFile(object):
|
|||||||
self._data_file = file_info.get('data_file')
|
self._data_file = file_info.get('data_file')
|
||||||
if not self._data_file:
|
if not self._data_file:
|
||||||
raise self._construct_exception_from_ts_file(**file_info)
|
raise self._construct_exception_from_ts_file(**file_info)
|
||||||
self._fp = self._construct_from_data_file(**file_info)
|
self._fp = self._construct_from_data_file(
|
||||||
|
modernize=modernize, **file_info)
|
||||||
# This method must populate the internal _metadata attribute.
|
# This method must populate the internal _metadata attribute.
|
||||||
self._metadata = self._metadata or {}
|
self._metadata = self._metadata or {}
|
||||||
return self
|
return self
|
||||||
@ -2395,7 +2406,8 @@ class BaseDiskFile(object):
|
|||||||
self._content_length = obj_size
|
self._content_length = obj_size
|
||||||
return obj_size
|
return obj_size
|
||||||
|
|
||||||
def _failsafe_read_metadata(self, source, quarantine_filename=None):
|
def _failsafe_read_metadata(self, source, quarantine_filename=None,
|
||||||
|
add_missing_checksum=False):
|
||||||
"""
|
"""
|
||||||
Read metadata from source object file. In case of failure, quarantine
|
Read metadata from source object file. In case of failure, quarantine
|
||||||
the file.
|
the file.
|
||||||
@ -2405,9 +2417,11 @@ class BaseDiskFile(object):
|
|||||||
|
|
||||||
:param source: file descriptor or filename to load the metadata from
|
:param source: file descriptor or filename to load the metadata from
|
||||||
:param quarantine_filename: full path of file to load the metadata from
|
:param quarantine_filename: full path of file to load the metadata from
|
||||||
|
:param add_missing_checksum: if True and no metadata checksum is
|
||||||
|
present, generate one and write it down
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
return read_metadata(source)
|
return read_metadata(source, add_missing_checksum)
|
||||||
except (DiskFileXattrNotSupported, DiskFileNotExist):
|
except (DiskFileXattrNotSupported, DiskFileNotExist):
|
||||||
raise
|
raise
|
||||||
except DiskFileBadMetadataChecksum as err:
|
except DiskFileBadMetadataChecksum as err:
|
||||||
@ -2437,6 +2451,7 @@ class BaseDiskFile(object):
|
|||||||
ctypefile_metadata.get('Content-Type-Timestamp')
|
ctypefile_metadata.get('Content-Type-Timestamp')
|
||||||
|
|
||||||
def _construct_from_data_file(self, data_file, meta_file, ctype_file,
|
def _construct_from_data_file(self, data_file, meta_file, ctype_file,
|
||||||
|
modernize=False,
|
||||||
**kwargs):
|
**kwargs):
|
||||||
"""
|
"""
|
||||||
Open the `.data` file to fetch its metadata, and fetch the metadata
|
Open the `.data` file to fetch its metadata, and fetch the metadata
|
||||||
@ -2447,16 +2462,21 @@ class BaseDiskFile(object):
|
|||||||
:param meta_file: on-disk fast-POST `.meta` file being considered
|
:param meta_file: on-disk fast-POST `.meta` file being considered
|
||||||
:param ctype_file: on-disk fast-POST `.meta` file being considered that
|
:param ctype_file: on-disk fast-POST `.meta` file being considered that
|
||||||
contains content-type and content-type timestamp
|
contains content-type and content-type timestamp
|
||||||
|
:param modernize: whether to update the on-disk files to the newest
|
||||||
|
format
|
||||||
:returns: an opened data file pointer
|
:returns: an opened data file pointer
|
||||||
:raises DiskFileError: various exceptions from
|
:raises DiskFileError: various exceptions from
|
||||||
:func:`swift.obj.diskfile.DiskFile._verify_data_file`
|
:func:`swift.obj.diskfile.DiskFile._verify_data_file`
|
||||||
"""
|
"""
|
||||||
fp = open(data_file, 'rb')
|
fp = open(data_file, 'rb')
|
||||||
self._datafile_metadata = self._failsafe_read_metadata(fp, data_file)
|
self._datafile_metadata = self._failsafe_read_metadata(
|
||||||
|
fp, data_file,
|
||||||
|
add_missing_checksum=modernize)
|
||||||
self._metadata = {}
|
self._metadata = {}
|
||||||
if meta_file:
|
if meta_file:
|
||||||
self._metafile_metadata = self._failsafe_read_metadata(
|
self._metafile_metadata = self._failsafe_read_metadata(
|
||||||
meta_file, meta_file)
|
meta_file, meta_file,
|
||||||
|
add_missing_checksum=modernize)
|
||||||
if ctype_file and ctype_file != meta_file:
|
if ctype_file and ctype_file != meta_file:
|
||||||
self._merge_content_type_metadata(ctype_file)
|
self._merge_content_type_metadata(ctype_file)
|
||||||
sys_metadata = dict(
|
sys_metadata = dict(
|
||||||
|
@ -273,7 +273,7 @@ class DiskFile(object):
|
|||||||
self._filesystem = fs
|
self._filesystem = fs
|
||||||
self.fragments = None
|
self.fragments = None
|
||||||
|
|
||||||
def open(self):
|
def open(self, modernize=False):
|
||||||
"""
|
"""
|
||||||
Open the file and read the metadata.
|
Open the file and read the metadata.
|
||||||
|
|
||||||
|
@ -19,6 +19,7 @@ import mock
|
|||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
import string
|
import string
|
||||||
|
import xattr
|
||||||
from shutil import rmtree
|
from shutil import rmtree
|
||||||
from hashlib import md5
|
from hashlib import md5
|
||||||
from tempfile import mkdtemp
|
from tempfile import mkdtemp
|
||||||
@ -188,6 +189,74 @@ class TestAuditor(unittest.TestCase):
|
|||||||
run_tests(self.disk_file_p1)
|
run_tests(self.disk_file_p1)
|
||||||
run_tests(self.disk_file_ec)
|
run_tests(self.disk_file_ec)
|
||||||
|
|
||||||
|
def test_object_audit_adds_metadata_checksums(self):
|
||||||
|
disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o-md',
|
||||||
|
policy=POLICIES.legacy)
|
||||||
|
|
||||||
|
# simulate a PUT
|
||||||
|
now = time.time()
|
||||||
|
data = b'boots and cats and ' * 1024
|
||||||
|
hasher = md5()
|
||||||
|
with disk_file.create() as writer:
|
||||||
|
writer.write(data)
|
||||||
|
hasher.update(data)
|
||||||
|
etag = hasher.hexdigest()
|
||||||
|
metadata = {
|
||||||
|
'ETag': etag,
|
||||||
|
'X-Timestamp': str(normalize_timestamp(now)),
|
||||||
|
'Content-Length': len(data),
|
||||||
|
'Content-Type': 'the old type',
|
||||||
|
}
|
||||||
|
writer.put(metadata)
|
||||||
|
writer.commit(Timestamp(now))
|
||||||
|
|
||||||
|
# simulate a subsequent POST
|
||||||
|
post_metadata = metadata.copy()
|
||||||
|
post_metadata['Content-Type'] = 'the new type'
|
||||||
|
post_metadata['X-Object-Meta-Biff'] = 'buff'
|
||||||
|
post_metadata['X-Timestamp'] = str(normalize_timestamp(now + 1))
|
||||||
|
disk_file.write_metadata(post_metadata)
|
||||||
|
|
||||||
|
file_paths = [os.path.join(disk_file._datadir, fname)
|
||||||
|
for fname in os.listdir(disk_file._datadir)
|
||||||
|
if fname not in ('.', '..')]
|
||||||
|
file_paths.sort()
|
||||||
|
|
||||||
|
# sanity check: make sure we have a .data and a .meta file
|
||||||
|
self.assertEqual(len(file_paths), 2)
|
||||||
|
self.assertTrue(file_paths[0].endswith(".data"))
|
||||||
|
self.assertTrue(file_paths[1].endswith(".meta"))
|
||||||
|
|
||||||
|
# Go remove the xattr "user.swift.metadata_checksum" as if this
|
||||||
|
# object were written before Swift supported metadata checksums.
|
||||||
|
for file_path in file_paths:
|
||||||
|
xattr.removexattr(file_path, "user.swift.metadata_checksum")
|
||||||
|
|
||||||
|
# Run the auditor...
|
||||||
|
auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
|
||||||
|
self.rcache, self.devices)
|
||||||
|
auditor_worker.object_audit(
|
||||||
|
AuditLocation(disk_file._datadir, 'sda', '0',
|
||||||
|
policy=disk_file.policy))
|
||||||
|
self.assertEqual(auditor_worker.quarantines, 0) # sanity
|
||||||
|
|
||||||
|
# ...and the checksums are back
|
||||||
|
for file_path in file_paths:
|
||||||
|
metadata = xattr.getxattr(file_path, "user.swift.metadata")
|
||||||
|
i = 1
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
metadata += xattr.getxattr(
|
||||||
|
file_path, "user.swift.metadata%d" % i)
|
||||||
|
i += 1
|
||||||
|
except (IOError, OSError):
|
||||||
|
break
|
||||||
|
|
||||||
|
checksum = xattr.getxattr(
|
||||||
|
file_path, "user.swift.metadata_checksum")
|
||||||
|
|
||||||
|
self.assertEqual(checksum, md5(metadata).hexdigest())
|
||||||
|
|
||||||
def test_object_audit_diff_data(self):
|
def test_object_audit_diff_data(self):
|
||||||
auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
|
auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
|
||||||
self.rcache, self.devices)
|
self.rcache, self.devices)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user