From 745a7f04fe5456dd1b8d0c7a88cd3d564738fb56 Mon Sep 17 00:00:00 2001 From: indianwhocodes Date: Tue, 16 Aug 2022 09:19:55 -0700 Subject: [PATCH] Add validation method for metadata in ECDiskfile Historically, we've allowed objects to get on disk that may not have had all of their required EC metadata. Add a new method to sanity-check metadata in the auditor, and quarantine such invalid data. Additionally, call validation early in the reconstructor's reconstruct_fa() method so we do not have to attempt reconstruction for invalid EC fragments. Change-Id: I73551007843d27041f594923c59e6fd89caf17e5 --- swift/obj/auditor.py | 4 ++++ swift/obj/diskfile.py | 14 ++++++++++++++ swift/obj/reconstructor.py | 3 +++ test/unit/obj/common.py | 1 + test/unit/obj/test_auditor.py | 9 +++++++++ test/unit/obj/test_diskfile.py | 1 + test/unit/obj/test_ssync.py | 1 + 7 files changed, 33 insertions(+) diff --git a/swift/obj/auditor.py b/swift/obj/auditor.py index f9013748a9..6f3e7f60d5 100644 --- a/swift/obj/auditor.py +++ b/swift/obj/auditor.py @@ -258,6 +258,10 @@ class AuditorWorker(object): try: with df.open(modernize=True): metadata = df.get_metadata() + if not df.validate_metadata(): + df._quarantine( + df._data_file, + "Metadata failed validation") obj_size = int(metadata['Content-Length']) if self.stats_sizes: self.record_stats(obj_size) diff --git a/swift/obj/diskfile.py b/swift/obj/diskfile.py index 0f3d7ef76d..efd8979077 100644 --- a/swift/obj/diskfile.py +++ b/swift/obj/diskfile.py @@ -2689,6 +2689,9 @@ class BaseDiskFile(object): exc = DiskFileDeleted(metadata=metadata) return exc + def validate_metadata(self): + return ('Content-Length' in self._datafile_metadata) + def _verify_name_matches_hash(self, data_file): """ @@ -3357,6 +3360,17 @@ class ECDiskFile(BaseDiskFile): raise DiskFileError( 'Bad frag_prefs: %r: %s' % (frag_prefs, e)) + def validate_metadata(self): + required_metadata = [ + 'Content-Length', + 'X-Object-Sysmeta-Ec-Frag-Index', + 'X-Object-Sysmeta-Ec-Etag', + ] + for header in required_metadata: + if not self._datafile_metadata.get(header): + return False + return True + @property def durable_timestamp(self): """ diff --git a/swift/obj/reconstructor.py b/swift/obj/reconstructor.py index c3f35615a6..c98e1ff300 100644 --- a/swift/obj/reconstructor.py +++ b/swift/obj/reconstructor.py @@ -653,6 +653,9 @@ class ObjectReconstructor(Daemon): # of the node we're rebuilding to within the primary part list fi_to_rebuild = node['backend_index'] datafile_metadata = df.get_datafile_metadata() + if not df.validate_metadata(): + raise df._quarantine( + df._data_file, "Invalid fragment #%s" % df._frag_index) local_timestamp = Timestamp(datafile_metadata['X-Timestamp']) path = datafile_metadata['name'] diff --git a/test/unit/obj/common.py b/test/unit/obj/common.py index 90c6922be3..41cc08fa3b 100644 --- a/test/unit/obj/common.py +++ b/test/unit/obj/common.py @@ -38,6 +38,7 @@ def write_diskfile(df, timestamp, data=b'test data', frag_index=None, metadata.update(extra_metadata) if frag_index is not None: metadata['X-Object-Sysmeta-Ec-Frag-Index'] = str(frag_index) + metadata['X-Object-Sysmeta-Ec-Etag'] = 'fake-etag' writer.put(metadata) if commit and legacy_durable: # simulate legacy .durable file creation diff --git a/test/unit/obj/test_auditor.py b/test/unit/obj/test_auditor.py index 20f2b86853..8bb3780124 100644 --- a/test/unit/obj/test_auditor.py +++ b/test/unit/obj/test_auditor.py @@ -224,6 +224,11 @@ class TestAuditor(TestAuditorBase): 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(writer._fd).st_size), } + if disk_file.policy.policy_type == EC_POLICY: + metadata.update({ + 'X-Object-Sysmeta-Ec-Frag-Index': '1', + 'X-Object-Sysmeta-Ec-Etag': 'fake-etag', + }) writer.put(metadata) writer.commit(Timestamp(timestamp)) pre_quarantines = auditor_worker.quarantines @@ -368,6 +373,8 @@ class TestAuditor(TestAuditorBase): 'ETag': etag, 'X-Timestamp': timestamp, 'Content-Length': len(data), + 'X-Object-Sysmeta-Ec-Frag-Index': '1', + 'X-Object-Sysmeta-Ec-Etag': 'fake-etag', } writer.put(metadata) writer.commit(Timestamp(timestamp)) @@ -1639,6 +1646,8 @@ class TestAuditWatchers(TestAuditorBase): 'X-Timestamp': timestamp.internal, 'Content-Length': str(len(frag_0)), 'X-Object-Meta-Flavor': 'peach', + 'X-Object-Sysmeta-Ec-Frag-Index': '1', + 'X-Object-Sysmeta-Ec-Etag': 'fake-etag', } writer.put(metadata) writer.commit(timestamp) diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py index 37dbb6be95..865985e98d 100644 --- a/test/unit/obj/test_diskfile.py +++ b/test/unit/obj/test_diskfile.py @@ -6381,6 +6381,7 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase): 'ETag': md5('test data').hexdigest(), 'X-Timestamp': ts.internal, 'Content-Length': str(len('test data')), + 'X-Object-Sysmeta-Ec-Etag': 'fake-etag', 'X-Object-Sysmeta-Ec-Frag-Index': '3', } diff --git a/test/unit/obj/test_ssync.py b/test/unit/obj/test_ssync.py index 9d5af919ca..5db107461f 100644 --- a/test/unit/obj/test_ssync.py +++ b/test/unit/obj/test_ssync.py @@ -129,6 +129,7 @@ class TestBaseSsync(BaseTest): frag_index=frag_index) if policy.policy_type == EC_POLICY: metadata['X-Object-Sysmeta-Ec-Frag-Index'] = str(frag_index) + metadata['X-Object-Sysmeta-Ec-Etag'] = 'fake-etag' df = self._make_diskfile( device=self.device, partition=self.partition, account='a', container='c', obj=obj_name, body=object_data,