From 404e1f273260cde0040fb6cecd2566aa295b4fec Mon Sep 17 00:00:00 2001 From: Alistair Coles Date: Wed, 2 Apr 2025 10:24:04 +0100 Subject: [PATCH] s3api: Add support for crc64nvme checksum calculation Add anycrc as a soft dependency in case ISA-L isn't available. Plus we'll want it later: when we start writing down checksums, we'll need it to combine per-part checksums for MPUs. Like with crc32c, we won't provide any pure-python version as the CPU-intensiveness could present a DoS vector. Worst case, we 501 as before. Co-Authored-By: Tim Burke Signed-off-by: Tim Burke Change-Id: Ia05e5677a8ca89a62b142078abfb7371b1badd3f Signed-off-by: Alistair Coles --- swift/common/utils/checksum.py | 64 ++++++++- test/s3api/test_object_checksums.py | 24 +++- test/unit/__init__.py | 11 ++ .../common/middleware/s3api/test_s3api.py | 8 +- .../common/middleware/s3api/test_s3request.py | 40 +++--- test/unit/common/utils/test_checksum.py | 126 ++++++++++++++++-- 6 files changed, 239 insertions(+), 34 deletions(-) diff --git a/swift/common/utils/checksum.py b/swift/common/utils/checksum.py index 14e3a158b6..d62b284a57 100644 --- a/swift/common/utils/checksum.py +++ b/swift/common/utils/checksum.py @@ -13,6 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +try: + import anycrc +except ImportError: + anycrc = None import binascii import ctypes import ctypes.util @@ -28,6 +32,15 @@ except ImportError: pkg_files = None +# See if anycrc is available... +if anycrc: + crc32c_anycrc = anycrc.Model('CRC32C').calc + crc64nvme_anycrc = anycrc.Model('CRC64-NVME').calc +else: + crc32c_anycrc = None + crc64nvme_anycrc = None + + # If isal is available system-wide, great! isal_lib = ctypes.util.find_library('isal') if isal_lib is None and pkg_files is not None: @@ -39,6 +52,7 @@ if isal_lib is None and pkg_files is not None: isal_lib = isal_libs[0].locate() isal = ctypes.CDLL(isal_lib) if isal_lib else None + if hasattr(isal, 'crc32_iscsi'): # isa-l >= 2.16 isal.crc32_iscsi.argtypes = [ctypes.c_char_p, ctypes.c_int, ctypes.c_uint] isal.crc32_iscsi.restype = ctypes.c_uint @@ -55,7 +69,22 @@ if hasattr(isal, 'crc32_iscsi'): # isa-l >= 2.16 else: crc32c_isal = None +if hasattr(isal, 'crc64_rocksoft_refl'): # isa-l >= 2.31.0 + isal.crc64_rocksoft_refl.argtypes = [ + ctypes.c_uint64, ctypes.c_char_p, ctypes.c_uint64] + isal.crc64_rocksoft_refl.restype = ctypes.c_uint64 + def crc64nvme_isal(data, value=0): + return isal.crc64_rocksoft_refl( + value, + data, + len(data), + ) +else: + crc64nvme_isal = None + + +# The kernel may also provide crc32c AF_ALG = getattr(socket, 'AF_ALG', 38) try: _sock = socket.socket(AF_ALG, socket.SOCK_SEQPACKET) @@ -98,9 +127,18 @@ def _select_crc32c_impl(): # AMD 3900XT | ~20GB/s | ~5GB/s # # i.e., ISA-L is consistently 3-5x faster than kernel sockets - selected = crc32c_isal or crc32c_kern or None + selected = crc32c_isal or crc32c_kern or crc32c_anycrc or None if not selected: - raise NotImplementedError('no crc32c implementation, install isal') + raise NotImplementedError( + 'no crc32c implementation, install isal or anycrc') + return selected + + +def _select_crc64nvme_impl(): + selected = crc64nvme_isal or crc64nvme_anycrc or None + if not selected: + raise NotImplementedError( + 'no crc64nvme implementation, install isal or anycrc') return selected @@ -145,7 +183,8 @@ class CRCHasher(object): def digest(self): """ - Return the current CRC value as a 4-byte big-endian integer. + Return the current CRC value as a big-endian integer of length + ``width / 8`` bytes. :returns: Packed CRC value. (bytes) """ @@ -187,8 +226,11 @@ def crc32c(data=None, initial_value=0): def crc64nvme(data=None, initial_value=0): - '''Stub for s3api''' - raise NotImplementedError + return CRCHasher('crc64nvme', + _select_crc64nvme_impl(), + data=data, + initial_value=initial_value, + width=64) def log_selected_implementation(logger): @@ -196,6 +238,16 @@ def log_selected_implementation(logger): impl = _select_crc32c_impl() except NotImplementedError: logger.warning( - 'No implementation found for CRC32C; install ISA-L for support.') + 'No implementation found for CRC32C; ' + 'install ISA-L or anycrc for support.') else: logger.info('Using %s implementation for CRC32C.' % impl.__name__) + + try: + impl = _select_crc64nvme_impl() + except NotImplementedError: + logger.warning( + 'No implementation found for CRC64NVME; ' + 'install ISA-L or anycrc for support.') + else: + logger.info('Using %s implementation for CRC64NVME.' % impl.__name__) diff --git a/test/s3api/test_object_checksums.py b/test/s3api/test_object_checksums.py index dc7661cdf2..783209a38e 100644 --- a/test/s3api/test_object_checksums.py +++ b/test/s3api/test_object_checksums.py @@ -191,7 +191,11 @@ class ObjectChecksumMixin(object): 'ChecksumAlgorithm': self.ALGORITHM, } if boto_at_least(1, 36): - checksum_kwargs['ChecksumType'] = 'COMPOSITE' + if self.ALGORITHM == 'CRC64NVME': + # crc64nvme only allows full-object + checksum_kwargs['ChecksumType'] = 'FULL_OBJECT' + else: + checksum_kwargs['ChecksumType'] = 'COMPOSITE' obj_name = self.create_name(self.ALGORITHM + '-mpu-complete-good') create_mpu_resp = self.client.create_multipart_upload( @@ -247,6 +251,24 @@ class TestObjectChecksumCRC32C(ObjectChecksumMixin, BaseS3TestCaseWithBucket): super().setUpClass() +class TestObjectChecksumCRC64NVME(ObjectChecksumMixin, + BaseS3TestCaseWithBucket): + ALGORITHM = 'CRC64NVME' + EXPECTED = 'rosUhgp5mIg=' + INVALID = 'rosUhgp5mIh=' + BAD = 'sosUhgp5mIg=' + + @classmethod + def setUpClass(cls): + if [int(x) for x in botocore.__version__.split('.')] < [1, 36]: + raise SkipTest('botocore cannot crc64nvme (run ' + '`pip install -U boto3 botocore`)') + if not botocore.httpchecksum.HAS_CRT: + raise SkipTest('botocore cannot crc64nvme (run ' + '`pip install awscrt`)') + super().setUpClass() + + class TestObjectChecksumSHA1(ObjectChecksumMixin, BaseS3TestCaseWithBucket): ALGORITHM = 'SHA1' EXPECTED = '98O8HYCOBHMq32eZZczDTKeuNEE=' diff --git a/test/unit/__init__.py b/test/unit/__init__.py index a7f966c299..b37d1dbd21 100644 --- a/test/unit/__init__.py +++ b/test/unit/__init__.py @@ -1096,6 +1096,17 @@ def requires_crc32c(func): return wrapper +def requires_crc64nvme(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + try: + checksum.crc64nvme() + except NotImplementedError as e: + raise SkipTest(str(e)) + return func(*args, **kwargs) + return wrapper + + class StubResponse(object): def __init__(self, status, body=b'', headers=None, frag_index=None, diff --git a/test/unit/common/middleware/s3api/test_s3api.py b/test/unit/common/middleware/s3api/test_s3api.py index 6b6ac082f9..f996326562 100644 --- a/test/unit/common/middleware/s3api/test_s3api.py +++ b/test/unit/common/middleware/s3api/test_s3api.py @@ -248,11 +248,15 @@ class TestS3ApiMiddleware(S3ApiTestCase): with mock.patch('swift.common.middleware.s3api.s3api.get_logger', return_value=self.logger), \ mock.patch('swift.common.utils.checksum.crc32c_isal') \ - as mock_crc32c: + as mock_crc32c, \ + mock.patch('swift.common.utils.checksum.crc64nvme_isal') \ + as mock_crc64nvme: mock_crc32c.__name__ = 'crc32c_isal' + mock_crc64nvme.__name__ = 'crc64nvme_isal' S3ApiMiddleware(None, {}) self.assertEqual( - {'info': ['Using crc32c_isal implementation for CRC32C.']}, + {'info': ['Using crc32c_isal implementation for CRC32C.', + 'Using crc64nvme_isal implementation for CRC64NVME.']}, self.logger.all_log_lines()) def test_non_s3_request_passthrough(self): diff --git a/test/unit/common/middleware/s3api/test_s3request.py b/test/unit/common/middleware/s3api/test_s3request.py index 6d03e17655..7db0a33f8b 100644 --- a/test/unit/common/middleware/s3api/test_s3request.py +++ b/test/unit/common/middleware/s3api/test_s3request.py @@ -41,7 +41,7 @@ from swift.common.middleware.s3api.s3response import InvalidArgument, \ XAmzContentSHA256Mismatch, ErrorResponse, S3NotImplemented from swift.common.utils import checksum from test.debug_logger import debug_logger -from test.unit import requires_crc32c +from test.unit import requires_crc32c, requires_crc64nvme from test.unit.common.middleware.s3api.test_s3api import S3ApiTestCase Fake_ACL_MAP = { @@ -2001,40 +2001,38 @@ class TestRequest(S3ApiTestCase): with self.assertRaises(S3InputChecksumMismatch): sigv4_req.environ['wsgi.input'].read() + @requires_crc64nvme @patch.object(S3Request, '_validate_dates', lambda *a: None) - def test_sig_v4_strm_unsgnd_pyld_trl_checksum_hdr_crc64nvme_valid(self): - # apparently valid value provokes the not implemented error + def test_sig_v4_strm_unsgnd_pyld_trl_checksum_hdr_crc64nvme_ok(self): body = 'a\r\nabcdefghij\r\n' \ 'a\r\nklmnopqrst\r\n' \ '7\r\nuvwxyz\n\r\n' \ '0\r\n' - crc = base64.b64encode(b'12345678') + crc = base64.b64encode( + checksum.crc64nvme(b'abcdefghijklmnopqrstuvwxyz\n').digest()) req = self._make_sig_v4_streaming_unsigned_payload_trailer_req( body=body, extra_headers={'x-amz-checksum-crc64nvme': crc} ) - with self.assertRaises(S3NotImplemented) as cm: - SigV4Request(req.environ) - self.assertIn( - b'The x-amz-checksum-crc64nvme algorithm is not supported.', - cm.exception.body) + sigv4_req = SigV4Request(req.environ) + self.assertEqual(b'abcdefghijklmnopqrstuvwxyz\n', + sigv4_req.environ['wsgi.input'].read()) + @requires_crc64nvme @patch.object(S3Request, '_validate_dates', lambda *a: None) def test_sig_v4_strm_unsgnd_pyld_trl_checksum_hdr_crc64nvme_invalid(self): - # the not implemented error is raised before the value is validated body = 'a\r\nabcdefghij\r\n' \ 'a\r\nklmnopqrst\r\n' \ '7\r\nuvwxyz\n\r\n' \ '0\r\n' + crc = base64.b64encode(checksum.crc64nvme(b'not-the-body').digest()) req = self._make_sig_v4_streaming_unsigned_payload_trailer_req( body=body, - extra_headers={'x-amz-checksum-crc64nvme': 'not-a-valid-crc'} + extra_headers={'x-amz-checksum-crc64nvme': crc} ) - with self.assertRaises(S3NotImplemented) as cm: - SigV4Request(req.environ) - self.assertIn( - b'The x-amz-checksum-crc64nvme algorithm is not supported.', - cm.exception.body) + sigv4_req = SigV4Request(req.environ) + with self.assertRaises(S3InputChecksumMismatch): + sigv4_req.environ['wsgi.input'].read() @patch.object(S3Request, '_validate_dates', lambda *a: None) def test_sig_v4_strm_unsgnd_pyld_trl_checksum_hdr_sha1_ok(self): @@ -2896,13 +2894,21 @@ class TestModuleFunctions(unittest.TestCase): do_test('crc32c') do_test('sha1') do_test('sha256') + try: + checksum._select_crc64nvme_impl() + except NotImplementedError: + pass + else: + do_test('crc64nvme') def test_get_checksum_hasher_invalid(self): def do_test(crc): with self.assertRaises(s3response.S3NotImplemented): _get_checksum_hasher('x-amz-checksum-%s' % crc) - do_test('crc64nvme') + with mock.patch.object(checksum, '_select_crc64nvme_impl', + side_effect=NotImplementedError): + do_test('crc64nvme') do_test('nonsense') do_test('') diff --git a/test/unit/common/utils/test_checksum.py b/test/unit/common/utils/test_checksum.py index 862d8709ca..144f90bba5 100644 --- a/test/unit/common/utils/test_checksum.py +++ b/test/unit/common/utils/test_checksum.py @@ -20,7 +20,7 @@ import zlib from swift.common.utils import checksum from test.debug_logger import debug_logger -from test.unit import requires_crc32c +from test.unit import requires_crc32c, requires_crc64nvme # If you're curious about the 0xe3069283, see "check" at @@ -32,10 +32,18 @@ class TestCRC32C(unittest.TestCase): partial = impl(b"12345") self.assertEqual(impl(b"6789", partial), 0xe3069283) + @unittest.skipIf(checksum.crc32c_anycrc is None, 'No anycrc CRC32C') + def test_anycrc(self): + self.check_crc_func(checksum.crc32c_anycrc) + # Check preferences -- beats out reference, but not kernel or ISA-L + if checksum.crc32c_isal is None and checksum.crc32c_kern is None: + self.assertIs(checksum._select_crc32c_impl(), + checksum.crc32c_anycrc) + @unittest.skipIf(checksum.crc32c_kern is None, 'No kernel CRC32C') def test_kern(self): self.check_crc_func(checksum.crc32c_kern) - # Check preferences -- beats out reference, but not ISA-L + # Check preferences -- beats out reference and anycrc, but not ISA-L if checksum.crc32c_isal is None: self.assertIs(checksum._select_crc32c_impl(), checksum.crc32c_kern) @@ -128,6 +136,28 @@ class TestCRC32C(unittest.TestCase): self.assertIs(checksum._select_crc32c_impl(), checksum.crc32c_isal) +class TestCRC64NVME(unittest.TestCase): + def check_crc_func(self, impl): + self.assertEqual(impl(b"123456789"), 0xae8b14860a799888) + # Check that we can save/continue + partial = impl(b"12345") + self.assertEqual(impl(b"6789", partial), 0xae8b14860a799888) + + @unittest.skipIf(checksum.crc64nvme_anycrc is None, 'No anycrc CRC64NVME') + def test_anycrc(self): + self.check_crc_func(checksum.crc64nvme_anycrc) + if checksum.crc64nvme_isal is None: + self.assertIs(checksum._select_crc64nvme_impl(), + checksum.crc64nvme_anycrc) + + @unittest.skipIf(checksum.crc64nvme_isal is None, 'No ISA-L CRC64NVME') + def test_isal(self): + self.check_crc_func(checksum.crc64nvme_isal) + # Check preferences -- ISA-L always wins + self.assertIs(checksum._select_crc64nvme_impl(), + checksum.crc64nvme_isal) + + class TestCRCHasher(unittest.TestCase): def setUp(self): self.logger = debug_logger() @@ -238,21 +268,101 @@ class TestCRCHasher(unittest.TestCase): self.assertEqual('6b2fc5b0', hasher_copy.hexdigest()) def test_crc32c_hasher_selects_kern_impl(self): - with mock.patch('swift.common.utils.checksum.crc32c_isal', None), \ - mock.patch( - 'swift.common.utils.checksum.crc32c_kern') as mock_kern: + scuc = 'swift.common.utils.checksum' + with mock.patch(scuc + '.crc32c_isal', None), \ + mock.patch(scuc + '.crc32c_kern') as mock_kern, \ + mock.patch(scuc + '.crc32c_anycrc', None): mock_kern.__name__ = 'crc32c_kern' self.assertIs(mock_kern, checksum.crc32c().crc_func) checksum.log_selected_implementation(self.logger) self.assertIn('Using crc32c_kern implementation for CRC32C.', self.logger.get_lines_for_level('info')) + def test_crc32c_hasher_selects_anycrc_impl(self): + scuc = 'swift.common.utils.checksum' + with mock.patch(scuc + '.crc32c_isal', None), \ + mock.patch(scuc + '.crc32c_kern', None), \ + mock.patch(scuc + '.crc32c_anycrc') as mock_anycrc: + mock_anycrc.__name__ = 'crc32c_anycrc' + self.assertIs(mock_anycrc, checksum.crc32c().crc_func) + checksum.log_selected_implementation(self.logger) + self.assertIn('Using crc32c_anycrc implementation for CRC32C.', + self.logger.get_lines_for_level('info')) + def test_crc32c_hasher_selects_isal_impl(self): - with mock.patch( - 'swift.common.utils.checksum.crc32c_isal') as mock_isal, \ - mock.patch('swift.common.utils.checksum.crc32c_kern'): + scuc = 'swift.common.utils.checksum' + with mock.patch(scuc + '.crc32c_isal') as mock_isal, \ + mock.patch(scuc + '.crc32c_kern'), \ + mock.patch(scuc + '.crc32c_anycrc'): mock_isal.__name__ = 'crc32c_isal' self.assertIs(mock_isal, checksum.crc32c().crc_func) checksum.log_selected_implementation(self.logger) self.assertIn('Using crc32c_isal implementation for CRC32C.', self.logger.get_lines_for_level('info')) + + @requires_crc64nvme + def test_crc64nvme_hasher(self): + # See CRC-64/NVME at + # https://reveng.sourceforge.io/crc-catalogue/17plus.htm + hasher = checksum.crc64nvme() + self.assertEqual('crc64nvme', hasher.name) + self.assertEqual(8, hasher.digest_size) + self.assertEqual(64, hasher.width) + self.assertEqual(0, hasher.crc) + self.assertEqual(b'\x00\x00\x00\x00\x00\x00\x00\x00', hasher.digest()) + self.assertEqual('0000000000000000', hasher.hexdigest()) + + hasher.update(b'123456789') + self.assertEqual(0xae8b14860a799888, hasher.crc) + self.assertEqual(b'\xae\x8b\x14\x86\x0a\x79\x98\x88', hasher.digest()) + self.assertEqual('ae8b14860a799888', hasher.hexdigest()) + + @requires_crc64nvme + def test_crc64nvme_hasher_constructed_with_data(self): + hasher = checksum.crc64nvme(b'123456789') + self.assertEqual(b'\xae\x8b\x14\x86\x0a\x79\x98\x88', hasher.digest()) + self.assertEqual('ae8b14860a799888', hasher.hexdigest()) + + @requires_crc64nvme + def test_crc64nvme_hasher_initial_value(self): + hasher = checksum.crc64nvme(initial_value=0xae8b14860a799888) + self.assertEqual(b'\xae\x8b\x14\x86\x0a\x79\x98\x88', hasher.digest()) + self.assertEqual('ae8b14860a799888', hasher.hexdigest()) + + @requires_crc64nvme + def test_crc64nvme_hasher_copy(self): + hasher = checksum.crc64nvme(b'123456789') + self.assertEqual('ae8b14860a799888', hasher.hexdigest()) + hasher_copy = hasher.copy() + self.assertEqual('crc64nvme', hasher_copy.name) + self.assertIs(hasher.crc_func, hasher_copy.crc_func) + self.assertEqual('ae8b14860a799888', hasher_copy.hexdigest()) + hasher_copy.update(b'foo') + self.assertEqual('ae8b14860a799888', hasher.hexdigest()) + self.assertEqual('673ece0d56523f46', hasher_copy.hexdigest()) + hasher.update(b'bar') + self.assertEqual('0991d5edf1b0062e', hasher.hexdigest()) + self.assertEqual('673ece0d56523f46', hasher_copy.hexdigest()) + + def test_crc64nvme_hasher_selects_anycrc_impl(self): + scuc = 'swift.common.utils.checksum' + with mock.patch(scuc + '.crc64nvme_isal', None), \ + mock.patch(scuc + '.crc64nvme_anycrc') as mock_anycrc: + mock_anycrc.__name__ = 'crc64nvme_anycrc' + self.assertIs(mock_anycrc, + checksum.crc64nvme().crc_func) + checksum.log_selected_implementation(self.logger) + self.assertIn( + 'Using crc64nvme_anycrc implementation for CRC64NVME.', + self.logger.get_lines_for_level('info')) + + def test_crc64nvme_hasher_selects_isal_impl(self): + scuc = 'swift.common.utils.checksum' + with mock.patch(scuc + '.crc64nvme_isal') as mock_isal, \ + mock.patch(scuc + '.crc64nvme_anycrc'): + mock_isal.__name__ = 'crc64nvme_isal' + self.assertIs(mock_isal, checksum.crc64nvme().crc_func) + checksum.log_selected_implementation(self.logger) + self.assertIn( + 'Using crc64nvme_isal implementation for CRC64NVME.', + self.logger.get_lines_for_level('info'))