s3api: Add support for crc64nvme checksum calculation

Add anycrc as a soft dependency in case ISA-L isn't available.
Plus we'll want it later: when we start writing down checksums,
we'll need it to combine per-part checksums for MPUs.

Like with crc32c, we won't provide any pure-python version as the
CPU-intensiveness could present a DoS vector. Worst case, we 501
as before.

Co-Authored-By: Tim Burke <tim.burke@gmail.com>
Signed-off-by: Tim Burke <tim.burke@gmail.com>
Change-Id: Ia05e5677a8ca89a62b142078abfb7371b1badd3f
Signed-off-by: Alistair Coles <alistairncoles@gmail.com>
This commit is contained in:
Alistair Coles
2025-04-02 10:24:04 +01:00
parent be56c1e258
commit 404e1f2732
6 changed files with 239 additions and 34 deletions

View File

@@ -13,6 +13,10 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
try:
import anycrc
except ImportError:
anycrc = None
import binascii import binascii
import ctypes import ctypes
import ctypes.util import ctypes.util
@@ -28,6 +32,15 @@ except ImportError:
pkg_files = None pkg_files = None
# See if anycrc is available...
if anycrc:
crc32c_anycrc = anycrc.Model('CRC32C').calc
crc64nvme_anycrc = anycrc.Model('CRC64-NVME').calc
else:
crc32c_anycrc = None
crc64nvme_anycrc = None
# If isal is available system-wide, great! # If isal is available system-wide, great!
isal_lib = ctypes.util.find_library('isal') isal_lib = ctypes.util.find_library('isal')
if isal_lib is None and pkg_files is not None: if isal_lib is None and pkg_files is not None:
@@ -39,6 +52,7 @@ if isal_lib is None and pkg_files is not None:
isal_lib = isal_libs[0].locate() isal_lib = isal_libs[0].locate()
isal = ctypes.CDLL(isal_lib) if isal_lib else None isal = ctypes.CDLL(isal_lib) if isal_lib else None
if hasattr(isal, 'crc32_iscsi'): # isa-l >= 2.16 if hasattr(isal, 'crc32_iscsi'): # isa-l >= 2.16
isal.crc32_iscsi.argtypes = [ctypes.c_char_p, ctypes.c_int, ctypes.c_uint] isal.crc32_iscsi.argtypes = [ctypes.c_char_p, ctypes.c_int, ctypes.c_uint]
isal.crc32_iscsi.restype = ctypes.c_uint isal.crc32_iscsi.restype = ctypes.c_uint
@@ -55,7 +69,22 @@ if hasattr(isal, 'crc32_iscsi'): # isa-l >= 2.16
else: else:
crc32c_isal = None crc32c_isal = None
if hasattr(isal, 'crc64_rocksoft_refl'): # isa-l >= 2.31.0
isal.crc64_rocksoft_refl.argtypes = [
ctypes.c_uint64, ctypes.c_char_p, ctypes.c_uint64]
isal.crc64_rocksoft_refl.restype = ctypes.c_uint64
def crc64nvme_isal(data, value=0):
return isal.crc64_rocksoft_refl(
value,
data,
len(data),
)
else:
crc64nvme_isal = None
# The kernel may also provide crc32c
AF_ALG = getattr(socket, 'AF_ALG', 38) AF_ALG = getattr(socket, 'AF_ALG', 38)
try: try:
_sock = socket.socket(AF_ALG, socket.SOCK_SEQPACKET) _sock = socket.socket(AF_ALG, socket.SOCK_SEQPACKET)
@@ -98,9 +127,18 @@ def _select_crc32c_impl():
# AMD 3900XT | ~20GB/s | ~5GB/s # AMD 3900XT | ~20GB/s | ~5GB/s
# #
# i.e., ISA-L is consistently 3-5x faster than kernel sockets # i.e., ISA-L is consistently 3-5x faster than kernel sockets
selected = crc32c_isal or crc32c_kern or None selected = crc32c_isal or crc32c_kern or crc32c_anycrc or None
if not selected: if not selected:
raise NotImplementedError('no crc32c implementation, install isal') raise NotImplementedError(
'no crc32c implementation, install isal or anycrc')
return selected
def _select_crc64nvme_impl():
selected = crc64nvme_isal or crc64nvme_anycrc or None
if not selected:
raise NotImplementedError(
'no crc64nvme implementation, install isal or anycrc')
return selected return selected
@@ -145,7 +183,8 @@ class CRCHasher(object):
def digest(self): def digest(self):
""" """
Return the current CRC value as a 4-byte big-endian integer. Return the current CRC value as a big-endian integer of length
``width / 8`` bytes.
:returns: Packed CRC value. (bytes) :returns: Packed CRC value. (bytes)
""" """
@@ -187,8 +226,11 @@ def crc32c(data=None, initial_value=0):
def crc64nvme(data=None, initial_value=0): def crc64nvme(data=None, initial_value=0):
'''Stub for s3api''' return CRCHasher('crc64nvme',
raise NotImplementedError _select_crc64nvme_impl(),
data=data,
initial_value=initial_value,
width=64)
def log_selected_implementation(logger): def log_selected_implementation(logger):
@@ -196,6 +238,16 @@ def log_selected_implementation(logger):
impl = _select_crc32c_impl() impl = _select_crc32c_impl()
except NotImplementedError: except NotImplementedError:
logger.warning( logger.warning(
'No implementation found for CRC32C; install ISA-L for support.') 'No implementation found for CRC32C; '
'install ISA-L or anycrc for support.')
else: else:
logger.info('Using %s implementation for CRC32C.' % impl.__name__) logger.info('Using %s implementation for CRC32C.' % impl.__name__)
try:
impl = _select_crc64nvme_impl()
except NotImplementedError:
logger.warning(
'No implementation found for CRC64NVME; '
'install ISA-L or anycrc for support.')
else:
logger.info('Using %s implementation for CRC64NVME.' % impl.__name__)

View File

@@ -191,7 +191,11 @@ class ObjectChecksumMixin(object):
'ChecksumAlgorithm': self.ALGORITHM, 'ChecksumAlgorithm': self.ALGORITHM,
} }
if boto_at_least(1, 36): if boto_at_least(1, 36):
checksum_kwargs['ChecksumType'] = 'COMPOSITE' if self.ALGORITHM == 'CRC64NVME':
# crc64nvme only allows full-object
checksum_kwargs['ChecksumType'] = 'FULL_OBJECT'
else:
checksum_kwargs['ChecksumType'] = 'COMPOSITE'
obj_name = self.create_name(self.ALGORITHM + '-mpu-complete-good') obj_name = self.create_name(self.ALGORITHM + '-mpu-complete-good')
create_mpu_resp = self.client.create_multipart_upload( create_mpu_resp = self.client.create_multipart_upload(
@@ -247,6 +251,24 @@ class TestObjectChecksumCRC32C(ObjectChecksumMixin, BaseS3TestCaseWithBucket):
super().setUpClass() super().setUpClass()
class TestObjectChecksumCRC64NVME(ObjectChecksumMixin,
BaseS3TestCaseWithBucket):
ALGORITHM = 'CRC64NVME'
EXPECTED = 'rosUhgp5mIg='
INVALID = 'rosUhgp5mIh='
BAD = 'sosUhgp5mIg='
@classmethod
def setUpClass(cls):
if [int(x) for x in botocore.__version__.split('.')] < [1, 36]:
raise SkipTest('botocore cannot crc64nvme (run '
'`pip install -U boto3 botocore`)')
if not botocore.httpchecksum.HAS_CRT:
raise SkipTest('botocore cannot crc64nvme (run '
'`pip install awscrt`)')
super().setUpClass()
class TestObjectChecksumSHA1(ObjectChecksumMixin, BaseS3TestCaseWithBucket): class TestObjectChecksumSHA1(ObjectChecksumMixin, BaseS3TestCaseWithBucket):
ALGORITHM = 'SHA1' ALGORITHM = 'SHA1'
EXPECTED = '98O8HYCOBHMq32eZZczDTKeuNEE=' EXPECTED = '98O8HYCOBHMq32eZZczDTKeuNEE='

View File

@@ -1096,6 +1096,17 @@ def requires_crc32c(func):
return wrapper return wrapper
def requires_crc64nvme(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
try:
checksum.crc64nvme()
except NotImplementedError as e:
raise SkipTest(str(e))
return func(*args, **kwargs)
return wrapper
class StubResponse(object): class StubResponse(object):
def __init__(self, status, body=b'', headers=None, frag_index=None, def __init__(self, status, body=b'', headers=None, frag_index=None,

View File

@@ -248,11 +248,15 @@ class TestS3ApiMiddleware(S3ApiTestCase):
with mock.patch('swift.common.middleware.s3api.s3api.get_logger', with mock.patch('swift.common.middleware.s3api.s3api.get_logger',
return_value=self.logger), \ return_value=self.logger), \
mock.patch('swift.common.utils.checksum.crc32c_isal') \ mock.patch('swift.common.utils.checksum.crc32c_isal') \
as mock_crc32c: as mock_crc32c, \
mock.patch('swift.common.utils.checksum.crc64nvme_isal') \
as mock_crc64nvme:
mock_crc32c.__name__ = 'crc32c_isal' mock_crc32c.__name__ = 'crc32c_isal'
mock_crc64nvme.__name__ = 'crc64nvme_isal'
S3ApiMiddleware(None, {}) S3ApiMiddleware(None, {})
self.assertEqual( self.assertEqual(
{'info': ['Using crc32c_isal implementation for CRC32C.']}, {'info': ['Using crc32c_isal implementation for CRC32C.',
'Using crc64nvme_isal implementation for CRC64NVME.']},
self.logger.all_log_lines()) self.logger.all_log_lines())
def test_non_s3_request_passthrough(self): def test_non_s3_request_passthrough(self):

View File

@@ -41,7 +41,7 @@ from swift.common.middleware.s3api.s3response import InvalidArgument, \
XAmzContentSHA256Mismatch, ErrorResponse, S3NotImplemented XAmzContentSHA256Mismatch, ErrorResponse, S3NotImplemented
from swift.common.utils import checksum from swift.common.utils import checksum
from test.debug_logger import debug_logger from test.debug_logger import debug_logger
from test.unit import requires_crc32c from test.unit import requires_crc32c, requires_crc64nvme
from test.unit.common.middleware.s3api.test_s3api import S3ApiTestCase from test.unit.common.middleware.s3api.test_s3api import S3ApiTestCase
Fake_ACL_MAP = { Fake_ACL_MAP = {
@@ -2001,40 +2001,38 @@ class TestRequest(S3ApiTestCase):
with self.assertRaises(S3InputChecksumMismatch): with self.assertRaises(S3InputChecksumMismatch):
sigv4_req.environ['wsgi.input'].read() sigv4_req.environ['wsgi.input'].read()
@requires_crc64nvme
@patch.object(S3Request, '_validate_dates', lambda *a: None) @patch.object(S3Request, '_validate_dates', lambda *a: None)
def test_sig_v4_strm_unsgnd_pyld_trl_checksum_hdr_crc64nvme_valid(self): def test_sig_v4_strm_unsgnd_pyld_trl_checksum_hdr_crc64nvme_ok(self):
# apparently valid value provokes the not implemented error
body = 'a\r\nabcdefghij\r\n' \ body = 'a\r\nabcdefghij\r\n' \
'a\r\nklmnopqrst\r\n' \ 'a\r\nklmnopqrst\r\n' \
'7\r\nuvwxyz\n\r\n' \ '7\r\nuvwxyz\n\r\n' \
'0\r\n' '0\r\n'
crc = base64.b64encode(b'12345678') crc = base64.b64encode(
checksum.crc64nvme(b'abcdefghijklmnopqrstuvwxyz\n').digest())
req = self._make_sig_v4_streaming_unsigned_payload_trailer_req( req = self._make_sig_v4_streaming_unsigned_payload_trailer_req(
body=body, body=body,
extra_headers={'x-amz-checksum-crc64nvme': crc} extra_headers={'x-amz-checksum-crc64nvme': crc}
) )
with self.assertRaises(S3NotImplemented) as cm: sigv4_req = SigV4Request(req.environ)
SigV4Request(req.environ) self.assertEqual(b'abcdefghijklmnopqrstuvwxyz\n',
self.assertIn( sigv4_req.environ['wsgi.input'].read())
b'The x-amz-checksum-crc64nvme algorithm is not supported.',
cm.exception.body)
@requires_crc64nvme
@patch.object(S3Request, '_validate_dates', lambda *a: None) @patch.object(S3Request, '_validate_dates', lambda *a: None)
def test_sig_v4_strm_unsgnd_pyld_trl_checksum_hdr_crc64nvme_invalid(self): def test_sig_v4_strm_unsgnd_pyld_trl_checksum_hdr_crc64nvme_invalid(self):
# the not implemented error is raised before the value is validated
body = 'a\r\nabcdefghij\r\n' \ body = 'a\r\nabcdefghij\r\n' \
'a\r\nklmnopqrst\r\n' \ 'a\r\nklmnopqrst\r\n' \
'7\r\nuvwxyz\n\r\n' \ '7\r\nuvwxyz\n\r\n' \
'0\r\n' '0\r\n'
crc = base64.b64encode(checksum.crc64nvme(b'not-the-body').digest())
req = self._make_sig_v4_streaming_unsigned_payload_trailer_req( req = self._make_sig_v4_streaming_unsigned_payload_trailer_req(
body=body, body=body,
extra_headers={'x-amz-checksum-crc64nvme': 'not-a-valid-crc'} extra_headers={'x-amz-checksum-crc64nvme': crc}
) )
with self.assertRaises(S3NotImplemented) as cm: sigv4_req = SigV4Request(req.environ)
SigV4Request(req.environ) with self.assertRaises(S3InputChecksumMismatch):
self.assertIn( sigv4_req.environ['wsgi.input'].read()
b'The x-amz-checksum-crc64nvme algorithm is not supported.',
cm.exception.body)
@patch.object(S3Request, '_validate_dates', lambda *a: None) @patch.object(S3Request, '_validate_dates', lambda *a: None)
def test_sig_v4_strm_unsgnd_pyld_trl_checksum_hdr_sha1_ok(self): def test_sig_v4_strm_unsgnd_pyld_trl_checksum_hdr_sha1_ok(self):
@@ -2896,13 +2894,21 @@ class TestModuleFunctions(unittest.TestCase):
do_test('crc32c') do_test('crc32c')
do_test('sha1') do_test('sha1')
do_test('sha256') do_test('sha256')
try:
checksum._select_crc64nvme_impl()
except NotImplementedError:
pass
else:
do_test('crc64nvme')
def test_get_checksum_hasher_invalid(self): def test_get_checksum_hasher_invalid(self):
def do_test(crc): def do_test(crc):
with self.assertRaises(s3response.S3NotImplemented): with self.assertRaises(s3response.S3NotImplemented):
_get_checksum_hasher('x-amz-checksum-%s' % crc) _get_checksum_hasher('x-amz-checksum-%s' % crc)
do_test('crc64nvme') with mock.patch.object(checksum, '_select_crc64nvme_impl',
side_effect=NotImplementedError):
do_test('crc64nvme')
do_test('nonsense') do_test('nonsense')
do_test('') do_test('')

View File

@@ -20,7 +20,7 @@ import zlib
from swift.common.utils import checksum from swift.common.utils import checksum
from test.debug_logger import debug_logger from test.debug_logger import debug_logger
from test.unit import requires_crc32c from test.unit import requires_crc32c, requires_crc64nvme
# If you're curious about the 0xe3069283, see "check" at # If you're curious about the 0xe3069283, see "check" at
@@ -32,10 +32,18 @@ class TestCRC32C(unittest.TestCase):
partial = impl(b"12345") partial = impl(b"12345")
self.assertEqual(impl(b"6789", partial), 0xe3069283) self.assertEqual(impl(b"6789", partial), 0xe3069283)
@unittest.skipIf(checksum.crc32c_anycrc is None, 'No anycrc CRC32C')
def test_anycrc(self):
self.check_crc_func(checksum.crc32c_anycrc)
# Check preferences -- beats out reference, but not kernel or ISA-L
if checksum.crc32c_isal is None and checksum.crc32c_kern is None:
self.assertIs(checksum._select_crc32c_impl(),
checksum.crc32c_anycrc)
@unittest.skipIf(checksum.crc32c_kern is None, 'No kernel CRC32C') @unittest.skipIf(checksum.crc32c_kern is None, 'No kernel CRC32C')
def test_kern(self): def test_kern(self):
self.check_crc_func(checksum.crc32c_kern) self.check_crc_func(checksum.crc32c_kern)
# Check preferences -- beats out reference, but not ISA-L # Check preferences -- beats out reference and anycrc, but not ISA-L
if checksum.crc32c_isal is None: if checksum.crc32c_isal is None:
self.assertIs(checksum._select_crc32c_impl(), checksum.crc32c_kern) self.assertIs(checksum._select_crc32c_impl(), checksum.crc32c_kern)
@@ -128,6 +136,28 @@ class TestCRC32C(unittest.TestCase):
self.assertIs(checksum._select_crc32c_impl(), checksum.crc32c_isal) self.assertIs(checksum._select_crc32c_impl(), checksum.crc32c_isal)
class TestCRC64NVME(unittest.TestCase):
def check_crc_func(self, impl):
self.assertEqual(impl(b"123456789"), 0xae8b14860a799888)
# Check that we can save/continue
partial = impl(b"12345")
self.assertEqual(impl(b"6789", partial), 0xae8b14860a799888)
@unittest.skipIf(checksum.crc64nvme_anycrc is None, 'No anycrc CRC64NVME')
def test_anycrc(self):
self.check_crc_func(checksum.crc64nvme_anycrc)
if checksum.crc64nvme_isal is None:
self.assertIs(checksum._select_crc64nvme_impl(),
checksum.crc64nvme_anycrc)
@unittest.skipIf(checksum.crc64nvme_isal is None, 'No ISA-L CRC64NVME')
def test_isal(self):
self.check_crc_func(checksum.crc64nvme_isal)
# Check preferences -- ISA-L always wins
self.assertIs(checksum._select_crc64nvme_impl(),
checksum.crc64nvme_isal)
class TestCRCHasher(unittest.TestCase): class TestCRCHasher(unittest.TestCase):
def setUp(self): def setUp(self):
self.logger = debug_logger() self.logger = debug_logger()
@@ -238,21 +268,101 @@ class TestCRCHasher(unittest.TestCase):
self.assertEqual('6b2fc5b0', hasher_copy.hexdigest()) self.assertEqual('6b2fc5b0', hasher_copy.hexdigest())
def test_crc32c_hasher_selects_kern_impl(self): def test_crc32c_hasher_selects_kern_impl(self):
with mock.patch('swift.common.utils.checksum.crc32c_isal', None), \ scuc = 'swift.common.utils.checksum'
mock.patch( with mock.patch(scuc + '.crc32c_isal', None), \
'swift.common.utils.checksum.crc32c_kern') as mock_kern: mock.patch(scuc + '.crc32c_kern') as mock_kern, \
mock.patch(scuc + '.crc32c_anycrc', None):
mock_kern.__name__ = 'crc32c_kern' mock_kern.__name__ = 'crc32c_kern'
self.assertIs(mock_kern, checksum.crc32c().crc_func) self.assertIs(mock_kern, checksum.crc32c().crc_func)
checksum.log_selected_implementation(self.logger) checksum.log_selected_implementation(self.logger)
self.assertIn('Using crc32c_kern implementation for CRC32C.', self.assertIn('Using crc32c_kern implementation for CRC32C.',
self.logger.get_lines_for_level('info')) self.logger.get_lines_for_level('info'))
def test_crc32c_hasher_selects_anycrc_impl(self):
scuc = 'swift.common.utils.checksum'
with mock.patch(scuc + '.crc32c_isal', None), \
mock.patch(scuc + '.crc32c_kern', None), \
mock.patch(scuc + '.crc32c_anycrc') as mock_anycrc:
mock_anycrc.__name__ = 'crc32c_anycrc'
self.assertIs(mock_anycrc, checksum.crc32c().crc_func)
checksum.log_selected_implementation(self.logger)
self.assertIn('Using crc32c_anycrc implementation for CRC32C.',
self.logger.get_lines_for_level('info'))
def test_crc32c_hasher_selects_isal_impl(self): def test_crc32c_hasher_selects_isal_impl(self):
with mock.patch( scuc = 'swift.common.utils.checksum'
'swift.common.utils.checksum.crc32c_isal') as mock_isal, \ with mock.patch(scuc + '.crc32c_isal') as mock_isal, \
mock.patch('swift.common.utils.checksum.crc32c_kern'): mock.patch(scuc + '.crc32c_kern'), \
mock.patch(scuc + '.crc32c_anycrc'):
mock_isal.__name__ = 'crc32c_isal' mock_isal.__name__ = 'crc32c_isal'
self.assertIs(mock_isal, checksum.crc32c().crc_func) self.assertIs(mock_isal, checksum.crc32c().crc_func)
checksum.log_selected_implementation(self.logger) checksum.log_selected_implementation(self.logger)
self.assertIn('Using crc32c_isal implementation for CRC32C.', self.assertIn('Using crc32c_isal implementation for CRC32C.',
self.logger.get_lines_for_level('info')) self.logger.get_lines_for_level('info'))
@requires_crc64nvme
def test_crc64nvme_hasher(self):
# See CRC-64/NVME at
# https://reveng.sourceforge.io/crc-catalogue/17plus.htm
hasher = checksum.crc64nvme()
self.assertEqual('crc64nvme', hasher.name)
self.assertEqual(8, hasher.digest_size)
self.assertEqual(64, hasher.width)
self.assertEqual(0, hasher.crc)
self.assertEqual(b'\x00\x00\x00\x00\x00\x00\x00\x00', hasher.digest())
self.assertEqual('0000000000000000', hasher.hexdigest())
hasher.update(b'123456789')
self.assertEqual(0xae8b14860a799888, hasher.crc)
self.assertEqual(b'\xae\x8b\x14\x86\x0a\x79\x98\x88', hasher.digest())
self.assertEqual('ae8b14860a799888', hasher.hexdigest())
@requires_crc64nvme
def test_crc64nvme_hasher_constructed_with_data(self):
hasher = checksum.crc64nvme(b'123456789')
self.assertEqual(b'\xae\x8b\x14\x86\x0a\x79\x98\x88', hasher.digest())
self.assertEqual('ae8b14860a799888', hasher.hexdigest())
@requires_crc64nvme
def test_crc64nvme_hasher_initial_value(self):
hasher = checksum.crc64nvme(initial_value=0xae8b14860a799888)
self.assertEqual(b'\xae\x8b\x14\x86\x0a\x79\x98\x88', hasher.digest())
self.assertEqual('ae8b14860a799888', hasher.hexdigest())
@requires_crc64nvme
def test_crc64nvme_hasher_copy(self):
hasher = checksum.crc64nvme(b'123456789')
self.assertEqual('ae8b14860a799888', hasher.hexdigest())
hasher_copy = hasher.copy()
self.assertEqual('crc64nvme', hasher_copy.name)
self.assertIs(hasher.crc_func, hasher_copy.crc_func)
self.assertEqual('ae8b14860a799888', hasher_copy.hexdigest())
hasher_copy.update(b'foo')
self.assertEqual('ae8b14860a799888', hasher.hexdigest())
self.assertEqual('673ece0d56523f46', hasher_copy.hexdigest())
hasher.update(b'bar')
self.assertEqual('0991d5edf1b0062e', hasher.hexdigest())
self.assertEqual('673ece0d56523f46', hasher_copy.hexdigest())
def test_crc64nvme_hasher_selects_anycrc_impl(self):
scuc = 'swift.common.utils.checksum'
with mock.patch(scuc + '.crc64nvme_isal', None), \
mock.patch(scuc + '.crc64nvme_anycrc') as mock_anycrc:
mock_anycrc.__name__ = 'crc64nvme_anycrc'
self.assertIs(mock_anycrc,
checksum.crc64nvme().crc_func)
checksum.log_selected_implementation(self.logger)
self.assertIn(
'Using crc64nvme_anycrc implementation for CRC64NVME.',
self.logger.get_lines_for_level('info'))
def test_crc64nvme_hasher_selects_isal_impl(self):
scuc = 'swift.common.utils.checksum'
with mock.patch(scuc + '.crc64nvme_isal') as mock_isal, \
mock.patch(scuc + '.crc64nvme_anycrc'):
mock_isal.__name__ = 'crc64nvme_isal'
self.assertIs(mock_isal, checksum.crc64nvme().crc_func)
checksum.log_selected_implementation(self.logger)
self.assertIn(
'Using crc64nvme_isal implementation for CRC64NVME.',
self.logger.get_lines_for_level('info'))