utils: Add CRCHasher and crc32c implementation
A new CRCHasher class wraps crc functions to mimic the hashlib hasher interface. For crc32, use the zlib implementation. For crc32c, use ISA-L if we can find it; on py38+ this includes the ISA-L bundled in pyeclib's binary wheels. If ISA-L is not available, check for kernel crypto API support for crc32c; use it if available. Otherwise, raise a NotImplementedError. No callers yet, but these will be used by s3api in a later patch. Change-Id: Ic0c55e307ce10b56b569c9fee728c445a2300cbd
This commit is contained in:
parent
f9ac22971f
commit
b65855d715
196
swift/common/utils/checksum.py
Normal file
196
swift/common/utils/checksum.py
Normal file
@ -0,0 +1,196 @@
|
||||
# Copyright (c) 2024 NVIDIA
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import binascii
|
||||
import ctypes
|
||||
import ctypes.util
|
||||
import errno
|
||||
import socket
|
||||
import struct
|
||||
import zlib
|
||||
|
||||
try:
|
||||
import pyeclib # noqa
|
||||
from importlib.metadata import files as pkg_files # py38+
|
||||
except ImportError:
|
||||
pkg_files = None
|
||||
|
||||
|
||||
# If isal is available system-wide, great!
|
||||
isal_lib = ctypes.util.find_library('isal')
|
||||
if isal_lib is None and pkg_files is not None:
|
||||
# py38+: Hopefully pyeclib was installed from a manylinux wheel
|
||||
# with isal baked in?
|
||||
isal_libs = [f for f in pkg_files('pyeclib')
|
||||
if f.name.startswith("libisal")]
|
||||
if len(isal_libs) == 1:
|
||||
isal_lib = isal_libs[0].locate()
|
||||
|
||||
isal = ctypes.CDLL(isal_lib) if isal_lib else None
|
||||
if hasattr(isal, 'crc32_iscsi'): # isa-l >= 2.16
|
||||
isal.crc32_iscsi.argtypes = [ctypes.c_char_p, ctypes.c_int, ctypes.c_uint]
|
||||
isal.crc32_iscsi.restype = ctypes.c_uint
|
||||
|
||||
def crc32c_isal(data, value=0):
|
||||
result = isal.crc32_iscsi(
|
||||
data,
|
||||
len(data),
|
||||
value ^ 0xffff_ffff,
|
||||
)
|
||||
# for some reason, despite us specifying that restype is uint,
|
||||
# it can come back signed??
|
||||
return (result & 0xffff_ffff) ^ 0xffff_ffff
|
||||
else:
|
||||
crc32c_isal = None
|
||||
|
||||
|
||||
AF_ALG = getattr(socket, 'AF_ALG', 38)
|
||||
try:
|
||||
_sock = socket.socket(AF_ALG, socket.SOCK_SEQPACKET)
|
||||
_sock.bind(("hash", "crc32c"))
|
||||
except OSError as e:
|
||||
if e.errno == errno.ENOENT:
|
||||
# could create socket, but crc32c is unknown
|
||||
_sock.close()
|
||||
elif e.errno != errno.EAFNOSUPPORT:
|
||||
raise
|
||||
crc32c_kern = None
|
||||
else:
|
||||
def crc32c_kern(data, value=0):
|
||||
crc32c_sock = socket.socket(AF_ALG, socket.SOCK_SEQPACKET)
|
||||
try:
|
||||
crc32c_sock.bind(("hash", "crc32c"))
|
||||
crc32c_sock.setsockopt(
|
||||
socket.SOL_ALG,
|
||||
socket.ALG_SET_KEY,
|
||||
struct.pack("I", value ^ 0xffff_ffff))
|
||||
sock, _ = crc32c_sock.accept()
|
||||
try:
|
||||
sock.sendall(data)
|
||||
return struct.unpack("I", sock.recv(4))[0]
|
||||
finally:
|
||||
sock.close()
|
||||
finally:
|
||||
crc32c_sock.close()
|
||||
|
||||
|
||||
def _select_crc32c_impl():
|
||||
# Use the best implementation available.
|
||||
# On various hardware we've seen
|
||||
#
|
||||
# CPU | ISA-L | Kernel
|
||||
# ---------------+-----------+----------
|
||||
# Intel N100 | ~9GB/s | ~3.5GB/s
|
||||
# ARM Cortex-A55 | ~2.5GB/s | ~0.4GB/s
|
||||
# Intel 11850H | ~7GB/s | ~2.6GB/s
|
||||
# AMD 3900XT | ~20GB/s | ~5GB/s
|
||||
#
|
||||
# i.e., ISA-L is consistently 3-5x faster than kernel sockets
|
||||
selected = crc32c_isal or crc32c_kern or None
|
||||
if not selected:
|
||||
raise NotImplementedError('no crc32c implementation, install isal')
|
||||
return selected
|
||||
|
||||
|
||||
class CRCHasher(object):
|
||||
"""
|
||||
Helper that works like a hashlib hasher, but with a CRC.
|
||||
"""
|
||||
def __init__(self, name, crc_func, data=None, initial_value=0, width=32):
|
||||
"""
|
||||
Initialize the CRCHasher.
|
||||
|
||||
:param name: Name of the hasher
|
||||
:param crc_func: Function to compute the CRC.
|
||||
:param data: Data to update the hasher.
|
||||
:param initial_value: Initial CRC value.
|
||||
:param width: Width (in bits) of CRC values.
|
||||
"""
|
||||
self.name = name
|
||||
self.crc_func = crc_func
|
||||
self.crc = initial_value
|
||||
if width not in (32, 64):
|
||||
raise ValueError("CRCHasher only supports 32- or 64-bit CRCs")
|
||||
self.width = width
|
||||
if data is not None:
|
||||
self.update(data)
|
||||
|
||||
@property
|
||||
def digest_size(self):
|
||||
return self.width / 8
|
||||
|
||||
@property
|
||||
def digest_fmt(self):
|
||||
return "!I" if self.width == 32 else "!Q"
|
||||
|
||||
def update(self, data):
|
||||
"""
|
||||
Update the CRC with new data.
|
||||
|
||||
:param data: Data to update the CRC with.
|
||||
"""
|
||||
self.crc = self.crc_func(data, self.crc)
|
||||
|
||||
def digest(self):
|
||||
"""
|
||||
Return the current CRC value as a 4-byte big-endian integer.
|
||||
|
||||
:returns: Packed CRC value. (bytes)
|
||||
"""
|
||||
return struct.pack(self.digest_fmt, self.crc)
|
||||
|
||||
def hexdigest(self):
|
||||
"""
|
||||
Return the hexadecimal representation of the current CRC value.
|
||||
|
||||
:returns: Hexadecimal CRC value. (str)
|
||||
"""
|
||||
hex = binascii.hexlify(self.digest()).decode("ascii")
|
||||
return hex
|
||||
|
||||
def copy(self):
|
||||
"""
|
||||
Copy the current state of this CRCHasher to a new one.
|
||||
|
||||
:returns:
|
||||
"""
|
||||
return CRCHasher(self.name,
|
||||
self.crc_func,
|
||||
initial_value=self.crc,
|
||||
width=self.width)
|
||||
|
||||
|
||||
def crc32(data=None, initial_value=0):
|
||||
return CRCHasher('crc32',
|
||||
zlib.crc32,
|
||||
data=data,
|
||||
initial_value=initial_value)
|
||||
|
||||
|
||||
def crc32c(data=None, initial_value=0):
|
||||
return CRCHasher('crc32c',
|
||||
_select_crc32c_impl(),
|
||||
data=data,
|
||||
initial_value=initial_value)
|
||||
|
||||
|
||||
def log_selected_implementation(logger):
|
||||
try:
|
||||
impl = _select_crc32c_impl()
|
||||
except NotImplementedError:
|
||||
logger.warning(
|
||||
'No implementation found for CRC32C; install ISA-L for support.')
|
||||
else:
|
||||
logger.info('Using %s implementation for CRC32C.' % impl.__name__)
|
@ -46,7 +46,7 @@ from swift.common import storage_policy, swob, utils, exceptions
|
||||
from swift.common.memcached import MemcacheConnectionError
|
||||
from swift.common.storage_policy import (StoragePolicy, ECStoragePolicy,
|
||||
VALID_EC_TYPES)
|
||||
from swift.common.utils import Timestamp, md5, close_if_possible
|
||||
from swift.common.utils import Timestamp, md5, close_if_possible, checksum
|
||||
from test import get_config
|
||||
from test.debug_logger import FakeLogger
|
||||
from swift.common.header_key_dict import HeaderKeyDict
|
||||
@ -1085,6 +1085,17 @@ def requires_o_tmpfile_support_in_tmp(func):
|
||||
return wrapper
|
||||
|
||||
|
||||
def requires_crc32c(func):
|
||||
@functools.wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
try:
|
||||
checksum.crc32c()
|
||||
except NotImplementedError as e:
|
||||
raise SkipTest(str(e))
|
||||
return func(*args, **kwargs)
|
||||
return wrapper
|
||||
|
||||
|
||||
class StubResponse(object):
|
||||
|
||||
def __init__(self, status, body=b'', headers=None, frag_index=None,
|
||||
|
258
test/unit/common/utils/test_checksum.py
Normal file
258
test/unit/common/utils/test_checksum.py
Normal file
@ -0,0 +1,258 @@
|
||||
# Copyright (c) 2024 NVIDIA
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import unittest
|
||||
from unittest import mock
|
||||
|
||||
import zlib
|
||||
|
||||
from swift.common.utils import checksum
|
||||
from test.debug_logger import debug_logger
|
||||
from test.unit import requires_crc32c
|
||||
|
||||
|
||||
# If you're curious about the 0xe3069283, see "check" at
|
||||
# https://reveng.sourceforge.io/crc-catalogue/17plus.htm#crc.cat.crc-32-iscsi
|
||||
class TestCRC32C(unittest.TestCase):
|
||||
def check_crc_func(self, impl):
|
||||
self.assertEqual(impl(b"123456789"), 0xe3069283)
|
||||
# Check that we can save/continue
|
||||
partial = impl(b"12345")
|
||||
self.assertEqual(impl(b"6789", partial), 0xe3069283)
|
||||
|
||||
@unittest.skipIf(checksum.crc32c_kern is None, 'No kernel CRC32C')
|
||||
def test_kern(self):
|
||||
self.check_crc_func(checksum.crc32c_kern)
|
||||
# Check preferences -- beats out reference, but not ISA-L
|
||||
if checksum.crc32c_isal is None:
|
||||
self.assertIs(checksum._select_crc32c_impl(), checksum.crc32c_kern)
|
||||
|
||||
@unittest.skipIf(checksum.crc32c_kern is None, 'No kernel CRC32C')
|
||||
def test_kern_socket_close_happy_path(self):
|
||||
mock_crc32c_socket = mock.MagicMock()
|
||||
mock_socket = mock.MagicMock()
|
||||
mock_socket.recv.return_value = b'1234'
|
||||
mock_crc32c_socket.accept.return_value = (mock_socket, None)
|
||||
with mock.patch('swift.common.utils.checksum.socket.socket',
|
||||
return_value=mock_crc32c_socket):
|
||||
checksum.crc32c_kern(b'x')
|
||||
self.assertEqual([mock.call()],
|
||||
mock_socket.close.call_args_list)
|
||||
self.assertEqual([mock.call()],
|
||||
mock_crc32c_socket.close.call_args_list)
|
||||
|
||||
@unittest.skipIf(checksum.crc32c_kern is None, 'No kernel CRC32C')
|
||||
def test_kern_socket_close_after_bind_error(self):
|
||||
mock_crc32c_socket = mock.MagicMock()
|
||||
mock_crc32c_socket.bind.side_effect = OSError('boom')
|
||||
with mock.patch('swift.common.utils.checksum.socket.socket',
|
||||
return_value=mock_crc32c_socket):
|
||||
with self.assertRaises(OSError) as cm:
|
||||
checksum.crc32c_kern(b'x')
|
||||
self.assertEqual('boom', str(cm.exception))
|
||||
self.assertEqual([mock.call()],
|
||||
mock_crc32c_socket.close.call_args_list)
|
||||
|
||||
@unittest.skipIf(checksum.crc32c_kern is None, 'No kernel CRC32C')
|
||||
def test_kern_socket_close_after_setsockopt_error(self):
|
||||
mock_crc32c_socket = mock.MagicMock()
|
||||
mock_crc32c_socket.setsockopt.side_effect = OSError('boom')
|
||||
with mock.patch('swift.common.utils.checksum.socket.socket',
|
||||
return_value=mock_crc32c_socket):
|
||||
with self.assertRaises(OSError) as cm:
|
||||
checksum.crc32c_kern(b'x')
|
||||
self.assertEqual('boom', str(cm.exception))
|
||||
self.assertEqual([mock.call()],
|
||||
mock_crc32c_socket.close.call_args_list)
|
||||
|
||||
@unittest.skipIf(checksum.crc32c_kern is None, 'No kernel CRC32C')
|
||||
def test_kern_socket_close_after_accept_error(self):
|
||||
mock_crc32c_socket = mock.MagicMock()
|
||||
mock_crc32c_socket.accept.side_effect = OSError('boom')
|
||||
with mock.patch('swift.common.utils.checksum.socket.socket',
|
||||
return_value=mock_crc32c_socket):
|
||||
with self.assertRaises(OSError) as cm:
|
||||
checksum.crc32c_kern(b'x')
|
||||
self.assertEqual('boom', str(cm.exception))
|
||||
self.assertEqual([mock.call()],
|
||||
mock_crc32c_socket.close.call_args_list)
|
||||
|
||||
@unittest.skipIf(checksum.crc32c_kern is None, 'No kernel CRC32C')
|
||||
def test_kern_socket_after_sendall_error(self):
|
||||
mock_crc32c_socket = mock.MagicMock()
|
||||
mock_socket = mock.MagicMock()
|
||||
mock_socket.sendall.side_effect = OSError('boom')
|
||||
mock_crc32c_socket.accept.return_value = (mock_socket, None)
|
||||
with mock.patch('swift.common.utils.checksum.socket.socket',
|
||||
return_value=mock_crc32c_socket):
|
||||
with self.assertRaises(OSError) as cm:
|
||||
checksum.crc32c_kern(b'x')
|
||||
self.assertEqual('boom', str(cm.exception))
|
||||
self.assertEqual([mock.call()],
|
||||
mock_socket.close.call_args_list)
|
||||
self.assertEqual([mock.call()],
|
||||
mock_crc32c_socket.close.call_args_list)
|
||||
|
||||
@unittest.skipIf(checksum.crc32c_kern is None, 'No kernel CRC32C')
|
||||
def test_kern_socket_after_recv_error(self):
|
||||
mock_crc32c_socket = mock.MagicMock()
|
||||
mock_socket = mock.MagicMock()
|
||||
mock_socket.recv.side_effect = OSError('boom')
|
||||
mock_crc32c_socket.accept.return_value = (mock_socket, None)
|
||||
with mock.patch('swift.common.utils.checksum.socket.socket',
|
||||
return_value=mock_crc32c_socket):
|
||||
with self.assertRaises(OSError) as cm:
|
||||
checksum.crc32c_kern(b'x')
|
||||
self.assertEqual('boom', str(cm.exception))
|
||||
self.assertEqual([mock.call()],
|
||||
mock_socket.close.call_args_list)
|
||||
self.assertEqual([mock.call()],
|
||||
mock_crc32c_socket.close.call_args_list)
|
||||
|
||||
@unittest.skipIf(checksum.crc32c_isal is None, 'No ISA-L CRC32C')
|
||||
def test_isal(self):
|
||||
self.check_crc_func(checksum.crc32c_isal)
|
||||
# Check preferences -- ISA-L always wins
|
||||
self.assertIs(checksum._select_crc32c_impl(), checksum.crc32c_isal)
|
||||
|
||||
|
||||
class TestCRCHasher(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.logger = debug_logger()
|
||||
|
||||
def test_base_crc_hasher(self):
|
||||
func = mock.MagicMock(return_value=0xbad1)
|
||||
hasher = checksum.CRCHasher('fake', func)
|
||||
self.assertEqual('fake', hasher.name)
|
||||
self.assertEqual(32, hasher.width)
|
||||
self.assertEqual(0, hasher.crc)
|
||||
self.assertEqual(b'\x00\x00\x00\x00', hasher.digest())
|
||||
self.assertEqual('00000000', hasher.hexdigest())
|
||||
|
||||
hasher.update(b'123456789')
|
||||
self.assertEqual(0xbad1, hasher.crc)
|
||||
self.assertEqual(b'\x00\x00\xba\xd1', hasher.digest())
|
||||
self.assertEqual('0000bad1', hasher.hexdigest())
|
||||
|
||||
def test_crc32_hasher(self):
|
||||
# See CRC-32/ISO-HDLC at
|
||||
# https://reveng.sourceforge.io/crc-catalogue/17plus.htm
|
||||
hasher = checksum.crc32()
|
||||
self.assertEqual('crc32', hasher.name)
|
||||
self.assertEqual(4, hasher.digest_size)
|
||||
self.assertEqual(zlib.crc32, hasher.crc_func)
|
||||
self.assertEqual(32, hasher.width)
|
||||
self.assertEqual(0, hasher.crc)
|
||||
self.assertEqual(b'\x00\x00\x00\x00', hasher.digest())
|
||||
self.assertEqual('00000000', hasher.hexdigest())
|
||||
|
||||
hasher.update(b'123456789')
|
||||
self.assertEqual(0xcbf43926, hasher.crc)
|
||||
self.assertEqual(b'\xcb\xf4\x39\x26', hasher.digest())
|
||||
self.assertEqual('cbf43926', hasher.hexdigest())
|
||||
|
||||
def test_crc32_hasher_contructed_with_data(self):
|
||||
hasher = checksum.crc32(b'123456789')
|
||||
self.assertEqual(zlib.crc32, hasher.crc_func)
|
||||
self.assertEqual(0xcbf43926, hasher.crc)
|
||||
self.assertEqual(b'\xcb\xf4\x39\x26', hasher.digest())
|
||||
self.assertEqual('cbf43926', hasher.hexdigest())
|
||||
|
||||
def test_crc32_hasher_initial_value(self):
|
||||
hasher = checksum.crc32(initial_value=0xcbf43926)
|
||||
self.assertEqual(zlib.crc32, hasher.crc_func)
|
||||
self.assertEqual(0xcbf43926, hasher.crc)
|
||||
self.assertEqual(b'\xcb\xf4\x39\x26', hasher.digest())
|
||||
self.assertEqual('cbf43926', hasher.hexdigest())
|
||||
|
||||
def test_crc32_hasher_copy(self):
|
||||
hasher = checksum.crc32(b'123456789')
|
||||
self.assertEqual(4, hasher.digest_size)
|
||||
self.assertEqual('cbf43926', hasher.hexdigest())
|
||||
hasher_copy = hasher.copy()
|
||||
self.assertEqual('crc32', hasher.name)
|
||||
self.assertEqual(zlib.crc32, hasher_copy.crc_func)
|
||||
self.assertEqual('cbf43926', hasher_copy.hexdigest())
|
||||
hasher_copy.update(b'foo')
|
||||
self.assertEqual('cbf43926', hasher.hexdigest())
|
||||
self.assertEqual('04e7e407', hasher_copy.hexdigest())
|
||||
hasher.update(b'bar')
|
||||
self.assertEqual('fe6b0d8c', hasher.hexdigest())
|
||||
self.assertEqual('04e7e407', hasher_copy.hexdigest())
|
||||
|
||||
@requires_crc32c
|
||||
def test_crc32c_hasher(self):
|
||||
# See CRC-32/ISCSI at
|
||||
# https://reveng.sourceforge.io/crc-catalogue/17plus.htm
|
||||
hasher = checksum.crc32c()
|
||||
self.assertEqual('crc32c', hasher.name)
|
||||
self.assertEqual(32, hasher.width)
|
||||
self.assertEqual(0, hasher.crc)
|
||||
self.assertEqual(b'\x00\x00\x00\x00', hasher.digest())
|
||||
self.assertEqual('00000000', hasher.hexdigest())
|
||||
|
||||
hasher.update(b'123456789')
|
||||
self.assertEqual(0xe3069283, hasher.crc)
|
||||
self.assertEqual(b'\xe3\x06\x92\x83', hasher.digest())
|
||||
self.assertEqual('e3069283', hasher.hexdigest())
|
||||
|
||||
@requires_crc32c
|
||||
def test_crc32c_hasher_constructed_with_data(self):
|
||||
hasher = checksum.crc32c(b'123456789')
|
||||
self.assertEqual(0xe3069283, hasher.crc)
|
||||
self.assertEqual(b'\xe3\x06\x92\x83', hasher.digest())
|
||||
self.assertEqual('e3069283', hasher.hexdigest())
|
||||
|
||||
@requires_crc32c
|
||||
def test_crc32c_hasher_initial_value(self):
|
||||
hasher = checksum.crc32c(initial_value=0xe3069283)
|
||||
self.assertEqual(0xe3069283, hasher.crc)
|
||||
self.assertEqual(b'\xe3\x06\x92\x83', hasher.digest())
|
||||
self.assertEqual('e3069283', hasher.hexdigest())
|
||||
|
||||
@requires_crc32c
|
||||
def test_crc32c_hasher_copy(self):
|
||||
hasher = checksum.crc32c(b'123456789')
|
||||
self.assertEqual('e3069283', hasher.hexdigest())
|
||||
hasher_copy = hasher.copy()
|
||||
self.assertEqual('crc32c', hasher_copy.name)
|
||||
self.assertIs(hasher.crc_func, hasher_copy.crc_func)
|
||||
self.assertEqual('e3069283', hasher_copy.hexdigest())
|
||||
hasher_copy.update(b'foo')
|
||||
self.assertEqual('e3069283', hasher.hexdigest())
|
||||
self.assertEqual('6b2fc5b0', hasher_copy.hexdigest())
|
||||
hasher.update(b'bar')
|
||||
self.assertEqual('ae5c789c', hasher.hexdigest())
|
||||
self.assertEqual('6b2fc5b0', hasher_copy.hexdigest())
|
||||
|
||||
def test_crc32c_hasher_selects_kern_impl(self):
|
||||
with mock.patch('swift.common.utils.checksum.crc32c_isal', None), \
|
||||
mock.patch(
|
||||
'swift.common.utils.checksum.crc32c_kern') as mock_kern:
|
||||
mock_kern.__name__ = 'crc32c_kern'
|
||||
self.assertIs(mock_kern, checksum.crc32c().crc_func)
|
||||
checksum.log_selected_implementation(self.logger)
|
||||
self.assertIn('Using crc32c_kern implementation for CRC32C.',
|
||||
self.logger.get_lines_for_level('info'))
|
||||
|
||||
def test_crc32c_hasher_selects_isal_impl(self):
|
||||
with mock.patch(
|
||||
'swift.common.utils.checksum.crc32c_isal') as mock_isal, \
|
||||
mock.patch('swift.common.utils.checksum.crc32c_kern'):
|
||||
mock_isal.__name__ = 'crc32c_isal'
|
||||
self.assertIs(mock_isal, checksum.crc32c().crc_func)
|
||||
checksum.log_selected_implementation(self.logger)
|
||||
self.assertIn('Using crc32c_isal implementation for CRC32C.',
|
||||
self.logger.get_lines_for_level('info'))
|
Loading…
x
Reference in New Issue
Block a user