utils: Add CRCHasher and crc32c implementation

A new CRCHasher class wraps crc functions to mimic the hashlib
hasher interface.

For crc32, use the zlib implementation.

For crc32c, use ISA-L if we can find it; on py38+ this includes the
ISA-L bundled in pyeclib's binary wheels.

If ISA-L is not available, check for kernel crypto API support for crc32c;
use it if available. Otherwise, raise a NotImplementedError.

No callers yet, but these will be used by s3api in a later patch.

Change-Id: Ic0c55e307ce10b56b569c9fee728c445a2300cbd
This commit is contained in:
Tim Burke 2024-02-15 22:32:03 -08:00
parent f9ac22971f
commit b65855d715
3 changed files with 466 additions and 1 deletions

View File

@ -0,0 +1,196 @@
# Copyright (c) 2024 NVIDIA
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import binascii
import ctypes
import ctypes.util
import errno
import socket
import struct
import zlib
try:
import pyeclib # noqa
from importlib.metadata import files as pkg_files # py38+
except ImportError:
pkg_files = None
# If isal is available system-wide, great!
isal_lib = ctypes.util.find_library('isal')
if isal_lib is None and pkg_files is not None:
# py38+: Hopefully pyeclib was installed from a manylinux wheel
# with isal baked in?
isal_libs = [f for f in pkg_files('pyeclib')
if f.name.startswith("libisal")]
if len(isal_libs) == 1:
isal_lib = isal_libs[0].locate()
isal = ctypes.CDLL(isal_lib) if isal_lib else None
if hasattr(isal, 'crc32_iscsi'): # isa-l >= 2.16
isal.crc32_iscsi.argtypes = [ctypes.c_char_p, ctypes.c_int, ctypes.c_uint]
isal.crc32_iscsi.restype = ctypes.c_uint
def crc32c_isal(data, value=0):
result = isal.crc32_iscsi(
data,
len(data),
value ^ 0xffff_ffff,
)
# for some reason, despite us specifying that restype is uint,
# it can come back signed??
return (result & 0xffff_ffff) ^ 0xffff_ffff
else:
crc32c_isal = None
AF_ALG = getattr(socket, 'AF_ALG', 38)
try:
_sock = socket.socket(AF_ALG, socket.SOCK_SEQPACKET)
_sock.bind(("hash", "crc32c"))
except OSError as e:
if e.errno == errno.ENOENT:
# could create socket, but crc32c is unknown
_sock.close()
elif e.errno != errno.EAFNOSUPPORT:
raise
crc32c_kern = None
else:
def crc32c_kern(data, value=0):
crc32c_sock = socket.socket(AF_ALG, socket.SOCK_SEQPACKET)
try:
crc32c_sock.bind(("hash", "crc32c"))
crc32c_sock.setsockopt(
socket.SOL_ALG,
socket.ALG_SET_KEY,
struct.pack("I", value ^ 0xffff_ffff))
sock, _ = crc32c_sock.accept()
try:
sock.sendall(data)
return struct.unpack("I", sock.recv(4))[0]
finally:
sock.close()
finally:
crc32c_sock.close()
def _select_crc32c_impl():
# Use the best implementation available.
# On various hardware we've seen
#
# CPU | ISA-L | Kernel
# ---------------+-----------+----------
# Intel N100 | ~9GB/s | ~3.5GB/s
# ARM Cortex-A55 | ~2.5GB/s | ~0.4GB/s
# Intel 11850H | ~7GB/s | ~2.6GB/s
# AMD 3900XT | ~20GB/s | ~5GB/s
#
# i.e., ISA-L is consistently 3-5x faster than kernel sockets
selected = crc32c_isal or crc32c_kern or None
if not selected:
raise NotImplementedError('no crc32c implementation, install isal')
return selected
class CRCHasher(object):
"""
Helper that works like a hashlib hasher, but with a CRC.
"""
def __init__(self, name, crc_func, data=None, initial_value=0, width=32):
"""
Initialize the CRCHasher.
:param name: Name of the hasher
:param crc_func: Function to compute the CRC.
:param data: Data to update the hasher.
:param initial_value: Initial CRC value.
:param width: Width (in bits) of CRC values.
"""
self.name = name
self.crc_func = crc_func
self.crc = initial_value
if width not in (32, 64):
raise ValueError("CRCHasher only supports 32- or 64-bit CRCs")
self.width = width
if data is not None:
self.update(data)
@property
def digest_size(self):
return self.width / 8
@property
def digest_fmt(self):
return "!I" if self.width == 32 else "!Q"
def update(self, data):
"""
Update the CRC with new data.
:param data: Data to update the CRC with.
"""
self.crc = self.crc_func(data, self.crc)
def digest(self):
"""
Return the current CRC value as a 4-byte big-endian integer.
:returns: Packed CRC value. (bytes)
"""
return struct.pack(self.digest_fmt, self.crc)
def hexdigest(self):
"""
Return the hexadecimal representation of the current CRC value.
:returns: Hexadecimal CRC value. (str)
"""
hex = binascii.hexlify(self.digest()).decode("ascii")
return hex
def copy(self):
"""
Copy the current state of this CRCHasher to a new one.
:returns:
"""
return CRCHasher(self.name,
self.crc_func,
initial_value=self.crc,
width=self.width)
def crc32(data=None, initial_value=0):
return CRCHasher('crc32',
zlib.crc32,
data=data,
initial_value=initial_value)
def crc32c(data=None, initial_value=0):
return CRCHasher('crc32c',
_select_crc32c_impl(),
data=data,
initial_value=initial_value)
def log_selected_implementation(logger):
try:
impl = _select_crc32c_impl()
except NotImplementedError:
logger.warning(
'No implementation found for CRC32C; install ISA-L for support.')
else:
logger.info('Using %s implementation for CRC32C.' % impl.__name__)

View File

@ -46,7 +46,7 @@ from swift.common import storage_policy, swob, utils, exceptions
from swift.common.memcached import MemcacheConnectionError
from swift.common.storage_policy import (StoragePolicy, ECStoragePolicy,
VALID_EC_TYPES)
from swift.common.utils import Timestamp, md5, close_if_possible
from swift.common.utils import Timestamp, md5, close_if_possible, checksum
from test import get_config
from test.debug_logger import FakeLogger
from swift.common.header_key_dict import HeaderKeyDict
@ -1085,6 +1085,17 @@ def requires_o_tmpfile_support_in_tmp(func):
return wrapper
def requires_crc32c(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
try:
checksum.crc32c()
except NotImplementedError as e:
raise SkipTest(str(e))
return func(*args, **kwargs)
return wrapper
class StubResponse(object):
def __init__(self, status, body=b'', headers=None, frag_index=None,

View File

@ -0,0 +1,258 @@
# Copyright (c) 2024 NVIDIA
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from unittest import mock
import zlib
from swift.common.utils import checksum
from test.debug_logger import debug_logger
from test.unit import requires_crc32c
# If you're curious about the 0xe3069283, see "check" at
# https://reveng.sourceforge.io/crc-catalogue/17plus.htm#crc.cat.crc-32-iscsi
class TestCRC32C(unittest.TestCase):
def check_crc_func(self, impl):
self.assertEqual(impl(b"123456789"), 0xe3069283)
# Check that we can save/continue
partial = impl(b"12345")
self.assertEqual(impl(b"6789", partial), 0xe3069283)
@unittest.skipIf(checksum.crc32c_kern is None, 'No kernel CRC32C')
def test_kern(self):
self.check_crc_func(checksum.crc32c_kern)
# Check preferences -- beats out reference, but not ISA-L
if checksum.crc32c_isal is None:
self.assertIs(checksum._select_crc32c_impl(), checksum.crc32c_kern)
@unittest.skipIf(checksum.crc32c_kern is None, 'No kernel CRC32C')
def test_kern_socket_close_happy_path(self):
mock_crc32c_socket = mock.MagicMock()
mock_socket = mock.MagicMock()
mock_socket.recv.return_value = b'1234'
mock_crc32c_socket.accept.return_value = (mock_socket, None)
with mock.patch('swift.common.utils.checksum.socket.socket',
return_value=mock_crc32c_socket):
checksum.crc32c_kern(b'x')
self.assertEqual([mock.call()],
mock_socket.close.call_args_list)
self.assertEqual([mock.call()],
mock_crc32c_socket.close.call_args_list)
@unittest.skipIf(checksum.crc32c_kern is None, 'No kernel CRC32C')
def test_kern_socket_close_after_bind_error(self):
mock_crc32c_socket = mock.MagicMock()
mock_crc32c_socket.bind.side_effect = OSError('boom')
with mock.patch('swift.common.utils.checksum.socket.socket',
return_value=mock_crc32c_socket):
with self.assertRaises(OSError) as cm:
checksum.crc32c_kern(b'x')
self.assertEqual('boom', str(cm.exception))
self.assertEqual([mock.call()],
mock_crc32c_socket.close.call_args_list)
@unittest.skipIf(checksum.crc32c_kern is None, 'No kernel CRC32C')
def test_kern_socket_close_after_setsockopt_error(self):
mock_crc32c_socket = mock.MagicMock()
mock_crc32c_socket.setsockopt.side_effect = OSError('boom')
with mock.patch('swift.common.utils.checksum.socket.socket',
return_value=mock_crc32c_socket):
with self.assertRaises(OSError) as cm:
checksum.crc32c_kern(b'x')
self.assertEqual('boom', str(cm.exception))
self.assertEqual([mock.call()],
mock_crc32c_socket.close.call_args_list)
@unittest.skipIf(checksum.crc32c_kern is None, 'No kernel CRC32C')
def test_kern_socket_close_after_accept_error(self):
mock_crc32c_socket = mock.MagicMock()
mock_crc32c_socket.accept.side_effect = OSError('boom')
with mock.patch('swift.common.utils.checksum.socket.socket',
return_value=mock_crc32c_socket):
with self.assertRaises(OSError) as cm:
checksum.crc32c_kern(b'x')
self.assertEqual('boom', str(cm.exception))
self.assertEqual([mock.call()],
mock_crc32c_socket.close.call_args_list)
@unittest.skipIf(checksum.crc32c_kern is None, 'No kernel CRC32C')
def test_kern_socket_after_sendall_error(self):
mock_crc32c_socket = mock.MagicMock()
mock_socket = mock.MagicMock()
mock_socket.sendall.side_effect = OSError('boom')
mock_crc32c_socket.accept.return_value = (mock_socket, None)
with mock.patch('swift.common.utils.checksum.socket.socket',
return_value=mock_crc32c_socket):
with self.assertRaises(OSError) as cm:
checksum.crc32c_kern(b'x')
self.assertEqual('boom', str(cm.exception))
self.assertEqual([mock.call()],
mock_socket.close.call_args_list)
self.assertEqual([mock.call()],
mock_crc32c_socket.close.call_args_list)
@unittest.skipIf(checksum.crc32c_kern is None, 'No kernel CRC32C')
def test_kern_socket_after_recv_error(self):
mock_crc32c_socket = mock.MagicMock()
mock_socket = mock.MagicMock()
mock_socket.recv.side_effect = OSError('boom')
mock_crc32c_socket.accept.return_value = (mock_socket, None)
with mock.patch('swift.common.utils.checksum.socket.socket',
return_value=mock_crc32c_socket):
with self.assertRaises(OSError) as cm:
checksum.crc32c_kern(b'x')
self.assertEqual('boom', str(cm.exception))
self.assertEqual([mock.call()],
mock_socket.close.call_args_list)
self.assertEqual([mock.call()],
mock_crc32c_socket.close.call_args_list)
@unittest.skipIf(checksum.crc32c_isal is None, 'No ISA-L CRC32C')
def test_isal(self):
self.check_crc_func(checksum.crc32c_isal)
# Check preferences -- ISA-L always wins
self.assertIs(checksum._select_crc32c_impl(), checksum.crc32c_isal)
class TestCRCHasher(unittest.TestCase):
def setUp(self):
self.logger = debug_logger()
def test_base_crc_hasher(self):
func = mock.MagicMock(return_value=0xbad1)
hasher = checksum.CRCHasher('fake', func)
self.assertEqual('fake', hasher.name)
self.assertEqual(32, hasher.width)
self.assertEqual(0, hasher.crc)
self.assertEqual(b'\x00\x00\x00\x00', hasher.digest())
self.assertEqual('00000000', hasher.hexdigest())
hasher.update(b'123456789')
self.assertEqual(0xbad1, hasher.crc)
self.assertEqual(b'\x00\x00\xba\xd1', hasher.digest())
self.assertEqual('0000bad1', hasher.hexdigest())
def test_crc32_hasher(self):
# See CRC-32/ISO-HDLC at
# https://reveng.sourceforge.io/crc-catalogue/17plus.htm
hasher = checksum.crc32()
self.assertEqual('crc32', hasher.name)
self.assertEqual(4, hasher.digest_size)
self.assertEqual(zlib.crc32, hasher.crc_func)
self.assertEqual(32, hasher.width)
self.assertEqual(0, hasher.crc)
self.assertEqual(b'\x00\x00\x00\x00', hasher.digest())
self.assertEqual('00000000', hasher.hexdigest())
hasher.update(b'123456789')
self.assertEqual(0xcbf43926, hasher.crc)
self.assertEqual(b'\xcb\xf4\x39\x26', hasher.digest())
self.assertEqual('cbf43926', hasher.hexdigest())
def test_crc32_hasher_contructed_with_data(self):
hasher = checksum.crc32(b'123456789')
self.assertEqual(zlib.crc32, hasher.crc_func)
self.assertEqual(0xcbf43926, hasher.crc)
self.assertEqual(b'\xcb\xf4\x39\x26', hasher.digest())
self.assertEqual('cbf43926', hasher.hexdigest())
def test_crc32_hasher_initial_value(self):
hasher = checksum.crc32(initial_value=0xcbf43926)
self.assertEqual(zlib.crc32, hasher.crc_func)
self.assertEqual(0xcbf43926, hasher.crc)
self.assertEqual(b'\xcb\xf4\x39\x26', hasher.digest())
self.assertEqual('cbf43926', hasher.hexdigest())
def test_crc32_hasher_copy(self):
hasher = checksum.crc32(b'123456789')
self.assertEqual(4, hasher.digest_size)
self.assertEqual('cbf43926', hasher.hexdigest())
hasher_copy = hasher.copy()
self.assertEqual('crc32', hasher.name)
self.assertEqual(zlib.crc32, hasher_copy.crc_func)
self.assertEqual('cbf43926', hasher_copy.hexdigest())
hasher_copy.update(b'foo')
self.assertEqual('cbf43926', hasher.hexdigest())
self.assertEqual('04e7e407', hasher_copy.hexdigest())
hasher.update(b'bar')
self.assertEqual('fe6b0d8c', hasher.hexdigest())
self.assertEqual('04e7e407', hasher_copy.hexdigest())
@requires_crc32c
def test_crc32c_hasher(self):
# See CRC-32/ISCSI at
# https://reveng.sourceforge.io/crc-catalogue/17plus.htm
hasher = checksum.crc32c()
self.assertEqual('crc32c', hasher.name)
self.assertEqual(32, hasher.width)
self.assertEqual(0, hasher.crc)
self.assertEqual(b'\x00\x00\x00\x00', hasher.digest())
self.assertEqual('00000000', hasher.hexdigest())
hasher.update(b'123456789')
self.assertEqual(0xe3069283, hasher.crc)
self.assertEqual(b'\xe3\x06\x92\x83', hasher.digest())
self.assertEqual('e3069283', hasher.hexdigest())
@requires_crc32c
def test_crc32c_hasher_constructed_with_data(self):
hasher = checksum.crc32c(b'123456789')
self.assertEqual(0xe3069283, hasher.crc)
self.assertEqual(b'\xe3\x06\x92\x83', hasher.digest())
self.assertEqual('e3069283', hasher.hexdigest())
@requires_crc32c
def test_crc32c_hasher_initial_value(self):
hasher = checksum.crc32c(initial_value=0xe3069283)
self.assertEqual(0xe3069283, hasher.crc)
self.assertEqual(b'\xe3\x06\x92\x83', hasher.digest())
self.assertEqual('e3069283', hasher.hexdigest())
@requires_crc32c
def test_crc32c_hasher_copy(self):
hasher = checksum.crc32c(b'123456789')
self.assertEqual('e3069283', hasher.hexdigest())
hasher_copy = hasher.copy()
self.assertEqual('crc32c', hasher_copy.name)
self.assertIs(hasher.crc_func, hasher_copy.crc_func)
self.assertEqual('e3069283', hasher_copy.hexdigest())
hasher_copy.update(b'foo')
self.assertEqual('e3069283', hasher.hexdigest())
self.assertEqual('6b2fc5b0', hasher_copy.hexdigest())
hasher.update(b'bar')
self.assertEqual('ae5c789c', hasher.hexdigest())
self.assertEqual('6b2fc5b0', hasher_copy.hexdigest())
def test_crc32c_hasher_selects_kern_impl(self):
with mock.patch('swift.common.utils.checksum.crc32c_isal', None), \
mock.patch(
'swift.common.utils.checksum.crc32c_kern') as mock_kern:
mock_kern.__name__ = 'crc32c_kern'
self.assertIs(mock_kern, checksum.crc32c().crc_func)
checksum.log_selected_implementation(self.logger)
self.assertIn('Using crc32c_kern implementation for CRC32C.',
self.logger.get_lines_for_level('info'))
def test_crc32c_hasher_selects_isal_impl(self):
with mock.patch(
'swift.common.utils.checksum.crc32c_isal') as mock_isal, \
mock.patch('swift.common.utils.checksum.crc32c_kern'):
mock_isal.__name__ = 'crc32c_isal'
self.assertIs(mock_isal, checksum.crc32c().crc_func)
checksum.log_selected_implementation(self.logger)
self.assertIn('Using crc32c_isal implementation for CRC32C.',
self.logger.get_lines_for_level('info'))