From b65855d7153cde393752c28c2516eeeb7d1a10e1 Mon Sep 17 00:00:00 2001 From: Tim Burke Date: Thu, 15 Feb 2024 22:32:03 -0800 Subject: [PATCH] utils: Add CRCHasher and crc32c implementation A new CRCHasher class wraps crc functions to mimic the hashlib hasher interface. For crc32, use the zlib implementation. For crc32c, use ISA-L if we can find it; on py38+ this includes the ISA-L bundled in pyeclib's binary wheels. If ISA-L is not available, check for kernel crypto API support for crc32c; use it if available. Otherwise, raise a NotImplementedError. No callers yet, but these will be used by s3api in a later patch. Change-Id: Ic0c55e307ce10b56b569c9fee728c445a2300cbd --- swift/common/utils/checksum.py | 196 ++++++++++++++++++ test/unit/__init__.py | 13 +- test/unit/common/utils/test_checksum.py | 258 ++++++++++++++++++++++++ 3 files changed, 466 insertions(+), 1 deletion(-) create mode 100644 swift/common/utils/checksum.py create mode 100644 test/unit/common/utils/test_checksum.py diff --git a/swift/common/utils/checksum.py b/swift/common/utils/checksum.py new file mode 100644 index 0000000000..7e1a434ed0 --- /dev/null +++ b/swift/common/utils/checksum.py @@ -0,0 +1,196 @@ +# Copyright (c) 2024 NVIDIA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import binascii +import ctypes +import ctypes.util +import errno +import socket +import struct +import zlib + +try: + import pyeclib # noqa + from importlib.metadata import files as pkg_files # py38+ +except ImportError: + pkg_files = None + + +# If isal is available system-wide, great! +isal_lib = ctypes.util.find_library('isal') +if isal_lib is None and pkg_files is not None: + # py38+: Hopefully pyeclib was installed from a manylinux wheel + # with isal baked in? + isal_libs = [f for f in pkg_files('pyeclib') + if f.name.startswith("libisal")] + if len(isal_libs) == 1: + isal_lib = isal_libs[0].locate() + +isal = ctypes.CDLL(isal_lib) if isal_lib else None +if hasattr(isal, 'crc32_iscsi'): # isa-l >= 2.16 + isal.crc32_iscsi.argtypes = [ctypes.c_char_p, ctypes.c_int, ctypes.c_uint] + isal.crc32_iscsi.restype = ctypes.c_uint + + def crc32c_isal(data, value=0): + result = isal.crc32_iscsi( + data, + len(data), + value ^ 0xffff_ffff, + ) + # for some reason, despite us specifying that restype is uint, + # it can come back signed?? + return (result & 0xffff_ffff) ^ 0xffff_ffff +else: + crc32c_isal = None + + +AF_ALG = getattr(socket, 'AF_ALG', 38) +try: + _sock = socket.socket(AF_ALG, socket.SOCK_SEQPACKET) + _sock.bind(("hash", "crc32c")) +except OSError as e: + if e.errno == errno.ENOENT: + # could create socket, but crc32c is unknown + _sock.close() + elif e.errno != errno.EAFNOSUPPORT: + raise + crc32c_kern = None +else: + def crc32c_kern(data, value=0): + crc32c_sock = socket.socket(AF_ALG, socket.SOCK_SEQPACKET) + try: + crc32c_sock.bind(("hash", "crc32c")) + crc32c_sock.setsockopt( + socket.SOL_ALG, + socket.ALG_SET_KEY, + struct.pack("I", value ^ 0xffff_ffff)) + sock, _ = crc32c_sock.accept() + try: + sock.sendall(data) + return struct.unpack("I", sock.recv(4))[0] + finally: + sock.close() + finally: + crc32c_sock.close() + + +def _select_crc32c_impl(): + # Use the best implementation available. + # On various hardware we've seen + # + # CPU | ISA-L | Kernel + # ---------------+-----------+---------- + # Intel N100 | ~9GB/s | ~3.5GB/s + # ARM Cortex-A55 | ~2.5GB/s | ~0.4GB/s + # Intel 11850H | ~7GB/s | ~2.6GB/s + # AMD 3900XT | ~20GB/s | ~5GB/s + # + # i.e., ISA-L is consistently 3-5x faster than kernel sockets + selected = crc32c_isal or crc32c_kern or None + if not selected: + raise NotImplementedError('no crc32c implementation, install isal') + return selected + + +class CRCHasher(object): + """ + Helper that works like a hashlib hasher, but with a CRC. + """ + def __init__(self, name, crc_func, data=None, initial_value=0, width=32): + """ + Initialize the CRCHasher. + + :param name: Name of the hasher + :param crc_func: Function to compute the CRC. + :param data: Data to update the hasher. + :param initial_value: Initial CRC value. + :param width: Width (in bits) of CRC values. + """ + self.name = name + self.crc_func = crc_func + self.crc = initial_value + if width not in (32, 64): + raise ValueError("CRCHasher only supports 32- or 64-bit CRCs") + self.width = width + if data is not None: + self.update(data) + + @property + def digest_size(self): + return self.width / 8 + + @property + def digest_fmt(self): + return "!I" if self.width == 32 else "!Q" + + def update(self, data): + """ + Update the CRC with new data. + + :param data: Data to update the CRC with. + """ + self.crc = self.crc_func(data, self.crc) + + def digest(self): + """ + Return the current CRC value as a 4-byte big-endian integer. + + :returns: Packed CRC value. (bytes) + """ + return struct.pack(self.digest_fmt, self.crc) + + def hexdigest(self): + """ + Return the hexadecimal representation of the current CRC value. + + :returns: Hexadecimal CRC value. (str) + """ + hex = binascii.hexlify(self.digest()).decode("ascii") + return hex + + def copy(self): + """ + Copy the current state of this CRCHasher to a new one. + + :returns: + """ + return CRCHasher(self.name, + self.crc_func, + initial_value=self.crc, + width=self.width) + + +def crc32(data=None, initial_value=0): + return CRCHasher('crc32', + zlib.crc32, + data=data, + initial_value=initial_value) + + +def crc32c(data=None, initial_value=0): + return CRCHasher('crc32c', + _select_crc32c_impl(), + data=data, + initial_value=initial_value) + + +def log_selected_implementation(logger): + try: + impl = _select_crc32c_impl() + except NotImplementedError: + logger.warning( + 'No implementation found for CRC32C; install ISA-L for support.') + else: + logger.info('Using %s implementation for CRC32C.' % impl.__name__) diff --git a/test/unit/__init__.py b/test/unit/__init__.py index 97dac5eea3..a7f966c299 100644 --- a/test/unit/__init__.py +++ b/test/unit/__init__.py @@ -46,7 +46,7 @@ from swift.common import storage_policy, swob, utils, exceptions from swift.common.memcached import MemcacheConnectionError from swift.common.storage_policy import (StoragePolicy, ECStoragePolicy, VALID_EC_TYPES) -from swift.common.utils import Timestamp, md5, close_if_possible +from swift.common.utils import Timestamp, md5, close_if_possible, checksum from test import get_config from test.debug_logger import FakeLogger from swift.common.header_key_dict import HeaderKeyDict @@ -1085,6 +1085,17 @@ def requires_o_tmpfile_support_in_tmp(func): return wrapper +def requires_crc32c(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + try: + checksum.crc32c() + except NotImplementedError as e: + raise SkipTest(str(e)) + return func(*args, **kwargs) + return wrapper + + class StubResponse(object): def __init__(self, status, body=b'', headers=None, frag_index=None, diff --git a/test/unit/common/utils/test_checksum.py b/test/unit/common/utils/test_checksum.py new file mode 100644 index 0000000000..862d8709ca --- /dev/null +++ b/test/unit/common/utils/test_checksum.py @@ -0,0 +1,258 @@ +# Copyright (c) 2024 NVIDIA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from unittest import mock + +import zlib + +from swift.common.utils import checksum +from test.debug_logger import debug_logger +from test.unit import requires_crc32c + + +# If you're curious about the 0xe3069283, see "check" at +# https://reveng.sourceforge.io/crc-catalogue/17plus.htm#crc.cat.crc-32-iscsi +class TestCRC32C(unittest.TestCase): + def check_crc_func(self, impl): + self.assertEqual(impl(b"123456789"), 0xe3069283) + # Check that we can save/continue + partial = impl(b"12345") + self.assertEqual(impl(b"6789", partial), 0xe3069283) + + @unittest.skipIf(checksum.crc32c_kern is None, 'No kernel CRC32C') + def test_kern(self): + self.check_crc_func(checksum.crc32c_kern) + # Check preferences -- beats out reference, but not ISA-L + if checksum.crc32c_isal is None: + self.assertIs(checksum._select_crc32c_impl(), checksum.crc32c_kern) + + @unittest.skipIf(checksum.crc32c_kern is None, 'No kernel CRC32C') + def test_kern_socket_close_happy_path(self): + mock_crc32c_socket = mock.MagicMock() + mock_socket = mock.MagicMock() + mock_socket.recv.return_value = b'1234' + mock_crc32c_socket.accept.return_value = (mock_socket, None) + with mock.patch('swift.common.utils.checksum.socket.socket', + return_value=mock_crc32c_socket): + checksum.crc32c_kern(b'x') + self.assertEqual([mock.call()], + mock_socket.close.call_args_list) + self.assertEqual([mock.call()], + mock_crc32c_socket.close.call_args_list) + + @unittest.skipIf(checksum.crc32c_kern is None, 'No kernel CRC32C') + def test_kern_socket_close_after_bind_error(self): + mock_crc32c_socket = mock.MagicMock() + mock_crc32c_socket.bind.side_effect = OSError('boom') + with mock.patch('swift.common.utils.checksum.socket.socket', + return_value=mock_crc32c_socket): + with self.assertRaises(OSError) as cm: + checksum.crc32c_kern(b'x') + self.assertEqual('boom', str(cm.exception)) + self.assertEqual([mock.call()], + mock_crc32c_socket.close.call_args_list) + + @unittest.skipIf(checksum.crc32c_kern is None, 'No kernel CRC32C') + def test_kern_socket_close_after_setsockopt_error(self): + mock_crc32c_socket = mock.MagicMock() + mock_crc32c_socket.setsockopt.side_effect = OSError('boom') + with mock.patch('swift.common.utils.checksum.socket.socket', + return_value=mock_crc32c_socket): + with self.assertRaises(OSError) as cm: + checksum.crc32c_kern(b'x') + self.assertEqual('boom', str(cm.exception)) + self.assertEqual([mock.call()], + mock_crc32c_socket.close.call_args_list) + + @unittest.skipIf(checksum.crc32c_kern is None, 'No kernel CRC32C') + def test_kern_socket_close_after_accept_error(self): + mock_crc32c_socket = mock.MagicMock() + mock_crc32c_socket.accept.side_effect = OSError('boom') + with mock.patch('swift.common.utils.checksum.socket.socket', + return_value=mock_crc32c_socket): + with self.assertRaises(OSError) as cm: + checksum.crc32c_kern(b'x') + self.assertEqual('boom', str(cm.exception)) + self.assertEqual([mock.call()], + mock_crc32c_socket.close.call_args_list) + + @unittest.skipIf(checksum.crc32c_kern is None, 'No kernel CRC32C') + def test_kern_socket_after_sendall_error(self): + mock_crc32c_socket = mock.MagicMock() + mock_socket = mock.MagicMock() + mock_socket.sendall.side_effect = OSError('boom') + mock_crc32c_socket.accept.return_value = (mock_socket, None) + with mock.patch('swift.common.utils.checksum.socket.socket', + return_value=mock_crc32c_socket): + with self.assertRaises(OSError) as cm: + checksum.crc32c_kern(b'x') + self.assertEqual('boom', str(cm.exception)) + self.assertEqual([mock.call()], + mock_socket.close.call_args_list) + self.assertEqual([mock.call()], + mock_crc32c_socket.close.call_args_list) + + @unittest.skipIf(checksum.crc32c_kern is None, 'No kernel CRC32C') + def test_kern_socket_after_recv_error(self): + mock_crc32c_socket = mock.MagicMock() + mock_socket = mock.MagicMock() + mock_socket.recv.side_effect = OSError('boom') + mock_crc32c_socket.accept.return_value = (mock_socket, None) + with mock.patch('swift.common.utils.checksum.socket.socket', + return_value=mock_crc32c_socket): + with self.assertRaises(OSError) as cm: + checksum.crc32c_kern(b'x') + self.assertEqual('boom', str(cm.exception)) + self.assertEqual([mock.call()], + mock_socket.close.call_args_list) + self.assertEqual([mock.call()], + mock_crc32c_socket.close.call_args_list) + + @unittest.skipIf(checksum.crc32c_isal is None, 'No ISA-L CRC32C') + def test_isal(self): + self.check_crc_func(checksum.crc32c_isal) + # Check preferences -- ISA-L always wins + self.assertIs(checksum._select_crc32c_impl(), checksum.crc32c_isal) + + +class TestCRCHasher(unittest.TestCase): + def setUp(self): + self.logger = debug_logger() + + def test_base_crc_hasher(self): + func = mock.MagicMock(return_value=0xbad1) + hasher = checksum.CRCHasher('fake', func) + self.assertEqual('fake', hasher.name) + self.assertEqual(32, hasher.width) + self.assertEqual(0, hasher.crc) + self.assertEqual(b'\x00\x00\x00\x00', hasher.digest()) + self.assertEqual('00000000', hasher.hexdigest()) + + hasher.update(b'123456789') + self.assertEqual(0xbad1, hasher.crc) + self.assertEqual(b'\x00\x00\xba\xd1', hasher.digest()) + self.assertEqual('0000bad1', hasher.hexdigest()) + + def test_crc32_hasher(self): + # See CRC-32/ISO-HDLC at + # https://reveng.sourceforge.io/crc-catalogue/17plus.htm + hasher = checksum.crc32() + self.assertEqual('crc32', hasher.name) + self.assertEqual(4, hasher.digest_size) + self.assertEqual(zlib.crc32, hasher.crc_func) + self.assertEqual(32, hasher.width) + self.assertEqual(0, hasher.crc) + self.assertEqual(b'\x00\x00\x00\x00', hasher.digest()) + self.assertEqual('00000000', hasher.hexdigest()) + + hasher.update(b'123456789') + self.assertEqual(0xcbf43926, hasher.crc) + self.assertEqual(b'\xcb\xf4\x39\x26', hasher.digest()) + self.assertEqual('cbf43926', hasher.hexdigest()) + + def test_crc32_hasher_contructed_with_data(self): + hasher = checksum.crc32(b'123456789') + self.assertEqual(zlib.crc32, hasher.crc_func) + self.assertEqual(0xcbf43926, hasher.crc) + self.assertEqual(b'\xcb\xf4\x39\x26', hasher.digest()) + self.assertEqual('cbf43926', hasher.hexdigest()) + + def test_crc32_hasher_initial_value(self): + hasher = checksum.crc32(initial_value=0xcbf43926) + self.assertEqual(zlib.crc32, hasher.crc_func) + self.assertEqual(0xcbf43926, hasher.crc) + self.assertEqual(b'\xcb\xf4\x39\x26', hasher.digest()) + self.assertEqual('cbf43926', hasher.hexdigest()) + + def test_crc32_hasher_copy(self): + hasher = checksum.crc32(b'123456789') + self.assertEqual(4, hasher.digest_size) + self.assertEqual('cbf43926', hasher.hexdigest()) + hasher_copy = hasher.copy() + self.assertEqual('crc32', hasher.name) + self.assertEqual(zlib.crc32, hasher_copy.crc_func) + self.assertEqual('cbf43926', hasher_copy.hexdigest()) + hasher_copy.update(b'foo') + self.assertEqual('cbf43926', hasher.hexdigest()) + self.assertEqual('04e7e407', hasher_copy.hexdigest()) + hasher.update(b'bar') + self.assertEqual('fe6b0d8c', hasher.hexdigest()) + self.assertEqual('04e7e407', hasher_copy.hexdigest()) + + @requires_crc32c + def test_crc32c_hasher(self): + # See CRC-32/ISCSI at + # https://reveng.sourceforge.io/crc-catalogue/17plus.htm + hasher = checksum.crc32c() + self.assertEqual('crc32c', hasher.name) + self.assertEqual(32, hasher.width) + self.assertEqual(0, hasher.crc) + self.assertEqual(b'\x00\x00\x00\x00', hasher.digest()) + self.assertEqual('00000000', hasher.hexdigest()) + + hasher.update(b'123456789') + self.assertEqual(0xe3069283, hasher.crc) + self.assertEqual(b'\xe3\x06\x92\x83', hasher.digest()) + self.assertEqual('e3069283', hasher.hexdigest()) + + @requires_crc32c + def test_crc32c_hasher_constructed_with_data(self): + hasher = checksum.crc32c(b'123456789') + self.assertEqual(0xe3069283, hasher.crc) + self.assertEqual(b'\xe3\x06\x92\x83', hasher.digest()) + self.assertEqual('e3069283', hasher.hexdigest()) + + @requires_crc32c + def test_crc32c_hasher_initial_value(self): + hasher = checksum.crc32c(initial_value=0xe3069283) + self.assertEqual(0xe3069283, hasher.crc) + self.assertEqual(b'\xe3\x06\x92\x83', hasher.digest()) + self.assertEqual('e3069283', hasher.hexdigest()) + + @requires_crc32c + def test_crc32c_hasher_copy(self): + hasher = checksum.crc32c(b'123456789') + self.assertEqual('e3069283', hasher.hexdigest()) + hasher_copy = hasher.copy() + self.assertEqual('crc32c', hasher_copy.name) + self.assertIs(hasher.crc_func, hasher_copy.crc_func) + self.assertEqual('e3069283', hasher_copy.hexdigest()) + hasher_copy.update(b'foo') + self.assertEqual('e3069283', hasher.hexdigest()) + self.assertEqual('6b2fc5b0', hasher_copy.hexdigest()) + hasher.update(b'bar') + self.assertEqual('ae5c789c', hasher.hexdigest()) + self.assertEqual('6b2fc5b0', hasher_copy.hexdigest()) + + def test_crc32c_hasher_selects_kern_impl(self): + with mock.patch('swift.common.utils.checksum.crc32c_isal', None), \ + mock.patch( + 'swift.common.utils.checksum.crc32c_kern') as mock_kern: + mock_kern.__name__ = 'crc32c_kern' + self.assertIs(mock_kern, checksum.crc32c().crc_func) + checksum.log_selected_implementation(self.logger) + self.assertIn('Using crc32c_kern implementation for CRC32C.', + self.logger.get_lines_for_level('info')) + + def test_crc32c_hasher_selects_isal_impl(self): + with mock.patch( + 'swift.common.utils.checksum.crc32c_isal') as mock_isal, \ + mock.patch('swift.common.utils.checksum.crc32c_kern'): + mock_isal.__name__ = 'crc32c_isal' + self.assertIs(mock_isal, checksum.crc32c().crc_func) + checksum.log_selected_implementation(self.logger) + self.assertIn('Using crc32c_isal implementation for CRC32C.', + self.logger.get_lines_for_level('info'))