diff --git a/LICENSE b/LICENSE index 7a7caa8..80afa9a 100644 --- a/LICENSE +++ b/LICENSE @@ -1,7 +1,7 @@ - Copyright(c) 2011-2015 Intel Corporation All rights reserved. + Copyright(c) 2011-2016 Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions + modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. diff --git a/Makefile.am b/Makefile.am index 2ce1495..1793331 100644 --- a/Makefile.am +++ b/Makefile.am @@ -27,6 +27,7 @@ perf_tests32= include erasure_code/Makefile.am include raid/Makefile.am include crc/Makefile.am +include igzip/Makefile.am # LIB version info not necessarily the same as package version LIBISAL_CURRENT=2 diff --git a/Makefile.nmake b/Makefile.nmake index 4ef1df9..b520246 100644 --- a/Makefile.nmake +++ b/Makefile.nmake @@ -1,5 +1,5 @@ ######################################################################## -# Copyright(c) 2011-2015 Intel Corporation All rights reserved. +# Copyright(c) 2011-2016 Intel Corporation All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -27,11 +27,91 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ######################################################################## +objs = \ + bin\ec_base.obj \ + bin\ec_highlevel_func.obj \ + bin\ec_multibinary.obj \ + bin\gf_2vect_dot_prod_avx.obj \ + bin\gf_2vect_dot_prod_avx2.obj \ + bin\gf_2vect_dot_prod_avx512.obj \ + bin\gf_2vect_dot_prod_sse.obj \ + bin\gf_2vect_mad_avx.obj \ + bin\gf_2vect_mad_avx2.obj \ + bin\gf_2vect_mad_avx512.obj \ + bin\gf_2vect_mad_sse.obj \ + bin\gf_3vect_dot_prod_avx.obj \ + bin\gf_3vect_dot_prod_avx2.obj \ + bin\gf_3vect_dot_prod_avx512.obj \ + bin\gf_3vect_dot_prod_sse.obj \ + bin\gf_3vect_mad_avx.obj \ + bin\gf_3vect_mad_avx2.obj \ + bin\gf_3vect_mad_avx512.obj \ + bin\gf_3vect_mad_sse.obj \ + bin\gf_4vect_dot_prod_avx.obj \ + bin\gf_4vect_dot_prod_avx2.obj \ + bin\gf_4vect_dot_prod_avx512.obj \ + bin\gf_4vect_dot_prod_sse.obj \ + bin\gf_4vect_mad_avx.obj \ + bin\gf_4vect_mad_avx2.obj \ + bin\gf_4vect_mad_avx512.obj \ + bin\gf_4vect_mad_sse.obj \ + bin\gf_5vect_dot_prod_avx.obj \ + bin\gf_5vect_dot_prod_avx2.obj \ + bin\gf_5vect_dot_prod_sse.obj \ + bin\gf_5vect_mad_avx.obj \ + bin\gf_5vect_mad_avx2.obj \ + bin\gf_5vect_mad_sse.obj \ + bin\gf_6vect_dot_prod_avx.obj \ + bin\gf_6vect_dot_prod_avx2.obj \ + bin\gf_6vect_dot_prod_sse.obj \ + bin\gf_6vect_mad_avx.obj \ + bin\gf_6vect_mad_avx2.obj \ + bin\gf_6vect_mad_sse.obj \ + bin\gf_vect_dot_prod_avx.obj \ + bin\gf_vect_dot_prod_avx2.obj \ + bin\gf_vect_dot_prod_avx512.obj \ + bin\gf_vect_dot_prod_sse.obj \ + bin\gf_vect_mad_avx.obj \ + bin\gf_vect_mad_avx2.obj \ + bin\gf_vect_mad_avx512.obj \ + bin\gf_vect_mad_sse.obj \ + bin\gf_vect_mul_avx.obj \ + bin\gf_vect_mul_sse.obj \ + bin\pq_check_sse.obj \ + bin\pq_gen_avx.obj \ + bin\pq_gen_avx2.obj \ + bin\pq_gen_sse.obj \ + bin\raid_base.obj \ + bin\raid_multibinary.obj \ + bin\xor_check_sse.obj \ + bin\xor_gen_avx.obj \ + bin\xor_gen_sse.obj \ + bin\crc16_t10dif_01.obj \ + bin\crc16_t10dif_by4.obj \ + bin\crc32_gzip.obj \ + bin\crc32_ieee_01.obj \ + bin\crc32_ieee_by4.obj \ + bin\crc32_iscsi_00.obj \ + bin\crc32_iscsi_01.obj \ + bin\crc_base.obj \ + bin\crc_data.obj \ + bin\crc_multibinary.obj \ + bin\huff_codes.obj \ + bin\hufftables_c.obj \ + bin\igzip.obj \ + bin\igzip_base.obj \ + bin\igzip_body_01.obj \ + bin\igzip_body_04.obj \ + bin\igzip_finish.obj \ + bin\igzip_multibinary.obj \ + bin\igzip_stateless_01.obj \ + bin\igzip_stateless_04.obj \ + bin\igzip_stateless_base.obj \ + bin\crc_utils_01.obj \ + bin\crc_utils_04.obj \ + bin\detect_repeated_char.obj -objs = bin\ec_base.obj bin\ec_highlevel_func.obj bin\ec_multibinary.obj bin\gf_2vect_dot_prod_avx.obj bin\gf_2vect_dot_prod_avx2.obj bin\gf_2vect_dot_prod_avx512.obj bin\gf_2vect_dot_prod_sse.obj bin\gf_2vect_mad_avx.obj bin\gf_2vect_mad_avx2.obj bin\gf_2vect_mad_avx512.obj bin\gf_2vect_mad_sse.obj bin\gf_3vect_dot_prod_avx.obj bin\gf_3vect_dot_prod_avx2.obj bin\gf_3vect_dot_prod_avx512.obj bin\gf_3vect_dot_prod_sse.obj bin\gf_3vect_mad_avx.obj bin\gf_3vect_mad_avx2.obj bin\gf_3vect_mad_avx512.obj bin\gf_3vect_mad_sse.obj bin\gf_4vect_dot_prod_avx.obj bin\gf_4vect_dot_prod_avx2.obj bin\gf_4vect_dot_prod_avx512.obj bin\gf_4vect_dot_prod_sse.obj bin\gf_4vect_mad_avx.obj bin\gf_4vect_mad_avx2.obj bin\gf_4vect_mad_avx512.obj bin\gf_4vect_mad_sse.obj bin\gf_5vect_dot_prod_avx.obj bin\gf_5vect_dot_prod_avx2.obj bin\gf_5vect_dot_prod_sse.obj bin\gf_5vect_mad_avx.obj bin\gf_5vect_mad_avx2.obj bin\gf_5vect_mad_sse.obj bin\gf_6vect_dot_prod_avx.obj bin\gf_6vect_dot_prod_avx2.obj bin\gf_6vect_dot_prod_sse.obj bin\gf_6vect_mad_avx.obj bin\gf_6vect_mad_avx2.obj bin\gf_6vect_mad_sse.obj bin\gf_vect_dot_prod_avx.obj bin\gf_vect_dot_prod_avx2.obj bin\gf_vect_dot_prod_avx512.obj bin\gf_vect_dot_prod_sse.obj bin\gf_vect_mad_avx.obj bin\gf_vect_mad_avx2.obj bin\gf_vect_mad_avx512.obj bin\gf_vect_mad_sse.obj bin\gf_vect_mul_avx.obj bin\gf_vect_mul_sse.obj bin\pq_check_sse.obj bin\pq_gen_avx.obj bin\pq_gen_avx2.obj bin\pq_gen_sse.obj bin\raid_base.obj bin\raid_multibinary.obj bin\xor_check_sse.obj bin\xor_gen_avx.obj bin\xor_gen_sse.obj bin/crc16_t10dif_01.obj bin/crc16_t10dif_by4.obj bin/crc32_ieee_01.obj bin/crc32_ieee_by4.obj bin/crc32_iscsi_01.obj bin/crc32_iscsi_00.obj bin/crc_multibinary.obj bin/crc_base.obj - - -INCLUDES = -I./ -Ierasure_code/ -Iraid/ -Icrc/ -Iinclude/ +INCLUDES = -I./ -Ierasure_code/ -Iraid/ -Icrc/ -Iigzip/ -Iinclude/ LINKFLAGS = /nologo CFLAGS = -O2 -D NDEBUG /nologo -D_USE_MATH_DEFINES -Qstd=c99 $(INCLUDES) $(D) AFLAGS = -f win64 $(INCLUDES) $(D) @@ -65,9 +145,14 @@ isa-l.dll: $(objs) {crc}.asm.obj: $(AS) $(AFLAGS) -o $@ $? +{igzip}.c.obj: + $(CC) $(CFLAGS) /c -Fo$@ $? +{igzip}.asm.obj: + $(AS) $(AFLAGS) -o $@ $? + # Examples -ex = xor_example.exe crc_simple_test.exe +ex = xor_example.exe crc_simple_test.exe igzip_example.exe igzip_sync_flush_example.exe ex: lib $(ex) $(ex): $(@B).obj @@ -76,9 +161,19 @@ $(ex): $(@B).obj link /out:$@ $(LINKFLAGS) isa-l.lib $? # Check tests -checks = erasure_code_test.exe erasure_code_update_test.exe gf_inverse_test.exe gf_vect_mul_test.exe \ - pq_check_test.exe pq_gen_test.exe xor_check_test.exe xor_gen_test.exe \ - crc16_t10dif_test.exe crc32_ieee_test.exe crc32_iscsi_test.exe +checks = \ + gf_vect_mul_test.exe \ + erasure_code_test.exe \ + gf_inverse_test.exe \ + erasure_code_update_test.exe \ + xor_gen_test.exe \ + pq_gen_test.exe \ + xor_check_test.exe \ + pq_check_test.exe \ + crc16_t10dif_test.exe \ + crc32_ieee_test.exe \ + crc32_iscsi_test.exe \ + igzip_check.exe checks: lib $(checks) $(checks): $(@B).obj @@ -86,13 +181,53 @@ check: $(checks) !$? # Unit tests -tests = erasure_code_base_test.exe erasure_code_sse_test.exe gf_2vect_dot_prod_sse_test.exe gf_3vect_dot_prod_sse_test.exe gf_4vect_dot_prod_sse_test.exe gf_5vect_dot_prod_sse_test.exe gf_6vect_dot_prod_sse_test.exe gf_vect_dot_prod_avx_test.exe gf_vect_dot_prod_base_test.exe gf_vect_dot_prod_sse_test.exe gf_vect_dot_prod_test.exe gf_vect_mad_test.exe gf_vect_mul_avx_test.exe gf_vect_mul_base_test.exe gf_vect_mul_sse_test.exe +tests = \ + gf_vect_mul_sse_test.exe \ + gf_vect_mul_avx_test.exe \ + gf_vect_mul_base_test.exe \ + gf_vect_dot_prod_sse_test.exe \ + gf_vect_dot_prod_avx_test.exe \ + gf_2vect_dot_prod_sse_test.exe \ + gf_3vect_dot_prod_sse_test.exe \ + gf_4vect_dot_prod_sse_test.exe \ + gf_5vect_dot_prod_sse_test.exe \ + gf_6vect_dot_prod_sse_test.exe \ + gf_vect_dot_prod_base_test.exe \ + gf_vect_dot_prod_test.exe \ + gf_vect_mad_test.exe \ + erasure_code_base_test.exe \ + erasure_code_sse_test.exe \ + igzip_rand_test.exe tests: lib $(tests) $(tests): $(@B).obj # Performance tests -perfs = erasure_code_base_perf.exe erasure_code_perf.exe erasure_code_sse_perf.exe erasure_code_update_perf.exe gf_2vect_dot_prod_sse_perf.exe gf_3vect_dot_prod_sse_perf.exe gf_4vect_dot_prod_sse_perf.exe gf_5vect_dot_prod_sse_perf.exe gf_6vect_dot_prod_sse_perf.exe gf_vect_dot_prod_1tbl.exe gf_vect_dot_prod_avx_perf.exe gf_vect_dot_prod_perf.exe gf_vect_dot_prod_sse_perf.exe gf_vect_mad_perf.exe gf_vect_mul_avx_perf.exe gf_vect_mul_perf.exe gf_vect_mul_sse_perf.exe pq_gen_perf.exe xor_gen_perf.exe crc16_t10dif_perf.exe crc32_ieee_perf.exe crc32_iscsi_perf.exe +perfs = \ + gf_vect_mul_perf.exe \ + gf_vect_mul_sse_perf.exe \ + gf_vect_mul_avx_perf.exe \ + gf_vect_dot_prod_sse_perf.exe \ + gf_vect_dot_prod_avx_perf.exe \ + gf_2vect_dot_prod_sse_perf.exe \ + gf_3vect_dot_prod_sse_perf.exe \ + gf_4vect_dot_prod_sse_perf.exe \ + gf_5vect_dot_prod_sse_perf.exe \ + gf_6vect_dot_prod_sse_perf.exe \ + gf_vect_dot_prod_perf.exe \ + gf_vect_dot_prod_1tbl.exe \ + gf_vect_mad_perf.exe \ + erasure_code_perf.exe \ + erasure_code_base_perf.exe \ + erasure_code_sse_perf.exe \ + erasure_code_update_perf.exe \ + xor_gen_perf.exe \ + pq_gen_perf.exe \ + crc16_t10dif_perf.exe \ + crc32_ieee_perf.exe \ + crc32_iscsi_perf.exe \ + igzip_perf.exe \ + igzip_sync_flush_perf.exe perfs: lib $(perfs) $(perfs): $(@B).obj @@ -105,3 +240,10 @@ clean: -if exist isa-l.lib del isa-l.lib -if exist isa-l.dll del isa-l.dll +zlib.lib: +igzip_rand_test.exe: igzip_inflate_ref.obj +igzip_inflate_perf.exe: igzip_inflate_ref.obj +igzip_inflate_perf.exe: zlib.lib +igzip_inflate_test.exe: igzip_inflate_ref.obj +igzip_inflate_test.exe: zlib.lib +igzip_check.exe: igzip_inflate_ref.obj diff --git a/Makefile.unx b/Makefile.unx index 0fe1825..6c21094 100644 --- a/Makefile.unx +++ b/Makefile.unx @@ -27,7 +27,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ######################################################################## -units = erasure_code raid crc +units = erasure_code raid crc igzip default: lib diff --git a/igzip/Makefile.am b/igzip/Makefile.am new file mode 100644 index 0000000..5a94a94 --- /dev/null +++ b/igzip/Makefile.am @@ -0,0 +1,95 @@ +######################################################################## +# Copyright(c) 2011-2016 Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +lsrc += igzip/igzip.c igzip/hufftables_c.c \ + igzip/crc_utils_01.asm \ + igzip/crc_utils_04.asm \ + igzip/igzip_body_01.asm igzip/igzip_body_04.asm igzip/igzip_finish.asm \ + igzip/igzip_stateless_01.asm igzip/igzip_stateless_04.asm \ + igzip/crc_data.asm \ + igzip/crc32_gzip.asm igzip/detect_repeated_char.asm \ + igzip/igzip_multibinary.asm \ + igzip/igzip_stateless_base.c \ + igzip/igzip_base.c + +extern_hdrs += include/igzip_lib.h + +pkginclude_HEADERS += include/types.h + +unit_tests += igzip/igzip_rand_test + +check_tests += igzip/igzip_check + +perf_tests += igzip/igzip_perf igzip/igzip_sync_flush_perf + +other_tests += igzip/igzip_file_perf igzip/igzip_sync_flush_file_perf igzip/igzip_stateless_file_perf + +other_src += igzip/bitbuf2.asm igzip/data_struct2.asm \ + igzip/igzip_buffer_utils_01.asm \ + igzip/igzip_buffer_utils_04.asm \ + igzip/igzip_body.asm igzip/igzip_finish.asm \ + igzip/lz0a_const.asm igzip/options.asm igzip/stdmac.asm igzip/igzip_compare_types.asm \ + igzip/bitbuf2.h igzip/repeated_char_result.h \ + igzip/igzip_body.asm \ + igzip/igzip_stateless.asm \ + igzip/huffman.asm \ + include/reg_sizes.asm \ + include/multibinary.asm \ + include/test.h \ + igzip/huffman.h + + +examples += igzip/igzip_example igzip/igzip_sync_flush_example + +igzip_rand_test: igzip_inflate_ref.o +igzip_igzip_rand_test_LDADD = igzip/igzip_inflate_ref.lo libisal.la + +# Include tools to make custom Huffman tables based on sample data +other_tests += igzip/generate_custom_hufftables +other_tests += igzip/generate_constant_block_header +other_src += igzip/huff_codes.h +lsrc += igzip/huff_codes.c + +# Include tools and tests using the reference inflate +other_tests += igzip/igzip_inflate_perf +other_tests += igzip/igzip_inflate_test +other_src += igzip/igzip_inflate_ref.h +other_src += igzip/igzip_inflate_ref.c +other_src += igzip/crc_inflate.h + +igzip_inflate_perf: igzip_inflate_ref.o +igzip_inflate_perf: LDLIBS += -lz +igzip_igzip_inflate_perf_LDADD = igzip/igzip_inflate_ref.lo libisal.la +igzip_igzip_inflate_perf_LDFLAGS = -lz +igzip_inflate_test: igzip_inflate_ref.o +igzip_inflate_test: LDLIBS += -lz +igzip_igzip_inflate_test_LDADD = igzip/igzip_inflate_ref.lo libisal.la +igzip_igzip_inflate_test_LDFLAGS = -lz +igzip_check: igzip_inflate_ref.o +igzip_igzip_check_LDADD = igzip/igzip_inflate_ref.lo libisal.la diff --git a/igzip/bitbuf2.asm b/igzip/bitbuf2.asm new file mode 100644 index 0000000..42821c3 --- /dev/null +++ b/igzip/bitbuf2.asm @@ -0,0 +1,205 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "options.asm" + +; Assumes m_out_buf is a register +; Clobbers RCX +; code is clobbered +; write_bits_always m_bits, m_bit_count, code, count, m_out_buf, tmp1 +%macro write_bits_always 6 +%define %%m_bits %1 +%define %%m_bit_count %2 +%define %%code %3 +%define %%count %4 +%define %%m_out_buf %5 +%define %%tmp1 %6 + +%ifdef USE_HSWNI + shlx %%code, %%code, %%m_bit_count +%else + mov rcx, %%m_bit_count + shl %%code, cl +%endif + or %%m_bits, %%code + add %%m_bit_count, %%count + + movnti [%%m_out_buf], %%m_bits + mov rcx, %%m_bit_count + shr rcx, 3 ; rcx = bytes + add %%m_out_buf, rcx + shl rcx, 3 ; rcx = bits + sub %%m_bit_count, rcx +%ifdef USE_HSWNI + shrx %%m_bits, %%m_bits, rcx +%else + shr %%m_bits, cl +%endif +%endm + +; Assumes m_out_buf is a register +; Clobbers RCX +; code is clobbered +; write_bits_safe m_bits, m_bit_count, code, count, m_out_buf, tmp1 +%macro write_bits_safe 6 +%define %%m_bits %1 +%define %%m_bit_count %2 +%define %%code %3 +%define %%count %4 +%define %%m_out_buf %5 +%define %%tmp1 %6 + + mov %%tmp1, %%code +%ifdef USE_HSWNI + shlx %%tmp1, %%tmp1, %%m_bit_count +%else + mov rcx, %%m_bit_count + shl %%tmp1, cl +%endif + or %%m_bits, %%tmp1 + add %%m_bit_count, %%count + cmp %%m_bit_count, 64 + jb %%not_full + sub %%m_bit_count, 64 + movnti [%%m_out_buf], %%m_bits + add %%m_out_buf, 8 + mov rcx, %%count + sub rcx, %%m_bit_count + mov %%m_bits, %%code +%ifdef USE_HSWNI + shrx %%m_bits, %%m_bits, rcx +%else + shr %%m_bits, cl +%endif +%%not_full: +%endm + +; Assumes m_out_buf is a register +; Clobbers RCX +;; check_space num_bits, m_bits, m_bit_count, m_out_buf, tmp1 +%macro check_space 5 +%define %%num_bits %1 +%define %%m_bits %2 +%define %%m_bit_count %3 +%define %%m_out_buf %4 +%define %%tmp1 %5 + + mov %%tmp1, 63 + sub %%tmp1, %%m_bit_count + cmp %%tmp1, %%num_bits + jae %%space_ok + + ; if (63 - m_bit_count < num_bits) + movnti [%%m_out_buf], %%m_bits + mov rcx, %%m_bit_count + shr rcx, 3 ; rcx = bytes + add %%m_out_buf, rcx + shl rcx, 3 ; rcx = bits + sub %%m_bit_count, rcx +%ifdef USE_HSWNI + shrx %%m_bits, %%m_bits, rcx +%else + shr %%m_bits, cl +%endif +%%space_ok: +%endm + +; rcx is clobbered +; code is clobbered +; write_bits_unsafe m_bits, m_bit_count, code, count +%macro write_bits_unsafe 4 +%define %%m_bits %1 +%define %%m_bit_count %2 +%define %%code %3 +%define %%count %4 +%ifdef USE_HSWNI + shlx %%code, %%code, %%m_bit_count +%else + mov rcx, %%m_bit_count + shl %%code, cl +%endif + or %%m_bits, %%code + add %%m_bit_count, %%count +%endm + +; pad_to_byte m_bit_count, extra_bits +%macro pad_to_byte 2 +%define %%m_bit_count %1 +%define %%extra_bits %2 + + mov %%extra_bits, %%m_bit_count + neg %%extra_bits + and %%extra_bits, 7 + add %%m_bit_count, %%extra_bits +%endm + +; Assumes m_out_buf is a memory reference +; flush m_bits, m_bit_count, m_out_buf, tmp1 +%macro flush 4 +%define %%m_bits %1 +%define %%m_bit_count %2 +%define %%m_out_buf %3 +%define %%tmp1 %4 + + test %%m_bit_count, %%m_bit_count + jz %%bit_count_is_zero + + mov %%tmp1, %%m_out_buf + movnti [%%tmp1], %%m_bits + + add %%m_bit_count, 7 + shr %%m_bit_count, 3 ; bytes + add %%tmp1, %%m_bit_count + mov %%m_out_buf, %%tmp1 + +%%bit_count_is_zero: + xor %%m_bits, %%m_bits + xor %%m_bit_count, %%m_bit_count +%endm + +%macro write_bits 6 +%define %%m_bits %1 +%define %%m_bit_count %2 +%define %%code %3 +%define %%count %4 +%define %%m_out_buf %5 +%define %%tmp1 %6 + +%ifdef USE_BITBUF8 + write_bits_safe %%m_bits, %%m_bit_count, %%code, %%count, %%m_out_buf, %%tmp1 +%elifdef USE_BITBUFB + write_bits_always %%m_bits, %%m_bit_count, %%code, %%count, %%m_out_buf, %%tmp1 +%else + ; state->bitbuf.check_space(code_len2); + check_space %%count, %%m_bits, %%m_bit_count, %%m_out_buf, %%tmp1 + ; state->bitbuf.write_bits(code2, code_len2); + write_bits_unsafe %%m_bits, %%m_bit_count, %%code, %%count + ; code2 is clobbered, rcx is clobbered +%endif +%endm diff --git a/igzip/bitbuf2.h b/igzip/bitbuf2.h new file mode 100644 index 0000000..84d521d --- /dev/null +++ b/igzip/bitbuf2.h @@ -0,0 +1,161 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#ifndef BITBUF2_H +#define BITBUF2_H + +#include "igzip_lib.h" + +#if defined (__unix__) || (__APPLE__) +#define _mm_stream_si64x(dst, src) *((uint64_t*)dst) = src +#else +#include +#endif + +#ifdef _WIN64 +#pragma warning(disable: 4996) +#endif + +#ifdef _MSC_VER +#define inline __inline +#endif + + +/* MAX_BITBUF_BIT WRITE is the maximum number of bits than can be safely written + * by consecutive calls of write_bits. Note this assumes the bitbuf is in a + * state that is possible at the exit of write_bits */ +#ifdef USE_BITBUF8 /*Write bits safe */ +# define MAX_BITBUF_BIT_WRITE 63 +#elif defined(USE_BITBUFB) /* Write bits always */ +# define MAX_BITBUF_BIT_WRITE 56 +#else /* USE_BITBUF_ELSE */ +# define MAX_BITBUF_BIT_WRITE 56 +#endif + + +static + inline void construct(struct BitBuf2 *me) +{ + me->m_bits = 0; + me->m_bit_count = 0; + me->m_out_buf = me->m_out_start = me->m_out_end = NULL; +} + +static inline void init(struct BitBuf2 *me) +{ + me->m_bits = 0; + me->m_bit_count = 0; +} + +static inline void set_buf(struct BitBuf2 *me, unsigned char *buf, unsigned int len) +{ + unsigned int slop = 8; + me->m_out_buf = me->m_out_start = buf; + me->m_out_end = buf + len - slop; +} + +static inline int is_full(struct BitBuf2 *me) +{ + return (me->m_out_buf > me->m_out_end); +} + +static inline uint8_t * buffer_ptr(struct BitBuf2 *me) +{ + return me->m_out_buf; +} + +static inline uint32_t buffer_used(struct BitBuf2 *me) +{ + return (uint32_t)(me->m_out_buf - me->m_out_start); +} + +static inline void check_space(struct BitBuf2 *me, uint32_t num_bits) +{ + /* Checks if bitbuf has num_bits extra space and flushes the bytes in + * the bitbuf if it doesn't. */ + uint32_t bytes; + if (63 - me->m_bit_count < num_bits) { + _mm_stream_si64x((int64_t *) me->m_out_buf, me->m_bits); + bytes = me->m_bit_count / 8; + me->m_out_buf += bytes; + bytes *= 8; + me->m_bit_count -= bytes; + me->m_bits >>= bytes; + } +} + +static inline void write_bits_unsafe(struct BitBuf2 *me, uint64_t code, uint32_t count) +{ + me->m_bits |= code << me->m_bit_count; + me->m_bit_count += count; +} + +static inline void write_bits(struct BitBuf2 *me, uint64_t code, uint32_t count) +{ +#ifdef USE_BITBUF8 /*Write bits safe */ + me->m_bits |= code << me->m_bit_count; + me->m_bit_count += count; + if (me->m_bit_count >= 64) { + _mm_stream_si64x((int64_t *) me->m_out_buf, me->m_bits); + me->m_out_buf += 8; + me->m_bit_count -= 64; + me->m_bits = code >> (count - me->m_bit_count); + } +#elif defined(USE_BITBUFB) /* Write bits always */ + /* Assumes there is space to fit code into m_bits. */ + uint32_t bits; + me->m_bits |= code << me->m_bit_count; + me->m_bit_count += count; + if (me->m_bit_count >= 8) { + _mm_stream_si64x((int64_t *) me->m_out_buf, me->m_bits); + bits = me->m_bit_count & ~7; + me->m_bit_count -= bits; + me->m_out_buf += bits/8; + me->m_bits >>= bits; + } +#else /* USE_BITBUF_ELSE */ + check_space(me, count); + write_bits_unsafe(me, code, count); +#endif + +} + +/* Can write up to 8 bytes to output buffer */ +static inline void flush(struct BitBuf2 *me) +{ + uint32_t bytes; + if (me->m_bit_count) { + _mm_stream_si64x((int64_t *) me->m_out_buf, me->m_bits); + bytes = (me->m_bit_count + 7) / 8; + me->m_out_buf += bytes; + } + me->m_bits = 0; + me->m_bit_count = 0; +} + +#endif //BITBUF2_H diff --git a/igzip/crc32_gzip.asm b/igzip/crc32_gzip.asm new file mode 100644 index 0000000..e0b0ba4 --- /dev/null +++ b/igzip/crc32_gzip.asm @@ -0,0 +1,617 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Function API: +; UINT32 crc32_gzip( +; UINT32 init_crc, //initial CRC value, 32 bits +; const unsigned char *buf, //buffer pointer to calculate CRC on +; UINT64 len //buffer length in bytes (64-bit data) +; ); +; +; Authors: +; Erdinc Ozturk +; Vinodh Gopal +; James Guilford +; +; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction" +; URL: http://download.intel.com/design/intarch/papers/323102.pdf +; +; +; sample yasm command line: +; yasm -f x64 -f elf64 -X gnu -g dwarf2 crc32_gzip +; +; As explained here: +; http://docs.oracle.com/javase/7/docs/api/java/util/zip/package-summary.html +; CRC-32 checksum is described in RFC 1952 +; Implementing RFC 1952 CRC: +; http://www.ietf.org/rfc/rfc1952.txt + +%include "reg_sizes.asm" + +[bits 64] +default rel + +section .text + + +%ifidn __OUTPUT_FORMAT__, win64 + %xdefine arg1 rcx + %xdefine arg2 rdx + %xdefine arg3 r8 + + %xdefine arg1_low32 ecx +%else + %xdefine arg1 rdi + %xdefine arg2 rsi + %xdefine arg3 rdx + + %xdefine arg1_low32 edi +%endif + +%define TMP 16*0 +%ifidn __OUTPUT_FORMAT__, win64 + %define XMM_SAVE 16*2 + %define VARIABLE_OFFSET 16*10+8 +%else + %define VARIABLE_OFFSET 16*2+8 +%endif + +align 16 +global crc32_gzip +crc32_gzip: + + ; unsigned long c = crc ^ 0xffffffffL; + not arg1_low32 ; + + + sub rsp, VARIABLE_OFFSET +%ifidn __OUTPUT_FORMAT__, win64 + ; push the xmm registers into the stack to maintain + movdqa [rsp + XMM_SAVE + 16*0], xmm6 + movdqa [rsp + XMM_SAVE + 16*1], xmm7 + movdqa [rsp + XMM_SAVE + 16*2], xmm8 + movdqa [rsp + XMM_SAVE + 16*3], xmm9 + movdqa [rsp + XMM_SAVE + 16*4], xmm10 + movdqa [rsp + XMM_SAVE + 16*5], xmm11 + movdqa [rsp + XMM_SAVE + 16*6], xmm12 + movdqa [rsp + XMM_SAVE + 16*7], xmm13 +%endif + + ; check if smaller than 256B + cmp arg3, 256 + + ; for sizes less than 256, we can't fold 128B at a time... + jl _less_than_256 + + + ; load the initial crc value + movd xmm10, arg1_low32 ; initial crc + + ; receive the initial 64B data, xor the initial crc value + movdqu xmm0, [arg2+16*0] + movdqu xmm1, [arg2+16*1] + movdqu xmm2, [arg2+16*2] + movdqu xmm3, [arg2+16*3] + movdqu xmm4, [arg2+16*4] + movdqu xmm5, [arg2+16*5] + movdqu xmm6, [arg2+16*6] + movdqu xmm7, [arg2+16*7] + + ; XOR the initial_crc value + pxor xmm0, xmm10 + movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4 + ;imm value of pclmulqdq instruction will determine which constant to use + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; we subtract 256 instead of 128 to save one instruction from the loop + sub arg3, 256 + + ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop + ; loop will fold 128B at a time until we have 128+y Bytes of buffer + + + ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel +_fold_128_B_loop: + + ; update the buffer pointer + add arg2, 128 + + movdqu xmm9, [arg2+16*0] + movdqu xmm12, [arg2+16*1] + movdqa xmm8, xmm0 + movdqa xmm13, xmm1 + pclmulqdq xmm0, xmm10, 0x10 + pclmulqdq xmm8, xmm10 , 0x1 + pclmulqdq xmm1, xmm10, 0x10 + pclmulqdq xmm13, xmm10 , 0x1 + pxor xmm0, xmm9 + xorps xmm0, xmm8 + pxor xmm1, xmm12 + xorps xmm1, xmm13 + + movdqu xmm9, [arg2+16*2] + movdqu xmm12, [arg2+16*3] + movdqa xmm8, xmm2 + movdqa xmm13, xmm3 + pclmulqdq xmm2, xmm10, 0x10 + pclmulqdq xmm8, xmm10 , 0x1 + pclmulqdq xmm3, xmm10, 0x10 + pclmulqdq xmm13, xmm10 , 0x1 + pxor xmm2, xmm9 + xorps xmm2, xmm8 + pxor xmm3, xmm12 + xorps xmm3, xmm13 + + movdqu xmm9, [arg2+16*4] + movdqu xmm12, [arg2+16*5] + movdqa xmm8, xmm4 + movdqa xmm13, xmm5 + pclmulqdq xmm4, xmm10, 0x10 + pclmulqdq xmm8, xmm10 , 0x1 + pclmulqdq xmm5, xmm10, 0x10 + pclmulqdq xmm13, xmm10 , 0x1 + pxor xmm4, xmm9 + xorps xmm4, xmm8 + pxor xmm5, xmm12 + xorps xmm5, xmm13 + + movdqu xmm9, [arg2+16*6] + movdqu xmm12, [arg2+16*7] + movdqa xmm8, xmm6 + movdqa xmm13, xmm7 + pclmulqdq xmm6, xmm10, 0x10 + pclmulqdq xmm8, xmm10 , 0x1 + pclmulqdq xmm7, xmm10, 0x10 + pclmulqdq xmm13, xmm10 , 0x1 + pxor xmm6, xmm9 + xorps xmm6, xmm8 + pxor xmm7, xmm12 + xorps xmm7, xmm13 + + sub arg3, 128 + + ; check if there is another 128B in the buffer to be able to fold + jge _fold_128_B_loop + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + + add arg2, 128 + ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128 + ; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 + + + ; fold the 8 xmm registers to 1 xmm register with different constants + + movdqa xmm10, [rk9] + movdqa xmm8, xmm0 + pclmulqdq xmm0, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + xorps xmm7, xmm0 + + movdqa xmm10, [rk11] + movdqa xmm8, xmm1 + pclmulqdq xmm1, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + xorps xmm7, xmm1 + + movdqa xmm10, [rk13] + movdqa xmm8, xmm2 + pclmulqdq xmm2, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + pxor xmm7, xmm2 + + movdqa xmm10, [rk15] + movdqa xmm8, xmm3 + pclmulqdq xmm3, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + xorps xmm7, xmm3 + + movdqa xmm10, [rk17] + movdqa xmm8, xmm4 + pclmulqdq xmm4, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + pxor xmm7, xmm4 + + movdqa xmm10, [rk19] + movdqa xmm8, xmm5 + pclmulqdq xmm5, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + xorps xmm7, xmm5 + + movdqa xmm10, [rk1] + movdqa xmm8, xmm6 + pclmulqdq xmm6, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + pxor xmm7, xmm6 + + + ; instead of 128, we add 128-16 to the loop counter to save 1 instruction from the loop + ; instead of a cmp instruction, we use the negative flag with the jl instruction + add arg3, 128-16 + jl _final_reduction_for_128 + + ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory + ; we can fold 16 bytes at a time if y>=16 + ; continue folding 16B at a time + +_16B_reduction_loop: + movdqa xmm8, xmm7 + pclmulqdq xmm7, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + movdqu xmm0, [arg2] + pxor xmm7, xmm0 + add arg2, 16 + sub arg3, 16 + ; instead of a cmp instruction, we utilize the flags with the jge instruction + ; equivalent of: cmp arg3, 16-16 + ; check if there is any more 16B in the buffer to be able to fold + jge _16B_reduction_loop + + ;now we have 16+z bytes left to reduce, where 0<= z < 16. + ;first, we reduce the data in the xmm7 register + + +_final_reduction_for_128: + add arg3, 16 + je _128_done + +; here we are getting data that is less than 16 bytes. + ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes. + ; after that the registers need to be adjusted. +_get_last_two_xmms: + + + movdqa xmm2, xmm7 + movdqu xmm1, [arg2 - 16 + arg3] + + ; get rid of the extra data that was loaded before + ; load the shift constant + lea rax, [pshufb_shf_table] + add rax, arg3 + movdqu xmm0, [rax] + + + pshufb xmm7, xmm0 + pxor xmm0, [mask3] + pshufb xmm2, xmm0 + + pblendvb xmm2, xmm1 ;xmm0 is implicit + ;;;;;;;;;; + movdqa xmm8, xmm7 + pclmulqdq xmm7, xmm10, 0x1 + + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + pxor xmm7, xmm2 + +_128_done: + ; compute crc of a 128-bit value + movdqa xmm10, [rk5] + movdqa xmm0, xmm7 + + ;64b fold + pclmulqdq xmm7, xmm10, 0 + psrldq xmm0, 8 + pxor xmm7, xmm0 + + ;32b fold + movdqa xmm0, xmm7 + pslldq xmm7, 4 + pclmulqdq xmm7, xmm10, 0x10 + + pxor xmm7, xmm0 + + + ;barrett reduction +_barrett: + pand xmm7, [mask2] + movdqa xmm1, xmm7 + movdqa xmm2, xmm7 + movdqa xmm10, [rk7] + + pclmulqdq xmm7, xmm10, 0 + pxor xmm7, xmm2 + pand xmm7, [mask] + movdqa xmm2, xmm7 + pclmulqdq xmm7, xmm10, 0x10 + pxor xmm7, xmm2 + pxor xmm7, xmm1 + pextrd eax, xmm7, 2 + +_cleanup: + ; return c ^ 0xffffffffL; + not eax + + +%ifidn __OUTPUT_FORMAT__, win64 + movdqa xmm6, [rsp + XMM_SAVE + 16*0] + movdqa xmm7, [rsp + XMM_SAVE + 16*1] + movdqa xmm8, [rsp + XMM_SAVE + 16*2] + movdqa xmm9, [rsp + XMM_SAVE + 16*3] + movdqa xmm10, [rsp + XMM_SAVE + 16*4] + movdqa xmm11, [rsp + XMM_SAVE + 16*5] + movdqa xmm12, [rsp + XMM_SAVE + 16*6] + movdqa xmm13, [rsp + XMM_SAVE + 16*7] +%endif + add rsp, VARIABLE_OFFSET + ret + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +align 16 +_less_than_256: + + ; check if there is enough buffer to be able to fold 16B at a time + cmp arg3, 32 + jl _less_than_32 + + ; if there is, load the constants + movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 + + movd xmm0, arg1_low32 ; get the initial crc value + movdqu xmm7, [arg2] ; load the plaintext + pxor xmm7, xmm0 + + ; update the buffer pointer + add arg2, 16 + + ; update the counter. subtract 32 instead of 16 to save one instruction from the loop + sub arg3, 32 + + jmp _16B_reduction_loop + + +align 16 +_less_than_32: + ; mov initial crc to the return value. this is necessary for zero-length buffers. + mov eax, arg1_low32 + test arg3, arg3 + je _cleanup + + movd xmm0, arg1_low32 ; get the initial crc value + + cmp arg3, 16 + je _exact_16_left + jl _less_than_16_left + + movdqu xmm7, [arg2] ; load the plaintext + pxor xmm7, xmm0 ; xor the initial crc value + add arg2, 16 + sub arg3, 16 + movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 + jmp _get_last_two_xmms + + +align 16 +_less_than_16_left: + ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first. + + pxor xmm1, xmm1 + mov r11, rsp + movdqa [r11], xmm1 + + cmp arg3, 4 + jl _only_less_than_4 + + ; backup the counter value + mov r9, arg3 + cmp arg3, 8 + jl _less_than_8_left + + ; load 8 Bytes + mov rax, [arg2] + mov [r11], rax + add r11, 8 + sub arg3, 8 + add arg2, 8 +_less_than_8_left: + + cmp arg3, 4 + jl _less_than_4_left + + ; load 4 Bytes + mov eax, [arg2] + mov [r11], eax + add r11, 4 + sub arg3, 4 + add arg2, 4 +_less_than_4_left: + + cmp arg3, 2 + jl _less_than_2_left + + ; load 2 Bytes + mov ax, [arg2] + mov [r11], ax + add r11, 2 + sub arg3, 2 + add arg2, 2 +_less_than_2_left: + cmp arg3, 1 + jl _zero_left + + ; load 1 Byte + mov al, [arg2] + mov [r11], al + +_zero_left: + movdqa xmm7, [rsp] + pxor xmm7, xmm0 ; xor the initial crc value + + lea rax,[pshufb_shf_table] + movdqu xmm0, [rax + r9] + pshufb xmm7,xmm0 + + + + jmp _128_done + +align 16 +_exact_16_left: + movdqu xmm7, [arg2] + pxor xmm7, xmm0 ; xor the initial crc value + + jmp _128_done + +_only_less_than_4: + cmp arg3, 3 + jl _only_less_than_3 + + ; load 3 Bytes + mov al, [arg2] + mov [r11], al + + mov al, [arg2+1] + mov [r11+1], al + + mov al, [arg2+2] + mov [r11+2], al + + movdqa xmm7, [rsp] + pxor xmm7, xmm0 ; xor the initial crc value + + pslldq xmm7, 5 + + jmp _barrett +_only_less_than_3: + cmp arg3, 2 + jl _only_less_than_2 + + ; load 2 Bytes + mov al, [arg2] + mov [r11], al + + mov al, [arg2+1] + mov [r11+1], al + + movdqa xmm7, [rsp] + pxor xmm7, xmm0 ; xor the initial crc value + + pslldq xmm7, 6 + + jmp _barrett +_only_less_than_2: + + ; load 1 Byte + mov al, [arg2] + mov [r11], al + + movdqa xmm7, [rsp] + pxor xmm7, xmm0 ; xor the initial crc value + + pslldq xmm7, 7 + + jmp _barrett + +section .data + +; precomputed constants +align 16 +rk1 : +DQ 0x00000000ccaa009e +rk2 : +DQ 0x00000001751997d0 +rk3 : +DQ 0x000000014a7fe880 +rk4 : +DQ 0x00000001e88ef372 +rk5 : +DQ 0x00000000ccaa009e +rk6 : +DQ 0x0000000163cd6124 +rk7 : +DQ 0x00000001f7011640 +rk8 : +DQ 0x00000001db710640 +rk9 : +DQ 0x00000001d7cfc6ac +rk10 : +DQ 0x00000001ea89367e +rk11 : +DQ 0x000000018cb44e58 +rk12 : +DQ 0x00000000df068dc2 +rk13 : +DQ 0x00000000ae0b5394 +rk14 : +DQ 0x00000001c7569e54 +rk15 : +DQ 0x00000001c6e41596 +rk16 : +DQ 0x0000000154442bd4 +rk17 : +DQ 0x0000000174359406 +rk18 : +DQ 0x000000003db1ecdc +rk19 : +DQ 0x000000015a546366 +rk20 : +DQ 0x00000000f1da05aa + + +pshufb_shf_table: +; use these values for shift constants for the pshufb instruction +; different alignments result in values as shown: +; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1 +; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2 +; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3 +; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4 +; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5 +; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6 +; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7 +; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8 +; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9 +; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10 +; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11 +; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12 +; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13 +; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14 +; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15 +dq 0x8786858483828100, 0x8f8e8d8c8b8a8988 +dq 0x0706050403020100, 0x000e0d0c0b0a0908 + + +mask: +dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000 +mask2: +dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF +mask3: +dq 0x8080808080808080, 0x8080808080808080 diff --git a/igzip/crc_data.asm b/igzip/crc_data.asm new file mode 100644 index 0000000..04779f8 --- /dev/null +++ b/igzip/crc_data.asm @@ -0,0 +1,120 @@ +%ifndef CRC_DATA + +%define CRC_DATA +; precomputed constants +section .data + +align 32 + +global pshufb_shf_table:data internal +pshufb_shf_table: +dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1 +dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2 +dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3 +dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4 +dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5 +dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6 +dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7 +dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8 +dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9 +dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10 +dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11 +dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12 +dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13 +dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14 +dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15 + +;; ; MAGIC value, which when folded 4 times gives FFFFFF00000...0000 +;; global crc_init_4 +;; crc_init_4: +;; dq 0x9db42487 +;; dq 0x0 +;; dq 0x0 +;; dq 0x0 + +; constant used to shift/fold one XMM reg down by 4 XMM widths +global fold_4:data internal +fold_4: +dq 0x00000001c6e41596 +dq 0x0000000154442bd4 + + +;value, which when xored with pshufb_shf_table entry gives shr value +global mask3:data internal +mask3: dq 0x8080808080808080, 0x8080808080808080 + +%ifndef CRC_TABLE +%define CRC_TABLE +; Place marker in library to avoid linker warning +align 4 +global CrcTable:data internal +CrcTable: + dd 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba + dd 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3 + dd 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988 + dd 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91 + dd 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de + dd 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7 + dd 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec + dd 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5 + dd 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172 + dd 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b + dd 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940 + dd 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59 + dd 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116 + dd 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f + dd 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924 + dd 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d + dd 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a + dd 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433 + dd 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818 + dd 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01 + dd 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e + dd 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457 + dd 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c + dd 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65 + dd 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2 + dd 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb + dd 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0 + dd 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9 + dd 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086 + dd 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f + dd 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4 + dd 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad + dd 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a + dd 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683 + dd 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8 + dd 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1 + dd 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe + dd 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7 + dd 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc + dd 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5 + dd 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252 + dd 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b + dd 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60 + dd 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79 + dd 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236 + dd 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f + dd 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04 + dd 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d + dd 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a + dd 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713 + dd 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38 + dd 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21 + dd 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e + dd 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777 + dd 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c + dd 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45 + dd 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2 + dd 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db + dd 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0 + dd 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9 + dd 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6 + dd 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf + dd 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94 + dd 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d +End_CrcTable: + +%endif ;; CRC_TABLE + +%endif ;; CRC_DATA diff --git a/igzip/crc_inflate.h b/igzip/crc_inflate.h new file mode 100644 index 0000000..3683114 --- /dev/null +++ b/igzip/crc_inflate.h @@ -0,0 +1,81 @@ +#ifndef INFLATE_CRC_TABLE +#define INFLATE_CRC_TABLE + +uint32_t inflate_crc_table[256] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, + 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, + 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, + 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, + 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, + 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, + 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, + 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, + 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, + 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, + 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, + 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, + 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, + 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, + 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, + 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, + 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, + 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d}; + + +uint32_t find_crc(uint8_t * start, uint32_t length) +{ + uint32_t crc = ~0; + uint8_t *end = start + length; + + while (start < end) + crc = (crc >> 8) ^ inflate_crc_table[(crc & 0x000000FF) ^ *start++]; + return ~crc; +} + +#endif diff --git a/igzip/crc_utils_01.asm b/igzip/crc_utils_01.asm new file mode 100644 index 0000000..ba81114 --- /dev/null +++ b/igzip/crc_utils_01.asm @@ -0,0 +1,195 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "options.asm" +%include "reg_sizes.asm" + +; Functional versions of CRC macros + +%include "igzip_buffer_utils_01.asm" + +extern fold_4 + +%define crc_0 xmm0 ; in/out: crc state +%define crc_1 xmm1 ; in/out: crc state +%define crc_2 xmm2 ; in/out: crc state +%define crc_3 xmm3 ; in/out: crc state +%define crc_fold xmm4 ; in: (loaded from fold_4) +%define crc_tmp0 xmm5 ; tmp +%define crc_tmp1 xmm6 ; tmp +%define crc_tmp2 xmm7 ; tmp +%define crc_tmp3 xmm8 ; tmp +%define crc_tmp4 xmm9 ; tmp +%define tmp4 rax + +; copy x bytes (rounded up to 16 bytes) from src to dst with crc +; src & dst are unaligned +; void copy_in_crc(uint8_t *dst, uint8_t *src, uint32_t size, uint32_t *crc) +; arg 1: rcx: pointer to dst +; arg 2: rdx: pointer to src +; arg 3: r8: size (in bytes) +; arg 4: r9: pointer to CRC +;; %if 0 +global copy_in_crc_01 +copy_in_crc_01: +%ifidn __OUTPUT_FORMAT__, elf64 + mov r9, rcx + mov r8, rdx + mov rdx, rsi + mov rcx, rdi +%endif + + ; Save xmm registers that need to be preserved. + sub rsp, 8 + 4*16 + movdqa [rsp+0*16], xmm6 + movdqa [rsp+1*16], xmm7 + movdqa [rsp+2*16], xmm8 + movdqa [rsp+3*16], xmm9 + + movdqa crc_0, [r9 + 0*16] + movdqa crc_1, [r9 + 1*16] + movdqa crc_2, [r9 + 2*16] + movdqa crc_3, [r9 + 3*16] + + movdqa crc_fold, [fold_4 WRT_OPT] + COPY_IN_CRC rcx, rdx, r8, tmp4, crc_0, crc_1, crc_2, crc_3, \ + crc_fold, \ + crc_tmp0, crc_tmp1, crc_tmp2, crc_tmp3, crc_tmp4 + + movdqa [r9 + 0*16], crc_0 + movdqa [r9 + 1*16], crc_1 + movdqa [r9 + 2*16], crc_2 + movdqa [r9 + 3*16], crc_3 + + movdqa xmm9, [rsp+3*16] + movdqa xmm8, [rsp+2*16] + movdqa xmm7, [rsp+1*16] + movdqa xmm6, [rsp+0*16] + add rsp, 8 + 4*16 +ret + +; Convert 512-bit CRC data to real 32-bit value +; uint32_t crc_512to32(uint32_t *crc) +; arg 1: rcx: pointer to CRC +; returns: eax: 32 bit crc +global crc_512to32_01 +crc_512to32_01: +%ifidn __OUTPUT_FORMAT__, elf64 + mov rcx, rdi +%endif + + movdqa crc_0, [rcx + 0*16] + movdqa crc_1, [rcx + 1*16] + movdqa crc_2, [rcx + 2*16] + movdqa crc_3, [rcx + 3*16] + + movdqa crc_fold, [rk1 WRT_OPT] ;k1 + + ; fold the 4 xmm registers to 1 xmm register with different constants + movdqa crc_tmp0, crc_0 + pclmulqdq crc_0, crc_fold, 0x1 + pclmulqdq crc_tmp0, crc_fold, 0x10 + pxor crc_1, crc_tmp0 + pxor crc_1, crc_0 + + movdqa crc_tmp0, crc_1 + pclmulqdq crc_1, crc_fold, 0x1 + pclmulqdq crc_tmp0, crc_fold, 0x10 + pxor crc_2, crc_tmp0 + pxor crc_2, crc_1 + + movdqa crc_tmp0, crc_2 + pclmulqdq crc_2, crc_fold, 0x1 + pclmulqdq crc_tmp0, crc_fold, 0x10 + pxor crc_3, crc_tmp0 + pxor crc_3, crc_2 + + + movdqa crc_fold, [rk5 WRT_OPT] + movdqa crc_0, crc_3 + + pclmulqdq crc_3, crc_fold, 0 + + psrldq crc_0, 8 + + pxor crc_3, crc_0 + + movdqa crc_0, crc_3 + + + pslldq crc_3, 4 + + pclmulqdq crc_3, crc_fold, 0x10 + + + pxor crc_3, crc_0 + + pand crc_3, [mask2 WRT_OPT] + + movdqa crc_1, crc_3 + + movdqa crc_2, crc_3 + + movdqa crc_fold, [rk7 WRT_OPT] + + + pclmulqdq crc_3, crc_fold, 0 + pxor crc_3, crc_2 + + pand crc_3, [mask WRT_OPT] + + movdqa crc_2, crc_3 + + pclmulqdq crc_3, crc_fold, 0x10 + + pxor crc_3, crc_2 + + pxor crc_3, crc_1 + + pextrd eax, crc_3, 2 + + not eax + + ret +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +section .data + +align 16 + +rk1: dq 0x00000000ccaa009e +rk2: dq 0x00000001751997d0 +rk5: dq 0x00000000ccaa009e +rk6: dq 0x0000000163cd6124 +rk7: dq 0x00000001f7011640 +rk8: dq 0x00000001db710640 + +mask: dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000 +mask2: dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF diff --git a/igzip/crc_utils_04.asm b/igzip/crc_utils_04.asm new file mode 100644 index 0000000..8cb8c3b --- /dev/null +++ b/igzip/crc_utils_04.asm @@ -0,0 +1,194 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "options.asm" +%include "reg_sizes.asm" + +; Functional versions of CRC macros + +%include "igzip_buffer_utils_04.asm" + +extern fold_4 + +%define crc_0 xmm0 ; in/out: crc state +%define crc_1 xmm1 ; in/out: crc state +%define crc_2 xmm2 ; in/out: crc state +%define crc_3 xmm3 ; in/out: crc state +%define crc_fold xmm4 ; in: (loaded from fold_4) +%define crc_tmp0 xmm5 ; tmp +%define crc_tmp1 xmm6 ; tmp +%define crc_tmp2 xmm7 ; tmp +%define crc_tmp3 xmm8 ; tmp +%define crc_tmp4 xmm9 ; tmp +%define tmp4 rax + +; copy x bytes (rounded up to 16 bytes) from src to dst with crc +; src & dst are unaligned +; void copy_in_crc(uint8_t *dst, uint8_t *src, uint32_t size, uint32_t *crc) +; arg 1: rcx: pointer to dst +; arg 2: rdx: pointer to src +; arg 3: r8: size (in bytes) +; arg 4: r9: pointer to CRC +;; %if 0 +global copy_in_crc_04 +copy_in_crc_04: +%ifidn __OUTPUT_FORMAT__, elf64 + mov r9, rcx + mov r8, rdx + mov rdx, rsi + mov rcx, rdi +%endif + + ; Save xmm registers that need to be preserved. + sub rsp, 8 + 4*16 + vmovdqa [rsp+0*16], xmm6 + vmovdqa [rsp+1*16], xmm7 + vmovdqa [rsp+2*16], xmm8 + vmovdqa [rsp+3*16], xmm9 + + vmovdqa crc_0, [r9 + 0*16] + vmovdqa crc_1, [r9 + 1*16] + vmovdqa crc_2, [r9 + 2*16] + vmovdqa crc_3, [r9 + 3*16] + + vmovdqa crc_fold, [fold_4 WRT_OPT] + COPY_IN_CRC rcx, rdx, r8, tmp4, crc_0, crc_1, crc_2, crc_3, \ + crc_fold, \ + crc_tmp0, crc_tmp1, crc_tmp2, crc_tmp3, crc_tmp4 + + vmovdqa [r9 + 0*16], crc_0 + vmovdqa [r9 + 1*16], crc_1 + vmovdqa [r9 + 2*16], crc_2 + vmovdqa [r9 + 3*16], crc_3 + + vmovdqa xmm9, [rsp+3*16] + vmovdqa xmm8, [rsp+2*16] + vmovdqa xmm7, [rsp+1*16] + vmovdqa xmm6, [rsp+0*16] + add rsp, 8 + 4*16 +ret + +; Convert 512-bit CRC data to real 32-bit value +; uint32_t crc_512to32(uint32_t *crc) +; arg 1: rcx: pointer to CRC +; returns: eax: 32 bit crc +global crc_512to32_04 +crc_512to32_04: +%ifidn __OUTPUT_FORMAT__, elf64 + mov rcx, rdi +%endif + + vmovdqa crc_0, [rcx + 0*16] + vmovdqa crc_1, [rcx + 1*16] + vmovdqa crc_2, [rcx + 2*16] + vmovdqa crc_3, [rcx + 3*16] + + vmovdqa crc_fold, [rk1 WRT_OPT] ;k1 + + ; fold the 4 xmm registers to 1 xmm register with different constants + vmovdqa crc_tmp0, crc_0 + vpclmulqdq crc_0, crc_fold, 0x1 + vpclmulqdq crc_tmp0, crc_fold, 0x10 + vpxor crc_1, crc_tmp0 + vpxor crc_1, crc_0 + + vmovdqa crc_tmp0, crc_1 + vpclmulqdq crc_1, crc_fold, 0x1 + vpclmulqdq crc_tmp0, crc_fold, 0x10 + vpxor crc_2, crc_tmp0 + vpxor crc_2, crc_1 + + vmovdqa crc_tmp0, crc_2 + vpclmulqdq crc_2, crc_fold, 0x1 + vpclmulqdq crc_tmp0, crc_fold, 0x10 + vpxor crc_3, crc_tmp0 + vpxor crc_3, crc_2 + + + vmovdqa crc_fold, [rk5 WRT_OPT] + vmovdqa crc_0, crc_3 + + vpclmulqdq crc_3, crc_fold, 0 + + vpsrldq crc_0, 8 + + vpxor crc_3, crc_0 + + vmovdqa crc_0, crc_3 + + + vpslldq crc_3, 4 + + vpclmulqdq crc_3, crc_fold, 0x10 + + + vpxor crc_3, crc_0 + + vpand crc_3, [mask2 WRT_OPT] + + vmovdqa crc_1, crc_3 + + vmovdqa crc_2, crc_3 + + vmovdqa crc_fold, [rk7 WRT_OPT] + + vpclmulqdq crc_3, crc_fold, 0 + vpxor crc_3, crc_2 + + vpand crc_3, [mask WRT_OPT] + + vmovdqa crc_2, crc_3 + + vpclmulqdq crc_3, crc_fold, 0x10 + + vpxor crc_3, crc_2 + + vpxor crc_3, crc_1 + + vpextrd eax, crc_3, 2 + + not eax + + ret +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +section .data + +align 16 + +rk1: dq 0x00000000ccaa009e +rk2: dq 0x00000001751997d0 +rk5: dq 0x00000000ccaa009e +rk6: dq 0x0000000163cd6124 +rk7: dq 0x00000001f7011640 +rk8: dq 0x00000001db710640 + +mask: dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000 +mask2: dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF diff --git a/igzip/data_struct2.asm b/igzip/data_struct2.asm new file mode 100644 index 0000000..e099071 --- /dev/null +++ b/igzip/data_struct2.asm @@ -0,0 +1,165 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; START_FIELDS +%macro START_FIELDS 0 +%assign _FIELD_OFFSET 0 +%assign _STRUCT_ALIGN 0 +%endm + +;; FIELD name size align +%macro FIELD 3 +%define %%name %1 +%define %%size %2 +%define %%align %3 + +%assign _FIELD_OFFSET (_FIELD_OFFSET + (%%align) - 1) & (~ ((%%align)-1)) +%%name equ _FIELD_OFFSET +%assign _FIELD_OFFSET _FIELD_OFFSET + (%%size) +%if (%%align > _STRUCT_ALIGN) +%assign _STRUCT_ALIGN %%align +%endif +%endm + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +START_FIELDS ;; BitBuf2 + +;; name size align +FIELD _m_bits, 8, 8 +FIELD _m_bit_count, 4, 4 +FIELD _m_out_buf, 8, 8 +FIELD _m_out_end, 8, 8 +FIELD _m_out_start, 8, 8 + +%assign _BitBuf2_size _FIELD_OFFSET +%assign _BitBuf2_align _STRUCT_ALIGN + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +START_FIELDS ;; isal_zstate + +;; name size align +FIELD _b_bytes_valid, 4, 4 +FIELD _b_bytes_processed, 4, 4 +FIELD _file_start, 8, 8 +FIELD _crc, 64, 16 +FIELD _bitbuf, _BitBuf2_size, _BitBuf2_align +FIELD _state, 4, 4 +FIELD _count, 4, 4 +FIELD _tmp_out_buff, 16, 1 +FIELD _tmp_out_start, 4, 4 +FIELD _tmp_out_end, 4, 4 +FIELD _last_flush, 4, 4 +FIELD _has_gzip_hdr, 4, 4 +FIELD _has_eob, 4, 4 +FIELD _has_eob_hdr, 4, 4 +FIELD _left_over, 4, 4 +FIELD _buffer, BSIZE+16, 32 +FIELD _head, HASH_SIZE*2, 16 + +%assign _isal_zstate_size _FIELD_OFFSET +%assign _isal_zstate_align _STRUCT_ALIGN + +_bitbuf_m_bits equ _bitbuf+_m_bits +_bitbuf_m_bit_count equ _bitbuf+_m_bit_count +_bitbuf_m_out_buf equ _bitbuf+_m_out_buf +_bitbuf_m_out_end equ _bitbuf+_m_out_end +_bitbuf_m_out_start equ _bitbuf+_m_out_start + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +START_FIELDS ;; isal_zstream + +;; name size align +FIELD _next_in, 8, 8 +FIELD _avail_in, 4, 4 +FIELD _total_in, 4, 4 +FIELD _next_out, 8, 8 +FIELD _avail_out, 4, 4 +FIELD _total_out, 4, 4 +FIELD _hufftables, 8, 8 +FIELD _end_of_stream, 4, 4 +FIELD _flush, 4, 4 +FIELD _internal_state, _isal_zstate_size, _isal_zstate_align + +%assign _isal_zstream_size _FIELD_OFFSET +%assign _isal_zstream_align _STRUCT_ALIGN + +_internal_state_b_bytes_valid equ _internal_state+_b_bytes_valid +_internal_state_b_bytes_processed equ _internal_state+_b_bytes_processed +_internal_state_file_start equ _internal_state+_file_start +_internal_state_crc equ _internal_state+_crc +_internal_state_bitbuf equ _internal_state+_bitbuf +_internal_state_state equ _internal_state+_state +_internal_state_count equ _internal_state+_count +_internal_state_tmp_out_buff equ _internal_state+_tmp_out_buff +_internal_state_tmp_out_start equ _internal_state+_tmp_out_start +_internal_state_tmp_out_end equ _internal_state+_tmp_out_end +_internal_state_last_flush equ _internal_state+_last_flush +_internal_state_has_gzip_hdr equ _internal_state+_has_gzip_hdr +_internal_state_has_eob equ _internal_state+_has_eob +_internal_state_has_eob_hdr equ _internal_state+_has_eob_hdr +_internal_state_left_over equ _internal_state+_left_over +_internal_state_buffer equ _internal_state+_buffer +_internal_state_head equ _internal_state+_head +_internal_state_bitbuf_m_bits equ _internal_state+_bitbuf_m_bits +_internal_state_bitbuf_m_bit_count equ _internal_state+_bitbuf_m_bit_count +_internal_state_bitbuf_m_out_buf equ _internal_state+_bitbuf_m_out_buf +_internal_state_bitbuf_m_out_end equ _internal_state+_bitbuf_m_out_end +_internal_state_bitbuf_m_out_start equ _internal_state+_bitbuf_m_out_start + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +ZSTATE_HDR equ 1 +ZSTATE_BODY equ 2 +ZSTATE_FLUSH_READ_BUFFER equ 3 +ZSTATE_SYNC_FLUSH equ 4 +ZSTATE_TRL equ 6 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +_NO_FLUSH equ 0 +_SYNC_FLUSH equ 1 +_FULL_FLUSH equ 2 +_STORED_BLK equ 0 +%assign _STORED_BLK_END 65535 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + diff --git a/igzip/detect_repeated_char.asm b/igzip/detect_repeated_char.asm new file mode 100644 index 0000000..2f6c07a --- /dev/null +++ b/igzip/detect_repeated_char.asm @@ -0,0 +1,81 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "reg_sizes.asm" + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; detect_repeated_char buf, size +%ifidn __OUTPUT_FORMAT__, elf64 + %define buf rdi + %define size rsi +%elifidn __OUTPUT_FORMAT__, win64 + %define buf rcx + %define size rdx +%endif ; output formats + +%define tmp r10 + +global detect_repeated_char +detect_repeated_char: + +;; replicate the 1st byte to 8 bytes + xor tmp, tmp + xor rax, rax + + mov al, [buf] + mov ah, al + mov tmp %+ w, ax + shl tmp, 16 + or eax, tmp %+ d + mov tmp %+ d, eax + shl tmp, 32 + or rax, tmp + +;; detect the 8K input + lea tmp, [buf + size] +_loop: + cmp rax, [buf] + jne _fail + add buf, 8 + cmp buf, tmp + jb _loop + shr rax, 56 + jmp _end + +_fail: + mov rax, -1 + +_end: + ret + +%undef buf +%undef size +%undef tmp diff --git a/igzip/generate_constant_block_header.c b/igzip/generate_constant_block_header.c new file mode 100644 index 0000000..99bcf48 --- /dev/null +++ b/igzip/generate_constant_block_header.c @@ -0,0 +1,118 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include "huff_codes.h" +#include "bitbuf2.h" + +#define MAX_HEADER_SIZE 350 +#define BLOCK_SIZE 16*1024 + +void fprint_header(FILE * outfile, uint8_t * header, uint64_t bit_count) +{ + int i; + fprintf(outfile, "unsigned char data[] = {"); + for (i = 0; i < bit_count / 8; i++) { + if ((i & 7) == 0) + fprintf(outfile, "\n\t"); + else + fprintf(outfile, " "); + fprintf(outfile, "0x%02x,", header[i]); + } + + if ((i & 7) == 0) + fprintf(outfile, "\n\t"); + else + fprintf(outfile, " "); + fprintf(outfile, "0x%02x", header[i]); + fprintf(outfile, "\t};\n\n"); + +} + +int main(int argc, char **argv) +{ + /* Generates a header for a constant block, along with some manual + * twiddling to create a header with the desired properties*/ + uint8_t stream[BLOCK_SIZE]; + struct isal_huff_histogram histogram; + uint64_t *lit_histogram = histogram.lit_len_histogram; + uint64_t *dist_histogram = histogram.dist_histogram; + uint8_t header[MAX_HEADER_SIZE]; + struct huff_tree lit_tree, dist_tree; + struct huff_tree lit_tree_array[2 * LIT_LEN - 1], dist_tree_array[2 * DIST_LEN - 1]; + struct huff_code lit_huff_table[LIT_LEN], dist_huff_table[DIST_LEN]; + uint64_t bit_count; + + uint8_t repeated_char = 0x00; + + memset(header, 0, sizeof(header)); + memset(&histogram, 0, sizeof(histogram)); /* Initialize histograms. */ + memset(stream, repeated_char, sizeof(stream)); + memset(lit_tree_array, 0, sizeof(lit_tree_array)); + memset(dist_tree_array, 0, sizeof(dist_tree_array)); + memset(lit_huff_table, 0, sizeof(lit_huff_table)); + memset(dist_huff_table, 0, sizeof(dist_huff_table)); + + isal_update_histogram(stream, sizeof(stream), &histogram); + + /* These are set to manually change the histogram to create a header with the + * desired properties. In this case, the header is modified so that it is byte + * unaligned by 6 bits, so that 0 is a 2 bit code, so that the header plus the + * encoding of one 0 is byte aligned*/ + lit_histogram[repeated_char] = 20; + lit_histogram[280] = 2; + lit_histogram[264] = 5; + lit_histogram[282] = 0; + + lit_tree = create_symbol_subset_huff_tree(lit_tree_array, lit_histogram, LIT_LEN); + dist_tree = create_symbol_subset_huff_tree(dist_tree_array, dist_histogram, DIST_LEN); + if (create_huff_lookup(lit_huff_table, LIT_LEN, lit_tree, 15) > 0) { + printf("Error, code with invalid length for Deflate standard.\n"); + return 1; + } + + if (create_huff_lookup(dist_huff_table, DIST_LEN, dist_tree, 15) > 0) { + printf("Error, code with invalid length for Deflate standard.\n"); + return 1; + } + + /* Remove litral symbol corresponding to the unoptimal look back + * distance of 258 found by gen_histogram*/ + dist_huff_table[16].length = 0; + + bit_count = create_header(header, sizeof(header), lit_huff_table, dist_huff_table, 1); + printf("Header for %x\n", repeated_char); + fprintf(stdout, "Complete Bytes: %lu\n", bit_count / 8); + fprintf(stdout, "Byte Offset: %lu\n\n", (bit_count) & 7); + fprint_header(stdout, header, bit_count); + printf("\n"); + + return 0; +} diff --git a/igzip/generate_custom_hufftables.c b/igzip/generate_custom_hufftables.c new file mode 100644 index 0000000..9e7ee5f --- /dev/null +++ b/igzip/generate_custom_hufftables.c @@ -0,0 +1,425 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +/* This program can be used to generate custom a custom huffman encoding to get + * better data compression. This is most useful when the type of data being + * compressed is well known. + * + * To use generate_custom_hufftables, pass a sequence of files to the program + * that together form an accurate representation of the data that is being + * compressed. Generate_custom_hufftables will then produce the file + * hufftables_c.c, which should be moved to replace its counterpart in the igzip + * source folder. After recompiling the Isa-l library, the igzip compression + * functions will use the new hufftables. + * + * Generate_custom_hufftables should be compiled with the same compile time + * parameters as the igzip source code. Generating custom hufftables with + * different compile time parameters may cause igzip to produce invalid output + * for the reasons described below. The default parameters used by + * generate_custom_hufftables are the same as the default parameters used by + * igzip. + * + * *WARNING* generate custom hufftables must be compiled with a HIST_SIZE that + * is at least as large as the HIST_SIZE used by igzip. By default HIST_SIZE is + * 8, the maximum usable HIST_SIZE is 32. The reason for this is to generate + * better compression. Igzip cannot produce look back distances with sizes + * larger than the HIST_SIZE * 1024 igzip was compiled with, so look back + * distances with sizes larger than HIST_SIZE * 1024 are not assigned a huffman + * code. + * + * To improve compression ratio, the compile time option LIT_SUB is provided to + * allow generating custom hufftables which only use a subset of all possible + * literals. This can be useful for getting better compression when it is known + * that the data being compressed will never contain certain symbols, for + * example text files. If this option is used, it needs to be checked that every + * possible literal is in fact given a valid code in the output hufftable. This + * can be done by checking that every required literal has a positive value for + * the length of the code associated with that literal. Literals which have not + * been given codes will have a code length of zero. The compile time option + * PRINT_CODES (described below) can be used to help manually perform this + * check. + * + * The compile time parameter PRINT_CODES causes the literal/length huffman code + * and the distance huffman code created by generate_custom_hufftables to be + * printed out. This is printed out where each line corresponds to a different + * symbol. The first column is the symbol used to represent each literal (Lit), + * end of block symbol (EOB), length (Len) or distance (Dist), the second column + * is the associated code value, and the third column is the length in bits of + * that code. + */ + +#include +#include +#include +#include "huff_codes.h" +#include "bitbuf2.h" + +/*These max code lengths are limited by how the data is stored in + * hufftables.asm. The deflate standard max is 15.*/ + +#define LONG_DCODE_OFFSET 26 +#define SHORT_DCODE_OFFSET 20 + +#define MAX_HEADER_SIZE IGZIP_MAX_DEF_HDR_SIZE + +#define GZIP_HEADER_SIZE 10 +#define GZIP_TRAILER_SIZE 8 + +/** + * @brief Prints a table of uint8_t elements to a file. + * @param outfile: the file the table is printed to. + * @param table: the table to be printed. + * @param length: number of elements to be printed. + * @param header: header to append in front of the table. + * @param footer: footer to append at the end of the table. + * @param begin_line: string printed at beginning of new line + */ +void fprint_uint8_table(FILE * outfile, uint8_t * table, uint64_t length, char *header, + char *footer, char *begin_line) +{ + int i; + fprintf(outfile, "%s", header); + for (i = 0; i < length - 1; i++) { + if ((i & 7) == 0) + fprintf(outfile, "\n%s", begin_line); + else + fprintf(outfile, " "); + fprintf(outfile, "0x%02x,", table[i]); + } + + if ((i & 7) == 0) + fprintf(outfile, "\n%s", begin_line); + else + fprintf(outfile, " "); + fprintf(outfile, "0x%02x", table[i]); + fprintf(outfile, "%s", footer); + +} + +/** + * @brief Prints a table of uint16_t elements to a file. + * @param outfile: the file the table is printed to. + * @param table: the table to be printed. + * @param length: number of elements to be printed. + * @param header: header to append in front of the table. + * @param footer: footer to append at the end of the table. + * @param begin_line: string printed at beginning of new line + */ +void fprint_uint16_table(FILE * outfile, uint16_t * table, uint64_t length, char *header, + char *footer, char *begin_line) +{ + int i; + fprintf(outfile, "%s", header); + for (i = 0; i < length - 1; i++) { + if ((i & 7) == 0) + fprintf(outfile, "\n%s", begin_line); + else + fprintf(outfile, " "); + fprintf(outfile, "0x%04x,", table[i]); + } + + if ((i & 7) == 0) + fprintf(outfile, "\n%s", begin_line); + else + fprintf(outfile, " "); + fprintf(outfile, "0x%04x", table[i]); + fprintf(outfile, "%s", footer); + +} + +/** + * @brief Prints a table of uint32_t elements to a file. + * @param outfile: the file the table is printed to. + * @param table: the table to be printed. + * @param length: number of elements to be printed. + * @param header: header to append in front of the table. + * @param footer: footer to append at the end of the table. + * @param begin_line: string printed at beginning of new line + */ +void fprint_uint32_table(FILE * outfile, uint32_t * table, uint64_t length, char *header, + char *footer, char *begin_line) +{ + int i; + fprintf(outfile, "%s", header); + for (i = 0; i < length - 1; i++) { + if ((i & 3) == 0) + fprintf(outfile, "\n%s", begin_line); + else + fprintf(outfile, " "); + fprintf(outfile, "0x%08x,", table[i]); + } + + if ((i & 3) == 0) + fprintf(outfile, "%s", begin_line); + else + fprintf(outfile, " "); + fprintf(outfile, "0x%08x", table[i]); + fprintf(outfile, "%s", footer); + +} + +/** + * @brief Prints a table of uint64_t elements to a file. + * @param outfile: the file the table is printed to. + * @param table: the table to be printed. + * @param length: number of elements to be printed. + * @param header: header to append in front of the table. + * @param footer: footer to append at the end of the table. + */ +void fprint_uint64_table(FILE * outfile, uint64_t * table, uint64_t length, char *header, + char *footer) +{ + int i; + fprintf(outfile, "%s\n", header); + for (i = 0; i < length - 1; i++) + fprintf(outfile, "\t0x%016" PRIx64 ",\n", table[i]); + fprintf(outfile, "\t0x%016" PRIx64, table[i]); + fprintf(outfile, "%s", footer); + +} + +void fprint_hufftables(FILE * output_file, uint8_t * header, uint32_t bit_count, + uint16_t * lit_code_table, uint8_t * lit_code_size_table, + uint16_t * dcodes_code_table, uint8_t * dcodes_code_size_table, + uint32_t * packed_len_table, uint32_t * packed_dist_table) +{ + fprintf(output_file, "struct isal_hufftables hufftables_default = {\n\n"); + + fprint_uint8_table(output_file, header, (bit_count + 7) / 8, + "\t.deflate_hdr = {", "\t},\n\n", "\t\t"); + fprintf(output_file, "\t.deflate_hdr_count = %d,\n", bit_count / 8); + fprintf(output_file, "\t.deflate_hdr_extra_bits = %d,\n\n", bit_count & 7); + + fprint_uint32_table(output_file, packed_dist_table, SHORT_DIST_TABLE_SIZE, + "\t.dist_table = {", ",\n", "\t\t"); + fprint_uint32_table(output_file, &packed_dist_table[SHORT_DIST_TABLE_SIZE], + LONG_DIST_TABLE_SIZE - SHORT_DIST_TABLE_SIZE, + "#ifdef LONGER_HUFFTABLE", + "\n#endif /* LONGER_HUFFTABLE */\n\t},\n\n", "\t\t"); + + fprint_uint32_table(output_file, packed_len_table, LEN_TABLE_SIZE, "\t.len_table = {", + "\t},\n\n", "\t\t"); + fprint_uint16_table(output_file, lit_code_table, LIT_TABLE_SIZE, "\t.lit_table = {", + "\t},\n\n", "\t\t"); + fprint_uint8_table(output_file, lit_code_size_table, LIT_TABLE_SIZE, + "\t.lit_table_sizes = {", "\t},\n\n", "\t\t"); + + fprintf(output_file, "#ifndef LONGER_HUFFTABLE\n"); + fprint_uint16_table(output_file, dcodes_code_table + SHORT_DCODE_OFFSET, + DIST_LEN - SHORT_DCODE_OFFSET, "\t.dcodes = {", "\t},\n\n", + "\t\t"); + fprint_uint8_table(output_file, dcodes_code_size_table + SHORT_DCODE_OFFSET, + DIST_LEN - SHORT_DCODE_OFFSET, "\t.dcodes_sizes = {", "\t}\n", + "\t\t"); + fprintf(output_file, "#else\n"); + fprint_uint16_table(output_file, dcodes_code_table + LONG_DCODE_OFFSET, + DIST_LEN - LONG_DCODE_OFFSET, "\t.dcodes = {", "\t},\n\n", "\t\t"); + fprint_uint8_table(output_file, dcodes_code_size_table + LONG_DCODE_OFFSET, + DIST_LEN - LONG_DCODE_OFFSET, "\t.dcodes_sizes = {", "\t}\n", + "\t\t"); + fprintf(output_file, "#endif\n"); + fprintf(output_file, "};\n"); +} + +void fprint_header(FILE * output_file, uint8_t * header, uint32_t bit_count, + uint16_t * lit_code_table, uint8_t * lit_code_size_table, + uint16_t * dcodes_code_table, uint8_t * dcodes_code_size_table, + uint32_t * packed_len_table, uint32_t * packed_dist_table) +{ + fprintf(output_file, "#include \n"); + fprintf(output_file, "#include \n\n"); + + fprintf(output_file, "const uint8_t gzip_hdr[] = {\n" + "\t0x1f, 0x8b, 0x08, 0x00, 0x00,\n" "\t0x00, 0x00, 0x00, 0x00, 0xff\t};\n\n"); + + fprintf(output_file, "const uint32_t gzip_hdr_bytes = %d;\n", GZIP_HEADER_SIZE); + fprintf(output_file, "const uint32_t gzip_trl_bytes = %d;\n\n", GZIP_TRAILER_SIZE); + + fprint_hufftables(output_file, header, bit_count, lit_code_table, lit_code_size_table, + dcodes_code_table, dcodes_code_size_table, packed_len_table, + packed_dist_table); +} + +int main(int argc, char *argv[]) +{ + long int file_length; + uint8_t *stream = NULL; + struct isal_huff_histogram histogram; + uint64_t *lit_histogram = histogram.lit_len_histogram; + uint64_t *dist_histogram = histogram.dist_histogram; + uint8_t header[MAX_HEADER_SIZE]; + FILE *file; + struct huff_tree lit_tree, dist_tree; + struct huff_tree lit_tree_array[2 * LIT_LEN - 1], dist_tree_array[2 * DIST_LEN - 1]; + struct huff_code lit_huff_table[LIT_LEN], dist_huff_table[DIST_LEN]; + uint64_t bit_count; + uint32_t packed_len_table[LEN_TABLE_SIZE]; + uint32_t packed_dist_table[LONG_DIST_TABLE_SIZE]; + uint16_t lit_code_table[LIT_TABLE_SIZE]; + uint16_t dcodes_code_table[DIST_LEN]; + uint8_t lit_code_size_table[LIT_TABLE_SIZE]; + uint8_t dcodes_code_size_table[DIST_LEN]; + int max_dist = convert_dist_to_dist_sym(D); + + if (argc == 1) { + printf("Error, no input file.\n"); + return 1; + } + + memset(&histogram, 0, sizeof(histogram)); /* Initialize histograms. */ + memset(lit_tree_array, 0, sizeof(lit_tree_array)); + memset(dist_tree_array, 0, sizeof(dist_tree_array)); + memset(lit_huff_table, 0, sizeof(lit_huff_table)); + memset(dist_huff_table, 0, sizeof(dist_huff_table)); + + while (argc > 1) { + printf("Processing %s\n", argv[argc - 1]); + file = fopen(argv[argc - 1], "r"); + if (file == NULL) { + printf("Error opening file\n"); + return 1; + } + fseek(file, 0, SEEK_END); + file_length = ftell(file); + fseek(file, 0, SEEK_SET); + file_length -= ftell(file); + stream = malloc(file_length); + if (stream == NULL) { + printf("Failed to allocate memory to read in file\n"); + fclose(file); + return 1; + } + fread(stream, 1, file_length, file); + if (ferror(file)) { + printf("Error occurred when reading file"); + fclose(file); + free(stream); + return 1; + } + + /* Create a histogram of frequency of symbols found in stream to + * generate the huffman tree.*/ + isal_update_histogram(stream, file_length, &histogram); + + fclose(file); + free(stream); + argc--; + } + + /* Create a huffman tree corresponding to the histograms created in + * gen_histogram*/ +#ifdef LIT_SUB + int j; + /* Guarantee every possible repeat length is given a symbol. It is hard + * to guarantee data will never have a repeat of a given length */ + for (j = LIT_TABLE_SIZE; j < LIT_LEN; j++) + if (lit_histogram[j] == 0) + lit_histogram[j]++; + + lit_tree = create_symbol_subset_huff_tree(lit_tree_array, lit_histogram, LIT_LEN); +#else + lit_tree = create_huff_tree(lit_tree_array, lit_histogram, LIT_LEN); +#endif + dist_tree = create_huff_tree(dist_tree_array, dist_histogram, max_dist + 1); + + /* Create a look up table to represent huffman tree above in deflate + * standard form after it has been modified to satisfy max depth + * criteria.*/ + if (create_huff_lookup(lit_huff_table, LIT_LEN, lit_tree, MAX_DEFLATE_CODE_LEN) > 0) { + printf("Error, code with invalid length for Deflate standard.\n"); + return 1; + } + + if (create_huff_lookup(dist_huff_table, DIST_LEN, dist_tree, MAX_DEFLATE_CODE_LEN) > 0) { + printf("Error, code with invalid length for Deflate standard.\n"); + return 1; + } + + if (are_hufftables_useable(lit_huff_table, dist_huff_table)) { + if (create_huff_lookup + (lit_huff_table, LIT_LEN, lit_tree, MAX_SAFE_LIT_CODE_LEN) > 0) + printf("Error, code with invalid length for Deflate standard.\n"); + return 1; + + if (create_huff_lookup + (dist_huff_table, DIST_LEN, dist_tree, MAX_SAFE_DIST_CODE_LEN) > 0) + printf("Error, code with invalid length for Deflate standard.\n"); + return 1; + + if (are_hufftables_useable(lit_huff_table, dist_huff_table)) { + printf("Error, hufftable is not usable\n"); + return 1; + } + } +#ifdef PRINT_CODES + int i; + printf("Lit/Len codes\n"); + for (i = 0; i < LIT_TABLE_SIZE - 1; i++) + printf("Lit %3d: Code 0x%04x, Code_Len %d\n", i, lit_huff_table[i].code, + lit_huff_table[i].length); + + printf("EOB %3d: Code 0x%04x, Code_Len %d\n", 256, lit_huff_table[256].code, + lit_huff_table[256].length); + + for (i = LIT_TABLE_SIZE; i < LIT_LEN; i++) + printf("Len %d: Code 0x%04x, Code_Len %d\n", i, lit_huff_table[i].code, + lit_huff_table[i].length); + printf("\n"); + + printf("Dist codes \n"); + for (i = 0; i < DIST_LEN; i++) + printf("Dist %2d: Code 0x%04x, Code_Len %d\n", i, dist_huff_table[i].code, + dist_huff_table[i].length); + printf("\n"); +#endif + + create_code_tables(lit_code_table, lit_code_size_table, LIT_TABLE_SIZE, + lit_huff_table); + create_code_tables(dcodes_code_table, dcodes_code_size_table, DIST_LEN, + dist_huff_table); + create_packed_len_table(packed_len_table, lit_huff_table); + create_packed_dist_table(packed_dist_table, LONG_DIST_TABLE_SIZE, dist_huff_table); + + bit_count = + create_header(header, sizeof(header), lit_huff_table, dist_huff_table, LAST_BLOCK); + + file = fopen("hufftables_c.c", "w"); + if (file == NULL) { + printf("Error creating file hufftables_c.c\n"); + return 1; + } + + fprint_header(file, header, bit_count, lit_code_table, lit_code_size_table, + dcodes_code_table, dcodes_code_size_table, packed_len_table, + packed_dist_table); + + fclose(file); + + return 0; +} diff --git a/igzip/huff_codes.c b/igzip/huff_codes.c new file mode 100644 index 0000000..d69c99d --- /dev/null +++ b/igzip/huff_codes.c @@ -0,0 +1,964 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include +#include "igzip_lib.h" +#include "huff_codes.h" +#include "huffman.h" + +#define LENGTH_BITS 5 + +/* The order code length codes are written in the dynamic code header. This is + * defined in RFC 1951 page 13 */ +static const uint8_t code_length_code_order[] = + { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; + +int heap_push(struct huff_tree element, struct histheap *heap) +{ + uint16_t index; + uint16_t parent; + assert(heap->size < MAX_HISTHEAP_SIZE); + index = heap->size; + heap->size += 1; + parent = (index - 1) / 2; + while ((index != 0) && (heap->tree[parent].frequency > element.frequency)) { + heap->tree[index] = heap->tree[parent]; + index = parent; + parent = (index - 1) / 2; + + } + heap->tree[index] = element; + + return index; +} + +struct huff_tree heap_pop(struct histheap *heap) +{ + struct huff_tree root, temp; + uint16_t index = 0; + uint16_t child = 1; + assert(heap->size > 0); + root = heap->tree[index]; + heap->size--; + heap->tree[index] = heap->tree[heap->size]; + + while (child + 1 < heap->size) { + if (heap->tree[child].frequency < heap->tree[index].frequency + || heap->tree[child + 1].frequency < heap->tree[index].frequency) { + if (heap->tree[child].frequency > heap->tree[child + 1].frequency) + child += 1; + temp = heap->tree[index]; + heap->tree[index] = heap->tree[child]; + heap->tree[child] = temp; + index = child; + child = 2 * child + 1; + } else { + break; + } + } + + if (child < heap->size) { + if (heap->tree[child].frequency < heap->tree[index].frequency) { + temp = heap->tree[index]; + heap->tree[index] = heap->tree[child]; + heap->tree[child] = temp; + } + } + + return root; + +} + +struct linked_list_node *pop_from_front(struct linked_list *list) +{ + struct linked_list_node *temp; + + temp = list->start; + if (list->start != NULL) { + list->start = list->start->next; + if (list->start != NULL) + list->start->previous = NULL; + else + list->end = NULL; + list->length -= 1; + } + return temp; +} + +void append_to_front(struct linked_list *list, struct linked_list_node *new_element) +{ + new_element->next = list->start; + new_element->previous = NULL; + if (list->start != NULL) + list->start->previous = new_element; + else + list->end = new_element; + list->start = new_element; + list->length += 1; + + return; +} + +void append_to_back(struct linked_list *list, struct linked_list_node *new_element) +{ + new_element->previous = list->end; + new_element->next = NULL; + if (list->end != NULL) + list->end->next = new_element; + else + list->start = new_element; + list->end = new_element; + list->length += 1; + + return; +} + +void isal_update_histogram(uint8_t * start_stream, int length, + struct isal_huff_histogram *histogram) +{ + uint32_t literal = 0, hash; + uint8_t *last_seen[HASH_SIZE]; + uint8_t *current, *seen, *end_stream, *next_hash, *end; + uint32_t match_length; + uint32_t dist; + uint64_t *lit_len_histogram = histogram->lit_len_histogram; + uint64_t *dist_histogram = histogram->dist_histogram; + + if (length <= 0) + return; + + end_stream = start_stream + length; + memset(last_seen, 0, sizeof(last_seen)); /* Initialize last_seen to be 0. */ + for (current = start_stream; current < end_stream - 3; current++) { + literal = *(uint32_t *) current; + hash = compute_hash(literal) & HASH_MASK; + seen = last_seen[hash]; + last_seen[hash] = current; + dist = current - seen; + if (dist < D) { + match_length = compare258(seen, current, end_stream - current); + if (match_length >= SHORTEST_MATCH) { + next_hash = current; +#ifdef LIMIT_HASH_UPDATE + end = next_hash + 3; +#else + end = next_hash + match_length; +#endif + if (end > end_stream - 3) + end = end_stream - 3; + next_hash++; + for (; next_hash < end; next_hash++) { + literal = *(uint32_t *) next_hash; + hash = compute_hash(literal) & HASH_MASK; + last_seen[hash] = next_hash; + } + + dist_histogram[convert_dist_to_dist_sym(dist)] += 1; + lit_len_histogram[convert_length_to_len_sym(match_length)] += + 1; + current += match_length - 1; + continue; + } + } + lit_len_histogram[literal & 0xFF] += 1; + } + literal = literal >> 8; + hash = compute_hash(literal) & HASH_MASK; + seen = last_seen[hash]; + last_seen[hash] = current; + dist = current - seen; + if (dist < D) { + match_length = compare258(seen, current, end_stream - current); + if (match_length >= SHORTEST_MATCH) { + dist_histogram[convert_dist_to_dist_sym(dist)] += 1; + lit_len_histogram[convert_length_to_len_sym(match_length)] += 1; + lit_len_histogram[256] += 1; + return; + } + } else + lit_len_histogram[literal & 0xFF] += 1; + lit_len_histogram[(literal >> 8) & 0xFF] += 1; + lit_len_histogram[(literal >> 16) & 0xFF] += 1; + lit_len_histogram[256] += 1; + return; +} + +uint32_t convert_dist_to_dist_sym(uint32_t dist) +{ + assert(dist <= 32768 && dist > 0); + if (dist <= 2) + return dist - 1; + else if (dist <= 4) + return 0 + (dist - 1) / 1; + else if (dist <= 8) + return 2 + (dist - 1) / 2; + else if (dist <= 16) + return 4 + (dist - 1) / 4; + else if (dist <= 32) + return 6 + (dist - 1) / 8; + else if (dist <= 64) + return 8 + (dist - 1) / 16; + else if (dist <= 128) + return 10 + (dist - 1) / 32; + else if (dist <= 256) + return 12 + (dist - 1) / 64; + else if (dist <= 512) + return 14 + (dist - 1) / 128; + else if (dist <= 1024) + return 16 + (dist - 1) / 256; + else if (dist <= 2048) + return 18 + (dist - 1) / 512; + else if (dist <= 4096) + return 20 + (dist - 1) / 1024; + else if (dist <= 8192) + return 22 + (dist - 1) / 2048; + else if (dist <= 16384) + return 24 + (dist - 1) / 4096; + else if (dist <= 32768) + return 26 + (dist - 1) / 8192; + else + return ~0; /* ~0 is an invalid distance code */ + +} + +uint32_t convert_length_to_len_sym(uint32_t length) +{ + assert(length > 2 && length < 259); + + /* Based on tables on page 11 in RFC 1951 */ + if (length < 11) + return 257 + length - 3; + else if (length < 19) + return 261 + (length - 3) / 2; + else if (length < 35) + return 265 + (length - 3) / 4; + else if (length < 67) + return 269 + (length - 3) / 8; + else if (length < 131) + return 273 + (length - 3) / 16; + else if (length < 258) + return 277 + (length - 3) / 32; + else + return 285; +} + +struct huff_tree create_symbol_subset_huff_tree(struct huff_tree *tree_array, + uint64_t * histogram, uint32_t size) +{ + /* Assumes there are at least 2 symbols. */ + int i; + uint32_t node_index; + struct huff_tree tree; + struct histheap heap; + + heap.size = 0; + + tree.right = tree.left = NULL; + + /* Intitializes heap for construction of the huffman tree */ + for (i = 0; i < size; i++) { + tree.value = i; + tree.frequency = histogram[i]; + tree_array[i] = tree; + + /* If symbol does not appear (has frequency 0), ignore it. */ + if (tree_array[i].frequency != 0) + heap_push(tree, &heap); + } + + node_index = size; + + /* Construct the huffman tree */ + while (heap.size > 1) { + + tree = heap_pop(&heap); + tree_array[node_index].frequency = tree.frequency; + tree_array[node_index].left = &tree_array[tree.value]; + + tree = heap_pop(&heap); + tree_array[node_index].frequency += tree.frequency; + tree_array[node_index].right = &tree_array[tree.value]; + + tree_array[node_index].value = node_index; + heap_push(tree_array[node_index], &heap); + + node_index += 1; + } + + return heap_pop(&heap); +} + +struct huff_tree create_huff_tree(struct huff_tree *tree_array, uint64_t * histogram, + uint32_t size) +{ + int i; + uint32_t node_index; + struct huff_tree tree; + struct histheap heap; + + heap.size = 0; + + tree.right = tree.left = NULL; + + /* Intitializes heap for construction of the huffman tree */ + for (i = 0; i < size; i++) { + tree.value = i; + tree.frequency = histogram[i]; + tree_array[i] = tree; + heap_push(tree, &heap); + } + + node_index = size; + + /* Construct the huffman tree */ + while (heap.size > 1) { + + tree = heap_pop(&heap); + tree_array[node_index].frequency = tree.frequency; + tree_array[node_index].left = &tree_array[tree.value]; + + tree = heap_pop(&heap); + tree_array[node_index].frequency += tree.frequency; + tree_array[node_index].right = &tree_array[tree.value]; + + tree_array[node_index].value = node_index; + heap_push(tree_array[node_index], &heap); + + node_index += 1; + } + + return heap_pop(&heap); +} + +int create_huff_lookup(struct huff_code *huff_lookup_table, int table_length, + struct huff_tree root, uint8_t max_depth) +{ + /* Used to create a count of number of elements with a given code length */ + uint16_t count[MAX_HUFF_TREE_DEPTH + 1]; + + memset(count, 0, sizeof(count)); + + if (find_code_lengths(huff_lookup_table, count, root, max_depth) != 0) + return 1; + + set_huff_codes(huff_lookup_table, table_length, count); + + return 0; +} + +int find_code_lengths(struct huff_code *huff_lookup_table, uint16_t * count, + struct huff_tree root, uint8_t max_depth) +{ + struct linked_list depth_array[MAX_HUFF_TREE_DEPTH + 2]; + struct linked_list_node linked_lists[MAX_HISTHEAP_SIZE]; + struct linked_list_node *temp; + uint16_t extra_nodes = 0; + int i, j; + + memset(depth_array, 0, sizeof(depth_array)); + memset(linked_lists, 0, sizeof(linked_lists)); + for (i = 0; i < MAX_HISTHEAP_SIZE; i++) + linked_lists[i].value = i; + + huffman_tree_traversal(depth_array, linked_lists, &extra_nodes, max_depth, root, 0); + + /* This for loop fixes up the huffman tree to have a maximum depth not exceeding + * max_depth. This algorithm works by removing all elements below max_depth, + * filling up the empty leafs which are created with elements form the huffman + * tree and then iteratively pushing down the least frequent leaf that is above + * max_depth to a depth 1 lower, and moving up a leaf below max_depth to that + * same depth.*/ + for (i = MAX_HUFF_TREE_DEPTH + 1; i > max_depth; i--) { + + /* find element to push up the tree */ + while (depth_array[i].start != NULL) { + if (extra_nodes > 0) { + temp = pop_from_front(&depth_array[i]); + append_to_back(&depth_array[max_depth], temp); + extra_nodes -= 1; + + } else { + assert(depth_array[max_depth].length % 2 == 0); + assert(extra_nodes == 0); + + /* find element to push down in the tree */ + for (j = max_depth - 1; j >= 0; j--) + if (depth_array[j].start != NULL) + break; + + /* No element available to push down further. */ + if (j < 0) + return 1; + + temp = pop_from_front(&depth_array[i]); + append_to_front(&depth_array[j + 1], temp); + + temp = pop_from_front(&depth_array[j]); + append_to_back(&depth_array[j + 1], temp); + } + } + } + + for (i = 0; i < MAX_HUFF_TREE_DEPTH + 2; i++) { + temp = depth_array[i].start; + + while (temp != NULL) { + huff_lookup_table[temp->value].length = i; + count[i] += 1; + temp = temp->next; + } + } + return 0; + +} + +void huffman_tree_traversal(struct linked_list *depth_array, + struct linked_list_node *linked_lists, uint16_t * extra_nodes, + uint8_t max_depth, struct huff_tree current_node, + uint16_t current_depth) +{ + /* This algorithm performs a traversal of the huffman tree. It is setup + * to visit the leaves in order of frequency and bin elements into a + * linked list by depth.*/ + if (current_node.left == NULL) { + if (current_depth < MAX_HUFF_TREE_DEPTH + 1) + append_to_front(&depth_array[current_depth], + &linked_lists[current_node.value]); + else + append_to_front(&depth_array[MAX_HUFF_TREE_DEPTH + 1], + &linked_lists[current_node.value]); + return; + + } else if (current_depth == max_depth) + *extra_nodes += 1; + + if (current_node.left->frequency < current_node.right->frequency) { + huffman_tree_traversal(depth_array, linked_lists, extra_nodes, max_depth, + *current_node.right, current_depth + 1); + huffman_tree_traversal(depth_array, linked_lists, extra_nodes, max_depth, + *current_node.left, current_depth + 1); + + } else { + huffman_tree_traversal(depth_array, linked_lists, extra_nodes, max_depth, + *current_node.left, current_depth + 1); + huffman_tree_traversal(depth_array, linked_lists, extra_nodes, max_depth, + *current_node.right, current_depth + 1); + } + +} + +/* + * Returns integer with first length bits reversed and all higher bits zeroed + */ +uint16_t bit_reverse(uint16_t bits, uint8_t length) +{ + bits = ((bits >> 1) & 0x55555555) | ((bits & 0x55555555) << 1); // swap bits + bits = ((bits >> 2) & 0x33333333) | ((bits & 0x33333333) << 2); // swap pairs + bits = ((bits >> 4) & 0x0F0F0F0F) | ((bits & 0x0F0F0F0F) << 4); // swap nibbles + bits = ((bits >> 8) & 0x00FF00FF) | ((bits & 0x00FF00FF) << 8); // swap bytes + return bits >> (16 - length); +} + +void set_huff_codes(struct huff_code *huff_code_table, int table_length, uint16_t * count) +{ + /* Uses the algorithm mentioned in the deflate standard, Rfc 1951. */ + int i; + uint16_t code = 0; + uint16_t next_code[MAX_HUFF_TREE_DEPTH + 1]; + + next_code[0] = code; + + for (i = 1; i < MAX_HUFF_TREE_DEPTH + 1; i++) + next_code[i] = (next_code[i - 1] + count[i - 1]) << 1; + + for (i = 0; i < table_length; i++) { + if (huff_code_table[i].length != 0) { + huff_code_table[i].code = + bit_reverse(next_code[huff_code_table[i].length], + huff_code_table[i].length); + next_code[huff_code_table[i].length] += 1; + } + } + + return; +} + +int create_header(uint8_t * header, uint32_t header_length, struct huff_code *lit_huff_table, + struct huff_code *dist_huff_table, uint32_t end_of_block) +{ + int i; + uint64_t histogram[HUFF_LEN]; + uint16_t huffman_rep[LIT_LEN + DIST_LEN]; + uint16_t extra_bits[LIT_LEN + DIST_LEN]; + uint16_t length; + struct huff_tree root; + struct huff_tree tree_array[2 * HUFF_LEN - 1]; + struct huff_code lookup_table[HUFF_LEN]; + struct huff_code combined_table[LIT_LEN + DIST_LEN]; + + /* hlit, hdist, and hclen are defined in RFC 1951 page 13 */ + uint32_t hlit, hdist, hclen; + uint64_t bit_count; + + memset(lookup_table, 0, sizeof(lookup_table)); + + /* Calculate hlit */ + for (i = LIT_LEN - 1; i > 256; i--) + if (lit_huff_table[i].length != 0) + break; + + hlit = i - 256; + + /* Calculate hdist */ + for (i = DIST_LEN - 1; i > 0; i--) + if (dist_huff_table[i].length != 0) + break; + + hdist = i; + + /* Combine huffman tables for run length encoding */ + for (i = 0; i < 257 + hlit; i++) + combined_table[i] = lit_huff_table[i]; + for (i = 0; i < 1 + hdist; i++) + combined_table[i + hlit + 257] = dist_huff_table[i]; + + memset(extra_bits, 0, LIT_LEN + DIST_LEN); + memset(histogram, 0, sizeof(histogram)); + + /* Create a run length encoded representation of the literal/lenght and + * distance huffman trees. */ + length = create_huffman_rep(huffman_rep, histogram, extra_bits, + combined_table, hlit + 257 + hdist + 1); + + /* Create a huffman tree to encode run length encoded representation. */ + root = create_symbol_subset_huff_tree(tree_array, histogram, HUFF_LEN); + create_huff_lookup(lookup_table, HUFF_LEN, root, 7); + + /* Calculate hclen */ + for (i = CODE_LEN_CODES - 1; i > 3; i--) /* i must be at least 4 */ + if (lookup_table[code_length_code_order[i]].length != 0) + break; + + hclen = i - 3; + + /* Generate actual header. */ + bit_count = create_huffman_header(header, header_length, lookup_table, huffman_rep, + extra_bits, length, end_of_block, hclen, hlit, + hdist); + + return bit_count; +} + +uint16_t create_huffman_rep(uint16_t * huffman_rep, uint64_t * histogram, + uint16_t * extra_bits, struct huff_code * huff_table, uint16_t len) +{ + uint16_t current_in_index = 0, current_out_index = 0, run_length, last_code; + + while (current_in_index < len) { + last_code = huff_table[current_in_index].length; + run_length = 0; + + while (current_in_index < len + && last_code == huff_table[current_in_index].length) { + run_length += 1; + current_in_index += 1; + } + + current_out_index = flush_repeats(huffman_rep, histogram, extra_bits, + last_code, run_length, current_out_index); + } + return current_out_index; +} + +uint16_t flush_repeats(uint16_t * huffman_rep, uint64_t * histogram, uint16_t * extra_bits, + uint16_t last_code, uint16_t run_length, uint16_t current_index) +{ + int j; + + if (last_code != 0 && last_code < HUFF_LEN && run_length > 0) { + huffman_rep[current_index++] = last_code; + histogram[last_code] += 1; + run_length -= 1; + + } + + if (run_length < SHORTEST_MATCH) { + for (j = 0; j < run_length; j++) { + huffman_rep[current_index++] = last_code; + histogram[last_code] += 1; + } + } else { + if (last_code == 0) { + /* The values 138 is the maximum repeat length + * represented with code 18. The value 10 is the maximum + * repeate length represented with 17. */ + for (; run_length > 138; run_length -= 138) { + huffman_rep[current_index] = 0x12; + extra_bits[current_index++] = 0x7F7; + histogram[18]++; + } + + if (run_length > 10) { + huffman_rep[current_index] = 18; + extra_bits[current_index++] = ((run_length - 11) << 4) | 7; + histogram[18] += 1; + + } else if (run_length >= SHORTEST_MATCH) { + huffman_rep[current_index] = 17; + extra_bits[current_index++] = ((run_length - 3) << 4) | 3; + histogram[17] += 1; + + } else { + for (j = 0; j < run_length; j++) { + huffman_rep[current_index++] = last_code; + histogram[last_code] += 1; + } + } + + } else { + for (; run_length > 6; run_length -= 6) { + huffman_rep[current_index] = 0x10; + extra_bits[current_index++] = 0x32; + histogram[16]++; + } + + if (run_length >= SHORTEST_MATCH) { + huffman_rep[current_index] = 16; + extra_bits[current_index++] = ((run_length - 3) << 4) | 2; + histogram[16] += 1; + + } else { + for (j = 0; j < run_length; j++) { + huffman_rep[current_index++] = last_code; + histogram[last_code] += 1; + } + } + } + + } + + return current_index; +} + +int create_huffman_header(uint8_t * header, uint32_t header_length, + struct huff_code *lookup_table, uint16_t * huffman_rep, + uint16_t * extra_bits, uint16_t huffman_rep_length, + uint32_t end_of_block, uint32_t hclen, uint32_t hlit, uint32_t hdist) +{ + /* hlit, hdist, hclen are as defined in the deflate standard, head is the + * first three deflate header bits.*/ + int i; + uint32_t head; + uint64_t bit_count; + struct huff_code huffman_value; + struct BitBuf2 header_bitbuf; + + if (end_of_block) + head = 0x05; + else + head = 0x04; + + set_buf(&header_bitbuf, header, header_length); + init(&header_bitbuf); + + write_bits(&header_bitbuf, (head | (hlit << 3) | (hdist << 8) | (hclen << 13)), + DYN_HDR_START_LEN); + + uint64_t tmp = 0; + for (i = hclen + 3; i >= 0; i--) { + tmp = (tmp << 3) | lookup_table[code_length_code_order[i]].length; + } + + write_bits(&header_bitbuf, tmp, (hclen + 4) * 3); + + for (i = 0; i < huffman_rep_length; i++) { + huffman_value = lookup_table[huffman_rep[i]]; + + write_bits(&header_bitbuf, (uint64_t) huffman_value.code, + (uint32_t) huffman_value.length); + + if (huffman_rep[i] > 15) { + write_bits(&header_bitbuf, (uint64_t) extra_bits[i] >> 4, + (uint32_t) extra_bits[i] & 0xF); + } + } + bit_count = 8 * buffer_used(&header_bitbuf) + header_bitbuf.m_bit_count; + flush(&header_bitbuf); + + return bit_count; +} + +void create_code_tables(uint16_t * code_table, uint8_t * code_length_table, uint32_t length, + struct huff_code *hufftable) +{ + int i; + for (i = 0; i < length; i++) { + code_table[i] = hufftable[i].code; + code_length_table[i] = hufftable[i].length; + } +} + +void create_packed_len_table(uint32_t * packed_table, struct huff_code *lit_len_hufftable) +{ + int i, count = 0; + uint16_t extra_bits; + uint16_t extra_bits_count = 0; + + /* Gain extra bits is the next place where the number of extra bits in + * lenght codes increases. */ + uint16_t gain_extra_bits = LEN_EXTRA_BITS_START; + + for (i = 257; i < LIT_LEN - 1; i++) { + for (extra_bits = 0; extra_bits < (1 << extra_bits_count); extra_bits++) { + if (count > 254) + break; + packed_table[count++] = + (extra_bits << (lit_len_hufftable[i].length + LENGTH_BITS)) | + (lit_len_hufftable[i].code << LENGTH_BITS) | + (lit_len_hufftable[i].length + extra_bits_count); + } + + if (i == gain_extra_bits) { + gain_extra_bits += LEN_EXTRA_BITS_INTERVAL; + extra_bits_count += 1; + } + } + + packed_table[count] = (lit_len_hufftable[LIT_LEN - 1].code << LENGTH_BITS) | + (lit_len_hufftable[LIT_LEN - 1].length); +} + +void create_packed_dist_table(uint32_t * packed_table, uint32_t length, + struct huff_code *dist_hufftable) +{ + int i, count = 0; + uint16_t extra_bits; + uint16_t extra_bits_count = 0; + + /* Gain extra bits is the next place where the number of extra bits in + * distance codes increases. */ + uint16_t gain_extra_bits = DIST_EXTRA_BITS_START; + + for (i = 0; i < DIST_LEN; i++) { + for (extra_bits = 0; extra_bits < (1 << extra_bits_count); extra_bits++) { + if (count >= length) + return; + + packed_table[count++] = + (extra_bits << (dist_hufftable[i].length + LENGTH_BITS)) | + (dist_hufftable[i].code << LENGTH_BITS) | + (dist_hufftable[i].length + extra_bits_count); + + } + + if (i == gain_extra_bits) { + gain_extra_bits += DIST_EXTRA_BITS_INTERVAL; + extra_bits_count += 1; + } + } +} + +int are_hufftables_useable(struct huff_code *lit_len_hufftable, + struct huff_code *dist_hufftable) +{ + int max_lit_code_len = 0, max_len_code_len = 0, max_dist_code_len = 0; + int dist_extra_bits = 0, len_extra_bits = 0; + int gain_dist_extra_bits = DIST_EXTRA_BITS_START; + int gain_len_extra_bits = LEN_EXTRA_BITS_START; + int max_code_len; + int i; + + for (i = 0; i < LIT_LEN; i++) + if (lit_len_hufftable[i].length > max_lit_code_len) + max_lit_code_len = lit_len_hufftable[i].length; + + for (i = 257; i < LIT_LEN - 1; i++) { + if (lit_len_hufftable[i].length + len_extra_bits > max_len_code_len) + max_len_code_len = lit_len_hufftable[i].length + len_extra_bits; + + if (i == gain_len_extra_bits) { + gain_len_extra_bits += LEN_EXTRA_BITS_INTERVAL; + len_extra_bits += 1; + } + } + + for (i = 0; i < DIST_LEN; i++) { + if (dist_hufftable[i].length + dist_extra_bits > max_dist_code_len) + max_dist_code_len = dist_hufftable[i].length + dist_extra_bits; + + if (i == gain_dist_extra_bits) { + gain_dist_extra_bits += DIST_EXTRA_BITS_INTERVAL; + dist_extra_bits += 1; + } + } + + max_code_len = max_lit_code_len + max_len_code_len + max_dist_code_len; + + /* Some versions of igzip can write upto one literal, one length and one + * distance code at the same time. This checks to make sure that is + * always writeable in bitbuf*/ + return (max_code_len > MAX_BITBUF_BIT_WRITE); +} + +int isal_create_hufftables(struct isal_hufftables *hufftables, + struct isal_huff_histogram *histogram) +{ + struct huff_tree lit_tree, dist_tree; + struct huff_tree lit_tree_array[2 * LIT_LEN - 1], dist_tree_array[2 * DIST_LEN - 1]; + struct huff_code lit_huff_table[LIT_LEN], dist_huff_table[DIST_LEN]; + uint64_t bit_count; + int max_dist = convert_dist_to_dist_sym(IGZIP_D); + + uint32_t *dist_table = hufftables->dist_table; + uint32_t *len_table = hufftables->len_table; + uint16_t *lit_table = hufftables->lit_table; + uint16_t *dcodes = hufftables->dcodes; + uint8_t *lit_table_sizes = hufftables->lit_table_sizes; + uint8_t *dcodes_sizes = hufftables->dcodes_sizes; + uint8_t *deflate_hdr = hufftables->deflate_hdr; + uint64_t *lit_len_histogram = histogram->lit_len_histogram; + uint64_t *dist_histogram = histogram->dist_histogram; + + memset(hufftables, 0, sizeof(struct isal_hufftables)); + memset(lit_tree_array, 0, sizeof(lit_tree_array)); + memset(dist_tree_array, 0, sizeof(dist_tree_array)); + memset(lit_huff_table, 0, sizeof(lit_huff_table)); + memset(dist_huff_table, 0, sizeof(dist_huff_table)); + + lit_tree = create_huff_tree(lit_tree_array, lit_len_histogram, LIT_LEN); + dist_tree = create_huff_tree(dist_tree_array, dist_histogram, max_dist + 1); + + if (create_huff_lookup(lit_huff_table, LIT_LEN, lit_tree, MAX_DEFLATE_CODE_LEN) > 0) + return INVALID_LIT_LEN_HUFFCODE; + + if (create_huff_lookup(dist_huff_table, DIST_LEN, dist_tree, MAX_DEFLATE_CODE_LEN) > 0) + return INVALID_DIST_HUFFCODE; + + if (are_hufftables_useable(lit_huff_table, dist_huff_table)) { + if (create_huff_lookup + (lit_huff_table, LIT_LEN, lit_tree, MAX_SAFE_LIT_CODE_LEN) > 0) + return INVALID_LIT_LEN_HUFFCODE; + + if (create_huff_lookup + (dist_huff_table, DIST_LEN, dist_tree, MAX_SAFE_DIST_CODE_LEN) > 0) + return INVALID_DIST_HUFFCODE; + + if (are_hufftables_useable(lit_huff_table, dist_huff_table)) + return INVALID_HUFFCODE; + } + + create_code_tables(dcodes, dcodes_sizes, DIST_LEN - DCODE_OFFSET, + dist_huff_table + DCODE_OFFSET); + + create_code_tables(lit_table, lit_table_sizes, LIT_TABLE_SIZE, lit_huff_table); + + create_packed_len_table(len_table, lit_huff_table); + create_packed_dist_table(dist_table, DIST_TABLE_SIZE, dist_huff_table); + + bit_count = + create_header(deflate_hdr, sizeof(deflate_hdr), lit_huff_table, dist_huff_table, + LAST_BLOCK); + + hufftables->deflate_hdr_count = bit_count / 8; + hufftables->deflate_hdr_extra_bits = bit_count % 8; + + return 0; +} + +int isal_create_hufftables_subset(struct isal_hufftables *hufftables, + struct isal_huff_histogram *histogram) +{ + struct huff_tree lit_tree, dist_tree; + struct huff_tree lit_tree_array[2 * LIT_LEN - 1], dist_tree_array[2 * DIST_LEN - 1]; + struct huff_code lit_huff_table[LIT_LEN], dist_huff_table[DIST_LEN]; + uint64_t bit_count; + int j, max_dist = convert_dist_to_dist_sym(IGZIP_D); + + uint32_t *dist_table = hufftables->dist_table; + uint32_t *len_table = hufftables->len_table; + uint16_t *lit_table = hufftables->lit_table; + uint16_t *dcodes = hufftables->dcodes; + uint8_t *lit_table_sizes = hufftables->lit_table_sizes; + uint8_t *dcodes_sizes = hufftables->dcodes_sizes; + uint8_t *deflate_hdr = hufftables->deflate_hdr; + uint64_t *lit_len_histogram = histogram->lit_len_histogram; + uint64_t *dist_histogram = histogram->dist_histogram; + + memset(hufftables, 0, sizeof(struct isal_hufftables)); + memset(lit_tree_array, 0, sizeof(lit_tree_array)); + memset(dist_tree_array, 0, sizeof(dist_tree_array)); + memset(lit_huff_table, 0, sizeof(lit_huff_table)); + memset(dist_huff_table, 0, sizeof(dist_huff_table)); + + for (j = LIT_TABLE_SIZE; j < LIT_LEN; j++) + if (lit_len_histogram[j] == 0) + lit_len_histogram[j]++; + + lit_tree = create_symbol_subset_huff_tree(lit_tree_array, lit_len_histogram, LIT_LEN); + dist_tree = create_huff_tree(dist_tree_array, dist_histogram, max_dist + 1); + + if (create_huff_lookup(lit_huff_table, LIT_LEN, lit_tree, MAX_DEFLATE_CODE_LEN) > 0) + return INVALID_LIT_LEN_HUFFCODE; + + if (create_huff_lookup(dist_huff_table, DIST_LEN, dist_tree, MAX_DEFLATE_CODE_LEN) > 0) + return INVALID_DIST_HUFFCODE; + + if (are_hufftables_useable(lit_huff_table, dist_huff_table)) { + if (create_huff_lookup + (lit_huff_table, LIT_LEN, lit_tree, MAX_SAFE_LIT_CODE_LEN) > 0) + return INVALID_LIT_LEN_HUFFCODE; + + if (create_huff_lookup + (dist_huff_table, DIST_LEN, dist_tree, MAX_SAFE_DIST_CODE_LEN) > 0) + return INVALID_DIST_HUFFCODE; + + if (are_hufftables_useable(lit_huff_table, dist_huff_table)) + return INVALID_HUFFCODE; + } + + create_code_tables(dcodes, dcodes_sizes, DIST_LEN - DCODE_OFFSET, + dist_huff_table + DCODE_OFFSET); + + create_code_tables(lit_table, lit_table_sizes, LIT_TABLE_SIZE, lit_huff_table); + + create_packed_len_table(len_table, lit_huff_table); + create_packed_dist_table(dist_table, DIST_TABLE_SIZE, dist_huff_table); + + bit_count = + create_header(deflate_hdr, sizeof(deflate_hdr), lit_huff_table, dist_huff_table, + LAST_BLOCK); + + hufftables->deflate_hdr_count = bit_count / 8; + hufftables->deflate_hdr_extra_bits = bit_count % 8; + + return 0; +} diff --git a/igzip/huff_codes.h b/igzip/huff_codes.h new file mode 100644 index 0000000..9f6312f --- /dev/null +++ b/igzip/huff_codes.h @@ -0,0 +1,348 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef HUFF_CODES_H +#define HUFF_CODES_H + +#include +#include +#include +#include +#include "igzip_lib.h" +#include "bitbuf2.h" + +#ifdef _MSC_VER +# include +#else +# include +#endif + +#define LIT_LEN IGZIP_LIT_LEN +#define DIST_LEN IGZIP_DIST_LEN +#define CODE_LEN_CODES 19 +#define HUFF_LEN 19 +#ifdef LONGER_HUFFTABLE +# define DCODE_OFFSET 26 +#else +# define DCODE_OFFSET 20 +#endif +#define DYN_HDR_START_LEN 17 +#define MAX_HISTHEAP_SIZE LIT_LEN +#define MAX_HUFF_TREE_DEPTH 15 +#define D IGZIP_D /* Amount of history */ + +#define MAX_DEFLATE_CODE_LEN 15 +#define MAX_SAFE_LIT_CODE_LEN 13 +#define MAX_SAFE_DIST_CODE_LEN 12 + +#define LONG_DIST_TABLE_SIZE 8192 +#define SHORT_DIST_TABLE_SIZE 1024 +#define LEN_TABLE_SIZE 256 +#define LIT_TABLE_SIZE 257 +#define LAST_BLOCK 1 + +#define LEN_EXTRA_BITS_START 264 +#define LEN_EXTRA_BITS_INTERVAL 4 +#define DIST_EXTRA_BITS_START 3 +#define DIST_EXTRA_BITS_INTERVAL 2 + +#define INVALID_LIT_LEN_HUFFCODE 1 +#define INVALID_DIST_HUFFCODE 1 +#define INVALID_HUFFCODE 1 + +/** + * @brief Structure used to store huffman codes + */ +struct huff_code { + uint16_t code; + uint8_t length; +}; + +/** + * @brief Binary tree used to store and create a huffman tree. + */ +struct huff_tree { + uint16_t value; + uint64_t frequency; + struct huff_tree *left; + struct huff_tree *right; +}; + +/** + * @brief Nodes in a doubly linked list. + */ +struct linked_list_node { + uint16_t value; + struct linked_list_node *next; + struct linked_list_node *previous; +}; + +/** + * @brief This structure is a doubly linked list. + */ +struct linked_list { + uint64_t length; + struct linked_list_node *start; + struct linked_list_node *end; +}; + +/** + * @brief This is a binary minheap structure which stores huffman trees. + * @details The huffman trees are sorted by the frequency of the root. + * The structure is represented in a fixed sized array. + */ +struct histheap { + struct huff_tree tree[MAX_HISTHEAP_SIZE]; + uint16_t size; +}; + +/** + * @brief Inserts a hufftree into a histheap. + * @param element: the hufftree to be inserted + * @param heap: the heap which element is being inserted into. + * @requires This function assumes the heap has enough allocated space. + * @returns Returns the index in heap of the inserted element + */ +int heap_push(struct huff_tree element, struct histheap *heap); + +/** + * @brief Removes the top element from the heap and returns it. + */ +struct huff_tree heap_pop(struct histheap *heap); + +/** + * @brief Removes the first element from list and returns it. + */ +struct linked_list_node *pop_from_front(struct linked_list *list); + +/** + * @brief Adds new_element to the front of list. + */ +void append_to_front(struct linked_list *list, struct linked_list_node *new_element); + +/** + * @brief Adds new_element to the end of list. + */ +void append_to_back(struct linked_list *list, struct linked_list_node *new_element); + +/** + * @brief Returns the deflate symbol value for a repeat length. +*/ +uint32_t convert_length_to_len_sym(uint32_t length); + +/** + * @brief Returns the deflate symbol value for a look back distance. + */ +uint32_t convert_dist_to_dist_sym(uint32_t dist); + +/** + * Constructs a huffman tree on tree_array which only uses elements with non-zero frequency. + * @requires Assumes there will be at least two symbols in the produced tree. + * @requires tree_array must have length at least 2*size-1, and size must be less than 286. + * @param tree_array: array of huff_tree elements used to create a huffman tree, the first + * size elements of the array are the leaf elements in the huffman tree. + * @param histogram: a histogram of the frequency of elements in tree_array. + * @param size: the number of leaf elements in the huffman tree. +*/ +struct huff_tree create_symbol_subset_huff_tree(struct huff_tree *tree_array, + uint64_t * histogram, uint32_t size); + +/** + * @brief Construct a huffman tree on tree_array which uses every symbol. + * @requires tree_array must have length at least 2*size-1, and size must be less than 286. + * @param tree_array: array of huff_tree elements used to create a huffman tree, the first + * @param size elements of the array are the leaf elements in the huffman tree. + * @param histogram: a histogram of the frequency of elements in tree_array. + * @param size: the number of leaf elements in the huffman tree. + */ +struct huff_tree create_huff_tree(struct huff_tree *tree_array, uint64_t * histogram, + uint32_t size); + +/** + * @brief Creates a deflate compliant huffman tree with maximum depth max_depth. + * @details The huffman tree is represented as a lookup table. + * @param huff_lookup_table: The output lookup table. + * @param table_length: The length of table. + * @param root: the input huffman tree the created tree is based on. + * @param max_depth: maximum depth the huffman tree can have + * @returns Returns 0 if sucessful and returns 1 otherwise. + */ +int create_huff_lookup(struct huff_code *huff_lookup_table, int table_length, + struct huff_tree root, uint8_t max_depth); + +/** + * @brief Determines the code length for every value in a huffmant tree. + * @param huff_lookup_table: An output lookup table used to store the code lengths + * @param corresponding to the possible values + * @param count: An output histogram representing code length versus number of occurences. + * @param current_node: A node of the huffman tree being analyzed currently. + * @param current_depth: The depth of the current node in the huffman tree. + * @returns Returns 0 if sucessful and returns 1 otherwise. + */ +int find_code_lengths(struct huff_code *huff_lookup_table, uint16_t * count, + struct huff_tree root, uint8_t max_depth); + +/** + * @brief Creates an array of linked lists. + * @detail Each linked list contains all the elements with codes of a given length for + * lengths less than 16, and an list for all elements with codes at least 16. These lists + * are sorted by frequency from least frequent to most frequent within any given code length. + * @param depth_array: depth_array[i] is a linked list of elements with code length i + * @param linked_lists: An input structure the linked lists in depth array are built on. + * @param current_node: the current node being visited in a huffman tree + * @param current_depth: the depth of current_node in a huffman tree + */ +void huffman_tree_traversal(struct linked_list *depth_array, + struct linked_list_node *linked_lists, uint16_t * extra_nodes, + uint8_t max_depth, struct huff_tree current_node, + uint16_t current_depth); + +/** + * @brief Determines the code each element of a deflate compliant huffman tree and stores + * it in a lookup table + * @requires table has been initialized to already contain the code length for each element. + * @param table: A lookup table used to store the codes. + * @param table_length: The length of table. + * @param count: a histogram representing the number of occurences of codes of a given length + */ +void set_huff_codes(struct huff_code *table, int table_length, uint16_t * count); + +/* Reverse the first length bits in bits and returns that value */ +uint16_t bit_reverse(uint16_t bits, uint8_t length); + +/** + * @brief Checks if a literal/length huffman table can be stored in the igzip hufftables files. + * @param table: A literal/length huffman code lookup table. + * @returns index of the first symbol which fails and 0xFFFF otherwise. + */ +uint16_t valid_lit_huff_table(struct huff_code *huff_code_table); + +/** + * @brief Checks if a distance huffman table can be stored in the igzip hufftables files. + * @param table: A distance huffman code lookup table. + * @returnsthe index of the first symbol which fails and 0xFFFF otherwise. + */ +uint16_t valid_dist_huff_table(struct huff_code *huff_code_table); + +/** + * @brief Creates the dynamic huffman deflate header. + * @returns Returns the length of header in bits. + * @requires This function requires header is large enough to store the whole header. + * @param header: The output header. + * @param lit_huff_table: A literal/length code huffman lookup table. + * @param dist_huff_table: A distance huffman code lookup table. + * @param end_of_block: Value determining whether end of block header is produced or not; + * 0 corresponds to not end of block and all other inputs correspond to end of block. + */ +int create_header(uint8_t *header, uint32_t header_length, struct huff_code *lit_huff_table, + struct huff_code *dist_huff_table, uint32_t end_of_block); + +/** + * @brief Creates a run length encoded reprsentation of huff_table. + * @details Also creates a histogram representing the frequency of each symbols + * @returns Returns the number of symbols written into huffman_rep. + * @param huffman_rep: The output run length encoded version of huff_table. + * @param histogram: The output histogram of frequencies of elements in huffman_rep. + * @param extra_bits: An output table storing extra bits associated with huffman_rep. + * @param huff_table: The input huffman_table or concatonation of huffman_tables. + * @parma len: The length of huff_table. + */ +uint16_t create_huffman_rep(uint16_t * huffman_rep, uint64_t * histogram, + uint16_t * extra_bits, struct huff_code *huff_table, uint16_t len); + +/** + * @brief Flushes the symbols for a repeat of last_code for length run_length into huffman_rep. + * @param huffman_rep: pointer to array containing the output huffman_rep. + * @param histogram: histogram of elements seen in huffman_rep. + * @param extra_bits: an array holding extra bits for the corresponding symbol in huffman_rep. + * @param huff_table: a concatenated list of huffman lookup tables. + * @param current_index: The next spot elements will be written in huffman_rep. + */ +uint16_t flush_repeats(uint16_t * huffman_rep, uint64_t * histogram, uint16_t * extra_bits, + uint16_t last_code, uint16_t run_length, uint16_t current_index); + +/** + * @brief Creates the header for run length encoded huffman trees. + * @param header: the output header. + * @param lookup_table: a huffman lookup table. + * @param huffman_rep: a run length encoded huffman tree. + * @extra_bits: extra bits associated with the corresponding spot in huffman_rep + * @param huffman_rep_length: the length of huffman_rep. + * @param end_of_block: Value determining whether end of block header is produced or not; + * 0 corresponds to not end of block and all other inputs correspond to end of block. + * @param hclen: Length of huffman code for huffman codes minus 4. + * @param hlit: Length of literal/length table minus 257. + * @parm hdist: Length of distance table minus 1. + */ +int create_huffman_header(uint8_t *header, uint32_t header_length, struct huff_code *lookup_table, + uint16_t * huffman_rep, uint16_t * extra_bits, + uint16_t huffman_rep_length, uint32_t end_of_block, uint32_t hclen, + uint32_t hlit, uint32_t hdist); + +/** + * @brief Creates a two table representation of huffman codes. + * @param code_table: output table containing the code + * @param code_size_table: output table containing the code length + * @param length: the lenght of hufftable + * @param hufftable: a huffman lookup table + */ +void create_code_tables(uint16_t * code_table, uint8_t * code_length_table, + uint32_t length, struct huff_code *hufftable); + +/** + * @brief Creates a packed representation of length huffman codes. + * @details In packed_table, bits 32:8 contain the extra bits appended to the huffman + * code and bits 8:0 contain the code length. + * @param packed_table: the output table + * @param length: the length of lit_len_hufftable + * @param lit_len_hufftable: a literal/length huffman lookup table + */ +void create_packed_len_table(uint32_t * packed_table, struct huff_code *lit_len_hufftable); + +/** + * @brief Creates a packed representation of distance huffman codes. + * @details In packed_table, bits 32:8 contain the extra bits appended to the huffman + * code and bits 8:0 contain the code length. + * @param packed_table: the output table + * @param length: the length of lit_len_hufftable + * @param dist_hufftable: a distance huffman lookup table + */ +void create_packed_dist_table(uint32_t * packed_table, uint32_t length, + struct huff_code *dist_hufftable); + +/** + * @brief Checks to see if the hufftable is usable by igzip + * + * @param lit_len_hufftable: literal/lenght huffman code + * @param dist_hufftable: distance huffman code + * @returns Returns 0 if the table is usable + */ +int are_hufftables_useable(struct huff_code *lit_len_hufftable, + struct huff_code *dist_hufftable); +#endif diff --git a/igzip/huffman.asm b/igzip/huffman.asm new file mode 100644 index 0000000..fbe402c --- /dev/null +++ b/igzip/huffman.asm @@ -0,0 +1,208 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "options.asm" +%include "lz0a_const.asm" + +; Macros for doing Huffman Encoding + +%ifdef LONGER_HUFFTABLE + %if (D > 8192) + %error History D is larger than 8K, cannot use %LONGER_HUFFTABLE + % error + %else + %define DIST_TABLE_SIZE 8192 + %define DECODE_OFFSET 26 + %endif +%else + %define DIST_TABLE_SIZE 1024 + %define DECODE_OFFSET 20 +%endif + +%define LEN_TABLE_SIZE 256 +%define LIT_TABLE_SIZE 257 + +%define DIST_TABLE_START (IGZIP_MAX_DEF_HDR_SIZE + 8) +%define DIST_TABLE_OFFSET (DIST_TABLE_START + - 4 * 1) +%define LEN_TABLE_OFFSET (DIST_TABLE_START + DIST_TABLE_SIZE * 4 - 4*3) +%define LIT_TABLE_OFFSET (DIST_TABLE_START + 4 * DIST_TABLE_SIZE + 4 * LEN_TABLE_SIZE) +%define LIT_TABLE_SIZES_OFFSET (LIT_TABLE_OFFSET + 2 * LIT_TABLE_SIZE) +%define DCODE_TABLE_OFFSET (LIT_TABLE_SIZES_OFFSET + LIT_TABLE_SIZE + 1 - DECODE_OFFSET * 2) +%define DCODE_TABLE_SIZE_OFFSET (DCODE_TABLE_OFFSET + 2 * 30 - DECODE_OFFSET) +;; /** @brief Holds the huffman tree used to huffman encode the input stream **/ +;; struct isal_hufftables { +;; // deflate huffman tree header +;; uint8_t deflate_huff_hdr[IGZIP_MAX_DEF_HDR_SIZE]; +;; +;; //!< Number of whole bytes in deflate_huff_hdr +;; uint32_t deflate_huff_hdr_count; +;; +;; //!< Number of bits in the partial byte in header +;; uint32_t deflate_huff_hdr_extra_bits; +;; +;; //!< bits 7:0 are the code length, bits 31:8 are the code +;; uint32_t dist_table[DIST_TABLE_SIZE]; +;; +;; //!< bits 7:0 are the code length, bits 31:8 are the code +;; uint32_t len_table[LEN_TABLE_SIZE]; +;; +;; //!< bits 3:0 are the code length, bits 15:4 are the code +;; uint16_t lit_table[LIT_TABLE_SIZE]; +;; +;; //!< bits 3:0 are the code length, bits 15:4 are the code +;; uint16_t dcodes[30 - DECODE_OFFSET]; + +;; }; + + +%ifdef LONGER_HUFFTABLE +; Uses RCX, clobbers dist +; get_dist_code dist, code, len +%macro get_dist_code 4 +%define %%dist %1 ; 64-bit IN +%define %%code %2d ; 32-bit OUT +%define %%len %3d ; 32-bit OUT +%define %%hufftables %4 ; address of the hufftable + + mov %%len, [%%hufftables + DIST_TABLE_OFFSET + 4*%%dist ] + mov %%code, %%len + and %%len, 0x1F; + shr %%code, 5 +%endm + +%macro get_packed_dist_code 3 +%define %%dist %1 ; 64-bit IN +%define %%code_len %2d ; 32-bit OUT +%define %%hufftables %3 ; address of the hufftable + mov %%code_len, [%%hufftables + DIST_TABLE_OFFSET + 4*%%dist ] +%endm + +%macro unpack_dist_code 2 +%define %%code %1d ; 32-bit OUT +%define %%len %2d ; 32-bit OUT + + mov %%len, %%code + and %%len, 0x1F; + shr %%code, 5 +%endm + +%else +; Assumes (dist != 0) +; Uses RCX, clobbers dist +; void compute_dist_code dist, code, len +%macro compute_dist_code 4 +%define %%dist %1d ; IN, clobbered +%define %%distq %1 +%define %%code %2 ; OUT +%define %%len %3 ; OUT +%define %%hufftables %4 + + dec %%dist + bsr ecx, %%dist ; ecx = msb = bsr(dist) + dec ecx ; ecx = num_extra_bits = msb - N + mov %%code, 1 + shl %%code, CL + dec %%code ; code = ((1 << num_extra_bits) - 1) + and %%code, %%dist ; code = extra_bits + shr %%dist, CL ; dist >>= num_extra_bits + lea %%dist, [%%dist + 2*ecx] ; dist = sym = dist + num_extra_bits*2 + mov %%len, ecx ; len = num_extra_bits + movzx ecx, byte [hufftables + DCODE_TABLE_SIZE_OFFSET + %%distq WRT_OPT] + movzx %%dist, word [hufftables + DCODE_TABLE_OFFSET + 2 * %%distq WRT_OPT] + shl %%code, CL ; code = extra_bits << (sym & 0xF) + or %%code, %%dist ; code = (sym >> 4) | (extra_bits << (sym & 0xF)) + add %%len, ecx ; len = num_extra_bits + (sym & 0xF) +%endm + +; Uses RCX, clobbers dist +; get_dist_code dist, code, len +%macro get_dist_code 4 +%define %%dist %1d ; 32-bit IN, clobbered +%define %%distq %1 ; 64-bit IN, clobbered +%define %%code %2d ; 32-bit OUT +%define %%len %3d ; 32-bit OUT +%define %%hufftables %4 + + cmp %%dist, DIST_TABLE_SIZE + jg %%do_compute + mov %%len, [hufftables + DIST_TABLE_OFFSET + 4*%%distq WRT_OPT] + mov %%code, %%len + and %%len, 0x1F; + shr %%code, 5 + jmp %%done +%%do_compute: + compute_dist_code %%distq, %%code, %%len, %%hufftables +%%done: +%endm + +%macro get_packed_dist_code 3 +%define %%dist %1 ; 64-bit IN +%define %%code_len %2d ; 32-bit OUT +%define %%hufftables %3 ; address of the hufftable +%endm + +%endif + + +; "len" can be same register as "length" +; get_len_code length, code, len +%macro get_len_code 4 +%define %%length %1 ; 64-bit IN +%define %%code %2d ; 32-bit OUT +%define %%len %3d ; 32-bit OUT +%define %%hufftables %4 + + mov %%len, [%%hufftables + LEN_TABLE_OFFSET + 4 * %%length] + mov %%code, %%len + and %%len, 0x1F + shr %%code, 5 +%endm + + +%macro get_lit_code 4 +%define %%lit %1 ; 64-bit IN or CONST +%define %%code %2d ; 32-bit OUT +%define %%len %3d ; 32-bit OUT +%define %%hufftables %4 + + movzx %%len, byte [%%hufftables + LIT_TABLE_SIZES_OFFSET + %%lit] + movzx %%code, word [%%hufftables + LIT_TABLE_OFFSET + 2 * %%lit] + +%endm + + +;; Compute hash of first 3 bytes of data +%macro compute_hash 2 +%define %%result %1d ; 32-bit reg +%define %%data %2d ; 32-bit reg (low byte not clobbered) + + and %%data, 0x00FFFFFF + xor %%result, %%result + crc32 %%result, %%data +%endm diff --git a/igzip/huffman.h b/igzip/huffman.h new file mode 100644 index 0000000..0116dea --- /dev/null +++ b/igzip/huffman.h @@ -0,0 +1,226 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include "igzip_lib.h" + +#ifdef _MSC_VER +# include +# define inline __inline +#else +# include +#endif + +#ifndef IGZIP_USE_GZIP_FORMAT +# define DEFLATE 1 +#endif + + +extern uint32_t CrcTable[256]; + +static inline uint32_t bsr(uint32_t val) +{ + uint32_t msb; +#ifdef __LZCNT__ + msb = 16 - __lzcnt16(val); +#else + for(msb = 0; val > 0; val >>= 1) + msb++; +#endif + return msb; +} + +static inline uint32_t tzcnt(uint64_t val) +{ + uint32_t cnt; + +#ifdef __x86_64__ + + cnt = __builtin_ctzll(val) / 8;//__tzcnt_u64(val); + +#else + for(cnt = 8; val > 0; val <<= 8) + cnt -= 1; +#endif + return cnt; +} + +static void compute_dist_code(struct isal_hufftables *hufftables, uint16_t dist, uint64_t *p_code, uint64_t *p_len) +{ + assert(dist > DIST_TABLE_SIZE); + + dist -= 1; + uint32_t msb; + uint32_t num_extra_bits; + uint32_t extra_bits; + uint32_t sym; + uint32_t len; + uint32_t code; + + msb = bsr(dist); + assert(msb >= 1); + num_extra_bits = msb - 2; + extra_bits = dist & ((1 << num_extra_bits) - 1); + dist >>= num_extra_bits; + sym = dist + 2 * num_extra_bits; + assert(sym < 30); + code = hufftables->dcodes[sym - DECODE_OFFSET]; + len = hufftables->dcodes_sizes[sym - DECODE_OFFSET]; + *p_code = code | (extra_bits << len); + *p_len = len + num_extra_bits; +} + +static inline void get_dist_code(struct isal_hufftables *hufftables, uint32_t dist, uint64_t *code, uint64_t *len) +{ + if (dist < 1) + dist = 0; + assert(dist >= 1); + assert(dist <= 32768); + if (dist <= DIST_TABLE_SIZE) { + uint64_t code_len; + code_len = hufftables->dist_table[dist - 1]; + *code = code_len >> 5; + *len = code_len & 0x1F; + } else { + compute_dist_code(hufftables, dist, code, len); + } +} + +static inline void get_len_code(struct isal_hufftables *hufftables, uint32_t length, uint64_t *code, uint64_t *len) +{ + assert(length >= 3); + assert(length <= 258); + + uint64_t code_len; + code_len = hufftables->len_table[length - 3]; + *code = code_len >> 5; + *len = code_len & 0x1F; +} + +static inline void get_lit_code(struct isal_hufftables *hufftables, uint32_t lit, uint64_t *code, uint64_t *len) +{ + assert(lit <= 256); + + *code = hufftables->lit_table[lit]; + *len = hufftables->lit_table_sizes[lit]; +} + +/** + * @brief Returns a hash of the first 3 bytes of input data. + */ +static inline uint32_t compute_hash(uint32_t data) +{ + data &= 0x00FFFFFF; +#ifdef __SSE4_1__ + + return _mm_crc32_u32(0, data); + +#else + /* Use multiplication to create a hash, 0xBDD06057 is a prime number */ + return ((uint64_t)data * 0xB2D06057) >> 16; + +#endif /* __SSE4_1__ */ +} + + +/** + * @brief Returns how long str1 and str2 have the same symbols. + * @param str1: First input string. + * @param str2: Second input string. + * @param max_length: length of the smaller string. + */ +static inline int compare258(uint8_t * str1, uint8_t * str2, uint32_t max_length) +{ + uint32_t count; + uint64_t test; + uint64_t loop_length; + + if(max_length > 258) + max_length = 258; + + loop_length = max_length & ~0x7; + + for(count = 0; count < loop_length; count += 8){ + test = *(uint64_t *) str1; + test ^= *(uint64_t *) str2; + if(test != 0) + return count + tzcnt(test); + str1 += 8; + str2 += 8; + } + + switch(max_length % 8){ + + case 7: + if(*str1++ != *str2++) + return count; + count++; + case 6: + if(*str1++ != *str2++) + return count; + count++; + case 5: + if(*str1++ != *str2++) + return count; + count++; + case 4: + if(*str1++ != *str2++) + return count; + count++; + case 3: + if(*str1++ != *str2++) + return count; + count++; + case 2: + if(*str1++ != *str2++) + return count; + count++; + case 1: + if(*str1 != *str2) + return count; + count++; + } + + return count; +} + +static inline void update_crc(uint32_t* crc, uint8_t * start, uint32_t length) +{ +#ifndef DEFLATE + uint8_t *end = start + length; + + while (start < end) + *crc = (*crc >> 8) ^ CrcTable[(*crc & 0x000000FF) ^ *start++]; +#else + return; +#endif +} + diff --git a/igzip/hufftables_c.c b/igzip/hufftables_c.c new file mode 100644 index 0000000..2f37bbc --- /dev/null +++ b/igzip/hufftables_c.c @@ -0,0 +1,2528 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include +#include + +uint8_t const gzip_hdr[10] = { + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xff +}; + +const uint32_t gzip_hdr_bytes = 10; +const uint32_t gzip_trl_bytes = 8; + +const struct isal_hufftables hufftables_default = { + + .deflate_hdr = { + 0xed, 0xfd, 0x6d, 0xc7, 0xb6, 0x6d, 0xd7, 0x34, + 0x3d, 0xef, 0xc7, 0x7e, 0x1c, 0xfb, 0xb1, 0x1f, + 0xcb, 0x7c, 0xec, 0xfb, 0xf1, 0xb2, 0x1f, 0xfb, + 0xcb, 0x71, 0xec, 0xc7, 0xbe, 0x1f, 0xc7, 0xb4, + 0xce, 0xfb, 0xb1, 0x6f, 0xcb, 0xbc, 0xec, 0xeb, + 0xb2, 0xce, 0xeb, 0x7a, 0x6c, 0xfb, 0x76, 0xec, + 0xc7, 0x71, 0x6c, 0xcb, 0xb6, 0x6c, 0xc7, 0xb1, + 0x6e, 0xeb, 0xb6, 0x6e, 0xdb, 0xba, 0x1d, 0xfb, + 0xb6, 0x6e, 0xdb, 0xb6, 0xaf, 0xeb, 0xb2, 0x6d, + 0xc7, 0x7a, 0xec, 0xdb, 0xb6, 0xed, 0xcb, 0x31, + 0xaf, 0xf3, 0x3c, 0x2d, 0xcb, 0x32, 0x6f, 0xdb, + 0xbc, 0xcc, 0xf3, 0xb2, 0xcd, 0x2f, 0xcb, 0xb2, + 0x2e, 0xc7, 0xb6, 0xad, 0xc7, 0x7e, 0xbc, 0xbf, + 0xee, 0xfb, 0xb1, 0xbf, 0x1e, 0xc7, 0xb1, 0x1f, + 0xc7, 0xfe, 0x72, 0xbc, 0xec, 0xfb, 0x7e, 0xec, + 0xfb, 0x71, 0xec, 0x2f, 0xc7, 0xeb, 0xbe, 0x1f, + 0xc7, 0xfe, 0x72, 0xbc, 0xec, 0xfb, 0x71, 0x1c, + 0xfb, 0x71, 0xec, 0xfb, 0xf1, 0xbe, 0x1f, 0xc7, + 0xbe, 0x1f, 0xc7, 0xb1, 0xbf, 0x1d, 0x6f, 0xfb, + 0x7e, 0xbc, 0xed, 0xaf, 0xc7, 0xb1, 0xef, 0xc7, + 0xb1, 0x1f, 0xef, 0xfb, 0x31, 0xc6, 0x34, 0xcd, + 0xf3, 0x32, 0xbf, 0x2c, 0xf3, 0xba, 0x6e, 0xeb, + 0x7a, 0x6c, 0xc7, 0xb1, 0x6f, 0xc7, 0x7e, 0x1c, + 0xfb, 0xb6, 0xad, 0xdb, 0xbc, 0x2d, 0xf3, 0x34, + 0x8d, 0x31, 0xa6, 0x31, 0x7a, 0xf4, 0x18, 0xd3, + 0x34, 0x6d, 0xaf }, + + .deflate_hdr_count = 203, + .deflate_hdr_extra_bits = 0, + +#ifdef LONGER_HUFFTABLE + .dist_table = { + 0x00001be9, 0x00003be9, 0x00000be8, 0x000007e9, + 0x000001e7, 0x000009e7, 0x000027ea, 0x000067ea, + 0x000003e9, 0x000013e9, 0x000023e9, 0x000033e9, + 0x000005e8, 0x00000de8, 0x000015e8, 0x00001de8, + 0x00000068, 0x00000468, 0x00000868, 0x00000c68, + 0x00001068, 0x00001468, 0x00001868, 0x00001c68, + 0x00000268, 0x00000668, 0x00000a68, 0x00000e68, + 0x00001268, 0x00001668, 0x00001a68, 0x00001e68, + 0x00000048, 0x00000248, 0x00000448, 0x00000648, + 0x00000848, 0x00000a48, 0x00000c48, 0x00000e48, + 0x00001048, 0x00001248, 0x00001448, 0x00001648, + 0x00001848, 0x00001a48, 0x00001c48, 0x00001e48, + 0x00000148, 0x00000348, 0x00000548, 0x00000748, + 0x00000948, 0x00000b48, 0x00000d48, 0x00000f48, + 0x00001148, 0x00001348, 0x00001548, 0x00001748, + 0x00001948, 0x00001b48, 0x00001d48, 0x00001f48, + 0x000000c9, 0x000002c9, 0x000004c9, 0x000006c9, + 0x000008c9, 0x00000ac9, 0x00000cc9, 0x00000ec9, + 0x000010c9, 0x000012c9, 0x000014c9, 0x000016c9, + 0x000018c9, 0x00001ac9, 0x00001cc9, 0x00001ec9, + 0x000020c9, 0x000022c9, 0x000024c9, 0x000026c9, + 0x000028c9, 0x00002ac9, 0x00002cc9, 0x00002ec9, + 0x000030c9, 0x000032c9, 0x000034c9, 0x000036c9, + 0x000038c9, 0x00003ac9, 0x00003cc9, 0x00003ec9, + 0x0000016a, 0x0000056a, 0x0000096a, 0x00000d6a, + 0x0000116a, 0x0000156a, 0x0000196a, 0x00001d6a, + 0x0000216a, 0x0000256a, 0x0000296a, 0x00002d6a, + 0x0000316a, 0x0000356a, 0x0000396a, 0x00003d6a, + 0x0000416a, 0x0000456a, 0x0000496a, 0x00004d6a, + 0x0000516a, 0x0000556a, 0x0000596a, 0x00005d6a, + 0x0000616a, 0x0000656a, 0x0000696a, 0x00006d6a, + 0x0000716a, 0x0000756a, 0x0000796a, 0x00007d6a, + 0x000001ca, 0x000003ca, 0x000005ca, 0x000007ca, + 0x000009ca, 0x00000bca, 0x00000dca, 0x00000fca, + 0x000011ca, 0x000013ca, 0x000015ca, 0x000017ca, + 0x000019ca, 0x00001bca, 0x00001dca, 0x00001fca, + 0x000021ca, 0x000023ca, 0x000025ca, 0x000027ca, + 0x000029ca, 0x00002bca, 0x00002dca, 0x00002fca, + 0x000031ca, 0x000033ca, 0x000035ca, 0x000037ca, + 0x000039ca, 0x00003bca, 0x00003dca, 0x00003fca, + 0x000041ca, 0x000043ca, 0x000045ca, 0x000047ca, + 0x000049ca, 0x00004bca, 0x00004dca, 0x00004fca, + 0x000051ca, 0x000053ca, 0x000055ca, 0x000057ca, + 0x000059ca, 0x00005bca, 0x00005dca, 0x00005fca, + 0x000061ca, 0x000063ca, 0x000065ca, 0x000067ca, + 0x000069ca, 0x00006bca, 0x00006dca, 0x00006fca, + 0x000071ca, 0x000073ca, 0x000075ca, 0x000077ca, + 0x000079ca, 0x00007bca, 0x00007dca, 0x00007fca, + 0x0000002a, 0x0000022a, 0x0000042a, 0x0000062a, + 0x0000082a, 0x00000a2a, 0x00000c2a, 0x00000e2a, + 0x0000102a, 0x0000122a, 0x0000142a, 0x0000162a, + 0x0000182a, 0x00001a2a, 0x00001c2a, 0x00001e2a, + 0x0000202a, 0x0000222a, 0x0000242a, 0x0000262a, + 0x0000282a, 0x00002a2a, 0x00002c2a, 0x00002e2a, + 0x0000302a, 0x0000322a, 0x0000342a, 0x0000362a, + 0x0000382a, 0x00003a2a, 0x00003c2a, 0x00003e2a, + 0x0000402a, 0x0000422a, 0x0000442a, 0x0000462a, + 0x0000482a, 0x00004a2a, 0x00004c2a, 0x00004e2a, + 0x0000502a, 0x0000522a, 0x0000542a, 0x0000562a, + 0x0000582a, 0x00005a2a, 0x00005c2a, 0x00005e2a, + 0x0000602a, 0x0000622a, 0x0000642a, 0x0000662a, + 0x0000682a, 0x00006a2a, 0x00006c2a, 0x00006e2a, + 0x0000702a, 0x0000722a, 0x0000742a, 0x0000762a, + 0x0000782a, 0x00007a2a, 0x00007c2a, 0x00007e2a, + 0x0000000a, 0x0000010a, 0x0000020a, 0x0000030a, + 0x0000040a, 0x0000050a, 0x0000060a, 0x0000070a, + 0x0000080a, 0x0000090a, 0x00000a0a, 0x00000b0a, + 0x00000c0a, 0x00000d0a, 0x00000e0a, 0x00000f0a, + 0x0000100a, 0x0000110a, 0x0000120a, 0x0000130a, + 0x0000140a, 0x0000150a, 0x0000160a, 0x0000170a, + 0x0000180a, 0x0000190a, 0x00001a0a, 0x00001b0a, + 0x00001c0a, 0x00001d0a, 0x00001e0a, 0x00001f0a, + 0x0000200a, 0x0000210a, 0x0000220a, 0x0000230a, + 0x0000240a, 0x0000250a, 0x0000260a, 0x0000270a, + 0x0000280a, 0x0000290a, 0x00002a0a, 0x00002b0a, + 0x00002c0a, 0x00002d0a, 0x00002e0a, 0x00002f0a, + 0x0000300a, 0x0000310a, 0x0000320a, 0x0000330a, + 0x0000340a, 0x0000350a, 0x0000360a, 0x0000370a, + 0x0000380a, 0x0000390a, 0x00003a0a, 0x00003b0a, + 0x00003c0a, 0x00003d0a, 0x00003e0a, 0x00003f0a, + 0x0000400a, 0x0000410a, 0x0000420a, 0x0000430a, + 0x0000440a, 0x0000450a, 0x0000460a, 0x0000470a, + 0x0000480a, 0x0000490a, 0x00004a0a, 0x00004b0a, + 0x00004c0a, 0x00004d0a, 0x00004e0a, 0x00004f0a, + 0x0000500a, 0x0000510a, 0x0000520a, 0x0000530a, + 0x0000540a, 0x0000550a, 0x0000560a, 0x0000570a, + 0x0000580a, 0x0000590a, 0x00005a0a, 0x00005b0a, + 0x00005c0a, 0x00005d0a, 0x00005e0a, 0x00005f0a, + 0x0000600a, 0x0000610a, 0x0000620a, 0x0000630a, + 0x0000640a, 0x0000650a, 0x0000660a, 0x0000670a, + 0x0000680a, 0x0000690a, 0x00006a0a, 0x00006b0a, + 0x00006c0a, 0x00006d0a, 0x00006e0a, 0x00006f0a, + 0x0000700a, 0x0000710a, 0x0000720a, 0x0000730a, + 0x0000740a, 0x0000750a, 0x0000760a, 0x0000770a, + 0x0000780a, 0x0000790a, 0x00007a0a, 0x00007b0a, + 0x00007c0a, 0x00007d0a, 0x00007e0a, 0x00007f0a, + 0x0000012b, 0x0000032b, 0x0000052b, 0x0000072b, + 0x0000092b, 0x00000b2b, 0x00000d2b, 0x00000f2b, + 0x0000112b, 0x0000132b, 0x0000152b, 0x0000172b, + 0x0000192b, 0x00001b2b, 0x00001d2b, 0x00001f2b, + 0x0000212b, 0x0000232b, 0x0000252b, 0x0000272b, + 0x0000292b, 0x00002b2b, 0x00002d2b, 0x00002f2b, + 0x0000312b, 0x0000332b, 0x0000352b, 0x0000372b, + 0x0000392b, 0x00003b2b, 0x00003d2b, 0x00003f2b, + 0x0000412b, 0x0000432b, 0x0000452b, 0x0000472b, + 0x0000492b, 0x00004b2b, 0x00004d2b, 0x00004f2b, + 0x0000512b, 0x0000532b, 0x0000552b, 0x0000572b, + 0x0000592b, 0x00005b2b, 0x00005d2b, 0x00005f2b, + 0x0000612b, 0x0000632b, 0x0000652b, 0x0000672b, + 0x0000692b, 0x00006b2b, 0x00006d2b, 0x00006f2b, + 0x0000712b, 0x0000732b, 0x0000752b, 0x0000772b, + 0x0000792b, 0x00007b2b, 0x00007d2b, 0x00007f2b, + 0x0000812b, 0x0000832b, 0x0000852b, 0x0000872b, + 0x0000892b, 0x00008b2b, 0x00008d2b, 0x00008f2b, + 0x0000912b, 0x0000932b, 0x0000952b, 0x0000972b, + 0x0000992b, 0x00009b2b, 0x00009d2b, 0x00009f2b, + 0x0000a12b, 0x0000a32b, 0x0000a52b, 0x0000a72b, + 0x0000a92b, 0x0000ab2b, 0x0000ad2b, 0x0000af2b, + 0x0000b12b, 0x0000b32b, 0x0000b52b, 0x0000b72b, + 0x0000b92b, 0x0000bb2b, 0x0000bd2b, 0x0000bf2b, + 0x0000c12b, 0x0000c32b, 0x0000c52b, 0x0000c72b, + 0x0000c92b, 0x0000cb2b, 0x0000cd2b, 0x0000cf2b, + 0x0000d12b, 0x0000d32b, 0x0000d52b, 0x0000d72b, + 0x0000d92b, 0x0000db2b, 0x0000dd2b, 0x0000df2b, + 0x0000e12b, 0x0000e32b, 0x0000e52b, 0x0000e72b, + 0x0000e92b, 0x0000eb2b, 0x0000ed2b, 0x0000ef2b, + 0x0000f12b, 0x0000f32b, 0x0000f52b, 0x0000f72b, + 0x0000f92b, 0x0000fb2b, 0x0000fd2b, 0x0000ff2b, + 0x0000008b, 0x0000018b, 0x0000028b, 0x0000038b, + 0x0000048b, 0x0000058b, 0x0000068b, 0x0000078b, + 0x0000088b, 0x0000098b, 0x00000a8b, 0x00000b8b, + 0x00000c8b, 0x00000d8b, 0x00000e8b, 0x00000f8b, + 0x0000108b, 0x0000118b, 0x0000128b, 0x0000138b, + 0x0000148b, 0x0000158b, 0x0000168b, 0x0000178b, + 0x0000188b, 0x0000198b, 0x00001a8b, 0x00001b8b, + 0x00001c8b, 0x00001d8b, 0x00001e8b, 0x00001f8b, + 0x0000208b, 0x0000218b, 0x0000228b, 0x0000238b, + 0x0000248b, 0x0000258b, 0x0000268b, 0x0000278b, + 0x0000288b, 0x0000298b, 0x00002a8b, 0x00002b8b, + 0x00002c8b, 0x00002d8b, 0x00002e8b, 0x00002f8b, + 0x0000308b, 0x0000318b, 0x0000328b, 0x0000338b, + 0x0000348b, 0x0000358b, 0x0000368b, 0x0000378b, + 0x0000388b, 0x0000398b, 0x00003a8b, 0x00003b8b, + 0x00003c8b, 0x00003d8b, 0x00003e8b, 0x00003f8b, + 0x0000408b, 0x0000418b, 0x0000428b, 0x0000438b, + 0x0000448b, 0x0000458b, 0x0000468b, 0x0000478b, + 0x0000488b, 0x0000498b, 0x00004a8b, 0x00004b8b, + 0x00004c8b, 0x00004d8b, 0x00004e8b, 0x00004f8b, + 0x0000508b, 0x0000518b, 0x0000528b, 0x0000538b, + 0x0000548b, 0x0000558b, 0x0000568b, 0x0000578b, + 0x0000588b, 0x0000598b, 0x00005a8b, 0x00005b8b, + 0x00005c8b, 0x00005d8b, 0x00005e8b, 0x00005f8b, + 0x0000608b, 0x0000618b, 0x0000628b, 0x0000638b, + 0x0000648b, 0x0000658b, 0x0000668b, 0x0000678b, + 0x0000688b, 0x0000698b, 0x00006a8b, 0x00006b8b, + 0x00006c8b, 0x00006d8b, 0x00006e8b, 0x00006f8b, + 0x0000708b, 0x0000718b, 0x0000728b, 0x0000738b, + 0x0000748b, 0x0000758b, 0x0000768b, 0x0000778b, + 0x0000788b, 0x0000798b, 0x00007a8b, 0x00007b8b, + 0x00007c8b, 0x00007d8b, 0x00007e8b, 0x00007f8b, + 0x0000808b, 0x0000818b, 0x0000828b, 0x0000838b, + 0x0000848b, 0x0000858b, 0x0000868b, 0x0000878b, + 0x0000888b, 0x0000898b, 0x00008a8b, 0x00008b8b, + 0x00008c8b, 0x00008d8b, 0x00008e8b, 0x00008f8b, + 0x0000908b, 0x0000918b, 0x0000928b, 0x0000938b, + 0x0000948b, 0x0000958b, 0x0000968b, 0x0000978b, + 0x0000988b, 0x0000998b, 0x00009a8b, 0x00009b8b, + 0x00009c8b, 0x00009d8b, 0x00009e8b, 0x00009f8b, + 0x0000a08b, 0x0000a18b, 0x0000a28b, 0x0000a38b, + 0x0000a48b, 0x0000a58b, 0x0000a68b, 0x0000a78b, + 0x0000a88b, 0x0000a98b, 0x0000aa8b, 0x0000ab8b, + 0x0000ac8b, 0x0000ad8b, 0x0000ae8b, 0x0000af8b, + 0x0000b08b, 0x0000b18b, 0x0000b28b, 0x0000b38b, + 0x0000b48b, 0x0000b58b, 0x0000b68b, 0x0000b78b, + 0x0000b88b, 0x0000b98b, 0x0000ba8b, 0x0000bb8b, + 0x0000bc8b, 0x0000bd8b, 0x0000be8b, 0x0000bf8b, + 0x0000c08b, 0x0000c18b, 0x0000c28b, 0x0000c38b, + 0x0000c48b, 0x0000c58b, 0x0000c68b, 0x0000c78b, + 0x0000c88b, 0x0000c98b, 0x0000ca8b, 0x0000cb8b, + 0x0000cc8b, 0x0000cd8b, 0x0000ce8b, 0x0000cf8b, + 0x0000d08b, 0x0000d18b, 0x0000d28b, 0x0000d38b, + 0x0000d48b, 0x0000d58b, 0x0000d68b, 0x0000d78b, + 0x0000d88b, 0x0000d98b, 0x0000da8b, 0x0000db8b, + 0x0000dc8b, 0x0000dd8b, 0x0000de8b, 0x0000df8b, + 0x0000e08b, 0x0000e18b, 0x0000e28b, 0x0000e38b, + 0x0000e48b, 0x0000e58b, 0x0000e68b, 0x0000e78b, + 0x0000e88b, 0x0000e98b, 0x0000ea8b, 0x0000eb8b, + 0x0000ec8b, 0x0000ed8b, 0x0000ee8b, 0x0000ef8b, + 0x0000f08b, 0x0000f18b, 0x0000f28b, 0x0000f38b, + 0x0000f48b, 0x0000f58b, 0x0000f68b, 0x0000f78b, + 0x0000f88b, 0x0000f98b, 0x0000fa8b, 0x0000fb8b, + 0x0000fc8b, 0x0000fd8b, 0x0000fe8b, 0x0000ff8b, + 0x000000ac, 0x000002ac, 0x000004ac, 0x000006ac, + 0x000008ac, 0x00000aac, 0x00000cac, 0x00000eac, + 0x000010ac, 0x000012ac, 0x000014ac, 0x000016ac, + 0x000018ac, 0x00001aac, 0x00001cac, 0x00001eac, + 0x000020ac, 0x000022ac, 0x000024ac, 0x000026ac, + 0x000028ac, 0x00002aac, 0x00002cac, 0x00002eac, + 0x000030ac, 0x000032ac, 0x000034ac, 0x000036ac, + 0x000038ac, 0x00003aac, 0x00003cac, 0x00003eac, + 0x000040ac, 0x000042ac, 0x000044ac, 0x000046ac, + 0x000048ac, 0x00004aac, 0x00004cac, 0x00004eac, + 0x000050ac, 0x000052ac, 0x000054ac, 0x000056ac, + 0x000058ac, 0x00005aac, 0x00005cac, 0x00005eac, + 0x000060ac, 0x000062ac, 0x000064ac, 0x000066ac, + 0x000068ac, 0x00006aac, 0x00006cac, 0x00006eac, + 0x000070ac, 0x000072ac, 0x000074ac, 0x000076ac, + 0x000078ac, 0x00007aac, 0x00007cac, 0x00007eac, + 0x000080ac, 0x000082ac, 0x000084ac, 0x000086ac, + 0x000088ac, 0x00008aac, 0x00008cac, 0x00008eac, + 0x000090ac, 0x000092ac, 0x000094ac, 0x000096ac, + 0x000098ac, 0x00009aac, 0x00009cac, 0x00009eac, + 0x0000a0ac, 0x0000a2ac, 0x0000a4ac, 0x0000a6ac, + 0x0000a8ac, 0x0000aaac, 0x0000acac, 0x0000aeac, + 0x0000b0ac, 0x0000b2ac, 0x0000b4ac, 0x0000b6ac, + 0x0000b8ac, 0x0000baac, 0x0000bcac, 0x0000beac, + 0x0000c0ac, 0x0000c2ac, 0x0000c4ac, 0x0000c6ac, + 0x0000c8ac, 0x0000caac, 0x0000ccac, 0x0000ceac, + 0x0000d0ac, 0x0000d2ac, 0x0000d4ac, 0x0000d6ac, + 0x0000d8ac, 0x0000daac, 0x0000dcac, 0x0000deac, + 0x0000e0ac, 0x0000e2ac, 0x0000e4ac, 0x0000e6ac, + 0x0000e8ac, 0x0000eaac, 0x0000ecac, 0x0000eeac, + 0x0000f0ac, 0x0000f2ac, 0x0000f4ac, 0x0000f6ac, + 0x0000f8ac, 0x0000faac, 0x0000fcac, 0x0000feac, + 0x000100ac, 0x000102ac, 0x000104ac, 0x000106ac, + 0x000108ac, 0x00010aac, 0x00010cac, 0x00010eac, + 0x000110ac, 0x000112ac, 0x000114ac, 0x000116ac, + 0x000118ac, 0x00011aac, 0x00011cac, 0x00011eac, + 0x000120ac, 0x000122ac, 0x000124ac, 0x000126ac, + 0x000128ac, 0x00012aac, 0x00012cac, 0x00012eac, + 0x000130ac, 0x000132ac, 0x000134ac, 0x000136ac, + 0x000138ac, 0x00013aac, 0x00013cac, 0x00013eac, + 0x000140ac, 0x000142ac, 0x000144ac, 0x000146ac, + 0x000148ac, 0x00014aac, 0x00014cac, 0x00014eac, + 0x000150ac, 0x000152ac, 0x000154ac, 0x000156ac, + 0x000158ac, 0x00015aac, 0x00015cac, 0x00015eac, + 0x000160ac, 0x000162ac, 0x000164ac, 0x000166ac, + 0x000168ac, 0x00016aac, 0x00016cac, 0x00016eac, + 0x000170ac, 0x000172ac, 0x000174ac, 0x000176ac, + 0x000178ac, 0x00017aac, 0x00017cac, 0x00017eac, + 0x000180ac, 0x000182ac, 0x000184ac, 0x000186ac, + 0x000188ac, 0x00018aac, 0x00018cac, 0x00018eac, + 0x000190ac, 0x000192ac, 0x000194ac, 0x000196ac, + 0x000198ac, 0x00019aac, 0x00019cac, 0x00019eac, + 0x0001a0ac, 0x0001a2ac, 0x0001a4ac, 0x0001a6ac, + 0x0001a8ac, 0x0001aaac, 0x0001acac, 0x0001aeac, + 0x0001b0ac, 0x0001b2ac, 0x0001b4ac, 0x0001b6ac, + 0x0001b8ac, 0x0001baac, 0x0001bcac, 0x0001beac, + 0x0001c0ac, 0x0001c2ac, 0x0001c4ac, 0x0001c6ac, + 0x0001c8ac, 0x0001caac, 0x0001ccac, 0x0001ceac, + 0x0001d0ac, 0x0001d2ac, 0x0001d4ac, 0x0001d6ac, + 0x0001d8ac, 0x0001daac, 0x0001dcac, 0x0001deac, + 0x0001e0ac, 0x0001e2ac, 0x0001e4ac, 0x0001e6ac, + 0x0001e8ac, 0x0001eaac, 0x0001ecac, 0x0001eeac, + 0x0001f0ac, 0x0001f2ac, 0x0001f4ac, 0x0001f6ac, + 0x0001f8ac, 0x0001faac, 0x0001fcac, 0x0001feac, + 0x000001ad, 0x000003ad, 0x000005ad, 0x000007ad, + 0x000009ad, 0x00000bad, 0x00000dad, 0x00000fad, + 0x000011ad, 0x000013ad, 0x000015ad, 0x000017ad, + 0x000019ad, 0x00001bad, 0x00001dad, 0x00001fad, + 0x000021ad, 0x000023ad, 0x000025ad, 0x000027ad, + 0x000029ad, 0x00002bad, 0x00002dad, 0x00002fad, + 0x000031ad, 0x000033ad, 0x000035ad, 0x000037ad, + 0x000039ad, 0x00003bad, 0x00003dad, 0x00003fad, + 0x000041ad, 0x000043ad, 0x000045ad, 0x000047ad, + 0x000049ad, 0x00004bad, 0x00004dad, 0x00004fad, + 0x000051ad, 0x000053ad, 0x000055ad, 0x000057ad, + 0x000059ad, 0x00005bad, 0x00005dad, 0x00005fad, + 0x000061ad, 0x000063ad, 0x000065ad, 0x000067ad, + 0x000069ad, 0x00006bad, 0x00006dad, 0x00006fad, + 0x000071ad, 0x000073ad, 0x000075ad, 0x000077ad, + 0x000079ad, 0x00007bad, 0x00007dad, 0x00007fad, + 0x000081ad, 0x000083ad, 0x000085ad, 0x000087ad, + 0x000089ad, 0x00008bad, 0x00008dad, 0x00008fad, + 0x000091ad, 0x000093ad, 0x000095ad, 0x000097ad, + 0x000099ad, 0x00009bad, 0x00009dad, 0x00009fad, + 0x0000a1ad, 0x0000a3ad, 0x0000a5ad, 0x0000a7ad, + 0x0000a9ad, 0x0000abad, 0x0000adad, 0x0000afad, + 0x0000b1ad, 0x0000b3ad, 0x0000b5ad, 0x0000b7ad, + 0x0000b9ad, 0x0000bbad, 0x0000bdad, 0x0000bfad, + 0x0000c1ad, 0x0000c3ad, 0x0000c5ad, 0x0000c7ad, + 0x0000c9ad, 0x0000cbad, 0x0000cdad, 0x0000cfad, + 0x0000d1ad, 0x0000d3ad, 0x0000d5ad, 0x0000d7ad, + 0x0000d9ad, 0x0000dbad, 0x0000ddad, 0x0000dfad, + 0x0000e1ad, 0x0000e3ad, 0x0000e5ad, 0x0000e7ad, + 0x0000e9ad, 0x0000ebad, 0x0000edad, 0x0000efad, + 0x0000f1ad, 0x0000f3ad, 0x0000f5ad, 0x0000f7ad, + 0x0000f9ad, 0x0000fbad, 0x0000fdad, 0x0000ffad, + 0x000101ad, 0x000103ad, 0x000105ad, 0x000107ad, + 0x000109ad, 0x00010bad, 0x00010dad, 0x00010fad, + 0x000111ad, 0x000113ad, 0x000115ad, 0x000117ad, + 0x000119ad, 0x00011bad, 0x00011dad, 0x00011fad, + 0x000121ad, 0x000123ad, 0x000125ad, 0x000127ad, + 0x000129ad, 0x00012bad, 0x00012dad, 0x00012fad, + 0x000131ad, 0x000133ad, 0x000135ad, 0x000137ad, + 0x000139ad, 0x00013bad, 0x00013dad, 0x00013fad, + 0x000141ad, 0x000143ad, 0x000145ad, 0x000147ad, + 0x000149ad, 0x00014bad, 0x00014dad, 0x00014fad, + 0x000151ad, 0x000153ad, 0x000155ad, 0x000157ad, + 0x000159ad, 0x00015bad, 0x00015dad, 0x00015fad, + 0x000161ad, 0x000163ad, 0x000165ad, 0x000167ad, + 0x000169ad, 0x00016bad, 0x00016dad, 0x00016fad, + 0x000171ad, 0x000173ad, 0x000175ad, 0x000177ad, + 0x000179ad, 0x00017bad, 0x00017dad, 0x00017fad, + 0x000181ad, 0x000183ad, 0x000185ad, 0x000187ad, + 0x000189ad, 0x00018bad, 0x00018dad, 0x00018fad, + 0x000191ad, 0x000193ad, 0x000195ad, 0x000197ad, + 0x000199ad, 0x00019bad, 0x00019dad, 0x00019fad, + 0x0001a1ad, 0x0001a3ad, 0x0001a5ad, 0x0001a7ad, + 0x0001a9ad, 0x0001abad, 0x0001adad, 0x0001afad, + 0x0001b1ad, 0x0001b3ad, 0x0001b5ad, 0x0001b7ad, + 0x0001b9ad, 0x0001bbad, 0x0001bdad, 0x0001bfad, + 0x0001c1ad, 0x0001c3ad, 0x0001c5ad, 0x0001c7ad, + 0x0001c9ad, 0x0001cbad, 0x0001cdad, 0x0001cfad, + 0x0001d1ad, 0x0001d3ad, 0x0001d5ad, 0x0001d7ad, + 0x0001d9ad, 0x0001dbad, 0x0001ddad, 0x0001dfad, + 0x0001e1ad, 0x0001e3ad, 0x0001e5ad, 0x0001e7ad, + 0x0001e9ad, 0x0001ebad, 0x0001edad, 0x0001efad, + 0x0001f1ad, 0x0001f3ad, 0x0001f5ad, 0x0001f7ad, + 0x0001f9ad, 0x0001fbad, 0x0001fdad, 0x0001ffad, + 0x000201ad, 0x000203ad, 0x000205ad, 0x000207ad, + 0x000209ad, 0x00020bad, 0x00020dad, 0x00020fad, + 0x000211ad, 0x000213ad, 0x000215ad, 0x000217ad, + 0x000219ad, 0x00021bad, 0x00021dad, 0x00021fad, + 0x000221ad, 0x000223ad, 0x000225ad, 0x000227ad, + 0x000229ad, 0x00022bad, 0x00022dad, 0x00022fad, + 0x000231ad, 0x000233ad, 0x000235ad, 0x000237ad, + 0x000239ad, 0x00023bad, 0x00023dad, 0x00023fad, + 0x000241ad, 0x000243ad, 0x000245ad, 0x000247ad, + 0x000249ad, 0x00024bad, 0x00024dad, 0x00024fad, + 0x000251ad, 0x000253ad, 0x000255ad, 0x000257ad, + 0x000259ad, 0x00025bad, 0x00025dad, 0x00025fad, + 0x000261ad, 0x000263ad, 0x000265ad, 0x000267ad, + 0x000269ad, 0x00026bad, 0x00026dad, 0x00026fad, + 0x000271ad, 0x000273ad, 0x000275ad, 0x000277ad, + 0x000279ad, 0x00027bad, 0x00027dad, 0x00027fad, + 0x000281ad, 0x000283ad, 0x000285ad, 0x000287ad, + 0x000289ad, 0x00028bad, 0x00028dad, 0x00028fad, + 0x000291ad, 0x000293ad, 0x000295ad, 0x000297ad, + 0x000299ad, 0x00029bad, 0x00029dad, 0x00029fad, + 0x0002a1ad, 0x0002a3ad, 0x0002a5ad, 0x0002a7ad, + 0x0002a9ad, 0x0002abad, 0x0002adad, 0x0002afad, + 0x0002b1ad, 0x0002b3ad, 0x0002b5ad, 0x0002b7ad, + 0x0002b9ad, 0x0002bbad, 0x0002bdad, 0x0002bfad, + 0x0002c1ad, 0x0002c3ad, 0x0002c5ad, 0x0002c7ad, + 0x0002c9ad, 0x0002cbad, 0x0002cdad, 0x0002cfad, + 0x0002d1ad, 0x0002d3ad, 0x0002d5ad, 0x0002d7ad, + 0x0002d9ad, 0x0002dbad, 0x0002ddad, 0x0002dfad, + 0x0002e1ad, 0x0002e3ad, 0x0002e5ad, 0x0002e7ad, + 0x0002e9ad, 0x0002ebad, 0x0002edad, 0x0002efad, + 0x0002f1ad, 0x0002f3ad, 0x0002f5ad, 0x0002f7ad, + 0x0002f9ad, 0x0002fbad, 0x0002fdad, 0x0002ffad, + 0x000301ad, 0x000303ad, 0x000305ad, 0x000307ad, + 0x000309ad, 0x00030bad, 0x00030dad, 0x00030fad, + 0x000311ad, 0x000313ad, 0x000315ad, 0x000317ad, + 0x000319ad, 0x00031bad, 0x00031dad, 0x00031fad, + 0x000321ad, 0x000323ad, 0x000325ad, 0x000327ad, + 0x000329ad, 0x00032bad, 0x00032dad, 0x00032fad, + 0x000331ad, 0x000333ad, 0x000335ad, 0x000337ad, + 0x000339ad, 0x00033bad, 0x00033dad, 0x00033fad, + 0x000341ad, 0x000343ad, 0x000345ad, 0x000347ad, + 0x000349ad, 0x00034bad, 0x00034dad, 0x00034fad, + 0x000351ad, 0x000353ad, 0x000355ad, 0x000357ad, + 0x000359ad, 0x00035bad, 0x00035dad, 0x00035fad, + 0x000361ad, 0x000363ad, 0x000365ad, 0x000367ad, + 0x000369ad, 0x00036bad, 0x00036dad, 0x00036fad, + 0x000371ad, 0x000373ad, 0x000375ad, 0x000377ad, + 0x000379ad, 0x00037bad, 0x00037dad, 0x00037fad, + 0x000381ad, 0x000383ad, 0x000385ad, 0x000387ad, + 0x000389ad, 0x00038bad, 0x00038dad, 0x00038fad, + 0x000391ad, 0x000393ad, 0x000395ad, 0x000397ad, + 0x000399ad, 0x00039bad, 0x00039dad, 0x00039fad, + 0x0003a1ad, 0x0003a3ad, 0x0003a5ad, 0x0003a7ad, + 0x0003a9ad, 0x0003abad, 0x0003adad, 0x0003afad, + 0x0003b1ad, 0x0003b3ad, 0x0003b5ad, 0x0003b7ad, + 0x0003b9ad, 0x0003bbad, 0x0003bdad, 0x0003bfad, + 0x0003c1ad, 0x0003c3ad, 0x0003c5ad, 0x0003c7ad, + 0x0003c9ad, 0x0003cbad, 0x0003cdad, 0x0003cfad, + 0x0003d1ad, 0x0003d3ad, 0x0003d5ad, 0x0003d7ad, + 0x0003d9ad, 0x0003dbad, 0x0003ddad, 0x0003dfad, + 0x0003e1ad, 0x0003e3ad, 0x0003e5ad, 0x0003e7ad, + 0x0003e9ad, 0x0003ebad, 0x0003edad, 0x0003efad, + 0x0003f1ad, 0x0003f3ad, 0x0003f5ad, 0x0003f7ad, + 0x0003f9ad, 0x0003fbad, 0x0003fdad, 0x0003ffad, + 0x0000036e, 0x0000076e, 0x00000b6e, 0x00000f6e, + 0x0000136e, 0x0000176e, 0x00001b6e, 0x00001f6e, + 0x0000236e, 0x0000276e, 0x00002b6e, 0x00002f6e, + 0x0000336e, 0x0000376e, 0x00003b6e, 0x00003f6e, + 0x0000436e, 0x0000476e, 0x00004b6e, 0x00004f6e, + 0x0000536e, 0x0000576e, 0x00005b6e, 0x00005f6e, + 0x0000636e, 0x0000676e, 0x00006b6e, 0x00006f6e, + 0x0000736e, 0x0000776e, 0x00007b6e, 0x00007f6e, + 0x0000836e, 0x0000876e, 0x00008b6e, 0x00008f6e, + 0x0000936e, 0x0000976e, 0x00009b6e, 0x00009f6e, + 0x0000a36e, 0x0000a76e, 0x0000ab6e, 0x0000af6e, + 0x0000b36e, 0x0000b76e, 0x0000bb6e, 0x0000bf6e, + 0x0000c36e, 0x0000c76e, 0x0000cb6e, 0x0000cf6e, + 0x0000d36e, 0x0000d76e, 0x0000db6e, 0x0000df6e, + 0x0000e36e, 0x0000e76e, 0x0000eb6e, 0x0000ef6e, + 0x0000f36e, 0x0000f76e, 0x0000fb6e, 0x0000ff6e, + 0x0001036e, 0x0001076e, 0x00010b6e, 0x00010f6e, + 0x0001136e, 0x0001176e, 0x00011b6e, 0x00011f6e, + 0x0001236e, 0x0001276e, 0x00012b6e, 0x00012f6e, + 0x0001336e, 0x0001376e, 0x00013b6e, 0x00013f6e, + 0x0001436e, 0x0001476e, 0x00014b6e, 0x00014f6e, + 0x0001536e, 0x0001576e, 0x00015b6e, 0x00015f6e, + 0x0001636e, 0x0001676e, 0x00016b6e, 0x00016f6e, + 0x0001736e, 0x0001776e, 0x00017b6e, 0x00017f6e, + 0x0001836e, 0x0001876e, 0x00018b6e, 0x00018f6e, + 0x0001936e, 0x0001976e, 0x00019b6e, 0x00019f6e, + 0x0001a36e, 0x0001a76e, 0x0001ab6e, 0x0001af6e, + 0x0001b36e, 0x0001b76e, 0x0001bb6e, 0x0001bf6e, + 0x0001c36e, 0x0001c76e, 0x0001cb6e, 0x0001cf6e, + 0x0001d36e, 0x0001d76e, 0x0001db6e, 0x0001df6e, + 0x0001e36e, 0x0001e76e, 0x0001eb6e, 0x0001ef6e, + 0x0001f36e, 0x0001f76e, 0x0001fb6e, 0x0001ff6e, + 0x0002036e, 0x0002076e, 0x00020b6e, 0x00020f6e, + 0x0002136e, 0x0002176e, 0x00021b6e, 0x00021f6e, + 0x0002236e, 0x0002276e, 0x00022b6e, 0x00022f6e, + 0x0002336e, 0x0002376e, 0x00023b6e, 0x00023f6e, + 0x0002436e, 0x0002476e, 0x00024b6e, 0x00024f6e, + 0x0002536e, 0x0002576e, 0x00025b6e, 0x00025f6e, + 0x0002636e, 0x0002676e, 0x00026b6e, 0x00026f6e, + 0x0002736e, 0x0002776e, 0x00027b6e, 0x00027f6e, + 0x0002836e, 0x0002876e, 0x00028b6e, 0x00028f6e, + 0x0002936e, 0x0002976e, 0x00029b6e, 0x00029f6e, + 0x0002a36e, 0x0002a76e, 0x0002ab6e, 0x0002af6e, + 0x0002b36e, 0x0002b76e, 0x0002bb6e, 0x0002bf6e, + 0x0002c36e, 0x0002c76e, 0x0002cb6e, 0x0002cf6e, + 0x0002d36e, 0x0002d76e, 0x0002db6e, 0x0002df6e, + 0x0002e36e, 0x0002e76e, 0x0002eb6e, 0x0002ef6e, + 0x0002f36e, 0x0002f76e, 0x0002fb6e, 0x0002ff6e, + 0x0003036e, 0x0003076e, 0x00030b6e, 0x00030f6e, + 0x0003136e, 0x0003176e, 0x00031b6e, 0x00031f6e, + 0x0003236e, 0x0003276e, 0x00032b6e, 0x00032f6e, + 0x0003336e, 0x0003376e, 0x00033b6e, 0x00033f6e, + 0x0003436e, 0x0003476e, 0x00034b6e, 0x00034f6e, + 0x0003536e, 0x0003576e, 0x00035b6e, 0x00035f6e, + 0x0003636e, 0x0003676e, 0x00036b6e, 0x00036f6e, + 0x0003736e, 0x0003776e, 0x00037b6e, 0x00037f6e, + 0x0003836e, 0x0003876e, 0x00038b6e, 0x00038f6e, + 0x0003936e, 0x0003976e, 0x00039b6e, 0x00039f6e, + 0x0003a36e, 0x0003a76e, 0x0003ab6e, 0x0003af6e, + 0x0003b36e, 0x0003b76e, 0x0003bb6e, 0x0003bf6e, + 0x0003c36e, 0x0003c76e, 0x0003cb6e, 0x0003cf6e, + 0x0003d36e, 0x0003d76e, 0x0003db6e, 0x0003df6e, + 0x0003e36e, 0x0003e76e, 0x0003eb6e, 0x0003ef6e, + 0x0003f36e, 0x0003f76e, 0x0003fb6e, 0x0003ff6e, + 0x0004036e, 0x0004076e, 0x00040b6e, 0x00040f6e, + 0x0004136e, 0x0004176e, 0x00041b6e, 0x00041f6e, + 0x0004236e, 0x0004276e, 0x00042b6e, 0x00042f6e, + 0x0004336e, 0x0004376e, 0x00043b6e, 0x00043f6e, + 0x0004436e, 0x0004476e, 0x00044b6e, 0x00044f6e, + 0x0004536e, 0x0004576e, 0x00045b6e, 0x00045f6e, + 0x0004636e, 0x0004676e, 0x00046b6e, 0x00046f6e, + 0x0004736e, 0x0004776e, 0x00047b6e, 0x00047f6e, + 0x0004836e, 0x0004876e, 0x00048b6e, 0x00048f6e, + 0x0004936e, 0x0004976e, 0x00049b6e, 0x00049f6e, + 0x0004a36e, 0x0004a76e, 0x0004ab6e, 0x0004af6e, + 0x0004b36e, 0x0004b76e, 0x0004bb6e, 0x0004bf6e, + 0x0004c36e, 0x0004c76e, 0x0004cb6e, 0x0004cf6e, + 0x0004d36e, 0x0004d76e, 0x0004db6e, 0x0004df6e, + 0x0004e36e, 0x0004e76e, 0x0004eb6e, 0x0004ef6e, + 0x0004f36e, 0x0004f76e, 0x0004fb6e, 0x0004ff6e, + 0x0005036e, 0x0005076e, 0x00050b6e, 0x00050f6e, + 0x0005136e, 0x0005176e, 0x00051b6e, 0x00051f6e, + 0x0005236e, 0x0005276e, 0x00052b6e, 0x00052f6e, + 0x0005336e, 0x0005376e, 0x00053b6e, 0x00053f6e, + 0x0005436e, 0x0005476e, 0x00054b6e, 0x00054f6e, + 0x0005536e, 0x0005576e, 0x00055b6e, 0x00055f6e, + 0x0005636e, 0x0005676e, 0x00056b6e, 0x00056f6e, + 0x0005736e, 0x0005776e, 0x00057b6e, 0x00057f6e, + 0x0005836e, 0x0005876e, 0x00058b6e, 0x00058f6e, + 0x0005936e, 0x0005976e, 0x00059b6e, 0x00059f6e, + 0x0005a36e, 0x0005a76e, 0x0005ab6e, 0x0005af6e, + 0x0005b36e, 0x0005b76e, 0x0005bb6e, 0x0005bf6e, + 0x0005c36e, 0x0005c76e, 0x0005cb6e, 0x0005cf6e, + 0x0005d36e, 0x0005d76e, 0x0005db6e, 0x0005df6e, + 0x0005e36e, 0x0005e76e, 0x0005eb6e, 0x0005ef6e, + 0x0005f36e, 0x0005f76e, 0x0005fb6e, 0x0005ff6e, + 0x0006036e, 0x0006076e, 0x00060b6e, 0x00060f6e, + 0x0006136e, 0x0006176e, 0x00061b6e, 0x00061f6e, + 0x0006236e, 0x0006276e, 0x00062b6e, 0x00062f6e, + 0x0006336e, 0x0006376e, 0x00063b6e, 0x00063f6e, + 0x0006436e, 0x0006476e, 0x00064b6e, 0x00064f6e, + 0x0006536e, 0x0006576e, 0x00065b6e, 0x00065f6e, + 0x0006636e, 0x0006676e, 0x00066b6e, 0x00066f6e, + 0x0006736e, 0x0006776e, 0x00067b6e, 0x00067f6e, + 0x0006836e, 0x0006876e, 0x00068b6e, 0x00068f6e, + 0x0006936e, 0x0006976e, 0x00069b6e, 0x00069f6e, + 0x0006a36e, 0x0006a76e, 0x0006ab6e, 0x0006af6e, + 0x0006b36e, 0x0006b76e, 0x0006bb6e, 0x0006bf6e, + 0x0006c36e, 0x0006c76e, 0x0006cb6e, 0x0006cf6e, + 0x0006d36e, 0x0006d76e, 0x0006db6e, 0x0006df6e, + 0x0006e36e, 0x0006e76e, 0x0006eb6e, 0x0006ef6e, + 0x0006f36e, 0x0006f76e, 0x0006fb6e, 0x0006ff6e, + 0x0007036e, 0x0007076e, 0x00070b6e, 0x00070f6e, + 0x0007136e, 0x0007176e, 0x00071b6e, 0x00071f6e, + 0x0007236e, 0x0007276e, 0x00072b6e, 0x00072f6e, + 0x0007336e, 0x0007376e, 0x00073b6e, 0x00073f6e, + 0x0007436e, 0x0007476e, 0x00074b6e, 0x00074f6e, + 0x0007536e, 0x0007576e, 0x00075b6e, 0x00075f6e, + 0x0007636e, 0x0007676e, 0x00076b6e, 0x00076f6e, + 0x0007736e, 0x0007776e, 0x00077b6e, 0x00077f6e, + 0x0007836e, 0x0007876e, 0x00078b6e, 0x00078f6e, + 0x0007936e, 0x0007976e, 0x00079b6e, 0x00079f6e, + 0x0007a36e, 0x0007a76e, 0x0007ab6e, 0x0007af6e, + 0x0007b36e, 0x0007b76e, 0x0007bb6e, 0x0007bf6e, + 0x0007c36e, 0x0007c76e, 0x0007cb6e, 0x0007cf6e, + 0x0007d36e, 0x0007d76e, 0x0007db6e, 0x0007df6e, + 0x0007e36e, 0x0007e76e, 0x0007eb6e, 0x0007ef6e, + 0x0007f36e, 0x0007f76e, 0x0007fb6e, 0x0007ff6e, + 0x000000ef, 0x000004ef, 0x000008ef, 0x00000cef, + 0x000010ef, 0x000014ef, 0x000018ef, 0x00001cef, + 0x000020ef, 0x000024ef, 0x000028ef, 0x00002cef, + 0x000030ef, 0x000034ef, 0x000038ef, 0x00003cef, + 0x000040ef, 0x000044ef, 0x000048ef, 0x00004cef, + 0x000050ef, 0x000054ef, 0x000058ef, 0x00005cef, + 0x000060ef, 0x000064ef, 0x000068ef, 0x00006cef, + 0x000070ef, 0x000074ef, 0x000078ef, 0x00007cef, + 0x000080ef, 0x000084ef, 0x000088ef, 0x00008cef, + 0x000090ef, 0x000094ef, 0x000098ef, 0x00009cef, + 0x0000a0ef, 0x0000a4ef, 0x0000a8ef, 0x0000acef, + 0x0000b0ef, 0x0000b4ef, 0x0000b8ef, 0x0000bcef, + 0x0000c0ef, 0x0000c4ef, 0x0000c8ef, 0x0000ccef, + 0x0000d0ef, 0x0000d4ef, 0x0000d8ef, 0x0000dcef, + 0x0000e0ef, 0x0000e4ef, 0x0000e8ef, 0x0000ecef, + 0x0000f0ef, 0x0000f4ef, 0x0000f8ef, 0x0000fcef, + 0x000100ef, 0x000104ef, 0x000108ef, 0x00010cef, + 0x000110ef, 0x000114ef, 0x000118ef, 0x00011cef, + 0x000120ef, 0x000124ef, 0x000128ef, 0x00012cef, + 0x000130ef, 0x000134ef, 0x000138ef, 0x00013cef, + 0x000140ef, 0x000144ef, 0x000148ef, 0x00014cef, + 0x000150ef, 0x000154ef, 0x000158ef, 0x00015cef, + 0x000160ef, 0x000164ef, 0x000168ef, 0x00016cef, + 0x000170ef, 0x000174ef, 0x000178ef, 0x00017cef, + 0x000180ef, 0x000184ef, 0x000188ef, 0x00018cef, + 0x000190ef, 0x000194ef, 0x000198ef, 0x00019cef, + 0x0001a0ef, 0x0001a4ef, 0x0001a8ef, 0x0001acef, + 0x0001b0ef, 0x0001b4ef, 0x0001b8ef, 0x0001bcef, + 0x0001c0ef, 0x0001c4ef, 0x0001c8ef, 0x0001ccef, + 0x0001d0ef, 0x0001d4ef, 0x0001d8ef, 0x0001dcef, + 0x0001e0ef, 0x0001e4ef, 0x0001e8ef, 0x0001ecef, + 0x0001f0ef, 0x0001f4ef, 0x0001f8ef, 0x0001fcef, + 0x000200ef, 0x000204ef, 0x000208ef, 0x00020cef, + 0x000210ef, 0x000214ef, 0x000218ef, 0x00021cef, + 0x000220ef, 0x000224ef, 0x000228ef, 0x00022cef, + 0x000230ef, 0x000234ef, 0x000238ef, 0x00023cef, + 0x000240ef, 0x000244ef, 0x000248ef, 0x00024cef, + 0x000250ef, 0x000254ef, 0x000258ef, 0x00025cef, + 0x000260ef, 0x000264ef, 0x000268ef, 0x00026cef, + 0x000270ef, 0x000274ef, 0x000278ef, 0x00027cef, + 0x000280ef, 0x000284ef, 0x000288ef, 0x00028cef, + 0x000290ef, 0x000294ef, 0x000298ef, 0x00029cef, + 0x0002a0ef, 0x0002a4ef, 0x0002a8ef, 0x0002acef, + 0x0002b0ef, 0x0002b4ef, 0x0002b8ef, 0x0002bcef, + 0x0002c0ef, 0x0002c4ef, 0x0002c8ef, 0x0002ccef, + 0x0002d0ef, 0x0002d4ef, 0x0002d8ef, 0x0002dcef, + 0x0002e0ef, 0x0002e4ef, 0x0002e8ef, 0x0002ecef, + 0x0002f0ef, 0x0002f4ef, 0x0002f8ef, 0x0002fcef, + 0x000300ef, 0x000304ef, 0x000308ef, 0x00030cef, + 0x000310ef, 0x000314ef, 0x000318ef, 0x00031cef, + 0x000320ef, 0x000324ef, 0x000328ef, 0x00032cef, + 0x000330ef, 0x000334ef, 0x000338ef, 0x00033cef, + 0x000340ef, 0x000344ef, 0x000348ef, 0x00034cef, + 0x000350ef, 0x000354ef, 0x000358ef, 0x00035cef, + 0x000360ef, 0x000364ef, 0x000368ef, 0x00036cef, + 0x000370ef, 0x000374ef, 0x000378ef, 0x00037cef, + 0x000380ef, 0x000384ef, 0x000388ef, 0x00038cef, + 0x000390ef, 0x000394ef, 0x000398ef, 0x00039cef, + 0x0003a0ef, 0x0003a4ef, 0x0003a8ef, 0x0003acef, + 0x0003b0ef, 0x0003b4ef, 0x0003b8ef, 0x0003bcef, + 0x0003c0ef, 0x0003c4ef, 0x0003c8ef, 0x0003ccef, + 0x0003d0ef, 0x0003d4ef, 0x0003d8ef, 0x0003dcef, + 0x0003e0ef, 0x0003e4ef, 0x0003e8ef, 0x0003ecef, + 0x0003f0ef, 0x0003f4ef, 0x0003f8ef, 0x0003fcef, + 0x000400ef, 0x000404ef, 0x000408ef, 0x00040cef, + 0x000410ef, 0x000414ef, 0x000418ef, 0x00041cef, + 0x000420ef, 0x000424ef, 0x000428ef, 0x00042cef, + 0x000430ef, 0x000434ef, 0x000438ef, 0x00043cef, + 0x000440ef, 0x000444ef, 0x000448ef, 0x00044cef, + 0x000450ef, 0x000454ef, 0x000458ef, 0x00045cef, + 0x000460ef, 0x000464ef, 0x000468ef, 0x00046cef, + 0x000470ef, 0x000474ef, 0x000478ef, 0x00047cef, + 0x000480ef, 0x000484ef, 0x000488ef, 0x00048cef, + 0x000490ef, 0x000494ef, 0x000498ef, 0x00049cef, + 0x0004a0ef, 0x0004a4ef, 0x0004a8ef, 0x0004acef, + 0x0004b0ef, 0x0004b4ef, 0x0004b8ef, 0x0004bcef, + 0x0004c0ef, 0x0004c4ef, 0x0004c8ef, 0x0004ccef, + 0x0004d0ef, 0x0004d4ef, 0x0004d8ef, 0x0004dcef, + 0x0004e0ef, 0x0004e4ef, 0x0004e8ef, 0x0004ecef, + 0x0004f0ef, 0x0004f4ef, 0x0004f8ef, 0x0004fcef, + 0x000500ef, 0x000504ef, 0x000508ef, 0x00050cef, + 0x000510ef, 0x000514ef, 0x000518ef, 0x00051cef, + 0x000520ef, 0x000524ef, 0x000528ef, 0x00052cef, + 0x000530ef, 0x000534ef, 0x000538ef, 0x00053cef, + 0x000540ef, 0x000544ef, 0x000548ef, 0x00054cef, + 0x000550ef, 0x000554ef, 0x000558ef, 0x00055cef, + 0x000560ef, 0x000564ef, 0x000568ef, 0x00056cef, + 0x000570ef, 0x000574ef, 0x000578ef, 0x00057cef, + 0x000580ef, 0x000584ef, 0x000588ef, 0x00058cef, + 0x000590ef, 0x000594ef, 0x000598ef, 0x00059cef, + 0x0005a0ef, 0x0005a4ef, 0x0005a8ef, 0x0005acef, + 0x0005b0ef, 0x0005b4ef, 0x0005b8ef, 0x0005bcef, + 0x0005c0ef, 0x0005c4ef, 0x0005c8ef, 0x0005ccef, + 0x0005d0ef, 0x0005d4ef, 0x0005d8ef, 0x0005dcef, + 0x0005e0ef, 0x0005e4ef, 0x0005e8ef, 0x0005ecef, + 0x0005f0ef, 0x0005f4ef, 0x0005f8ef, 0x0005fcef, + 0x000600ef, 0x000604ef, 0x000608ef, 0x00060cef, + 0x000610ef, 0x000614ef, 0x000618ef, 0x00061cef, + 0x000620ef, 0x000624ef, 0x000628ef, 0x00062cef, + 0x000630ef, 0x000634ef, 0x000638ef, 0x00063cef, + 0x000640ef, 0x000644ef, 0x000648ef, 0x00064cef, + 0x000650ef, 0x000654ef, 0x000658ef, 0x00065cef, + 0x000660ef, 0x000664ef, 0x000668ef, 0x00066cef, + 0x000670ef, 0x000674ef, 0x000678ef, 0x00067cef, + 0x000680ef, 0x000684ef, 0x000688ef, 0x00068cef, + 0x000690ef, 0x000694ef, 0x000698ef, 0x00069cef, + 0x0006a0ef, 0x0006a4ef, 0x0006a8ef, 0x0006acef, + 0x0006b0ef, 0x0006b4ef, 0x0006b8ef, 0x0006bcef, + 0x0006c0ef, 0x0006c4ef, 0x0006c8ef, 0x0006ccef, + 0x0006d0ef, 0x0006d4ef, 0x0006d8ef, 0x0006dcef, + 0x0006e0ef, 0x0006e4ef, 0x0006e8ef, 0x0006ecef, + 0x0006f0ef, 0x0006f4ef, 0x0006f8ef, 0x0006fcef, + 0x000700ef, 0x000704ef, 0x000708ef, 0x00070cef, + 0x000710ef, 0x000714ef, 0x000718ef, 0x00071cef, + 0x000720ef, 0x000724ef, 0x000728ef, 0x00072cef, + 0x000730ef, 0x000734ef, 0x000738ef, 0x00073cef, + 0x000740ef, 0x000744ef, 0x000748ef, 0x00074cef, + 0x000750ef, 0x000754ef, 0x000758ef, 0x00075cef, + 0x000760ef, 0x000764ef, 0x000768ef, 0x00076cef, + 0x000770ef, 0x000774ef, 0x000778ef, 0x00077cef, + 0x000780ef, 0x000784ef, 0x000788ef, 0x00078cef, + 0x000790ef, 0x000794ef, 0x000798ef, 0x00079cef, + 0x0007a0ef, 0x0007a4ef, 0x0007a8ef, 0x0007acef, + 0x0007b0ef, 0x0007b4ef, 0x0007b8ef, 0x0007bcef, + 0x0007c0ef, 0x0007c4ef, 0x0007c8ef, 0x0007ccef, + 0x0007d0ef, 0x0007d4ef, 0x0007d8ef, 0x0007dcef, + 0x0007e0ef, 0x0007e4ef, 0x0007e8ef, 0x0007ecef, + 0x0007f0ef, 0x0007f4ef, 0x0007f8ef, 0x0007fcef, + 0x000800ef, 0x000804ef, 0x000808ef, 0x00080cef, + 0x000810ef, 0x000814ef, 0x000818ef, 0x00081cef, + 0x000820ef, 0x000824ef, 0x000828ef, 0x00082cef, + 0x000830ef, 0x000834ef, 0x000838ef, 0x00083cef, + 0x000840ef, 0x000844ef, 0x000848ef, 0x00084cef, + 0x000850ef, 0x000854ef, 0x000858ef, 0x00085cef, + 0x000860ef, 0x000864ef, 0x000868ef, 0x00086cef, + 0x000870ef, 0x000874ef, 0x000878ef, 0x00087cef, + 0x000880ef, 0x000884ef, 0x000888ef, 0x00088cef, + 0x000890ef, 0x000894ef, 0x000898ef, 0x00089cef, + 0x0008a0ef, 0x0008a4ef, 0x0008a8ef, 0x0008acef, + 0x0008b0ef, 0x0008b4ef, 0x0008b8ef, 0x0008bcef, + 0x0008c0ef, 0x0008c4ef, 0x0008c8ef, 0x0008ccef, + 0x0008d0ef, 0x0008d4ef, 0x0008d8ef, 0x0008dcef, + 0x0008e0ef, 0x0008e4ef, 0x0008e8ef, 0x0008ecef, + 0x0008f0ef, 0x0008f4ef, 0x0008f8ef, 0x0008fcef, + 0x000900ef, 0x000904ef, 0x000908ef, 0x00090cef, + 0x000910ef, 0x000914ef, 0x000918ef, 0x00091cef, + 0x000920ef, 0x000924ef, 0x000928ef, 0x00092cef, + 0x000930ef, 0x000934ef, 0x000938ef, 0x00093cef, + 0x000940ef, 0x000944ef, 0x000948ef, 0x00094cef, + 0x000950ef, 0x000954ef, 0x000958ef, 0x00095cef, + 0x000960ef, 0x000964ef, 0x000968ef, 0x00096cef, + 0x000970ef, 0x000974ef, 0x000978ef, 0x00097cef, + 0x000980ef, 0x000984ef, 0x000988ef, 0x00098cef, + 0x000990ef, 0x000994ef, 0x000998ef, 0x00099cef, + 0x0009a0ef, 0x0009a4ef, 0x0009a8ef, 0x0009acef, + 0x0009b0ef, 0x0009b4ef, 0x0009b8ef, 0x0009bcef, + 0x0009c0ef, 0x0009c4ef, 0x0009c8ef, 0x0009ccef, + 0x0009d0ef, 0x0009d4ef, 0x0009d8ef, 0x0009dcef, + 0x0009e0ef, 0x0009e4ef, 0x0009e8ef, 0x0009ecef, + 0x0009f0ef, 0x0009f4ef, 0x0009f8ef, 0x0009fcef, + 0x000a00ef, 0x000a04ef, 0x000a08ef, 0x000a0cef, + 0x000a10ef, 0x000a14ef, 0x000a18ef, 0x000a1cef, + 0x000a20ef, 0x000a24ef, 0x000a28ef, 0x000a2cef, + 0x000a30ef, 0x000a34ef, 0x000a38ef, 0x000a3cef, + 0x000a40ef, 0x000a44ef, 0x000a48ef, 0x000a4cef, + 0x000a50ef, 0x000a54ef, 0x000a58ef, 0x000a5cef, + 0x000a60ef, 0x000a64ef, 0x000a68ef, 0x000a6cef, + 0x000a70ef, 0x000a74ef, 0x000a78ef, 0x000a7cef, + 0x000a80ef, 0x000a84ef, 0x000a88ef, 0x000a8cef, + 0x000a90ef, 0x000a94ef, 0x000a98ef, 0x000a9cef, + 0x000aa0ef, 0x000aa4ef, 0x000aa8ef, 0x000aacef, + 0x000ab0ef, 0x000ab4ef, 0x000ab8ef, 0x000abcef, + 0x000ac0ef, 0x000ac4ef, 0x000ac8ef, 0x000accef, + 0x000ad0ef, 0x000ad4ef, 0x000ad8ef, 0x000adcef, + 0x000ae0ef, 0x000ae4ef, 0x000ae8ef, 0x000aecef, + 0x000af0ef, 0x000af4ef, 0x000af8ef, 0x000afcef, + 0x000b00ef, 0x000b04ef, 0x000b08ef, 0x000b0cef, + 0x000b10ef, 0x000b14ef, 0x000b18ef, 0x000b1cef, + 0x000b20ef, 0x000b24ef, 0x000b28ef, 0x000b2cef, + 0x000b30ef, 0x000b34ef, 0x000b38ef, 0x000b3cef, + 0x000b40ef, 0x000b44ef, 0x000b48ef, 0x000b4cef, + 0x000b50ef, 0x000b54ef, 0x000b58ef, 0x000b5cef, + 0x000b60ef, 0x000b64ef, 0x000b68ef, 0x000b6cef, + 0x000b70ef, 0x000b74ef, 0x000b78ef, 0x000b7cef, + 0x000b80ef, 0x000b84ef, 0x000b88ef, 0x000b8cef, + 0x000b90ef, 0x000b94ef, 0x000b98ef, 0x000b9cef, + 0x000ba0ef, 0x000ba4ef, 0x000ba8ef, 0x000bacef, + 0x000bb0ef, 0x000bb4ef, 0x000bb8ef, 0x000bbcef, + 0x000bc0ef, 0x000bc4ef, 0x000bc8ef, 0x000bccef, + 0x000bd0ef, 0x000bd4ef, 0x000bd8ef, 0x000bdcef, + 0x000be0ef, 0x000be4ef, 0x000be8ef, 0x000becef, + 0x000bf0ef, 0x000bf4ef, 0x000bf8ef, 0x000bfcef, + 0x000c00ef, 0x000c04ef, 0x000c08ef, 0x000c0cef, + 0x000c10ef, 0x000c14ef, 0x000c18ef, 0x000c1cef, + 0x000c20ef, 0x000c24ef, 0x000c28ef, 0x000c2cef, + 0x000c30ef, 0x000c34ef, 0x000c38ef, 0x000c3cef, + 0x000c40ef, 0x000c44ef, 0x000c48ef, 0x000c4cef, + 0x000c50ef, 0x000c54ef, 0x000c58ef, 0x000c5cef, + 0x000c60ef, 0x000c64ef, 0x000c68ef, 0x000c6cef, + 0x000c70ef, 0x000c74ef, 0x000c78ef, 0x000c7cef, + 0x000c80ef, 0x000c84ef, 0x000c88ef, 0x000c8cef, + 0x000c90ef, 0x000c94ef, 0x000c98ef, 0x000c9cef, + 0x000ca0ef, 0x000ca4ef, 0x000ca8ef, 0x000cacef, + 0x000cb0ef, 0x000cb4ef, 0x000cb8ef, 0x000cbcef, + 0x000cc0ef, 0x000cc4ef, 0x000cc8ef, 0x000cccef, + 0x000cd0ef, 0x000cd4ef, 0x000cd8ef, 0x000cdcef, + 0x000ce0ef, 0x000ce4ef, 0x000ce8ef, 0x000cecef, + 0x000cf0ef, 0x000cf4ef, 0x000cf8ef, 0x000cfcef, + 0x000d00ef, 0x000d04ef, 0x000d08ef, 0x000d0cef, + 0x000d10ef, 0x000d14ef, 0x000d18ef, 0x000d1cef, + 0x000d20ef, 0x000d24ef, 0x000d28ef, 0x000d2cef, + 0x000d30ef, 0x000d34ef, 0x000d38ef, 0x000d3cef, + 0x000d40ef, 0x000d44ef, 0x000d48ef, 0x000d4cef, + 0x000d50ef, 0x000d54ef, 0x000d58ef, 0x000d5cef, + 0x000d60ef, 0x000d64ef, 0x000d68ef, 0x000d6cef, + 0x000d70ef, 0x000d74ef, 0x000d78ef, 0x000d7cef, + 0x000d80ef, 0x000d84ef, 0x000d88ef, 0x000d8cef, + 0x000d90ef, 0x000d94ef, 0x000d98ef, 0x000d9cef, + 0x000da0ef, 0x000da4ef, 0x000da8ef, 0x000dacef, + 0x000db0ef, 0x000db4ef, 0x000db8ef, 0x000dbcef, + 0x000dc0ef, 0x000dc4ef, 0x000dc8ef, 0x000dccef, + 0x000dd0ef, 0x000dd4ef, 0x000dd8ef, 0x000ddcef, + 0x000de0ef, 0x000de4ef, 0x000de8ef, 0x000decef, + 0x000df0ef, 0x000df4ef, 0x000df8ef, 0x000dfcef, + 0x000e00ef, 0x000e04ef, 0x000e08ef, 0x000e0cef, + 0x000e10ef, 0x000e14ef, 0x000e18ef, 0x000e1cef, + 0x000e20ef, 0x000e24ef, 0x000e28ef, 0x000e2cef, + 0x000e30ef, 0x000e34ef, 0x000e38ef, 0x000e3cef, + 0x000e40ef, 0x000e44ef, 0x000e48ef, 0x000e4cef, + 0x000e50ef, 0x000e54ef, 0x000e58ef, 0x000e5cef, + 0x000e60ef, 0x000e64ef, 0x000e68ef, 0x000e6cef, + 0x000e70ef, 0x000e74ef, 0x000e78ef, 0x000e7cef, + 0x000e80ef, 0x000e84ef, 0x000e88ef, 0x000e8cef, + 0x000e90ef, 0x000e94ef, 0x000e98ef, 0x000e9cef, + 0x000ea0ef, 0x000ea4ef, 0x000ea8ef, 0x000eacef, + 0x000eb0ef, 0x000eb4ef, 0x000eb8ef, 0x000ebcef, + 0x000ec0ef, 0x000ec4ef, 0x000ec8ef, 0x000eccef, + 0x000ed0ef, 0x000ed4ef, 0x000ed8ef, 0x000edcef, + 0x000ee0ef, 0x000ee4ef, 0x000ee8ef, 0x000eecef, + 0x000ef0ef, 0x000ef4ef, 0x000ef8ef, 0x000efcef, + 0x000f00ef, 0x000f04ef, 0x000f08ef, 0x000f0cef, + 0x000f10ef, 0x000f14ef, 0x000f18ef, 0x000f1cef, + 0x000f20ef, 0x000f24ef, 0x000f28ef, 0x000f2cef, + 0x000f30ef, 0x000f34ef, 0x000f38ef, 0x000f3cef, + 0x000f40ef, 0x000f44ef, 0x000f48ef, 0x000f4cef, + 0x000f50ef, 0x000f54ef, 0x000f58ef, 0x000f5cef, + 0x000f60ef, 0x000f64ef, 0x000f68ef, 0x000f6cef, + 0x000f70ef, 0x000f74ef, 0x000f78ef, 0x000f7cef, + 0x000f80ef, 0x000f84ef, 0x000f88ef, 0x000f8cef, + 0x000f90ef, 0x000f94ef, 0x000f98ef, 0x000f9cef, + 0x000fa0ef, 0x000fa4ef, 0x000fa8ef, 0x000facef, + 0x000fb0ef, 0x000fb4ef, 0x000fb8ef, 0x000fbcef, + 0x000fc0ef, 0x000fc4ef, 0x000fc8ef, 0x000fccef, + 0x000fd0ef, 0x000fd4ef, 0x000fd8ef, 0x000fdcef, + 0x000fe0ef, 0x000fe4ef, 0x000fe8ef, 0x000fecef, + 0x000ff0ef, 0x000ff4ef, 0x000ff8ef, 0x000ffcef, + 0x000002ef, 0x000006ef, 0x00000aef, 0x00000eef, + 0x000012ef, 0x000016ef, 0x00001aef, 0x00001eef, + 0x000022ef, 0x000026ef, 0x00002aef, 0x00002eef, + 0x000032ef, 0x000036ef, 0x00003aef, 0x00003eef, + 0x000042ef, 0x000046ef, 0x00004aef, 0x00004eef, + 0x000052ef, 0x000056ef, 0x00005aef, 0x00005eef, + 0x000062ef, 0x000066ef, 0x00006aef, 0x00006eef, + 0x000072ef, 0x000076ef, 0x00007aef, 0x00007eef, + 0x000082ef, 0x000086ef, 0x00008aef, 0x00008eef, + 0x000092ef, 0x000096ef, 0x00009aef, 0x00009eef, + 0x0000a2ef, 0x0000a6ef, 0x0000aaef, 0x0000aeef, + 0x0000b2ef, 0x0000b6ef, 0x0000baef, 0x0000beef, + 0x0000c2ef, 0x0000c6ef, 0x0000caef, 0x0000ceef, + 0x0000d2ef, 0x0000d6ef, 0x0000daef, 0x0000deef, + 0x0000e2ef, 0x0000e6ef, 0x0000eaef, 0x0000eeef, + 0x0000f2ef, 0x0000f6ef, 0x0000faef, 0x0000feef, + 0x000102ef, 0x000106ef, 0x00010aef, 0x00010eef, + 0x000112ef, 0x000116ef, 0x00011aef, 0x00011eef, + 0x000122ef, 0x000126ef, 0x00012aef, 0x00012eef, + 0x000132ef, 0x000136ef, 0x00013aef, 0x00013eef, + 0x000142ef, 0x000146ef, 0x00014aef, 0x00014eef, + 0x000152ef, 0x000156ef, 0x00015aef, 0x00015eef, + 0x000162ef, 0x000166ef, 0x00016aef, 0x00016eef, + 0x000172ef, 0x000176ef, 0x00017aef, 0x00017eef, + 0x000182ef, 0x000186ef, 0x00018aef, 0x00018eef, + 0x000192ef, 0x000196ef, 0x00019aef, 0x00019eef, + 0x0001a2ef, 0x0001a6ef, 0x0001aaef, 0x0001aeef, + 0x0001b2ef, 0x0001b6ef, 0x0001baef, 0x0001beef, + 0x0001c2ef, 0x0001c6ef, 0x0001caef, 0x0001ceef, + 0x0001d2ef, 0x0001d6ef, 0x0001daef, 0x0001deef, + 0x0001e2ef, 0x0001e6ef, 0x0001eaef, 0x0001eeef, + 0x0001f2ef, 0x0001f6ef, 0x0001faef, 0x0001feef, + 0x000202ef, 0x000206ef, 0x00020aef, 0x00020eef, + 0x000212ef, 0x000216ef, 0x00021aef, 0x00021eef, + 0x000222ef, 0x000226ef, 0x00022aef, 0x00022eef, + 0x000232ef, 0x000236ef, 0x00023aef, 0x00023eef, + 0x000242ef, 0x000246ef, 0x00024aef, 0x00024eef, + 0x000252ef, 0x000256ef, 0x00025aef, 0x00025eef, + 0x000262ef, 0x000266ef, 0x00026aef, 0x00026eef, + 0x000272ef, 0x000276ef, 0x00027aef, 0x00027eef, + 0x000282ef, 0x000286ef, 0x00028aef, 0x00028eef, + 0x000292ef, 0x000296ef, 0x00029aef, 0x00029eef, + 0x0002a2ef, 0x0002a6ef, 0x0002aaef, 0x0002aeef, + 0x0002b2ef, 0x0002b6ef, 0x0002baef, 0x0002beef, + 0x0002c2ef, 0x0002c6ef, 0x0002caef, 0x0002ceef, + 0x0002d2ef, 0x0002d6ef, 0x0002daef, 0x0002deef, + 0x0002e2ef, 0x0002e6ef, 0x0002eaef, 0x0002eeef, + 0x0002f2ef, 0x0002f6ef, 0x0002faef, 0x0002feef, + 0x000302ef, 0x000306ef, 0x00030aef, 0x00030eef, + 0x000312ef, 0x000316ef, 0x00031aef, 0x00031eef, + 0x000322ef, 0x000326ef, 0x00032aef, 0x00032eef, + 0x000332ef, 0x000336ef, 0x00033aef, 0x00033eef, + 0x000342ef, 0x000346ef, 0x00034aef, 0x00034eef, + 0x000352ef, 0x000356ef, 0x00035aef, 0x00035eef, + 0x000362ef, 0x000366ef, 0x00036aef, 0x00036eef, + 0x000372ef, 0x000376ef, 0x00037aef, 0x00037eef, + 0x000382ef, 0x000386ef, 0x00038aef, 0x00038eef, + 0x000392ef, 0x000396ef, 0x00039aef, 0x00039eef, + 0x0003a2ef, 0x0003a6ef, 0x0003aaef, 0x0003aeef, + 0x0003b2ef, 0x0003b6ef, 0x0003baef, 0x0003beef, + 0x0003c2ef, 0x0003c6ef, 0x0003caef, 0x0003ceef, + 0x0003d2ef, 0x0003d6ef, 0x0003daef, 0x0003deef, + 0x0003e2ef, 0x0003e6ef, 0x0003eaef, 0x0003eeef, + 0x0003f2ef, 0x0003f6ef, 0x0003faef, 0x0003feef, + 0x000402ef, 0x000406ef, 0x00040aef, 0x00040eef, + 0x000412ef, 0x000416ef, 0x00041aef, 0x00041eef, + 0x000422ef, 0x000426ef, 0x00042aef, 0x00042eef, + 0x000432ef, 0x000436ef, 0x00043aef, 0x00043eef, + 0x000442ef, 0x000446ef, 0x00044aef, 0x00044eef, + 0x000452ef, 0x000456ef, 0x00045aef, 0x00045eef, + 0x000462ef, 0x000466ef, 0x00046aef, 0x00046eef, + 0x000472ef, 0x000476ef, 0x00047aef, 0x00047eef, + 0x000482ef, 0x000486ef, 0x00048aef, 0x00048eef, + 0x000492ef, 0x000496ef, 0x00049aef, 0x00049eef, + 0x0004a2ef, 0x0004a6ef, 0x0004aaef, 0x0004aeef, + 0x0004b2ef, 0x0004b6ef, 0x0004baef, 0x0004beef, + 0x0004c2ef, 0x0004c6ef, 0x0004caef, 0x0004ceef, + 0x0004d2ef, 0x0004d6ef, 0x0004daef, 0x0004deef, + 0x0004e2ef, 0x0004e6ef, 0x0004eaef, 0x0004eeef, + 0x0004f2ef, 0x0004f6ef, 0x0004faef, 0x0004feef, + 0x000502ef, 0x000506ef, 0x00050aef, 0x00050eef, + 0x000512ef, 0x000516ef, 0x00051aef, 0x00051eef, + 0x000522ef, 0x000526ef, 0x00052aef, 0x00052eef, + 0x000532ef, 0x000536ef, 0x00053aef, 0x00053eef, + 0x000542ef, 0x000546ef, 0x00054aef, 0x00054eef, + 0x000552ef, 0x000556ef, 0x00055aef, 0x00055eef, + 0x000562ef, 0x000566ef, 0x00056aef, 0x00056eef, + 0x000572ef, 0x000576ef, 0x00057aef, 0x00057eef, + 0x000582ef, 0x000586ef, 0x00058aef, 0x00058eef, + 0x000592ef, 0x000596ef, 0x00059aef, 0x00059eef, + 0x0005a2ef, 0x0005a6ef, 0x0005aaef, 0x0005aeef, + 0x0005b2ef, 0x0005b6ef, 0x0005baef, 0x0005beef, + 0x0005c2ef, 0x0005c6ef, 0x0005caef, 0x0005ceef, + 0x0005d2ef, 0x0005d6ef, 0x0005daef, 0x0005deef, + 0x0005e2ef, 0x0005e6ef, 0x0005eaef, 0x0005eeef, + 0x0005f2ef, 0x0005f6ef, 0x0005faef, 0x0005feef, + 0x000602ef, 0x000606ef, 0x00060aef, 0x00060eef, + 0x000612ef, 0x000616ef, 0x00061aef, 0x00061eef, + 0x000622ef, 0x000626ef, 0x00062aef, 0x00062eef, + 0x000632ef, 0x000636ef, 0x00063aef, 0x00063eef, + 0x000642ef, 0x000646ef, 0x00064aef, 0x00064eef, + 0x000652ef, 0x000656ef, 0x00065aef, 0x00065eef, + 0x000662ef, 0x000666ef, 0x00066aef, 0x00066eef, + 0x000672ef, 0x000676ef, 0x00067aef, 0x00067eef, + 0x000682ef, 0x000686ef, 0x00068aef, 0x00068eef, + 0x000692ef, 0x000696ef, 0x00069aef, 0x00069eef, + 0x0006a2ef, 0x0006a6ef, 0x0006aaef, 0x0006aeef, + 0x0006b2ef, 0x0006b6ef, 0x0006baef, 0x0006beef, + 0x0006c2ef, 0x0006c6ef, 0x0006caef, 0x0006ceef, + 0x0006d2ef, 0x0006d6ef, 0x0006daef, 0x0006deef, + 0x0006e2ef, 0x0006e6ef, 0x0006eaef, 0x0006eeef, + 0x0006f2ef, 0x0006f6ef, 0x0006faef, 0x0006feef, + 0x000702ef, 0x000706ef, 0x00070aef, 0x00070eef, + 0x000712ef, 0x000716ef, 0x00071aef, 0x00071eef, + 0x000722ef, 0x000726ef, 0x00072aef, 0x00072eef, + 0x000732ef, 0x000736ef, 0x00073aef, 0x00073eef, + 0x000742ef, 0x000746ef, 0x00074aef, 0x00074eef, + 0x000752ef, 0x000756ef, 0x00075aef, 0x00075eef, + 0x000762ef, 0x000766ef, 0x00076aef, 0x00076eef, + 0x000772ef, 0x000776ef, 0x00077aef, 0x00077eef, + 0x000782ef, 0x000786ef, 0x00078aef, 0x00078eef, + 0x000792ef, 0x000796ef, 0x00079aef, 0x00079eef, + 0x0007a2ef, 0x0007a6ef, 0x0007aaef, 0x0007aeef, + 0x0007b2ef, 0x0007b6ef, 0x0007baef, 0x0007beef, + 0x0007c2ef, 0x0007c6ef, 0x0007caef, 0x0007ceef, + 0x0007d2ef, 0x0007d6ef, 0x0007daef, 0x0007deef, + 0x0007e2ef, 0x0007e6ef, 0x0007eaef, 0x0007eeef, + 0x0007f2ef, 0x0007f6ef, 0x0007faef, 0x0007feef, + 0x000802ef, 0x000806ef, 0x00080aef, 0x00080eef, + 0x000812ef, 0x000816ef, 0x00081aef, 0x00081eef, + 0x000822ef, 0x000826ef, 0x00082aef, 0x00082eef, + 0x000832ef, 0x000836ef, 0x00083aef, 0x00083eef, + 0x000842ef, 0x000846ef, 0x00084aef, 0x00084eef, + 0x000852ef, 0x000856ef, 0x00085aef, 0x00085eef, + 0x000862ef, 0x000866ef, 0x00086aef, 0x00086eef, + 0x000872ef, 0x000876ef, 0x00087aef, 0x00087eef, + 0x000882ef, 0x000886ef, 0x00088aef, 0x00088eef, + 0x000892ef, 0x000896ef, 0x00089aef, 0x00089eef, + 0x0008a2ef, 0x0008a6ef, 0x0008aaef, 0x0008aeef, + 0x0008b2ef, 0x0008b6ef, 0x0008baef, 0x0008beef, + 0x0008c2ef, 0x0008c6ef, 0x0008caef, 0x0008ceef, + 0x0008d2ef, 0x0008d6ef, 0x0008daef, 0x0008deef, + 0x0008e2ef, 0x0008e6ef, 0x0008eaef, 0x0008eeef, + 0x0008f2ef, 0x0008f6ef, 0x0008faef, 0x0008feef, + 0x000902ef, 0x000906ef, 0x00090aef, 0x00090eef, + 0x000912ef, 0x000916ef, 0x00091aef, 0x00091eef, + 0x000922ef, 0x000926ef, 0x00092aef, 0x00092eef, + 0x000932ef, 0x000936ef, 0x00093aef, 0x00093eef, + 0x000942ef, 0x000946ef, 0x00094aef, 0x00094eef, + 0x000952ef, 0x000956ef, 0x00095aef, 0x00095eef, + 0x000962ef, 0x000966ef, 0x00096aef, 0x00096eef, + 0x000972ef, 0x000976ef, 0x00097aef, 0x00097eef, + 0x000982ef, 0x000986ef, 0x00098aef, 0x00098eef, + 0x000992ef, 0x000996ef, 0x00099aef, 0x00099eef, + 0x0009a2ef, 0x0009a6ef, 0x0009aaef, 0x0009aeef, + 0x0009b2ef, 0x0009b6ef, 0x0009baef, 0x0009beef, + 0x0009c2ef, 0x0009c6ef, 0x0009caef, 0x0009ceef, + 0x0009d2ef, 0x0009d6ef, 0x0009daef, 0x0009deef, + 0x0009e2ef, 0x0009e6ef, 0x0009eaef, 0x0009eeef, + 0x0009f2ef, 0x0009f6ef, 0x0009faef, 0x0009feef, + 0x000a02ef, 0x000a06ef, 0x000a0aef, 0x000a0eef, + 0x000a12ef, 0x000a16ef, 0x000a1aef, 0x000a1eef, + 0x000a22ef, 0x000a26ef, 0x000a2aef, 0x000a2eef, + 0x000a32ef, 0x000a36ef, 0x000a3aef, 0x000a3eef, + 0x000a42ef, 0x000a46ef, 0x000a4aef, 0x000a4eef, + 0x000a52ef, 0x000a56ef, 0x000a5aef, 0x000a5eef, + 0x000a62ef, 0x000a66ef, 0x000a6aef, 0x000a6eef, + 0x000a72ef, 0x000a76ef, 0x000a7aef, 0x000a7eef, + 0x000a82ef, 0x000a86ef, 0x000a8aef, 0x000a8eef, + 0x000a92ef, 0x000a96ef, 0x000a9aef, 0x000a9eef, + 0x000aa2ef, 0x000aa6ef, 0x000aaaef, 0x000aaeef, + 0x000ab2ef, 0x000ab6ef, 0x000abaef, 0x000abeef, + 0x000ac2ef, 0x000ac6ef, 0x000acaef, 0x000aceef, + 0x000ad2ef, 0x000ad6ef, 0x000adaef, 0x000adeef, + 0x000ae2ef, 0x000ae6ef, 0x000aeaef, 0x000aeeef, + 0x000af2ef, 0x000af6ef, 0x000afaef, 0x000afeef, + 0x000b02ef, 0x000b06ef, 0x000b0aef, 0x000b0eef, + 0x000b12ef, 0x000b16ef, 0x000b1aef, 0x000b1eef, + 0x000b22ef, 0x000b26ef, 0x000b2aef, 0x000b2eef, + 0x000b32ef, 0x000b36ef, 0x000b3aef, 0x000b3eef, + 0x000b42ef, 0x000b46ef, 0x000b4aef, 0x000b4eef, + 0x000b52ef, 0x000b56ef, 0x000b5aef, 0x000b5eef, + 0x000b62ef, 0x000b66ef, 0x000b6aef, 0x000b6eef, + 0x000b72ef, 0x000b76ef, 0x000b7aef, 0x000b7eef, + 0x000b82ef, 0x000b86ef, 0x000b8aef, 0x000b8eef, + 0x000b92ef, 0x000b96ef, 0x000b9aef, 0x000b9eef, + 0x000ba2ef, 0x000ba6ef, 0x000baaef, 0x000baeef, + 0x000bb2ef, 0x000bb6ef, 0x000bbaef, 0x000bbeef, + 0x000bc2ef, 0x000bc6ef, 0x000bcaef, 0x000bceef, + 0x000bd2ef, 0x000bd6ef, 0x000bdaef, 0x000bdeef, + 0x000be2ef, 0x000be6ef, 0x000beaef, 0x000beeef, + 0x000bf2ef, 0x000bf6ef, 0x000bfaef, 0x000bfeef, + 0x000c02ef, 0x000c06ef, 0x000c0aef, 0x000c0eef, + 0x000c12ef, 0x000c16ef, 0x000c1aef, 0x000c1eef, + 0x000c22ef, 0x000c26ef, 0x000c2aef, 0x000c2eef, + 0x000c32ef, 0x000c36ef, 0x000c3aef, 0x000c3eef, + 0x000c42ef, 0x000c46ef, 0x000c4aef, 0x000c4eef, + 0x000c52ef, 0x000c56ef, 0x000c5aef, 0x000c5eef, + 0x000c62ef, 0x000c66ef, 0x000c6aef, 0x000c6eef, + 0x000c72ef, 0x000c76ef, 0x000c7aef, 0x000c7eef, + 0x000c82ef, 0x000c86ef, 0x000c8aef, 0x000c8eef, + 0x000c92ef, 0x000c96ef, 0x000c9aef, 0x000c9eef, + 0x000ca2ef, 0x000ca6ef, 0x000caaef, 0x000caeef, + 0x000cb2ef, 0x000cb6ef, 0x000cbaef, 0x000cbeef, + 0x000cc2ef, 0x000cc6ef, 0x000ccaef, 0x000cceef, + 0x000cd2ef, 0x000cd6ef, 0x000cdaef, 0x000cdeef, + 0x000ce2ef, 0x000ce6ef, 0x000ceaef, 0x000ceeef, + 0x000cf2ef, 0x000cf6ef, 0x000cfaef, 0x000cfeef, + 0x000d02ef, 0x000d06ef, 0x000d0aef, 0x000d0eef, + 0x000d12ef, 0x000d16ef, 0x000d1aef, 0x000d1eef, + 0x000d22ef, 0x000d26ef, 0x000d2aef, 0x000d2eef, + 0x000d32ef, 0x000d36ef, 0x000d3aef, 0x000d3eef, + 0x000d42ef, 0x000d46ef, 0x000d4aef, 0x000d4eef, + 0x000d52ef, 0x000d56ef, 0x000d5aef, 0x000d5eef, + 0x000d62ef, 0x000d66ef, 0x000d6aef, 0x000d6eef, + 0x000d72ef, 0x000d76ef, 0x000d7aef, 0x000d7eef, + 0x000d82ef, 0x000d86ef, 0x000d8aef, 0x000d8eef, + 0x000d92ef, 0x000d96ef, 0x000d9aef, 0x000d9eef, + 0x000da2ef, 0x000da6ef, 0x000daaef, 0x000daeef, + 0x000db2ef, 0x000db6ef, 0x000dbaef, 0x000dbeef, + 0x000dc2ef, 0x000dc6ef, 0x000dcaef, 0x000dceef, + 0x000dd2ef, 0x000dd6ef, 0x000ddaef, 0x000ddeef, + 0x000de2ef, 0x000de6ef, 0x000deaef, 0x000deeef, + 0x000df2ef, 0x000df6ef, 0x000dfaef, 0x000dfeef, + 0x000e02ef, 0x000e06ef, 0x000e0aef, 0x000e0eef, + 0x000e12ef, 0x000e16ef, 0x000e1aef, 0x000e1eef, + 0x000e22ef, 0x000e26ef, 0x000e2aef, 0x000e2eef, + 0x000e32ef, 0x000e36ef, 0x000e3aef, 0x000e3eef, + 0x000e42ef, 0x000e46ef, 0x000e4aef, 0x000e4eef, + 0x000e52ef, 0x000e56ef, 0x000e5aef, 0x000e5eef, + 0x000e62ef, 0x000e66ef, 0x000e6aef, 0x000e6eef, + 0x000e72ef, 0x000e76ef, 0x000e7aef, 0x000e7eef, + 0x000e82ef, 0x000e86ef, 0x000e8aef, 0x000e8eef, + 0x000e92ef, 0x000e96ef, 0x000e9aef, 0x000e9eef, + 0x000ea2ef, 0x000ea6ef, 0x000eaaef, 0x000eaeef, + 0x000eb2ef, 0x000eb6ef, 0x000ebaef, 0x000ebeef, + 0x000ec2ef, 0x000ec6ef, 0x000ecaef, 0x000eceef, + 0x000ed2ef, 0x000ed6ef, 0x000edaef, 0x000edeef, + 0x000ee2ef, 0x000ee6ef, 0x000eeaef, 0x000eeeef, + 0x000ef2ef, 0x000ef6ef, 0x000efaef, 0x000efeef, + 0x000f02ef, 0x000f06ef, 0x000f0aef, 0x000f0eef, + 0x000f12ef, 0x000f16ef, 0x000f1aef, 0x000f1eef, + 0x000f22ef, 0x000f26ef, 0x000f2aef, 0x000f2eef, + 0x000f32ef, 0x000f36ef, 0x000f3aef, 0x000f3eef, + 0x000f42ef, 0x000f46ef, 0x000f4aef, 0x000f4eef, + 0x000f52ef, 0x000f56ef, 0x000f5aef, 0x000f5eef, + 0x000f62ef, 0x000f66ef, 0x000f6aef, 0x000f6eef, + 0x000f72ef, 0x000f76ef, 0x000f7aef, 0x000f7eef, + 0x000f82ef, 0x000f86ef, 0x000f8aef, 0x000f8eef, + 0x000f92ef, 0x000f96ef, 0x000f9aef, 0x000f9eef, + 0x000fa2ef, 0x000fa6ef, 0x000faaef, 0x000faeef, + 0x000fb2ef, 0x000fb6ef, 0x000fbaef, 0x000fbeef, + 0x000fc2ef, 0x000fc6ef, 0x000fcaef, 0x000fceef, + 0x000fd2ef, 0x000fd6ef, 0x000fdaef, 0x000fdeef, + 0x000fe2ef, 0x000fe6ef, 0x000feaef, 0x000feeef, + 0x000ff2ef, 0x000ff6ef, 0x000ffaef, 0x000ffeef, + 0x000017f4, 0x000057f4, 0x000097f4, 0x0000d7f4, + 0x000117f4, 0x000157f4, 0x000197f4, 0x0001d7f4, + 0x000217f4, 0x000257f4, 0x000297f4, 0x0002d7f4, + 0x000317f4, 0x000357f4, 0x000397f4, 0x0003d7f4, + 0x000417f4, 0x000457f4, 0x000497f4, 0x0004d7f4, + 0x000517f4, 0x000557f4, 0x000597f4, 0x0005d7f4, + 0x000617f4, 0x000657f4, 0x000697f4, 0x0006d7f4, + 0x000717f4, 0x000757f4, 0x000797f4, 0x0007d7f4, + 0x000817f4, 0x000857f4, 0x000897f4, 0x0008d7f4, + 0x000917f4, 0x000957f4, 0x000997f4, 0x0009d7f4, + 0x000a17f4, 0x000a57f4, 0x000a97f4, 0x000ad7f4, + 0x000b17f4, 0x000b57f4, 0x000b97f4, 0x000bd7f4, + 0x000c17f4, 0x000c57f4, 0x000c97f4, 0x000cd7f4, + 0x000d17f4, 0x000d57f4, 0x000d97f4, 0x000dd7f4, + 0x000e17f4, 0x000e57f4, 0x000e97f4, 0x000ed7f4, + 0x000f17f4, 0x000f57f4, 0x000f97f4, 0x000fd7f4, + 0x001017f4, 0x001057f4, 0x001097f4, 0x0010d7f4, + 0x001117f4, 0x001157f4, 0x001197f4, 0x0011d7f4, + 0x001217f4, 0x001257f4, 0x001297f4, 0x0012d7f4, + 0x001317f4, 0x001357f4, 0x001397f4, 0x0013d7f4, + 0x001417f4, 0x001457f4, 0x001497f4, 0x0014d7f4, + 0x001517f4, 0x001557f4, 0x001597f4, 0x0015d7f4, + 0x001617f4, 0x001657f4, 0x001697f4, 0x0016d7f4, + 0x001717f4, 0x001757f4, 0x001797f4, 0x0017d7f4, + 0x001817f4, 0x001857f4, 0x001897f4, 0x0018d7f4, + 0x001917f4, 0x001957f4, 0x001997f4, 0x0019d7f4, + 0x001a17f4, 0x001a57f4, 0x001a97f4, 0x001ad7f4, + 0x001b17f4, 0x001b57f4, 0x001b97f4, 0x001bd7f4, + 0x001c17f4, 0x001c57f4, 0x001c97f4, 0x001cd7f4, + 0x001d17f4, 0x001d57f4, 0x001d97f4, 0x001dd7f4, + 0x001e17f4, 0x001e57f4, 0x001e97f4, 0x001ed7f4, + 0x001f17f4, 0x001f57f4, 0x001f97f4, 0x001fd7f4, + 0x002017f4, 0x002057f4, 0x002097f4, 0x0020d7f4, + 0x002117f4, 0x002157f4, 0x002197f4, 0x0021d7f4, + 0x002217f4, 0x002257f4, 0x002297f4, 0x0022d7f4, + 0x002317f4, 0x002357f4, 0x002397f4, 0x0023d7f4, + 0x002417f4, 0x002457f4, 0x002497f4, 0x0024d7f4, + 0x002517f4, 0x002557f4, 0x002597f4, 0x0025d7f4, + 0x002617f4, 0x002657f4, 0x002697f4, 0x0026d7f4, + 0x002717f4, 0x002757f4, 0x002797f4, 0x0027d7f4, + 0x002817f4, 0x002857f4, 0x002897f4, 0x0028d7f4, + 0x002917f4, 0x002957f4, 0x002997f4, 0x0029d7f4, + 0x002a17f4, 0x002a57f4, 0x002a97f4, 0x002ad7f4, + 0x002b17f4, 0x002b57f4, 0x002b97f4, 0x002bd7f4, + 0x002c17f4, 0x002c57f4, 0x002c97f4, 0x002cd7f4, + 0x002d17f4, 0x002d57f4, 0x002d97f4, 0x002dd7f4, + 0x002e17f4, 0x002e57f4, 0x002e97f4, 0x002ed7f4, + 0x002f17f4, 0x002f57f4, 0x002f97f4, 0x002fd7f4, + 0x003017f4, 0x003057f4, 0x003097f4, 0x0030d7f4, + 0x003117f4, 0x003157f4, 0x003197f4, 0x0031d7f4, + 0x003217f4, 0x003257f4, 0x003297f4, 0x0032d7f4, + 0x003317f4, 0x003357f4, 0x003397f4, 0x0033d7f4, + 0x003417f4, 0x003457f4, 0x003497f4, 0x0034d7f4, + 0x003517f4, 0x003557f4, 0x003597f4, 0x0035d7f4, + 0x003617f4, 0x003657f4, 0x003697f4, 0x0036d7f4, + 0x003717f4, 0x003757f4, 0x003797f4, 0x0037d7f4, + 0x003817f4, 0x003857f4, 0x003897f4, 0x0038d7f4, + 0x003917f4, 0x003957f4, 0x003997f4, 0x0039d7f4, + 0x003a17f4, 0x003a57f4, 0x003a97f4, 0x003ad7f4, + 0x003b17f4, 0x003b57f4, 0x003b97f4, 0x003bd7f4, + 0x003c17f4, 0x003c57f4, 0x003c97f4, 0x003cd7f4, + 0x003d17f4, 0x003d57f4, 0x003d97f4, 0x003dd7f4, + 0x003e17f4, 0x003e57f4, 0x003e97f4, 0x003ed7f4, + 0x003f17f4, 0x003f57f4, 0x003f97f4, 0x003fd7f4, + 0x004017f4, 0x004057f4, 0x004097f4, 0x0040d7f4, + 0x004117f4, 0x004157f4, 0x004197f4, 0x0041d7f4, + 0x004217f4, 0x004257f4, 0x004297f4, 0x0042d7f4, + 0x004317f4, 0x004357f4, 0x004397f4, 0x0043d7f4, + 0x004417f4, 0x004457f4, 0x004497f4, 0x0044d7f4, + 0x004517f4, 0x004557f4, 0x004597f4, 0x0045d7f4, + 0x004617f4, 0x004657f4, 0x004697f4, 0x0046d7f4, + 0x004717f4, 0x004757f4, 0x004797f4, 0x0047d7f4, + 0x004817f4, 0x004857f4, 0x004897f4, 0x0048d7f4, + 0x004917f4, 0x004957f4, 0x004997f4, 0x0049d7f4, + 0x004a17f4, 0x004a57f4, 0x004a97f4, 0x004ad7f4, + 0x004b17f4, 0x004b57f4, 0x004b97f4, 0x004bd7f4, + 0x004c17f4, 0x004c57f4, 0x004c97f4, 0x004cd7f4, + 0x004d17f4, 0x004d57f4, 0x004d97f4, 0x004dd7f4, + 0x004e17f4, 0x004e57f4, 0x004e97f4, 0x004ed7f4, + 0x004f17f4, 0x004f57f4, 0x004f97f4, 0x004fd7f4, + 0x005017f4, 0x005057f4, 0x005097f4, 0x0050d7f4, + 0x005117f4, 0x005157f4, 0x005197f4, 0x0051d7f4, + 0x005217f4, 0x005257f4, 0x005297f4, 0x0052d7f4, + 0x005317f4, 0x005357f4, 0x005397f4, 0x0053d7f4, + 0x005417f4, 0x005457f4, 0x005497f4, 0x0054d7f4, + 0x005517f4, 0x005557f4, 0x005597f4, 0x0055d7f4, + 0x005617f4, 0x005657f4, 0x005697f4, 0x0056d7f4, + 0x005717f4, 0x005757f4, 0x005797f4, 0x0057d7f4, + 0x005817f4, 0x005857f4, 0x005897f4, 0x0058d7f4, + 0x005917f4, 0x005957f4, 0x005997f4, 0x0059d7f4, + 0x005a17f4, 0x005a57f4, 0x005a97f4, 0x005ad7f4, + 0x005b17f4, 0x005b57f4, 0x005b97f4, 0x005bd7f4, + 0x005c17f4, 0x005c57f4, 0x005c97f4, 0x005cd7f4, + 0x005d17f4, 0x005d57f4, 0x005d97f4, 0x005dd7f4, + 0x005e17f4, 0x005e57f4, 0x005e97f4, 0x005ed7f4, + 0x005f17f4, 0x005f57f4, 0x005f97f4, 0x005fd7f4, + 0x006017f4, 0x006057f4, 0x006097f4, 0x0060d7f4, + 0x006117f4, 0x006157f4, 0x006197f4, 0x0061d7f4, + 0x006217f4, 0x006257f4, 0x006297f4, 0x0062d7f4, + 0x006317f4, 0x006357f4, 0x006397f4, 0x0063d7f4, + 0x006417f4, 0x006457f4, 0x006497f4, 0x0064d7f4, + 0x006517f4, 0x006557f4, 0x006597f4, 0x0065d7f4, + 0x006617f4, 0x006657f4, 0x006697f4, 0x0066d7f4, + 0x006717f4, 0x006757f4, 0x006797f4, 0x0067d7f4, + 0x006817f4, 0x006857f4, 0x006897f4, 0x0068d7f4, + 0x006917f4, 0x006957f4, 0x006997f4, 0x0069d7f4, + 0x006a17f4, 0x006a57f4, 0x006a97f4, 0x006ad7f4, + 0x006b17f4, 0x006b57f4, 0x006b97f4, 0x006bd7f4, + 0x006c17f4, 0x006c57f4, 0x006c97f4, 0x006cd7f4, + 0x006d17f4, 0x006d57f4, 0x006d97f4, 0x006dd7f4, + 0x006e17f4, 0x006e57f4, 0x006e97f4, 0x006ed7f4, + 0x006f17f4, 0x006f57f4, 0x006f97f4, 0x006fd7f4, + 0x007017f4, 0x007057f4, 0x007097f4, 0x0070d7f4, + 0x007117f4, 0x007157f4, 0x007197f4, 0x0071d7f4, + 0x007217f4, 0x007257f4, 0x007297f4, 0x0072d7f4, + 0x007317f4, 0x007357f4, 0x007397f4, 0x0073d7f4, + 0x007417f4, 0x007457f4, 0x007497f4, 0x0074d7f4, + 0x007517f4, 0x007557f4, 0x007597f4, 0x0075d7f4, + 0x007617f4, 0x007657f4, 0x007697f4, 0x0076d7f4, + 0x007717f4, 0x007757f4, 0x007797f4, 0x0077d7f4, + 0x007817f4, 0x007857f4, 0x007897f4, 0x0078d7f4, + 0x007917f4, 0x007957f4, 0x007997f4, 0x0079d7f4, + 0x007a17f4, 0x007a57f4, 0x007a97f4, 0x007ad7f4, + 0x007b17f4, 0x007b57f4, 0x007b97f4, 0x007bd7f4, + 0x007c17f4, 0x007c57f4, 0x007c97f4, 0x007cd7f4, + 0x007d17f4, 0x007d57f4, 0x007d97f4, 0x007dd7f4, + 0x007e17f4, 0x007e57f4, 0x007e97f4, 0x007ed7f4, + 0x007f17f4, 0x007f57f4, 0x007f97f4, 0x007fd7f4, + 0x008017f4, 0x008057f4, 0x008097f4, 0x0080d7f4, + 0x008117f4, 0x008157f4, 0x008197f4, 0x0081d7f4, + 0x008217f4, 0x008257f4, 0x008297f4, 0x0082d7f4, + 0x008317f4, 0x008357f4, 0x008397f4, 0x0083d7f4, + 0x008417f4, 0x008457f4, 0x008497f4, 0x0084d7f4, + 0x008517f4, 0x008557f4, 0x008597f4, 0x0085d7f4, + 0x008617f4, 0x008657f4, 0x008697f4, 0x0086d7f4, + 0x008717f4, 0x008757f4, 0x008797f4, 0x0087d7f4, + 0x008817f4, 0x008857f4, 0x008897f4, 0x0088d7f4, + 0x008917f4, 0x008957f4, 0x008997f4, 0x0089d7f4, + 0x008a17f4, 0x008a57f4, 0x008a97f4, 0x008ad7f4, + 0x008b17f4, 0x008b57f4, 0x008b97f4, 0x008bd7f4, + 0x008c17f4, 0x008c57f4, 0x008c97f4, 0x008cd7f4, + 0x008d17f4, 0x008d57f4, 0x008d97f4, 0x008dd7f4, + 0x008e17f4, 0x008e57f4, 0x008e97f4, 0x008ed7f4, + 0x008f17f4, 0x008f57f4, 0x008f97f4, 0x008fd7f4, + 0x009017f4, 0x009057f4, 0x009097f4, 0x0090d7f4, + 0x009117f4, 0x009157f4, 0x009197f4, 0x0091d7f4, + 0x009217f4, 0x009257f4, 0x009297f4, 0x0092d7f4, + 0x009317f4, 0x009357f4, 0x009397f4, 0x0093d7f4, + 0x009417f4, 0x009457f4, 0x009497f4, 0x0094d7f4, + 0x009517f4, 0x009557f4, 0x009597f4, 0x0095d7f4, + 0x009617f4, 0x009657f4, 0x009697f4, 0x0096d7f4, + 0x009717f4, 0x009757f4, 0x009797f4, 0x0097d7f4, + 0x009817f4, 0x009857f4, 0x009897f4, 0x0098d7f4, + 0x009917f4, 0x009957f4, 0x009997f4, 0x0099d7f4, + 0x009a17f4, 0x009a57f4, 0x009a97f4, 0x009ad7f4, + 0x009b17f4, 0x009b57f4, 0x009b97f4, 0x009bd7f4, + 0x009c17f4, 0x009c57f4, 0x009c97f4, 0x009cd7f4, + 0x009d17f4, 0x009d57f4, 0x009d97f4, 0x009dd7f4, + 0x009e17f4, 0x009e57f4, 0x009e97f4, 0x009ed7f4, + 0x009f17f4, 0x009f57f4, 0x009f97f4, 0x009fd7f4, + 0x00a017f4, 0x00a057f4, 0x00a097f4, 0x00a0d7f4, + 0x00a117f4, 0x00a157f4, 0x00a197f4, 0x00a1d7f4, + 0x00a217f4, 0x00a257f4, 0x00a297f4, 0x00a2d7f4, + 0x00a317f4, 0x00a357f4, 0x00a397f4, 0x00a3d7f4, + 0x00a417f4, 0x00a457f4, 0x00a497f4, 0x00a4d7f4, + 0x00a517f4, 0x00a557f4, 0x00a597f4, 0x00a5d7f4, + 0x00a617f4, 0x00a657f4, 0x00a697f4, 0x00a6d7f4, + 0x00a717f4, 0x00a757f4, 0x00a797f4, 0x00a7d7f4, + 0x00a817f4, 0x00a857f4, 0x00a897f4, 0x00a8d7f4, + 0x00a917f4, 0x00a957f4, 0x00a997f4, 0x00a9d7f4, + 0x00aa17f4, 0x00aa57f4, 0x00aa97f4, 0x00aad7f4, + 0x00ab17f4, 0x00ab57f4, 0x00ab97f4, 0x00abd7f4, + 0x00ac17f4, 0x00ac57f4, 0x00ac97f4, 0x00acd7f4, + 0x00ad17f4, 0x00ad57f4, 0x00ad97f4, 0x00add7f4, + 0x00ae17f4, 0x00ae57f4, 0x00ae97f4, 0x00aed7f4, + 0x00af17f4, 0x00af57f4, 0x00af97f4, 0x00afd7f4, + 0x00b017f4, 0x00b057f4, 0x00b097f4, 0x00b0d7f4, + 0x00b117f4, 0x00b157f4, 0x00b197f4, 0x00b1d7f4, + 0x00b217f4, 0x00b257f4, 0x00b297f4, 0x00b2d7f4, + 0x00b317f4, 0x00b357f4, 0x00b397f4, 0x00b3d7f4, + 0x00b417f4, 0x00b457f4, 0x00b497f4, 0x00b4d7f4, + 0x00b517f4, 0x00b557f4, 0x00b597f4, 0x00b5d7f4, + 0x00b617f4, 0x00b657f4, 0x00b697f4, 0x00b6d7f4, + 0x00b717f4, 0x00b757f4, 0x00b797f4, 0x00b7d7f4, + 0x00b817f4, 0x00b857f4, 0x00b897f4, 0x00b8d7f4, + 0x00b917f4, 0x00b957f4, 0x00b997f4, 0x00b9d7f4, + 0x00ba17f4, 0x00ba57f4, 0x00ba97f4, 0x00bad7f4, + 0x00bb17f4, 0x00bb57f4, 0x00bb97f4, 0x00bbd7f4, + 0x00bc17f4, 0x00bc57f4, 0x00bc97f4, 0x00bcd7f4, + 0x00bd17f4, 0x00bd57f4, 0x00bd97f4, 0x00bdd7f4, + 0x00be17f4, 0x00be57f4, 0x00be97f4, 0x00bed7f4, + 0x00bf17f4, 0x00bf57f4, 0x00bf97f4, 0x00bfd7f4, + 0x00c017f4, 0x00c057f4, 0x00c097f4, 0x00c0d7f4, + 0x00c117f4, 0x00c157f4, 0x00c197f4, 0x00c1d7f4, + 0x00c217f4, 0x00c257f4, 0x00c297f4, 0x00c2d7f4, + 0x00c317f4, 0x00c357f4, 0x00c397f4, 0x00c3d7f4, + 0x00c417f4, 0x00c457f4, 0x00c497f4, 0x00c4d7f4, + 0x00c517f4, 0x00c557f4, 0x00c597f4, 0x00c5d7f4, + 0x00c617f4, 0x00c657f4, 0x00c697f4, 0x00c6d7f4, + 0x00c717f4, 0x00c757f4, 0x00c797f4, 0x00c7d7f4, + 0x00c817f4, 0x00c857f4, 0x00c897f4, 0x00c8d7f4, + 0x00c917f4, 0x00c957f4, 0x00c997f4, 0x00c9d7f4, + 0x00ca17f4, 0x00ca57f4, 0x00ca97f4, 0x00cad7f4, + 0x00cb17f4, 0x00cb57f4, 0x00cb97f4, 0x00cbd7f4, + 0x00cc17f4, 0x00cc57f4, 0x00cc97f4, 0x00ccd7f4, + 0x00cd17f4, 0x00cd57f4, 0x00cd97f4, 0x00cdd7f4, + 0x00ce17f4, 0x00ce57f4, 0x00ce97f4, 0x00ced7f4, + 0x00cf17f4, 0x00cf57f4, 0x00cf97f4, 0x00cfd7f4, + 0x00d017f4, 0x00d057f4, 0x00d097f4, 0x00d0d7f4, + 0x00d117f4, 0x00d157f4, 0x00d197f4, 0x00d1d7f4, + 0x00d217f4, 0x00d257f4, 0x00d297f4, 0x00d2d7f4, + 0x00d317f4, 0x00d357f4, 0x00d397f4, 0x00d3d7f4, + 0x00d417f4, 0x00d457f4, 0x00d497f4, 0x00d4d7f4, + 0x00d517f4, 0x00d557f4, 0x00d597f4, 0x00d5d7f4, + 0x00d617f4, 0x00d657f4, 0x00d697f4, 0x00d6d7f4, + 0x00d717f4, 0x00d757f4, 0x00d797f4, 0x00d7d7f4, + 0x00d817f4, 0x00d857f4, 0x00d897f4, 0x00d8d7f4, + 0x00d917f4, 0x00d957f4, 0x00d997f4, 0x00d9d7f4, + 0x00da17f4, 0x00da57f4, 0x00da97f4, 0x00dad7f4, + 0x00db17f4, 0x00db57f4, 0x00db97f4, 0x00dbd7f4, + 0x00dc17f4, 0x00dc57f4, 0x00dc97f4, 0x00dcd7f4, + 0x00dd17f4, 0x00dd57f4, 0x00dd97f4, 0x00ddd7f4, + 0x00de17f4, 0x00de57f4, 0x00de97f4, 0x00ded7f4, + 0x00df17f4, 0x00df57f4, 0x00df97f4, 0x00dfd7f4, + 0x00e017f4, 0x00e057f4, 0x00e097f4, 0x00e0d7f4, + 0x00e117f4, 0x00e157f4, 0x00e197f4, 0x00e1d7f4, + 0x00e217f4, 0x00e257f4, 0x00e297f4, 0x00e2d7f4, + 0x00e317f4, 0x00e357f4, 0x00e397f4, 0x00e3d7f4, + 0x00e417f4, 0x00e457f4, 0x00e497f4, 0x00e4d7f4, + 0x00e517f4, 0x00e557f4, 0x00e597f4, 0x00e5d7f4, + 0x00e617f4, 0x00e657f4, 0x00e697f4, 0x00e6d7f4, + 0x00e717f4, 0x00e757f4, 0x00e797f4, 0x00e7d7f4, + 0x00e817f4, 0x00e857f4, 0x00e897f4, 0x00e8d7f4, + 0x00e917f4, 0x00e957f4, 0x00e997f4, 0x00e9d7f4, + 0x00ea17f4, 0x00ea57f4, 0x00ea97f4, 0x00ead7f4, + 0x00eb17f4, 0x00eb57f4, 0x00eb97f4, 0x00ebd7f4, + 0x00ec17f4, 0x00ec57f4, 0x00ec97f4, 0x00ecd7f4, + 0x00ed17f4, 0x00ed57f4, 0x00ed97f4, 0x00edd7f4, + 0x00ee17f4, 0x00ee57f4, 0x00ee97f4, 0x00eed7f4, + 0x00ef17f4, 0x00ef57f4, 0x00ef97f4, 0x00efd7f4, + 0x00f017f4, 0x00f057f4, 0x00f097f4, 0x00f0d7f4, + 0x00f117f4, 0x00f157f4, 0x00f197f4, 0x00f1d7f4, + 0x00f217f4, 0x00f257f4, 0x00f297f4, 0x00f2d7f4, + 0x00f317f4, 0x00f357f4, 0x00f397f4, 0x00f3d7f4, + 0x00f417f4, 0x00f457f4, 0x00f497f4, 0x00f4d7f4, + 0x00f517f4, 0x00f557f4, 0x00f597f4, 0x00f5d7f4, + 0x00f617f4, 0x00f657f4, 0x00f697f4, 0x00f6d7f4, + 0x00f717f4, 0x00f757f4, 0x00f797f4, 0x00f7d7f4, + 0x00f817f4, 0x00f857f4, 0x00f897f4, 0x00f8d7f4, + 0x00f917f4, 0x00f957f4, 0x00f997f4, 0x00f9d7f4, + 0x00fa17f4, 0x00fa57f4, 0x00fa97f4, 0x00fad7f4, + 0x00fb17f4, 0x00fb57f4, 0x00fb97f4, 0x00fbd7f4, + 0x00fc17f4, 0x00fc57f4, 0x00fc97f4, 0x00fcd7f4, + 0x00fd17f4, 0x00fd57f4, 0x00fd97f4, 0x00fdd7f4, + 0x00fe17f4, 0x00fe57f4, 0x00fe97f4, 0x00fed7f4, + 0x00ff17f4, 0x00ff57f4, 0x00ff97f4, 0x00ffd7f4, + 0x010017f4, 0x010057f4, 0x010097f4, 0x0100d7f4, + 0x010117f4, 0x010157f4, 0x010197f4, 0x0101d7f4, + 0x010217f4, 0x010257f4, 0x010297f4, 0x0102d7f4, + 0x010317f4, 0x010357f4, 0x010397f4, 0x0103d7f4, + 0x010417f4, 0x010457f4, 0x010497f4, 0x0104d7f4, + 0x010517f4, 0x010557f4, 0x010597f4, 0x0105d7f4, + 0x010617f4, 0x010657f4, 0x010697f4, 0x0106d7f4, + 0x010717f4, 0x010757f4, 0x010797f4, 0x0107d7f4, + 0x010817f4, 0x010857f4, 0x010897f4, 0x0108d7f4, + 0x010917f4, 0x010957f4, 0x010997f4, 0x0109d7f4, + 0x010a17f4, 0x010a57f4, 0x010a97f4, 0x010ad7f4, + 0x010b17f4, 0x010b57f4, 0x010b97f4, 0x010bd7f4, + 0x010c17f4, 0x010c57f4, 0x010c97f4, 0x010cd7f4, + 0x010d17f4, 0x010d57f4, 0x010d97f4, 0x010dd7f4, + 0x010e17f4, 0x010e57f4, 0x010e97f4, 0x010ed7f4, + 0x010f17f4, 0x010f57f4, 0x010f97f4, 0x010fd7f4, + 0x011017f4, 0x011057f4, 0x011097f4, 0x0110d7f4, + 0x011117f4, 0x011157f4, 0x011197f4, 0x0111d7f4, + 0x011217f4, 0x011257f4, 0x011297f4, 0x0112d7f4, + 0x011317f4, 0x011357f4, 0x011397f4, 0x0113d7f4, + 0x011417f4, 0x011457f4, 0x011497f4, 0x0114d7f4, + 0x011517f4, 0x011557f4, 0x011597f4, 0x0115d7f4, + 0x011617f4, 0x011657f4, 0x011697f4, 0x0116d7f4, + 0x011717f4, 0x011757f4, 0x011797f4, 0x0117d7f4, + 0x011817f4, 0x011857f4, 0x011897f4, 0x0118d7f4, + 0x011917f4, 0x011957f4, 0x011997f4, 0x0119d7f4, + 0x011a17f4, 0x011a57f4, 0x011a97f4, 0x011ad7f4, + 0x011b17f4, 0x011b57f4, 0x011b97f4, 0x011bd7f4, + 0x011c17f4, 0x011c57f4, 0x011c97f4, 0x011cd7f4, + 0x011d17f4, 0x011d57f4, 0x011d97f4, 0x011dd7f4, + 0x011e17f4, 0x011e57f4, 0x011e97f4, 0x011ed7f4, + 0x011f17f4, 0x011f57f4, 0x011f97f4, 0x011fd7f4, + 0x012017f4, 0x012057f4, 0x012097f4, 0x0120d7f4, + 0x012117f4, 0x012157f4, 0x012197f4, 0x0121d7f4, + 0x012217f4, 0x012257f4, 0x012297f4, 0x0122d7f4, + 0x012317f4, 0x012357f4, 0x012397f4, 0x0123d7f4, + 0x012417f4, 0x012457f4, 0x012497f4, 0x0124d7f4, + 0x012517f4, 0x012557f4, 0x012597f4, 0x0125d7f4, + 0x012617f4, 0x012657f4, 0x012697f4, 0x0126d7f4, + 0x012717f4, 0x012757f4, 0x012797f4, 0x0127d7f4, + 0x012817f4, 0x012857f4, 0x012897f4, 0x0128d7f4, + 0x012917f4, 0x012957f4, 0x012997f4, 0x0129d7f4, + 0x012a17f4, 0x012a57f4, 0x012a97f4, 0x012ad7f4, + 0x012b17f4, 0x012b57f4, 0x012b97f4, 0x012bd7f4, + 0x012c17f4, 0x012c57f4, 0x012c97f4, 0x012cd7f4, + 0x012d17f4, 0x012d57f4, 0x012d97f4, 0x012dd7f4, + 0x012e17f4, 0x012e57f4, 0x012e97f4, 0x012ed7f4, + 0x012f17f4, 0x012f57f4, 0x012f97f4, 0x012fd7f4, + 0x013017f4, 0x013057f4, 0x013097f4, 0x0130d7f4, + 0x013117f4, 0x013157f4, 0x013197f4, 0x0131d7f4, + 0x013217f4, 0x013257f4, 0x013297f4, 0x0132d7f4, + 0x013317f4, 0x013357f4, 0x013397f4, 0x0133d7f4, + 0x013417f4, 0x013457f4, 0x013497f4, 0x0134d7f4, + 0x013517f4, 0x013557f4, 0x013597f4, 0x0135d7f4, + 0x013617f4, 0x013657f4, 0x013697f4, 0x0136d7f4, + 0x013717f4, 0x013757f4, 0x013797f4, 0x0137d7f4, + 0x013817f4, 0x013857f4, 0x013897f4, 0x0138d7f4, + 0x013917f4, 0x013957f4, 0x013997f4, 0x0139d7f4, + 0x013a17f4, 0x013a57f4, 0x013a97f4, 0x013ad7f4, + 0x013b17f4, 0x013b57f4, 0x013b97f4, 0x013bd7f4, + 0x013c17f4, 0x013c57f4, 0x013c97f4, 0x013cd7f4, + 0x013d17f4, 0x013d57f4, 0x013d97f4, 0x013dd7f4, + 0x013e17f4, 0x013e57f4, 0x013e97f4, 0x013ed7f4, + 0x013f17f4, 0x013f57f4, 0x013f97f4, 0x013fd7f4, + 0x014017f4, 0x014057f4, 0x014097f4, 0x0140d7f4, + 0x014117f4, 0x014157f4, 0x014197f4, 0x0141d7f4, + 0x014217f4, 0x014257f4, 0x014297f4, 0x0142d7f4, + 0x014317f4, 0x014357f4, 0x014397f4, 0x0143d7f4, + 0x014417f4, 0x014457f4, 0x014497f4, 0x0144d7f4, + 0x014517f4, 0x014557f4, 0x014597f4, 0x0145d7f4, + 0x014617f4, 0x014657f4, 0x014697f4, 0x0146d7f4, + 0x014717f4, 0x014757f4, 0x014797f4, 0x0147d7f4, + 0x014817f4, 0x014857f4, 0x014897f4, 0x0148d7f4, + 0x014917f4, 0x014957f4, 0x014997f4, 0x0149d7f4, + 0x014a17f4, 0x014a57f4, 0x014a97f4, 0x014ad7f4, + 0x014b17f4, 0x014b57f4, 0x014b97f4, 0x014bd7f4, + 0x014c17f4, 0x014c57f4, 0x014c97f4, 0x014cd7f4, + 0x014d17f4, 0x014d57f4, 0x014d97f4, 0x014dd7f4, + 0x014e17f4, 0x014e57f4, 0x014e97f4, 0x014ed7f4, + 0x014f17f4, 0x014f57f4, 0x014f97f4, 0x014fd7f4, + 0x015017f4, 0x015057f4, 0x015097f4, 0x0150d7f4, + 0x015117f4, 0x015157f4, 0x015197f4, 0x0151d7f4, + 0x015217f4, 0x015257f4, 0x015297f4, 0x0152d7f4, + 0x015317f4, 0x015357f4, 0x015397f4, 0x0153d7f4, + 0x015417f4, 0x015457f4, 0x015497f4, 0x0154d7f4, + 0x015517f4, 0x015557f4, 0x015597f4, 0x0155d7f4, + 0x015617f4, 0x015657f4, 0x015697f4, 0x0156d7f4, + 0x015717f4, 0x015757f4, 0x015797f4, 0x0157d7f4, + 0x015817f4, 0x015857f4, 0x015897f4, 0x0158d7f4, + 0x015917f4, 0x015957f4, 0x015997f4, 0x0159d7f4, + 0x015a17f4, 0x015a57f4, 0x015a97f4, 0x015ad7f4, + 0x015b17f4, 0x015b57f4, 0x015b97f4, 0x015bd7f4, + 0x015c17f4, 0x015c57f4, 0x015c97f4, 0x015cd7f4, + 0x015d17f4, 0x015d57f4, 0x015d97f4, 0x015dd7f4, + 0x015e17f4, 0x015e57f4, 0x015e97f4, 0x015ed7f4, + 0x015f17f4, 0x015f57f4, 0x015f97f4, 0x015fd7f4, + 0x016017f4, 0x016057f4, 0x016097f4, 0x0160d7f4, + 0x016117f4, 0x016157f4, 0x016197f4, 0x0161d7f4, + 0x016217f4, 0x016257f4, 0x016297f4, 0x0162d7f4, + 0x016317f4, 0x016357f4, 0x016397f4, 0x0163d7f4, + 0x016417f4, 0x016457f4, 0x016497f4, 0x0164d7f4, + 0x016517f4, 0x016557f4, 0x016597f4, 0x0165d7f4, + 0x016617f4, 0x016657f4, 0x016697f4, 0x0166d7f4, + 0x016717f4, 0x016757f4, 0x016797f4, 0x0167d7f4, + 0x016817f4, 0x016857f4, 0x016897f4, 0x0168d7f4, + 0x016917f4, 0x016957f4, 0x016997f4, 0x0169d7f4, + 0x016a17f4, 0x016a57f4, 0x016a97f4, 0x016ad7f4, + 0x016b17f4, 0x016b57f4, 0x016b97f4, 0x016bd7f4, + 0x016c17f4, 0x016c57f4, 0x016c97f4, 0x016cd7f4, + 0x016d17f4, 0x016d57f4, 0x016d97f4, 0x016dd7f4, + 0x016e17f4, 0x016e57f4, 0x016e97f4, 0x016ed7f4, + 0x016f17f4, 0x016f57f4, 0x016f97f4, 0x016fd7f4, + 0x017017f4, 0x017057f4, 0x017097f4, 0x0170d7f4, + 0x017117f4, 0x017157f4, 0x017197f4, 0x0171d7f4, + 0x017217f4, 0x017257f4, 0x017297f4, 0x0172d7f4, + 0x017317f4, 0x017357f4, 0x017397f4, 0x0173d7f4, + 0x017417f4, 0x017457f4, 0x017497f4, 0x0174d7f4, + 0x017517f4, 0x017557f4, 0x017597f4, 0x0175d7f4, + 0x017617f4, 0x017657f4, 0x017697f4, 0x0176d7f4, + 0x017717f4, 0x017757f4, 0x017797f4, 0x0177d7f4, + 0x017817f4, 0x017857f4, 0x017897f4, 0x0178d7f4, + 0x017917f4, 0x017957f4, 0x017997f4, 0x0179d7f4, + 0x017a17f4, 0x017a57f4, 0x017a97f4, 0x017ad7f4, + 0x017b17f4, 0x017b57f4, 0x017b97f4, 0x017bd7f4, + 0x017c17f4, 0x017c57f4, 0x017c97f4, 0x017cd7f4, + 0x017d17f4, 0x017d57f4, 0x017d97f4, 0x017dd7f4, + 0x017e17f4, 0x017e57f4, 0x017e97f4, 0x017ed7f4, + 0x017f17f4, 0x017f57f4, 0x017f97f4, 0x017fd7f4, + 0x018017f4, 0x018057f4, 0x018097f4, 0x0180d7f4, + 0x018117f4, 0x018157f4, 0x018197f4, 0x0181d7f4, + 0x018217f4, 0x018257f4, 0x018297f4, 0x0182d7f4, + 0x018317f4, 0x018357f4, 0x018397f4, 0x0183d7f4, + 0x018417f4, 0x018457f4, 0x018497f4, 0x0184d7f4, + 0x018517f4, 0x018557f4, 0x018597f4, 0x0185d7f4, + 0x018617f4, 0x018657f4, 0x018697f4, 0x0186d7f4, + 0x018717f4, 0x018757f4, 0x018797f4, 0x0187d7f4, + 0x018817f4, 0x018857f4, 0x018897f4, 0x0188d7f4, + 0x018917f4, 0x018957f4, 0x018997f4, 0x0189d7f4, + 0x018a17f4, 0x018a57f4, 0x018a97f4, 0x018ad7f4, + 0x018b17f4, 0x018b57f4, 0x018b97f4, 0x018bd7f4, + 0x018c17f4, 0x018c57f4, 0x018c97f4, 0x018cd7f4, + 0x018d17f4, 0x018d57f4, 0x018d97f4, 0x018dd7f4, + 0x018e17f4, 0x018e57f4, 0x018e97f4, 0x018ed7f4, + 0x018f17f4, 0x018f57f4, 0x018f97f4, 0x018fd7f4, + 0x019017f4, 0x019057f4, 0x019097f4, 0x0190d7f4, + 0x019117f4, 0x019157f4, 0x019197f4, 0x0191d7f4, + 0x019217f4, 0x019257f4, 0x019297f4, 0x0192d7f4, + 0x019317f4, 0x019357f4, 0x019397f4, 0x0193d7f4, + 0x019417f4, 0x019457f4, 0x019497f4, 0x0194d7f4, + 0x019517f4, 0x019557f4, 0x019597f4, 0x0195d7f4, + 0x019617f4, 0x019657f4, 0x019697f4, 0x0196d7f4, + 0x019717f4, 0x019757f4, 0x019797f4, 0x0197d7f4, + 0x019817f4, 0x019857f4, 0x019897f4, 0x0198d7f4, + 0x019917f4, 0x019957f4, 0x019997f4, 0x0199d7f4, + 0x019a17f4, 0x019a57f4, 0x019a97f4, 0x019ad7f4, + 0x019b17f4, 0x019b57f4, 0x019b97f4, 0x019bd7f4, + 0x019c17f4, 0x019c57f4, 0x019c97f4, 0x019cd7f4, + 0x019d17f4, 0x019d57f4, 0x019d97f4, 0x019dd7f4, + 0x019e17f4, 0x019e57f4, 0x019e97f4, 0x019ed7f4, + 0x019f17f4, 0x019f57f4, 0x019f97f4, 0x019fd7f4, + 0x01a017f4, 0x01a057f4, 0x01a097f4, 0x01a0d7f4, + 0x01a117f4, 0x01a157f4, 0x01a197f4, 0x01a1d7f4, + 0x01a217f4, 0x01a257f4, 0x01a297f4, 0x01a2d7f4, + 0x01a317f4, 0x01a357f4, 0x01a397f4, 0x01a3d7f4, + 0x01a417f4, 0x01a457f4, 0x01a497f4, 0x01a4d7f4, + 0x01a517f4, 0x01a557f4, 0x01a597f4, 0x01a5d7f4, + 0x01a617f4, 0x01a657f4, 0x01a697f4, 0x01a6d7f4, + 0x01a717f4, 0x01a757f4, 0x01a797f4, 0x01a7d7f4, + 0x01a817f4, 0x01a857f4, 0x01a897f4, 0x01a8d7f4, + 0x01a917f4, 0x01a957f4, 0x01a997f4, 0x01a9d7f4, + 0x01aa17f4, 0x01aa57f4, 0x01aa97f4, 0x01aad7f4, + 0x01ab17f4, 0x01ab57f4, 0x01ab97f4, 0x01abd7f4, + 0x01ac17f4, 0x01ac57f4, 0x01ac97f4, 0x01acd7f4, + 0x01ad17f4, 0x01ad57f4, 0x01ad97f4, 0x01add7f4, + 0x01ae17f4, 0x01ae57f4, 0x01ae97f4, 0x01aed7f4, + 0x01af17f4, 0x01af57f4, 0x01af97f4, 0x01afd7f4, + 0x01b017f4, 0x01b057f4, 0x01b097f4, 0x01b0d7f4, + 0x01b117f4, 0x01b157f4, 0x01b197f4, 0x01b1d7f4, + 0x01b217f4, 0x01b257f4, 0x01b297f4, 0x01b2d7f4, + 0x01b317f4, 0x01b357f4, 0x01b397f4, 0x01b3d7f4, + 0x01b417f4, 0x01b457f4, 0x01b497f4, 0x01b4d7f4, + 0x01b517f4, 0x01b557f4, 0x01b597f4, 0x01b5d7f4, + 0x01b617f4, 0x01b657f4, 0x01b697f4, 0x01b6d7f4, + 0x01b717f4, 0x01b757f4, 0x01b797f4, 0x01b7d7f4, + 0x01b817f4, 0x01b857f4, 0x01b897f4, 0x01b8d7f4, + 0x01b917f4, 0x01b957f4, 0x01b997f4, 0x01b9d7f4, + 0x01ba17f4, 0x01ba57f4, 0x01ba97f4, 0x01bad7f4, + 0x01bb17f4, 0x01bb57f4, 0x01bb97f4, 0x01bbd7f4, + 0x01bc17f4, 0x01bc57f4, 0x01bc97f4, 0x01bcd7f4, + 0x01bd17f4, 0x01bd57f4, 0x01bd97f4, 0x01bdd7f4, + 0x01be17f4, 0x01be57f4, 0x01be97f4, 0x01bed7f4, + 0x01bf17f4, 0x01bf57f4, 0x01bf97f4, 0x01bfd7f4, + 0x01c017f4, 0x01c057f4, 0x01c097f4, 0x01c0d7f4, + 0x01c117f4, 0x01c157f4, 0x01c197f4, 0x01c1d7f4, + 0x01c217f4, 0x01c257f4, 0x01c297f4, 0x01c2d7f4, + 0x01c317f4, 0x01c357f4, 0x01c397f4, 0x01c3d7f4, + 0x01c417f4, 0x01c457f4, 0x01c497f4, 0x01c4d7f4, + 0x01c517f4, 0x01c557f4, 0x01c597f4, 0x01c5d7f4, + 0x01c617f4, 0x01c657f4, 0x01c697f4, 0x01c6d7f4, + 0x01c717f4, 0x01c757f4, 0x01c797f4, 0x01c7d7f4, + 0x01c817f4, 0x01c857f4, 0x01c897f4, 0x01c8d7f4, + 0x01c917f4, 0x01c957f4, 0x01c997f4, 0x01c9d7f4, + 0x01ca17f4, 0x01ca57f4, 0x01ca97f4, 0x01cad7f4, + 0x01cb17f4, 0x01cb57f4, 0x01cb97f4, 0x01cbd7f4, + 0x01cc17f4, 0x01cc57f4, 0x01cc97f4, 0x01ccd7f4, + 0x01cd17f4, 0x01cd57f4, 0x01cd97f4, 0x01cdd7f4, + 0x01ce17f4, 0x01ce57f4, 0x01ce97f4, 0x01ced7f4, + 0x01cf17f4, 0x01cf57f4, 0x01cf97f4, 0x01cfd7f4, + 0x01d017f4, 0x01d057f4, 0x01d097f4, 0x01d0d7f4, + 0x01d117f4, 0x01d157f4, 0x01d197f4, 0x01d1d7f4, + 0x01d217f4, 0x01d257f4, 0x01d297f4, 0x01d2d7f4, + 0x01d317f4, 0x01d357f4, 0x01d397f4, 0x01d3d7f4, + 0x01d417f4, 0x01d457f4, 0x01d497f4, 0x01d4d7f4, + 0x01d517f4, 0x01d557f4, 0x01d597f4, 0x01d5d7f4, + 0x01d617f4, 0x01d657f4, 0x01d697f4, 0x01d6d7f4, + 0x01d717f4, 0x01d757f4, 0x01d797f4, 0x01d7d7f4, + 0x01d817f4, 0x01d857f4, 0x01d897f4, 0x01d8d7f4, + 0x01d917f4, 0x01d957f4, 0x01d997f4, 0x01d9d7f4, + 0x01da17f4, 0x01da57f4, 0x01da97f4, 0x01dad7f4, + 0x01db17f4, 0x01db57f4, 0x01db97f4, 0x01dbd7f4, + 0x01dc17f4, 0x01dc57f4, 0x01dc97f4, 0x01dcd7f4, + 0x01dd17f4, 0x01dd57f4, 0x01dd97f4, 0x01ddd7f4, + 0x01de17f4, 0x01de57f4, 0x01de97f4, 0x01ded7f4, + 0x01df17f4, 0x01df57f4, 0x01df97f4, 0x01dfd7f4, + 0x01e017f4, 0x01e057f4, 0x01e097f4, 0x01e0d7f4, + 0x01e117f4, 0x01e157f4, 0x01e197f4, 0x01e1d7f4, + 0x01e217f4, 0x01e257f4, 0x01e297f4, 0x01e2d7f4, + 0x01e317f4, 0x01e357f4, 0x01e397f4, 0x01e3d7f4, + 0x01e417f4, 0x01e457f4, 0x01e497f4, 0x01e4d7f4, + 0x01e517f4, 0x01e557f4, 0x01e597f4, 0x01e5d7f4, + 0x01e617f4, 0x01e657f4, 0x01e697f4, 0x01e6d7f4, + 0x01e717f4, 0x01e757f4, 0x01e797f4, 0x01e7d7f4, + 0x01e817f4, 0x01e857f4, 0x01e897f4, 0x01e8d7f4, + 0x01e917f4, 0x01e957f4, 0x01e997f4, 0x01e9d7f4, + 0x01ea17f4, 0x01ea57f4, 0x01ea97f4, 0x01ead7f4, + 0x01eb17f4, 0x01eb57f4, 0x01eb97f4, 0x01ebd7f4, + 0x01ec17f4, 0x01ec57f4, 0x01ec97f4, 0x01ecd7f4, + 0x01ed17f4, 0x01ed57f4, 0x01ed97f4, 0x01edd7f4, + 0x01ee17f4, 0x01ee57f4, 0x01ee97f4, 0x01eed7f4, + 0x01ef17f4, 0x01ef57f4, 0x01ef97f4, 0x01efd7f4, + 0x01f017f4, 0x01f057f4, 0x01f097f4, 0x01f0d7f4, + 0x01f117f4, 0x01f157f4, 0x01f197f4, 0x01f1d7f4, + 0x01f217f4, 0x01f257f4, 0x01f297f4, 0x01f2d7f4, + 0x01f317f4, 0x01f357f4, 0x01f397f4, 0x01f3d7f4, + 0x01f417f4, 0x01f457f4, 0x01f497f4, 0x01f4d7f4, + 0x01f517f4, 0x01f557f4, 0x01f597f4, 0x01f5d7f4, + 0x01f617f4, 0x01f657f4, 0x01f697f4, 0x01f6d7f4, + 0x01f717f4, 0x01f757f4, 0x01f797f4, 0x01f7d7f4, + 0x01f817f4, 0x01f857f4, 0x01f897f4, 0x01f8d7f4, + 0x01f917f4, 0x01f957f4, 0x01f997f4, 0x01f9d7f4, + 0x01fa17f4, 0x01fa57f4, 0x01fa97f4, 0x01fad7f4, + 0x01fb17f4, 0x01fb57f4, 0x01fb97f4, 0x01fbd7f4, + 0x01fc17f4, 0x01fc57f4, 0x01fc97f4, 0x01fcd7f4, + 0x01fd17f4, 0x01fd57f4, 0x01fd97f4, 0x01fdd7f4, + 0x01fe17f4, 0x01fe57f4, 0x01fe97f4, 0x01fed7f4, + 0x01ff17f4, 0x01ff57f4, 0x01ff97f4, 0x01ffd7f4, + 0x000037f4, 0x000077f4, 0x0000b7f4, 0x0000f7f4, + 0x000137f4, 0x000177f4, 0x0001b7f4, 0x0001f7f4, + 0x000237f4, 0x000277f4, 0x0002b7f4, 0x0002f7f4, + 0x000337f4, 0x000377f4, 0x0003b7f4, 0x0003f7f4, + 0x000437f4, 0x000477f4, 0x0004b7f4, 0x0004f7f4, + 0x000537f4, 0x000577f4, 0x0005b7f4, 0x0005f7f4, + 0x000637f4, 0x000677f4, 0x0006b7f4, 0x0006f7f4, + 0x000737f4, 0x000777f4, 0x0007b7f4, 0x0007f7f4, + 0x000837f4, 0x000877f4, 0x0008b7f4, 0x0008f7f4, + 0x000937f4, 0x000977f4, 0x0009b7f4, 0x0009f7f4, + 0x000a37f4, 0x000a77f4, 0x000ab7f4, 0x000af7f4, + 0x000b37f4, 0x000b77f4, 0x000bb7f4, 0x000bf7f4, + 0x000c37f4, 0x000c77f4, 0x000cb7f4, 0x000cf7f4, + 0x000d37f4, 0x000d77f4, 0x000db7f4, 0x000df7f4, + 0x000e37f4, 0x000e77f4, 0x000eb7f4, 0x000ef7f4, + 0x000f37f4, 0x000f77f4, 0x000fb7f4, 0x000ff7f4, + 0x001037f4, 0x001077f4, 0x0010b7f4, 0x0010f7f4, + 0x001137f4, 0x001177f4, 0x0011b7f4, 0x0011f7f4, + 0x001237f4, 0x001277f4, 0x0012b7f4, 0x0012f7f4, + 0x001337f4, 0x001377f4, 0x0013b7f4, 0x0013f7f4, + 0x001437f4, 0x001477f4, 0x0014b7f4, 0x0014f7f4, + 0x001537f4, 0x001577f4, 0x0015b7f4, 0x0015f7f4, + 0x001637f4, 0x001677f4, 0x0016b7f4, 0x0016f7f4, + 0x001737f4, 0x001777f4, 0x0017b7f4, 0x0017f7f4, + 0x001837f4, 0x001877f4, 0x0018b7f4, 0x0018f7f4, + 0x001937f4, 0x001977f4, 0x0019b7f4, 0x0019f7f4, + 0x001a37f4, 0x001a77f4, 0x001ab7f4, 0x001af7f4, + 0x001b37f4, 0x001b77f4, 0x001bb7f4, 0x001bf7f4, + 0x001c37f4, 0x001c77f4, 0x001cb7f4, 0x001cf7f4, + 0x001d37f4, 0x001d77f4, 0x001db7f4, 0x001df7f4, + 0x001e37f4, 0x001e77f4, 0x001eb7f4, 0x001ef7f4, + 0x001f37f4, 0x001f77f4, 0x001fb7f4, 0x001ff7f4, + 0x002037f4, 0x002077f4, 0x0020b7f4, 0x0020f7f4, + 0x002137f4, 0x002177f4, 0x0021b7f4, 0x0021f7f4, + 0x002237f4, 0x002277f4, 0x0022b7f4, 0x0022f7f4, + 0x002337f4, 0x002377f4, 0x0023b7f4, 0x0023f7f4, + 0x002437f4, 0x002477f4, 0x0024b7f4, 0x0024f7f4, + 0x002537f4, 0x002577f4, 0x0025b7f4, 0x0025f7f4, + 0x002637f4, 0x002677f4, 0x0026b7f4, 0x0026f7f4, + 0x002737f4, 0x002777f4, 0x0027b7f4, 0x0027f7f4, + 0x002837f4, 0x002877f4, 0x0028b7f4, 0x0028f7f4, + 0x002937f4, 0x002977f4, 0x0029b7f4, 0x0029f7f4, + 0x002a37f4, 0x002a77f4, 0x002ab7f4, 0x002af7f4, + 0x002b37f4, 0x002b77f4, 0x002bb7f4, 0x002bf7f4, + 0x002c37f4, 0x002c77f4, 0x002cb7f4, 0x002cf7f4, + 0x002d37f4, 0x002d77f4, 0x002db7f4, 0x002df7f4, + 0x002e37f4, 0x002e77f4, 0x002eb7f4, 0x002ef7f4, + 0x002f37f4, 0x002f77f4, 0x002fb7f4, 0x002ff7f4, + 0x003037f4, 0x003077f4, 0x0030b7f4, 0x0030f7f4, + 0x003137f4, 0x003177f4, 0x0031b7f4, 0x0031f7f4, + 0x003237f4, 0x003277f4, 0x0032b7f4, 0x0032f7f4, + 0x003337f4, 0x003377f4, 0x0033b7f4, 0x0033f7f4, + 0x003437f4, 0x003477f4, 0x0034b7f4, 0x0034f7f4, + 0x003537f4, 0x003577f4, 0x0035b7f4, 0x0035f7f4, + 0x003637f4, 0x003677f4, 0x0036b7f4, 0x0036f7f4, + 0x003737f4, 0x003777f4, 0x0037b7f4, 0x0037f7f4, + 0x003837f4, 0x003877f4, 0x0038b7f4, 0x0038f7f4, + 0x003937f4, 0x003977f4, 0x0039b7f4, 0x0039f7f4, + 0x003a37f4, 0x003a77f4, 0x003ab7f4, 0x003af7f4, + 0x003b37f4, 0x003b77f4, 0x003bb7f4, 0x003bf7f4, + 0x003c37f4, 0x003c77f4, 0x003cb7f4, 0x003cf7f4, + 0x003d37f4, 0x003d77f4, 0x003db7f4, 0x003df7f4, + 0x003e37f4, 0x003e77f4, 0x003eb7f4, 0x003ef7f4, + 0x003f37f4, 0x003f77f4, 0x003fb7f4, 0x003ff7f4, + 0x004037f4, 0x004077f4, 0x0040b7f4, 0x0040f7f4, + 0x004137f4, 0x004177f4, 0x0041b7f4, 0x0041f7f4, + 0x004237f4, 0x004277f4, 0x0042b7f4, 0x0042f7f4, + 0x004337f4, 0x004377f4, 0x0043b7f4, 0x0043f7f4, + 0x004437f4, 0x004477f4, 0x0044b7f4, 0x0044f7f4, + 0x004537f4, 0x004577f4, 0x0045b7f4, 0x0045f7f4, + 0x004637f4, 0x004677f4, 0x0046b7f4, 0x0046f7f4, + 0x004737f4, 0x004777f4, 0x0047b7f4, 0x0047f7f4, + 0x004837f4, 0x004877f4, 0x0048b7f4, 0x0048f7f4, + 0x004937f4, 0x004977f4, 0x0049b7f4, 0x0049f7f4, + 0x004a37f4, 0x004a77f4, 0x004ab7f4, 0x004af7f4, + 0x004b37f4, 0x004b77f4, 0x004bb7f4, 0x004bf7f4, + 0x004c37f4, 0x004c77f4, 0x004cb7f4, 0x004cf7f4, + 0x004d37f4, 0x004d77f4, 0x004db7f4, 0x004df7f4, + 0x004e37f4, 0x004e77f4, 0x004eb7f4, 0x004ef7f4, + 0x004f37f4, 0x004f77f4, 0x004fb7f4, 0x004ff7f4, + 0x005037f4, 0x005077f4, 0x0050b7f4, 0x0050f7f4, + 0x005137f4, 0x005177f4, 0x0051b7f4, 0x0051f7f4, + 0x005237f4, 0x005277f4, 0x0052b7f4, 0x0052f7f4, + 0x005337f4, 0x005377f4, 0x0053b7f4, 0x0053f7f4, + 0x005437f4, 0x005477f4, 0x0054b7f4, 0x0054f7f4, + 0x005537f4, 0x005577f4, 0x0055b7f4, 0x0055f7f4, + 0x005637f4, 0x005677f4, 0x0056b7f4, 0x0056f7f4, + 0x005737f4, 0x005777f4, 0x0057b7f4, 0x0057f7f4, + 0x005837f4, 0x005877f4, 0x0058b7f4, 0x0058f7f4, + 0x005937f4, 0x005977f4, 0x0059b7f4, 0x0059f7f4, + 0x005a37f4, 0x005a77f4, 0x005ab7f4, 0x005af7f4, + 0x005b37f4, 0x005b77f4, 0x005bb7f4, 0x005bf7f4, + 0x005c37f4, 0x005c77f4, 0x005cb7f4, 0x005cf7f4, + 0x005d37f4, 0x005d77f4, 0x005db7f4, 0x005df7f4, + 0x005e37f4, 0x005e77f4, 0x005eb7f4, 0x005ef7f4, + 0x005f37f4, 0x005f77f4, 0x005fb7f4, 0x005ff7f4, + 0x006037f4, 0x006077f4, 0x0060b7f4, 0x0060f7f4, + 0x006137f4, 0x006177f4, 0x0061b7f4, 0x0061f7f4, + 0x006237f4, 0x006277f4, 0x0062b7f4, 0x0062f7f4, + 0x006337f4, 0x006377f4, 0x0063b7f4, 0x0063f7f4, + 0x006437f4, 0x006477f4, 0x0064b7f4, 0x0064f7f4, + 0x006537f4, 0x006577f4, 0x0065b7f4, 0x0065f7f4, + 0x006637f4, 0x006677f4, 0x0066b7f4, 0x0066f7f4, + 0x006737f4, 0x006777f4, 0x0067b7f4, 0x0067f7f4, + 0x006837f4, 0x006877f4, 0x0068b7f4, 0x0068f7f4, + 0x006937f4, 0x006977f4, 0x0069b7f4, 0x0069f7f4, + 0x006a37f4, 0x006a77f4, 0x006ab7f4, 0x006af7f4, + 0x006b37f4, 0x006b77f4, 0x006bb7f4, 0x006bf7f4, + 0x006c37f4, 0x006c77f4, 0x006cb7f4, 0x006cf7f4, + 0x006d37f4, 0x006d77f4, 0x006db7f4, 0x006df7f4, + 0x006e37f4, 0x006e77f4, 0x006eb7f4, 0x006ef7f4, + 0x006f37f4, 0x006f77f4, 0x006fb7f4, 0x006ff7f4, + 0x007037f4, 0x007077f4, 0x0070b7f4, 0x0070f7f4, + 0x007137f4, 0x007177f4, 0x0071b7f4, 0x0071f7f4, + 0x007237f4, 0x007277f4, 0x0072b7f4, 0x0072f7f4, + 0x007337f4, 0x007377f4, 0x0073b7f4, 0x0073f7f4, + 0x007437f4, 0x007477f4, 0x0074b7f4, 0x0074f7f4, + 0x007537f4, 0x007577f4, 0x0075b7f4, 0x0075f7f4, + 0x007637f4, 0x007677f4, 0x0076b7f4, 0x0076f7f4, + 0x007737f4, 0x007777f4, 0x0077b7f4, 0x0077f7f4, + 0x007837f4, 0x007877f4, 0x0078b7f4, 0x0078f7f4, + 0x007937f4, 0x007977f4, 0x0079b7f4, 0x0079f7f4, + 0x007a37f4, 0x007a77f4, 0x007ab7f4, 0x007af7f4, + 0x007b37f4, 0x007b77f4, 0x007bb7f4, 0x007bf7f4, + 0x007c37f4, 0x007c77f4, 0x007cb7f4, 0x007cf7f4, + 0x007d37f4, 0x007d77f4, 0x007db7f4, 0x007df7f4, + 0x007e37f4, 0x007e77f4, 0x007eb7f4, 0x007ef7f4, + 0x007f37f4, 0x007f77f4, 0x007fb7f4, 0x007ff7f4, + 0x008037f4, 0x008077f4, 0x0080b7f4, 0x0080f7f4, + 0x008137f4, 0x008177f4, 0x0081b7f4, 0x0081f7f4, + 0x008237f4, 0x008277f4, 0x0082b7f4, 0x0082f7f4, + 0x008337f4, 0x008377f4, 0x0083b7f4, 0x0083f7f4, + 0x008437f4, 0x008477f4, 0x0084b7f4, 0x0084f7f4, + 0x008537f4, 0x008577f4, 0x0085b7f4, 0x0085f7f4, + 0x008637f4, 0x008677f4, 0x0086b7f4, 0x0086f7f4, + 0x008737f4, 0x008777f4, 0x0087b7f4, 0x0087f7f4, + 0x008837f4, 0x008877f4, 0x0088b7f4, 0x0088f7f4, + 0x008937f4, 0x008977f4, 0x0089b7f4, 0x0089f7f4, + 0x008a37f4, 0x008a77f4, 0x008ab7f4, 0x008af7f4, + 0x008b37f4, 0x008b77f4, 0x008bb7f4, 0x008bf7f4, + 0x008c37f4, 0x008c77f4, 0x008cb7f4, 0x008cf7f4, + 0x008d37f4, 0x008d77f4, 0x008db7f4, 0x008df7f4, + 0x008e37f4, 0x008e77f4, 0x008eb7f4, 0x008ef7f4, + 0x008f37f4, 0x008f77f4, 0x008fb7f4, 0x008ff7f4, + 0x009037f4, 0x009077f4, 0x0090b7f4, 0x0090f7f4, + 0x009137f4, 0x009177f4, 0x0091b7f4, 0x0091f7f4, + 0x009237f4, 0x009277f4, 0x0092b7f4, 0x0092f7f4, + 0x009337f4, 0x009377f4, 0x0093b7f4, 0x0093f7f4, + 0x009437f4, 0x009477f4, 0x0094b7f4, 0x0094f7f4, + 0x009537f4, 0x009577f4, 0x0095b7f4, 0x0095f7f4, + 0x009637f4, 0x009677f4, 0x0096b7f4, 0x0096f7f4, + 0x009737f4, 0x009777f4, 0x0097b7f4, 0x0097f7f4, + 0x009837f4, 0x009877f4, 0x0098b7f4, 0x0098f7f4, + 0x009937f4, 0x009977f4, 0x0099b7f4, 0x0099f7f4, + 0x009a37f4, 0x009a77f4, 0x009ab7f4, 0x009af7f4, + 0x009b37f4, 0x009b77f4, 0x009bb7f4, 0x009bf7f4, + 0x009c37f4, 0x009c77f4, 0x009cb7f4, 0x009cf7f4, + 0x009d37f4, 0x009d77f4, 0x009db7f4, 0x009df7f4, + 0x009e37f4, 0x009e77f4, 0x009eb7f4, 0x009ef7f4, + 0x009f37f4, 0x009f77f4, 0x009fb7f4, 0x009ff7f4, + 0x00a037f4, 0x00a077f4, 0x00a0b7f4, 0x00a0f7f4, + 0x00a137f4, 0x00a177f4, 0x00a1b7f4, 0x00a1f7f4, + 0x00a237f4, 0x00a277f4, 0x00a2b7f4, 0x00a2f7f4, + 0x00a337f4, 0x00a377f4, 0x00a3b7f4, 0x00a3f7f4, + 0x00a437f4, 0x00a477f4, 0x00a4b7f4, 0x00a4f7f4, + 0x00a537f4, 0x00a577f4, 0x00a5b7f4, 0x00a5f7f4, + 0x00a637f4, 0x00a677f4, 0x00a6b7f4, 0x00a6f7f4, + 0x00a737f4, 0x00a777f4, 0x00a7b7f4, 0x00a7f7f4, + 0x00a837f4, 0x00a877f4, 0x00a8b7f4, 0x00a8f7f4, + 0x00a937f4, 0x00a977f4, 0x00a9b7f4, 0x00a9f7f4, + 0x00aa37f4, 0x00aa77f4, 0x00aab7f4, 0x00aaf7f4, + 0x00ab37f4, 0x00ab77f4, 0x00abb7f4, 0x00abf7f4, + 0x00ac37f4, 0x00ac77f4, 0x00acb7f4, 0x00acf7f4, + 0x00ad37f4, 0x00ad77f4, 0x00adb7f4, 0x00adf7f4, + 0x00ae37f4, 0x00ae77f4, 0x00aeb7f4, 0x00aef7f4, + 0x00af37f4, 0x00af77f4, 0x00afb7f4, 0x00aff7f4, + 0x00b037f4, 0x00b077f4, 0x00b0b7f4, 0x00b0f7f4, + 0x00b137f4, 0x00b177f4, 0x00b1b7f4, 0x00b1f7f4, + 0x00b237f4, 0x00b277f4, 0x00b2b7f4, 0x00b2f7f4, + 0x00b337f4, 0x00b377f4, 0x00b3b7f4, 0x00b3f7f4, + 0x00b437f4, 0x00b477f4, 0x00b4b7f4, 0x00b4f7f4, + 0x00b537f4, 0x00b577f4, 0x00b5b7f4, 0x00b5f7f4, + 0x00b637f4, 0x00b677f4, 0x00b6b7f4, 0x00b6f7f4, + 0x00b737f4, 0x00b777f4, 0x00b7b7f4, 0x00b7f7f4, + 0x00b837f4, 0x00b877f4, 0x00b8b7f4, 0x00b8f7f4, + 0x00b937f4, 0x00b977f4, 0x00b9b7f4, 0x00b9f7f4, + 0x00ba37f4, 0x00ba77f4, 0x00bab7f4, 0x00baf7f4, + 0x00bb37f4, 0x00bb77f4, 0x00bbb7f4, 0x00bbf7f4, + 0x00bc37f4, 0x00bc77f4, 0x00bcb7f4, 0x00bcf7f4, + 0x00bd37f4, 0x00bd77f4, 0x00bdb7f4, 0x00bdf7f4, + 0x00be37f4, 0x00be77f4, 0x00beb7f4, 0x00bef7f4, + 0x00bf37f4, 0x00bf77f4, 0x00bfb7f4, 0x00bff7f4, + 0x00c037f4, 0x00c077f4, 0x00c0b7f4, 0x00c0f7f4, + 0x00c137f4, 0x00c177f4, 0x00c1b7f4, 0x00c1f7f4, + 0x00c237f4, 0x00c277f4, 0x00c2b7f4, 0x00c2f7f4, + 0x00c337f4, 0x00c377f4, 0x00c3b7f4, 0x00c3f7f4, + 0x00c437f4, 0x00c477f4, 0x00c4b7f4, 0x00c4f7f4, + 0x00c537f4, 0x00c577f4, 0x00c5b7f4, 0x00c5f7f4, + 0x00c637f4, 0x00c677f4, 0x00c6b7f4, 0x00c6f7f4, + 0x00c737f4, 0x00c777f4, 0x00c7b7f4, 0x00c7f7f4, + 0x00c837f4, 0x00c877f4, 0x00c8b7f4, 0x00c8f7f4, + 0x00c937f4, 0x00c977f4, 0x00c9b7f4, 0x00c9f7f4, + 0x00ca37f4, 0x00ca77f4, 0x00cab7f4, 0x00caf7f4, + 0x00cb37f4, 0x00cb77f4, 0x00cbb7f4, 0x00cbf7f4, + 0x00cc37f4, 0x00cc77f4, 0x00ccb7f4, 0x00ccf7f4, + 0x00cd37f4, 0x00cd77f4, 0x00cdb7f4, 0x00cdf7f4, + 0x00ce37f4, 0x00ce77f4, 0x00ceb7f4, 0x00cef7f4, + 0x00cf37f4, 0x00cf77f4, 0x00cfb7f4, 0x00cff7f4, + 0x00d037f4, 0x00d077f4, 0x00d0b7f4, 0x00d0f7f4, + 0x00d137f4, 0x00d177f4, 0x00d1b7f4, 0x00d1f7f4, + 0x00d237f4, 0x00d277f4, 0x00d2b7f4, 0x00d2f7f4, + 0x00d337f4, 0x00d377f4, 0x00d3b7f4, 0x00d3f7f4, + 0x00d437f4, 0x00d477f4, 0x00d4b7f4, 0x00d4f7f4, + 0x00d537f4, 0x00d577f4, 0x00d5b7f4, 0x00d5f7f4, + 0x00d637f4, 0x00d677f4, 0x00d6b7f4, 0x00d6f7f4, + 0x00d737f4, 0x00d777f4, 0x00d7b7f4, 0x00d7f7f4, + 0x00d837f4, 0x00d877f4, 0x00d8b7f4, 0x00d8f7f4, + 0x00d937f4, 0x00d977f4, 0x00d9b7f4, 0x00d9f7f4, + 0x00da37f4, 0x00da77f4, 0x00dab7f4, 0x00daf7f4, + 0x00db37f4, 0x00db77f4, 0x00dbb7f4, 0x00dbf7f4, + 0x00dc37f4, 0x00dc77f4, 0x00dcb7f4, 0x00dcf7f4, + 0x00dd37f4, 0x00dd77f4, 0x00ddb7f4, 0x00ddf7f4, + 0x00de37f4, 0x00de77f4, 0x00deb7f4, 0x00def7f4, + 0x00df37f4, 0x00df77f4, 0x00dfb7f4, 0x00dff7f4, + 0x00e037f4, 0x00e077f4, 0x00e0b7f4, 0x00e0f7f4, + 0x00e137f4, 0x00e177f4, 0x00e1b7f4, 0x00e1f7f4, + 0x00e237f4, 0x00e277f4, 0x00e2b7f4, 0x00e2f7f4, + 0x00e337f4, 0x00e377f4, 0x00e3b7f4, 0x00e3f7f4, + 0x00e437f4, 0x00e477f4, 0x00e4b7f4, 0x00e4f7f4, + 0x00e537f4, 0x00e577f4, 0x00e5b7f4, 0x00e5f7f4, + 0x00e637f4, 0x00e677f4, 0x00e6b7f4, 0x00e6f7f4, + 0x00e737f4, 0x00e777f4, 0x00e7b7f4, 0x00e7f7f4, + 0x00e837f4, 0x00e877f4, 0x00e8b7f4, 0x00e8f7f4, + 0x00e937f4, 0x00e977f4, 0x00e9b7f4, 0x00e9f7f4, + 0x00ea37f4, 0x00ea77f4, 0x00eab7f4, 0x00eaf7f4, + 0x00eb37f4, 0x00eb77f4, 0x00ebb7f4, 0x00ebf7f4, + 0x00ec37f4, 0x00ec77f4, 0x00ecb7f4, 0x00ecf7f4, + 0x00ed37f4, 0x00ed77f4, 0x00edb7f4, 0x00edf7f4, + 0x00ee37f4, 0x00ee77f4, 0x00eeb7f4, 0x00eef7f4, + 0x00ef37f4, 0x00ef77f4, 0x00efb7f4, 0x00eff7f4, + 0x00f037f4, 0x00f077f4, 0x00f0b7f4, 0x00f0f7f4, + 0x00f137f4, 0x00f177f4, 0x00f1b7f4, 0x00f1f7f4, + 0x00f237f4, 0x00f277f4, 0x00f2b7f4, 0x00f2f7f4, + 0x00f337f4, 0x00f377f4, 0x00f3b7f4, 0x00f3f7f4, + 0x00f437f4, 0x00f477f4, 0x00f4b7f4, 0x00f4f7f4, + 0x00f537f4, 0x00f577f4, 0x00f5b7f4, 0x00f5f7f4, + 0x00f637f4, 0x00f677f4, 0x00f6b7f4, 0x00f6f7f4, + 0x00f737f4, 0x00f777f4, 0x00f7b7f4, 0x00f7f7f4, + 0x00f837f4, 0x00f877f4, 0x00f8b7f4, 0x00f8f7f4, + 0x00f937f4, 0x00f977f4, 0x00f9b7f4, 0x00f9f7f4, + 0x00fa37f4, 0x00fa77f4, 0x00fab7f4, 0x00faf7f4, + 0x00fb37f4, 0x00fb77f4, 0x00fbb7f4, 0x00fbf7f4, + 0x00fc37f4, 0x00fc77f4, 0x00fcb7f4, 0x00fcf7f4, + 0x00fd37f4, 0x00fd77f4, 0x00fdb7f4, 0x00fdf7f4, + 0x00fe37f4, 0x00fe77f4, 0x00feb7f4, 0x00fef7f4, + 0x00ff37f4, 0x00ff77f4, 0x00ffb7f4, 0x00fff7f4, + 0x010037f4, 0x010077f4, 0x0100b7f4, 0x0100f7f4, + 0x010137f4, 0x010177f4, 0x0101b7f4, 0x0101f7f4, + 0x010237f4, 0x010277f4, 0x0102b7f4, 0x0102f7f4, + 0x010337f4, 0x010377f4, 0x0103b7f4, 0x0103f7f4, + 0x010437f4, 0x010477f4, 0x0104b7f4, 0x0104f7f4, + 0x010537f4, 0x010577f4, 0x0105b7f4, 0x0105f7f4, + 0x010637f4, 0x010677f4, 0x0106b7f4, 0x0106f7f4, + 0x010737f4, 0x010777f4, 0x0107b7f4, 0x0107f7f4, + 0x010837f4, 0x010877f4, 0x0108b7f4, 0x0108f7f4, + 0x010937f4, 0x010977f4, 0x0109b7f4, 0x0109f7f4, + 0x010a37f4, 0x010a77f4, 0x010ab7f4, 0x010af7f4, + 0x010b37f4, 0x010b77f4, 0x010bb7f4, 0x010bf7f4, + 0x010c37f4, 0x010c77f4, 0x010cb7f4, 0x010cf7f4, + 0x010d37f4, 0x010d77f4, 0x010db7f4, 0x010df7f4, + 0x010e37f4, 0x010e77f4, 0x010eb7f4, 0x010ef7f4, + 0x010f37f4, 0x010f77f4, 0x010fb7f4, 0x010ff7f4, + 0x011037f4, 0x011077f4, 0x0110b7f4, 0x0110f7f4, + 0x011137f4, 0x011177f4, 0x0111b7f4, 0x0111f7f4, + 0x011237f4, 0x011277f4, 0x0112b7f4, 0x0112f7f4, + 0x011337f4, 0x011377f4, 0x0113b7f4, 0x0113f7f4, + 0x011437f4, 0x011477f4, 0x0114b7f4, 0x0114f7f4, + 0x011537f4, 0x011577f4, 0x0115b7f4, 0x0115f7f4, + 0x011637f4, 0x011677f4, 0x0116b7f4, 0x0116f7f4, + 0x011737f4, 0x011777f4, 0x0117b7f4, 0x0117f7f4, + 0x011837f4, 0x011877f4, 0x0118b7f4, 0x0118f7f4, + 0x011937f4, 0x011977f4, 0x0119b7f4, 0x0119f7f4, + 0x011a37f4, 0x011a77f4, 0x011ab7f4, 0x011af7f4, + 0x011b37f4, 0x011b77f4, 0x011bb7f4, 0x011bf7f4, + 0x011c37f4, 0x011c77f4, 0x011cb7f4, 0x011cf7f4, + 0x011d37f4, 0x011d77f4, 0x011db7f4, 0x011df7f4, + 0x011e37f4, 0x011e77f4, 0x011eb7f4, 0x011ef7f4, + 0x011f37f4, 0x011f77f4, 0x011fb7f4, 0x011ff7f4, + 0x012037f4, 0x012077f4, 0x0120b7f4, 0x0120f7f4, + 0x012137f4, 0x012177f4, 0x0121b7f4, 0x0121f7f4, + 0x012237f4, 0x012277f4, 0x0122b7f4, 0x0122f7f4, + 0x012337f4, 0x012377f4, 0x0123b7f4, 0x0123f7f4, + 0x012437f4, 0x012477f4, 0x0124b7f4, 0x0124f7f4, + 0x012537f4, 0x012577f4, 0x0125b7f4, 0x0125f7f4, + 0x012637f4, 0x012677f4, 0x0126b7f4, 0x0126f7f4, + 0x012737f4, 0x012777f4, 0x0127b7f4, 0x0127f7f4, + 0x012837f4, 0x012877f4, 0x0128b7f4, 0x0128f7f4, + 0x012937f4, 0x012977f4, 0x0129b7f4, 0x0129f7f4, + 0x012a37f4, 0x012a77f4, 0x012ab7f4, 0x012af7f4, + 0x012b37f4, 0x012b77f4, 0x012bb7f4, 0x012bf7f4, + 0x012c37f4, 0x012c77f4, 0x012cb7f4, 0x012cf7f4, + 0x012d37f4, 0x012d77f4, 0x012db7f4, 0x012df7f4, + 0x012e37f4, 0x012e77f4, 0x012eb7f4, 0x012ef7f4, + 0x012f37f4, 0x012f77f4, 0x012fb7f4, 0x012ff7f4, + 0x013037f4, 0x013077f4, 0x0130b7f4, 0x0130f7f4, + 0x013137f4, 0x013177f4, 0x0131b7f4, 0x0131f7f4, + 0x013237f4, 0x013277f4, 0x0132b7f4, 0x0132f7f4, + 0x013337f4, 0x013377f4, 0x0133b7f4, 0x0133f7f4, + 0x013437f4, 0x013477f4, 0x0134b7f4, 0x0134f7f4, + 0x013537f4, 0x013577f4, 0x0135b7f4, 0x0135f7f4, + 0x013637f4, 0x013677f4, 0x0136b7f4, 0x0136f7f4, + 0x013737f4, 0x013777f4, 0x0137b7f4, 0x0137f7f4, + 0x013837f4, 0x013877f4, 0x0138b7f4, 0x0138f7f4, + 0x013937f4, 0x013977f4, 0x0139b7f4, 0x0139f7f4, + 0x013a37f4, 0x013a77f4, 0x013ab7f4, 0x013af7f4, + 0x013b37f4, 0x013b77f4, 0x013bb7f4, 0x013bf7f4, + 0x013c37f4, 0x013c77f4, 0x013cb7f4, 0x013cf7f4, + 0x013d37f4, 0x013d77f4, 0x013db7f4, 0x013df7f4, + 0x013e37f4, 0x013e77f4, 0x013eb7f4, 0x013ef7f4, + 0x013f37f4, 0x013f77f4, 0x013fb7f4, 0x013ff7f4, + 0x014037f4, 0x014077f4, 0x0140b7f4, 0x0140f7f4, + 0x014137f4, 0x014177f4, 0x0141b7f4, 0x0141f7f4, + 0x014237f4, 0x014277f4, 0x0142b7f4, 0x0142f7f4, + 0x014337f4, 0x014377f4, 0x0143b7f4, 0x0143f7f4, + 0x014437f4, 0x014477f4, 0x0144b7f4, 0x0144f7f4, + 0x014537f4, 0x014577f4, 0x0145b7f4, 0x0145f7f4, + 0x014637f4, 0x014677f4, 0x0146b7f4, 0x0146f7f4, + 0x014737f4, 0x014777f4, 0x0147b7f4, 0x0147f7f4, + 0x014837f4, 0x014877f4, 0x0148b7f4, 0x0148f7f4, + 0x014937f4, 0x014977f4, 0x0149b7f4, 0x0149f7f4, + 0x014a37f4, 0x014a77f4, 0x014ab7f4, 0x014af7f4, + 0x014b37f4, 0x014b77f4, 0x014bb7f4, 0x014bf7f4, + 0x014c37f4, 0x014c77f4, 0x014cb7f4, 0x014cf7f4, + 0x014d37f4, 0x014d77f4, 0x014db7f4, 0x014df7f4, + 0x014e37f4, 0x014e77f4, 0x014eb7f4, 0x014ef7f4, + 0x014f37f4, 0x014f77f4, 0x014fb7f4, 0x014ff7f4, + 0x015037f4, 0x015077f4, 0x0150b7f4, 0x0150f7f4, + 0x015137f4, 0x015177f4, 0x0151b7f4, 0x0151f7f4, + 0x015237f4, 0x015277f4, 0x0152b7f4, 0x0152f7f4, + 0x015337f4, 0x015377f4, 0x0153b7f4, 0x0153f7f4, + 0x015437f4, 0x015477f4, 0x0154b7f4, 0x0154f7f4, + 0x015537f4, 0x015577f4, 0x0155b7f4, 0x0155f7f4, + 0x015637f4, 0x015677f4, 0x0156b7f4, 0x0156f7f4, + 0x015737f4, 0x015777f4, 0x0157b7f4, 0x0157f7f4, + 0x015837f4, 0x015877f4, 0x0158b7f4, 0x0158f7f4, + 0x015937f4, 0x015977f4, 0x0159b7f4, 0x0159f7f4, + 0x015a37f4, 0x015a77f4, 0x015ab7f4, 0x015af7f4, + 0x015b37f4, 0x015b77f4, 0x015bb7f4, 0x015bf7f4, + 0x015c37f4, 0x015c77f4, 0x015cb7f4, 0x015cf7f4, + 0x015d37f4, 0x015d77f4, 0x015db7f4, 0x015df7f4, + 0x015e37f4, 0x015e77f4, 0x015eb7f4, 0x015ef7f4, + 0x015f37f4, 0x015f77f4, 0x015fb7f4, 0x015ff7f4, + 0x016037f4, 0x016077f4, 0x0160b7f4, 0x0160f7f4, + 0x016137f4, 0x016177f4, 0x0161b7f4, 0x0161f7f4, + 0x016237f4, 0x016277f4, 0x0162b7f4, 0x0162f7f4, + 0x016337f4, 0x016377f4, 0x0163b7f4, 0x0163f7f4, + 0x016437f4, 0x016477f4, 0x0164b7f4, 0x0164f7f4, + 0x016537f4, 0x016577f4, 0x0165b7f4, 0x0165f7f4, + 0x016637f4, 0x016677f4, 0x0166b7f4, 0x0166f7f4, + 0x016737f4, 0x016777f4, 0x0167b7f4, 0x0167f7f4, + 0x016837f4, 0x016877f4, 0x0168b7f4, 0x0168f7f4, + 0x016937f4, 0x016977f4, 0x0169b7f4, 0x0169f7f4, + 0x016a37f4, 0x016a77f4, 0x016ab7f4, 0x016af7f4, + 0x016b37f4, 0x016b77f4, 0x016bb7f4, 0x016bf7f4, + 0x016c37f4, 0x016c77f4, 0x016cb7f4, 0x016cf7f4, + 0x016d37f4, 0x016d77f4, 0x016db7f4, 0x016df7f4, + 0x016e37f4, 0x016e77f4, 0x016eb7f4, 0x016ef7f4, + 0x016f37f4, 0x016f77f4, 0x016fb7f4, 0x016ff7f4, + 0x017037f4, 0x017077f4, 0x0170b7f4, 0x0170f7f4, + 0x017137f4, 0x017177f4, 0x0171b7f4, 0x0171f7f4, + 0x017237f4, 0x017277f4, 0x0172b7f4, 0x0172f7f4, + 0x017337f4, 0x017377f4, 0x0173b7f4, 0x0173f7f4, + 0x017437f4, 0x017477f4, 0x0174b7f4, 0x0174f7f4, + 0x017537f4, 0x017577f4, 0x0175b7f4, 0x0175f7f4, + 0x017637f4, 0x017677f4, 0x0176b7f4, 0x0176f7f4, + 0x017737f4, 0x017777f4, 0x0177b7f4, 0x0177f7f4, + 0x017837f4, 0x017877f4, 0x0178b7f4, 0x0178f7f4, + 0x017937f4, 0x017977f4, 0x0179b7f4, 0x0179f7f4, + 0x017a37f4, 0x017a77f4, 0x017ab7f4, 0x017af7f4, + 0x017b37f4, 0x017b77f4, 0x017bb7f4, 0x017bf7f4, + 0x017c37f4, 0x017c77f4, 0x017cb7f4, 0x017cf7f4, + 0x017d37f4, 0x017d77f4, 0x017db7f4, 0x017df7f4, + 0x017e37f4, 0x017e77f4, 0x017eb7f4, 0x017ef7f4, + 0x017f37f4, 0x017f77f4, 0x017fb7f4, 0x017ff7f4, + 0x018037f4, 0x018077f4, 0x0180b7f4, 0x0180f7f4, + 0x018137f4, 0x018177f4, 0x0181b7f4, 0x0181f7f4, + 0x018237f4, 0x018277f4, 0x0182b7f4, 0x0182f7f4, + 0x018337f4, 0x018377f4, 0x0183b7f4, 0x0183f7f4, + 0x018437f4, 0x018477f4, 0x0184b7f4, 0x0184f7f4, + 0x018537f4, 0x018577f4, 0x0185b7f4, 0x0185f7f4, + 0x018637f4, 0x018677f4, 0x0186b7f4, 0x0186f7f4, + 0x018737f4, 0x018777f4, 0x0187b7f4, 0x0187f7f4, + 0x018837f4, 0x018877f4, 0x0188b7f4, 0x0188f7f4, + 0x018937f4, 0x018977f4, 0x0189b7f4, 0x0189f7f4, + 0x018a37f4, 0x018a77f4, 0x018ab7f4, 0x018af7f4, + 0x018b37f4, 0x018b77f4, 0x018bb7f4, 0x018bf7f4, + 0x018c37f4, 0x018c77f4, 0x018cb7f4, 0x018cf7f4, + 0x018d37f4, 0x018d77f4, 0x018db7f4, 0x018df7f4, + 0x018e37f4, 0x018e77f4, 0x018eb7f4, 0x018ef7f4, + 0x018f37f4, 0x018f77f4, 0x018fb7f4, 0x018ff7f4, + 0x019037f4, 0x019077f4, 0x0190b7f4, 0x0190f7f4, + 0x019137f4, 0x019177f4, 0x0191b7f4, 0x0191f7f4, + 0x019237f4, 0x019277f4, 0x0192b7f4, 0x0192f7f4, + 0x019337f4, 0x019377f4, 0x0193b7f4, 0x0193f7f4, + 0x019437f4, 0x019477f4, 0x0194b7f4, 0x0194f7f4, + 0x019537f4, 0x019577f4, 0x0195b7f4, 0x0195f7f4, + 0x019637f4, 0x019677f4, 0x0196b7f4, 0x0196f7f4, + 0x019737f4, 0x019777f4, 0x0197b7f4, 0x0197f7f4, + 0x019837f4, 0x019877f4, 0x0198b7f4, 0x0198f7f4, + 0x019937f4, 0x019977f4, 0x0199b7f4, 0x0199f7f4, + 0x019a37f4, 0x019a77f4, 0x019ab7f4, 0x019af7f4, + 0x019b37f4, 0x019b77f4, 0x019bb7f4, 0x019bf7f4, + 0x019c37f4, 0x019c77f4, 0x019cb7f4, 0x019cf7f4, + 0x019d37f4, 0x019d77f4, 0x019db7f4, 0x019df7f4, + 0x019e37f4, 0x019e77f4, 0x019eb7f4, 0x019ef7f4, + 0x019f37f4, 0x019f77f4, 0x019fb7f4, 0x019ff7f4, + 0x01a037f4, 0x01a077f4, 0x01a0b7f4, 0x01a0f7f4, + 0x01a137f4, 0x01a177f4, 0x01a1b7f4, 0x01a1f7f4, + 0x01a237f4, 0x01a277f4, 0x01a2b7f4, 0x01a2f7f4, + 0x01a337f4, 0x01a377f4, 0x01a3b7f4, 0x01a3f7f4, + 0x01a437f4, 0x01a477f4, 0x01a4b7f4, 0x01a4f7f4, + 0x01a537f4, 0x01a577f4, 0x01a5b7f4, 0x01a5f7f4, + 0x01a637f4, 0x01a677f4, 0x01a6b7f4, 0x01a6f7f4, + 0x01a737f4, 0x01a777f4, 0x01a7b7f4, 0x01a7f7f4, + 0x01a837f4, 0x01a877f4, 0x01a8b7f4, 0x01a8f7f4, + 0x01a937f4, 0x01a977f4, 0x01a9b7f4, 0x01a9f7f4, + 0x01aa37f4, 0x01aa77f4, 0x01aab7f4, 0x01aaf7f4, + 0x01ab37f4, 0x01ab77f4, 0x01abb7f4, 0x01abf7f4, + 0x01ac37f4, 0x01ac77f4, 0x01acb7f4, 0x01acf7f4, + 0x01ad37f4, 0x01ad77f4, 0x01adb7f4, 0x01adf7f4, + 0x01ae37f4, 0x01ae77f4, 0x01aeb7f4, 0x01aef7f4, + 0x01af37f4, 0x01af77f4, 0x01afb7f4, 0x01aff7f4, + 0x01b037f4, 0x01b077f4, 0x01b0b7f4, 0x01b0f7f4, + 0x01b137f4, 0x01b177f4, 0x01b1b7f4, 0x01b1f7f4, + 0x01b237f4, 0x01b277f4, 0x01b2b7f4, 0x01b2f7f4, + 0x01b337f4, 0x01b377f4, 0x01b3b7f4, 0x01b3f7f4, + 0x01b437f4, 0x01b477f4, 0x01b4b7f4, 0x01b4f7f4, + 0x01b537f4, 0x01b577f4, 0x01b5b7f4, 0x01b5f7f4, + 0x01b637f4, 0x01b677f4, 0x01b6b7f4, 0x01b6f7f4, + 0x01b737f4, 0x01b777f4, 0x01b7b7f4, 0x01b7f7f4, + 0x01b837f4, 0x01b877f4, 0x01b8b7f4, 0x01b8f7f4, + 0x01b937f4, 0x01b977f4, 0x01b9b7f4, 0x01b9f7f4, + 0x01ba37f4, 0x01ba77f4, 0x01bab7f4, 0x01baf7f4, + 0x01bb37f4, 0x01bb77f4, 0x01bbb7f4, 0x01bbf7f4, + 0x01bc37f4, 0x01bc77f4, 0x01bcb7f4, 0x01bcf7f4, + 0x01bd37f4, 0x01bd77f4, 0x01bdb7f4, 0x01bdf7f4, + 0x01be37f4, 0x01be77f4, 0x01beb7f4, 0x01bef7f4, + 0x01bf37f4, 0x01bf77f4, 0x01bfb7f4, 0x01bff7f4, + 0x01c037f4, 0x01c077f4, 0x01c0b7f4, 0x01c0f7f4, + 0x01c137f4, 0x01c177f4, 0x01c1b7f4, 0x01c1f7f4, + 0x01c237f4, 0x01c277f4, 0x01c2b7f4, 0x01c2f7f4, + 0x01c337f4, 0x01c377f4, 0x01c3b7f4, 0x01c3f7f4, + 0x01c437f4, 0x01c477f4, 0x01c4b7f4, 0x01c4f7f4, + 0x01c537f4, 0x01c577f4, 0x01c5b7f4, 0x01c5f7f4, + 0x01c637f4, 0x01c677f4, 0x01c6b7f4, 0x01c6f7f4, + 0x01c737f4, 0x01c777f4, 0x01c7b7f4, 0x01c7f7f4, + 0x01c837f4, 0x01c877f4, 0x01c8b7f4, 0x01c8f7f4, + 0x01c937f4, 0x01c977f4, 0x01c9b7f4, 0x01c9f7f4, + 0x01ca37f4, 0x01ca77f4, 0x01cab7f4, 0x01caf7f4, + 0x01cb37f4, 0x01cb77f4, 0x01cbb7f4, 0x01cbf7f4, + 0x01cc37f4, 0x01cc77f4, 0x01ccb7f4, 0x01ccf7f4, + 0x01cd37f4, 0x01cd77f4, 0x01cdb7f4, 0x01cdf7f4, + 0x01ce37f4, 0x01ce77f4, 0x01ceb7f4, 0x01cef7f4, + 0x01cf37f4, 0x01cf77f4, 0x01cfb7f4, 0x01cff7f4, + 0x01d037f4, 0x01d077f4, 0x01d0b7f4, 0x01d0f7f4, + 0x01d137f4, 0x01d177f4, 0x01d1b7f4, 0x01d1f7f4, + 0x01d237f4, 0x01d277f4, 0x01d2b7f4, 0x01d2f7f4, + 0x01d337f4, 0x01d377f4, 0x01d3b7f4, 0x01d3f7f4, + 0x01d437f4, 0x01d477f4, 0x01d4b7f4, 0x01d4f7f4, + 0x01d537f4, 0x01d577f4, 0x01d5b7f4, 0x01d5f7f4, + 0x01d637f4, 0x01d677f4, 0x01d6b7f4, 0x01d6f7f4, + 0x01d737f4, 0x01d777f4, 0x01d7b7f4, 0x01d7f7f4, + 0x01d837f4, 0x01d877f4, 0x01d8b7f4, 0x01d8f7f4, + 0x01d937f4, 0x01d977f4, 0x01d9b7f4, 0x01d9f7f4, + 0x01da37f4, 0x01da77f4, 0x01dab7f4, 0x01daf7f4, + 0x01db37f4, 0x01db77f4, 0x01dbb7f4, 0x01dbf7f4, + 0x01dc37f4, 0x01dc77f4, 0x01dcb7f4, 0x01dcf7f4, + 0x01dd37f4, 0x01dd77f4, 0x01ddb7f4, 0x01ddf7f4, + 0x01de37f4, 0x01de77f4, 0x01deb7f4, 0x01def7f4, + 0x01df37f4, 0x01df77f4, 0x01dfb7f4, 0x01dff7f4, + 0x01e037f4, 0x01e077f4, 0x01e0b7f4, 0x01e0f7f4, + 0x01e137f4, 0x01e177f4, 0x01e1b7f4, 0x01e1f7f4, + 0x01e237f4, 0x01e277f4, 0x01e2b7f4, 0x01e2f7f4, + 0x01e337f4, 0x01e377f4, 0x01e3b7f4, 0x01e3f7f4, + 0x01e437f4, 0x01e477f4, 0x01e4b7f4, 0x01e4f7f4, + 0x01e537f4, 0x01e577f4, 0x01e5b7f4, 0x01e5f7f4, + 0x01e637f4, 0x01e677f4, 0x01e6b7f4, 0x01e6f7f4, + 0x01e737f4, 0x01e777f4, 0x01e7b7f4, 0x01e7f7f4, + 0x01e837f4, 0x01e877f4, 0x01e8b7f4, 0x01e8f7f4, + 0x01e937f4, 0x01e977f4, 0x01e9b7f4, 0x01e9f7f4, + 0x01ea37f4, 0x01ea77f4, 0x01eab7f4, 0x01eaf7f4, + 0x01eb37f4, 0x01eb77f4, 0x01ebb7f4, 0x01ebf7f4, + 0x01ec37f4, 0x01ec77f4, 0x01ecb7f4, 0x01ecf7f4, + 0x01ed37f4, 0x01ed77f4, 0x01edb7f4, 0x01edf7f4, + 0x01ee37f4, 0x01ee77f4, 0x01eeb7f4, 0x01eef7f4, + 0x01ef37f4, 0x01ef77f4, 0x01efb7f4, 0x01eff7f4, + 0x01f037f4, 0x01f077f4, 0x01f0b7f4, 0x01f0f7f4, + 0x01f137f4, 0x01f177f4, 0x01f1b7f4, 0x01f1f7f4, + 0x01f237f4, 0x01f277f4, 0x01f2b7f4, 0x01f2f7f4, + 0x01f337f4, 0x01f377f4, 0x01f3b7f4, 0x01f3f7f4, + 0x01f437f4, 0x01f477f4, 0x01f4b7f4, 0x01f4f7f4, + 0x01f537f4, 0x01f577f4, 0x01f5b7f4, 0x01f5f7f4, + 0x01f637f4, 0x01f677f4, 0x01f6b7f4, 0x01f6f7f4, + 0x01f737f4, 0x01f777f4, 0x01f7b7f4, 0x01f7f7f4, + 0x01f837f4, 0x01f877f4, 0x01f8b7f4, 0x01f8f7f4, + 0x01f937f4, 0x01f977f4, 0x01f9b7f4, 0x01f9f7f4, + 0x01fa37f4, 0x01fa77f4, 0x01fab7f4, 0x01faf7f4, + 0x01fb37f4, 0x01fb77f4, 0x01fbb7f4, 0x01fbf7f4, + 0x01fc37f4, 0x01fc77f4, 0x01fcb7f4, 0x01fcf7f4, + 0x01fd37f4, 0x01fd77f4, 0x01fdb7f4, 0x01fdf7f4, + 0x01fe37f4, 0x01fe77f4, 0x01feb7f4, 0x01fef7f4, + 0x01ff37f4, 0x01ff77f4, 0x01ffb7f4, 0x01fff7f4 }, +#else + .dist_table = { + 0x00001be9, 0x00003be9, 0x00000be8, 0x000007e9, + 0x000001e7, 0x000009e7, 0x000027ea, 0x000067ea, + 0x000003e9, 0x000013e9, 0x000023e9, 0x000033e9, + 0x000005e8, 0x00000de8, 0x000015e8, 0x00001de8, + 0x00000068, 0x00000468, 0x00000868, 0x00000c68, + 0x00001068, 0x00001468, 0x00001868, 0x00001c68, + 0x00000268, 0x00000668, 0x00000a68, 0x00000e68, + 0x00001268, 0x00001668, 0x00001a68, 0x00001e68, + 0x00000048, 0x00000248, 0x00000448, 0x00000648, + 0x00000848, 0x00000a48, 0x00000c48, 0x00000e48, + 0x00001048, 0x00001248, 0x00001448, 0x00001648, + 0x00001848, 0x00001a48, 0x00001c48, 0x00001e48, + 0x00000148, 0x00000348, 0x00000548, 0x00000748, + 0x00000948, 0x00000b48, 0x00000d48, 0x00000f48, + 0x00001148, 0x00001348, 0x00001548, 0x00001748, + 0x00001948, 0x00001b48, 0x00001d48, 0x00001f48, + 0x000000c9, 0x000002c9, 0x000004c9, 0x000006c9, + 0x000008c9, 0x00000ac9, 0x00000cc9, 0x00000ec9, + 0x000010c9, 0x000012c9, 0x000014c9, 0x000016c9, + 0x000018c9, 0x00001ac9, 0x00001cc9, 0x00001ec9, + 0x000020c9, 0x000022c9, 0x000024c9, 0x000026c9, + 0x000028c9, 0x00002ac9, 0x00002cc9, 0x00002ec9, + 0x000030c9, 0x000032c9, 0x000034c9, 0x000036c9, + 0x000038c9, 0x00003ac9, 0x00003cc9, 0x00003ec9, + 0x0000016a, 0x0000056a, 0x0000096a, 0x00000d6a, + 0x0000116a, 0x0000156a, 0x0000196a, 0x00001d6a, + 0x0000216a, 0x0000256a, 0x0000296a, 0x00002d6a, + 0x0000316a, 0x0000356a, 0x0000396a, 0x00003d6a, + 0x0000416a, 0x0000456a, 0x0000496a, 0x00004d6a, + 0x0000516a, 0x0000556a, 0x0000596a, 0x00005d6a, + 0x0000616a, 0x0000656a, 0x0000696a, 0x00006d6a, + 0x0000716a, 0x0000756a, 0x0000796a, 0x00007d6a, + 0x000001ca, 0x000003ca, 0x000005ca, 0x000007ca, + 0x000009ca, 0x00000bca, 0x00000dca, 0x00000fca, + 0x000011ca, 0x000013ca, 0x000015ca, 0x000017ca, + 0x000019ca, 0x00001bca, 0x00001dca, 0x00001fca, + 0x000021ca, 0x000023ca, 0x000025ca, 0x000027ca, + 0x000029ca, 0x00002bca, 0x00002dca, 0x00002fca, + 0x000031ca, 0x000033ca, 0x000035ca, 0x000037ca, + 0x000039ca, 0x00003bca, 0x00003dca, 0x00003fca, + 0x000041ca, 0x000043ca, 0x000045ca, 0x000047ca, + 0x000049ca, 0x00004bca, 0x00004dca, 0x00004fca, + 0x000051ca, 0x000053ca, 0x000055ca, 0x000057ca, + 0x000059ca, 0x00005bca, 0x00005dca, 0x00005fca, + 0x000061ca, 0x000063ca, 0x000065ca, 0x000067ca, + 0x000069ca, 0x00006bca, 0x00006dca, 0x00006fca, + 0x000071ca, 0x000073ca, 0x000075ca, 0x000077ca, + 0x000079ca, 0x00007bca, 0x00007dca, 0x00007fca, + 0x0000002a, 0x0000022a, 0x0000042a, 0x0000062a, + 0x0000082a, 0x00000a2a, 0x00000c2a, 0x00000e2a, + 0x0000102a, 0x0000122a, 0x0000142a, 0x0000162a, + 0x0000182a, 0x00001a2a, 0x00001c2a, 0x00001e2a, + 0x0000202a, 0x0000222a, 0x0000242a, 0x0000262a, + 0x0000282a, 0x00002a2a, 0x00002c2a, 0x00002e2a, + 0x0000302a, 0x0000322a, 0x0000342a, 0x0000362a, + 0x0000382a, 0x00003a2a, 0x00003c2a, 0x00003e2a, + 0x0000402a, 0x0000422a, 0x0000442a, 0x0000462a, + 0x0000482a, 0x00004a2a, 0x00004c2a, 0x00004e2a, + 0x0000502a, 0x0000522a, 0x0000542a, 0x0000562a, + 0x0000582a, 0x00005a2a, 0x00005c2a, 0x00005e2a, + 0x0000602a, 0x0000622a, 0x0000642a, 0x0000662a, + 0x0000682a, 0x00006a2a, 0x00006c2a, 0x00006e2a, + 0x0000702a, 0x0000722a, 0x0000742a, 0x0000762a, + 0x0000782a, 0x00007a2a, 0x00007c2a, 0x00007e2a, + 0x0000000a, 0x0000010a, 0x0000020a, 0x0000030a, + 0x0000040a, 0x0000050a, 0x0000060a, 0x0000070a, + 0x0000080a, 0x0000090a, 0x00000a0a, 0x00000b0a, + 0x00000c0a, 0x00000d0a, 0x00000e0a, 0x00000f0a, + 0x0000100a, 0x0000110a, 0x0000120a, 0x0000130a, + 0x0000140a, 0x0000150a, 0x0000160a, 0x0000170a, + 0x0000180a, 0x0000190a, 0x00001a0a, 0x00001b0a, + 0x00001c0a, 0x00001d0a, 0x00001e0a, 0x00001f0a, + 0x0000200a, 0x0000210a, 0x0000220a, 0x0000230a, + 0x0000240a, 0x0000250a, 0x0000260a, 0x0000270a, + 0x0000280a, 0x0000290a, 0x00002a0a, 0x00002b0a, + 0x00002c0a, 0x00002d0a, 0x00002e0a, 0x00002f0a, + 0x0000300a, 0x0000310a, 0x0000320a, 0x0000330a, + 0x0000340a, 0x0000350a, 0x0000360a, 0x0000370a, + 0x0000380a, 0x0000390a, 0x00003a0a, 0x00003b0a, + 0x00003c0a, 0x00003d0a, 0x00003e0a, 0x00003f0a, + 0x0000400a, 0x0000410a, 0x0000420a, 0x0000430a, + 0x0000440a, 0x0000450a, 0x0000460a, 0x0000470a, + 0x0000480a, 0x0000490a, 0x00004a0a, 0x00004b0a, + 0x00004c0a, 0x00004d0a, 0x00004e0a, 0x00004f0a, + 0x0000500a, 0x0000510a, 0x0000520a, 0x0000530a, + 0x0000540a, 0x0000550a, 0x0000560a, 0x0000570a, + 0x0000580a, 0x0000590a, 0x00005a0a, 0x00005b0a, + 0x00005c0a, 0x00005d0a, 0x00005e0a, 0x00005f0a, + 0x0000600a, 0x0000610a, 0x0000620a, 0x0000630a, + 0x0000640a, 0x0000650a, 0x0000660a, 0x0000670a, + 0x0000680a, 0x0000690a, 0x00006a0a, 0x00006b0a, + 0x00006c0a, 0x00006d0a, 0x00006e0a, 0x00006f0a, + 0x0000700a, 0x0000710a, 0x0000720a, 0x0000730a, + 0x0000740a, 0x0000750a, 0x0000760a, 0x0000770a, + 0x0000780a, 0x0000790a, 0x00007a0a, 0x00007b0a, + 0x00007c0a, 0x00007d0a, 0x00007e0a, 0x00007f0a, + 0x0000012b, 0x0000032b, 0x0000052b, 0x0000072b, + 0x0000092b, 0x00000b2b, 0x00000d2b, 0x00000f2b, + 0x0000112b, 0x0000132b, 0x0000152b, 0x0000172b, + 0x0000192b, 0x00001b2b, 0x00001d2b, 0x00001f2b, + 0x0000212b, 0x0000232b, 0x0000252b, 0x0000272b, + 0x0000292b, 0x00002b2b, 0x00002d2b, 0x00002f2b, + 0x0000312b, 0x0000332b, 0x0000352b, 0x0000372b, + 0x0000392b, 0x00003b2b, 0x00003d2b, 0x00003f2b, + 0x0000412b, 0x0000432b, 0x0000452b, 0x0000472b, + 0x0000492b, 0x00004b2b, 0x00004d2b, 0x00004f2b, + 0x0000512b, 0x0000532b, 0x0000552b, 0x0000572b, + 0x0000592b, 0x00005b2b, 0x00005d2b, 0x00005f2b, + 0x0000612b, 0x0000632b, 0x0000652b, 0x0000672b, + 0x0000692b, 0x00006b2b, 0x00006d2b, 0x00006f2b, + 0x0000712b, 0x0000732b, 0x0000752b, 0x0000772b, + 0x0000792b, 0x00007b2b, 0x00007d2b, 0x00007f2b, + 0x0000812b, 0x0000832b, 0x0000852b, 0x0000872b, + 0x0000892b, 0x00008b2b, 0x00008d2b, 0x00008f2b, + 0x0000912b, 0x0000932b, 0x0000952b, 0x0000972b, + 0x0000992b, 0x00009b2b, 0x00009d2b, 0x00009f2b, + 0x0000a12b, 0x0000a32b, 0x0000a52b, 0x0000a72b, + 0x0000a92b, 0x0000ab2b, 0x0000ad2b, 0x0000af2b, + 0x0000b12b, 0x0000b32b, 0x0000b52b, 0x0000b72b, + 0x0000b92b, 0x0000bb2b, 0x0000bd2b, 0x0000bf2b, + 0x0000c12b, 0x0000c32b, 0x0000c52b, 0x0000c72b, + 0x0000c92b, 0x0000cb2b, 0x0000cd2b, 0x0000cf2b, + 0x0000d12b, 0x0000d32b, 0x0000d52b, 0x0000d72b, + 0x0000d92b, 0x0000db2b, 0x0000dd2b, 0x0000df2b, + 0x0000e12b, 0x0000e32b, 0x0000e52b, 0x0000e72b, + 0x0000e92b, 0x0000eb2b, 0x0000ed2b, 0x0000ef2b, + 0x0000f12b, 0x0000f32b, 0x0000f52b, 0x0000f72b, + 0x0000f92b, 0x0000fb2b, 0x0000fd2b, 0x0000ff2b, + 0x0000008b, 0x0000018b, 0x0000028b, 0x0000038b, + 0x0000048b, 0x0000058b, 0x0000068b, 0x0000078b, + 0x0000088b, 0x0000098b, 0x00000a8b, 0x00000b8b, + 0x00000c8b, 0x00000d8b, 0x00000e8b, 0x00000f8b, + 0x0000108b, 0x0000118b, 0x0000128b, 0x0000138b, + 0x0000148b, 0x0000158b, 0x0000168b, 0x0000178b, + 0x0000188b, 0x0000198b, 0x00001a8b, 0x00001b8b, + 0x00001c8b, 0x00001d8b, 0x00001e8b, 0x00001f8b, + 0x0000208b, 0x0000218b, 0x0000228b, 0x0000238b, + 0x0000248b, 0x0000258b, 0x0000268b, 0x0000278b, + 0x0000288b, 0x0000298b, 0x00002a8b, 0x00002b8b, + 0x00002c8b, 0x00002d8b, 0x00002e8b, 0x00002f8b, + 0x0000308b, 0x0000318b, 0x0000328b, 0x0000338b, + 0x0000348b, 0x0000358b, 0x0000368b, 0x0000378b, + 0x0000388b, 0x0000398b, 0x00003a8b, 0x00003b8b, + 0x00003c8b, 0x00003d8b, 0x00003e8b, 0x00003f8b, + 0x0000408b, 0x0000418b, 0x0000428b, 0x0000438b, + 0x0000448b, 0x0000458b, 0x0000468b, 0x0000478b, + 0x0000488b, 0x0000498b, 0x00004a8b, 0x00004b8b, + 0x00004c8b, 0x00004d8b, 0x00004e8b, 0x00004f8b, + 0x0000508b, 0x0000518b, 0x0000528b, 0x0000538b, + 0x0000548b, 0x0000558b, 0x0000568b, 0x0000578b, + 0x0000588b, 0x0000598b, 0x00005a8b, 0x00005b8b, + 0x00005c8b, 0x00005d8b, 0x00005e8b, 0x00005f8b, + 0x0000608b, 0x0000618b, 0x0000628b, 0x0000638b, + 0x0000648b, 0x0000658b, 0x0000668b, 0x0000678b, + 0x0000688b, 0x0000698b, 0x00006a8b, 0x00006b8b, + 0x00006c8b, 0x00006d8b, 0x00006e8b, 0x00006f8b, + 0x0000708b, 0x0000718b, 0x0000728b, 0x0000738b, + 0x0000748b, 0x0000758b, 0x0000768b, 0x0000778b, + 0x0000788b, 0x0000798b, 0x00007a8b, 0x00007b8b, + 0x00007c8b, 0x00007d8b, 0x00007e8b, 0x00007f8b, + 0x0000808b, 0x0000818b, 0x0000828b, 0x0000838b, + 0x0000848b, 0x0000858b, 0x0000868b, 0x0000878b, + 0x0000888b, 0x0000898b, 0x00008a8b, 0x00008b8b, + 0x00008c8b, 0x00008d8b, 0x00008e8b, 0x00008f8b, + 0x0000908b, 0x0000918b, 0x0000928b, 0x0000938b, + 0x0000948b, 0x0000958b, 0x0000968b, 0x0000978b, + 0x0000988b, 0x0000998b, 0x00009a8b, 0x00009b8b, + 0x00009c8b, 0x00009d8b, 0x00009e8b, 0x00009f8b, + 0x0000a08b, 0x0000a18b, 0x0000a28b, 0x0000a38b, + 0x0000a48b, 0x0000a58b, 0x0000a68b, 0x0000a78b, + 0x0000a88b, 0x0000a98b, 0x0000aa8b, 0x0000ab8b, + 0x0000ac8b, 0x0000ad8b, 0x0000ae8b, 0x0000af8b, + 0x0000b08b, 0x0000b18b, 0x0000b28b, 0x0000b38b, + 0x0000b48b, 0x0000b58b, 0x0000b68b, 0x0000b78b, + 0x0000b88b, 0x0000b98b, 0x0000ba8b, 0x0000bb8b, + 0x0000bc8b, 0x0000bd8b, 0x0000be8b, 0x0000bf8b, + 0x0000c08b, 0x0000c18b, 0x0000c28b, 0x0000c38b, + 0x0000c48b, 0x0000c58b, 0x0000c68b, 0x0000c78b, + 0x0000c88b, 0x0000c98b, 0x0000ca8b, 0x0000cb8b, + 0x0000cc8b, 0x0000cd8b, 0x0000ce8b, 0x0000cf8b, + 0x0000d08b, 0x0000d18b, 0x0000d28b, 0x0000d38b, + 0x0000d48b, 0x0000d58b, 0x0000d68b, 0x0000d78b, + 0x0000d88b, 0x0000d98b, 0x0000da8b, 0x0000db8b, + 0x0000dc8b, 0x0000dd8b, 0x0000de8b, 0x0000df8b, + 0x0000e08b, 0x0000e18b, 0x0000e28b, 0x0000e38b, + 0x0000e48b, 0x0000e58b, 0x0000e68b, 0x0000e78b, + 0x0000e88b, 0x0000e98b, 0x0000ea8b, 0x0000eb8b, + 0x0000ec8b, 0x0000ed8b, 0x0000ee8b, 0x0000ef8b, + 0x0000f08b, 0x0000f18b, 0x0000f28b, 0x0000f38b, + 0x0000f48b, 0x0000f58b, 0x0000f68b, 0x0000f78b, + 0x0000f88b, 0x0000f98b, 0x0000fa8b, 0x0000fb8b, + 0x0000fc8b, 0x0000fd8b, 0x0000fe8b, 0x0000ff8b, + 0x000000ac, 0x000002ac, 0x000004ac, 0x000006ac, + 0x000008ac, 0x00000aac, 0x00000cac, 0x00000eac, + 0x000010ac, 0x000012ac, 0x000014ac, 0x000016ac, + 0x000018ac, 0x00001aac, 0x00001cac, 0x00001eac, + 0x000020ac, 0x000022ac, 0x000024ac, 0x000026ac, + 0x000028ac, 0x00002aac, 0x00002cac, 0x00002eac, + 0x000030ac, 0x000032ac, 0x000034ac, 0x000036ac, + 0x000038ac, 0x00003aac, 0x00003cac, 0x00003eac, + 0x000040ac, 0x000042ac, 0x000044ac, 0x000046ac, + 0x000048ac, 0x00004aac, 0x00004cac, 0x00004eac, + 0x000050ac, 0x000052ac, 0x000054ac, 0x000056ac, + 0x000058ac, 0x00005aac, 0x00005cac, 0x00005eac, + 0x000060ac, 0x000062ac, 0x000064ac, 0x000066ac, + 0x000068ac, 0x00006aac, 0x00006cac, 0x00006eac, + 0x000070ac, 0x000072ac, 0x000074ac, 0x000076ac, + 0x000078ac, 0x00007aac, 0x00007cac, 0x00007eac, + 0x000080ac, 0x000082ac, 0x000084ac, 0x000086ac, + 0x000088ac, 0x00008aac, 0x00008cac, 0x00008eac, + 0x000090ac, 0x000092ac, 0x000094ac, 0x000096ac, + 0x000098ac, 0x00009aac, 0x00009cac, 0x00009eac, + 0x0000a0ac, 0x0000a2ac, 0x0000a4ac, 0x0000a6ac, + 0x0000a8ac, 0x0000aaac, 0x0000acac, 0x0000aeac, + 0x0000b0ac, 0x0000b2ac, 0x0000b4ac, 0x0000b6ac, + 0x0000b8ac, 0x0000baac, 0x0000bcac, 0x0000beac, + 0x0000c0ac, 0x0000c2ac, 0x0000c4ac, 0x0000c6ac, + 0x0000c8ac, 0x0000caac, 0x0000ccac, 0x0000ceac, + 0x0000d0ac, 0x0000d2ac, 0x0000d4ac, 0x0000d6ac, + 0x0000d8ac, 0x0000daac, 0x0000dcac, 0x0000deac, + 0x0000e0ac, 0x0000e2ac, 0x0000e4ac, 0x0000e6ac, + 0x0000e8ac, 0x0000eaac, 0x0000ecac, 0x0000eeac, + 0x0000f0ac, 0x0000f2ac, 0x0000f4ac, 0x0000f6ac, + 0x0000f8ac, 0x0000faac, 0x0000fcac, 0x0000feac, + 0x000100ac, 0x000102ac, 0x000104ac, 0x000106ac, + 0x000108ac, 0x00010aac, 0x00010cac, 0x00010eac, + 0x000110ac, 0x000112ac, 0x000114ac, 0x000116ac, + 0x000118ac, 0x00011aac, 0x00011cac, 0x00011eac, + 0x000120ac, 0x000122ac, 0x000124ac, 0x000126ac, + 0x000128ac, 0x00012aac, 0x00012cac, 0x00012eac, + 0x000130ac, 0x000132ac, 0x000134ac, 0x000136ac, + 0x000138ac, 0x00013aac, 0x00013cac, 0x00013eac, + 0x000140ac, 0x000142ac, 0x000144ac, 0x000146ac, + 0x000148ac, 0x00014aac, 0x00014cac, 0x00014eac, + 0x000150ac, 0x000152ac, 0x000154ac, 0x000156ac, + 0x000158ac, 0x00015aac, 0x00015cac, 0x00015eac, + 0x000160ac, 0x000162ac, 0x000164ac, 0x000166ac, + 0x000168ac, 0x00016aac, 0x00016cac, 0x00016eac, + 0x000170ac, 0x000172ac, 0x000174ac, 0x000176ac, + 0x000178ac, 0x00017aac, 0x00017cac, 0x00017eac, + 0x000180ac, 0x000182ac, 0x000184ac, 0x000186ac, + 0x000188ac, 0x00018aac, 0x00018cac, 0x00018eac, + 0x000190ac, 0x000192ac, 0x000194ac, 0x000196ac, + 0x000198ac, 0x00019aac, 0x00019cac, 0x00019eac, + 0x0001a0ac, 0x0001a2ac, 0x0001a4ac, 0x0001a6ac, + 0x0001a8ac, 0x0001aaac, 0x0001acac, 0x0001aeac, + 0x0001b0ac, 0x0001b2ac, 0x0001b4ac, 0x0001b6ac, + 0x0001b8ac, 0x0001baac, 0x0001bcac, 0x0001beac, + 0x0001c0ac, 0x0001c2ac, 0x0001c4ac, 0x0001c6ac, + 0x0001c8ac, 0x0001caac, 0x0001ccac, 0x0001ceac, + 0x0001d0ac, 0x0001d2ac, 0x0001d4ac, 0x0001d6ac, + 0x0001d8ac, 0x0001daac, 0x0001dcac, 0x0001deac, + 0x0001e0ac, 0x0001e2ac, 0x0001e4ac, 0x0001e6ac, + 0x0001e8ac, 0x0001eaac, 0x0001ecac, 0x0001eeac, + 0x0001f0ac, 0x0001f2ac, 0x0001f4ac, 0x0001f6ac, + 0x0001f8ac, 0x0001faac, 0x0001fcac, 0x0001feac }, +#endif + + .len_table = { + 0x00000004, 0x00000104, 0x00000185, 0x00000385, + 0x000007c6, 0x00000026, 0x000003a7, 0x00000426, + 0x00000227, 0x00000a27, 0x00000627, 0x00000e27, + 0x00000127, 0x00000927, 0x00000ba8, 0x00001ba8, + 0x00000528, 0x00000d28, 0x00001528, 0x00001d28, + 0x0000066a, 0x0000266a, 0x0000466a, 0x0000666a, + 0x0000166a, 0x0000366a, 0x0000566a, 0x0000766a, + 0x0000276b, 0x0000676b, 0x0000a76b, 0x0000e76b, + 0x00000e6b, 0x00002e6b, 0x00004e6b, 0x00006e6b, + 0x00008e6b, 0x0000ae6b, 0x0000ce6b, 0x0000ee6b, + 0x00001e6b, 0x00003e6b, 0x00005e6b, 0x00007e6b, + 0x00009e6b, 0x0000be6b, 0x0000de6b, 0x0000fe6b, + 0x00005fee, 0x00015fee, 0x00025fee, 0x00035fee, + 0x00045fee, 0x00055fee, 0x00065fee, 0x00075fee, + 0x0000176c, 0x0000576c, 0x0000976c, 0x0000d76c, + 0x0001176c, 0x0001576c, 0x0001976c, 0x0001d76c, + 0x0000dfef, 0x0001dfef, 0x0002dfef, 0x0003dfef, + 0x0004dfef, 0x0005dfef, 0x0006dfef, 0x0007dfef, + 0x0008dfef, 0x0009dfef, 0x000adfef, 0x000bdfef, + 0x000cdfef, 0x000ddfef, 0x000edfef, 0x000fdfef, + 0x00003fef, 0x00013fef, 0x00023fef, 0x00033fef, + 0x00043fef, 0x00053fef, 0x00063fef, 0x00073fef, + 0x00083fef, 0x00093fef, 0x000a3fef, 0x000b3fef, + 0x000c3fef, 0x000d3fef, 0x000e3fef, 0x000f3fef, + 0x000051ee, 0x0000d1ee, 0x000151ee, 0x0001d1ee, + 0x000251ee, 0x0002d1ee, 0x000351ee, 0x0003d1ee, + 0x000451ee, 0x0004d1ee, 0x000551ee, 0x0005d1ee, + 0x000651ee, 0x0006d1ee, 0x000751ee, 0x0007d1ee, + 0x0000376d, 0x0000776d, 0x0000b76d, 0x0000f76d, + 0x0001376d, 0x0001776d, 0x0001b76d, 0x0001f76d, + 0x0002376d, 0x0002776d, 0x0002b76d, 0x0002f76d, + 0x0003376d, 0x0003776d, 0x0003b76d, 0x0003f76d, + 0x0000bff0, 0x0001bff0, 0x0002bff0, 0x0003bff0, + 0x0004bff0, 0x0005bff0, 0x0006bff0, 0x0007bff0, + 0x0008bff0, 0x0009bff0, 0x000abff0, 0x000bbff0, + 0x000cbff0, 0x000dbff0, 0x000ebff0, 0x000fbff0, + 0x0010bff0, 0x0011bff0, 0x0012bff0, 0x0013bff0, + 0x0014bff0, 0x0015bff0, 0x0016bff0, 0x0017bff0, + 0x0018bff0, 0x0019bff0, 0x001abff0, 0x001bbff0, + 0x001cbff0, 0x001dbff0, 0x001ebff0, 0x001fbff0, + 0x000031ef, 0x0000b1ef, 0x000131ef, 0x0001b1ef, + 0x000231ef, 0x0002b1ef, 0x000331ef, 0x0003b1ef, + 0x000431ef, 0x0004b1ef, 0x000531ef, 0x0005b1ef, + 0x000631ef, 0x0006b1ef, 0x000731ef, 0x0007b1ef, + 0x000831ef, 0x0008b1ef, 0x000931ef, 0x0009b1ef, + 0x000a31ef, 0x000ab1ef, 0x000b31ef, 0x000bb1ef, + 0x000c31ef, 0x000cb1ef, 0x000d31ef, 0x000db1ef, + 0x000e31ef, 0x000eb1ef, 0x000f31ef, 0x000fb1ef, + 0x00007ff0, 0x00017ff0, 0x00027ff0, 0x00037ff0, + 0x00047ff0, 0x00057ff0, 0x00067ff0, 0x00077ff0, + 0x00087ff0, 0x00097ff0, 0x000a7ff0, 0x000b7ff0, + 0x000c7ff0, 0x000d7ff0, 0x000e7ff0, 0x000f7ff0, + 0x00107ff0, 0x00117ff0, 0x00127ff0, 0x00137ff0, + 0x00147ff0, 0x00157ff0, 0x00167ff0, 0x00177ff0, + 0x00187ff0, 0x00197ff0, 0x001a7ff0, 0x001b7ff0, + 0x001c7ff0, 0x001d7ff0, 0x001e7ff0, 0x001f7ff0, + 0x0000fff0, 0x0001fff0, 0x0002fff0, 0x0003fff0, + 0x0004fff0, 0x0005fff0, 0x0006fff0, 0x0007fff0, + 0x0008fff0, 0x0009fff0, 0x000afff0, 0x000bfff0, + 0x000cfff0, 0x000dfff0, 0x000efff0, 0x000ffff0, + 0x0010fff0, 0x0011fff0, 0x0012fff0, 0x0013fff0, + 0x0014fff0, 0x0015fff0, 0x0016fff0, 0x0017fff0, + 0x0018fff0, 0x0019fff0, 0x001afff0, 0x001bfff0, + 0x001cfff0, 0x001dfff0, 0x001efff0, 0x000071ea }, + + .lit_table = { + 0x007b, 0x004f, 0x027b, 0x044f, 0x024f, 0x017b, 0x064f, 0x037b, + 0x014f, 0x0019, 0x0002, 0x054f, 0x00fb, 0x02fb, 0x034f, 0x074f, + 0x00cf, 0x04cf, 0x01fb, 0x02cf, 0x03fb, 0x0007, 0x0207, 0x0107, + 0x06cf, 0x01cf, 0x0307, 0x05cf, 0x0087, 0x0287, 0x03cf, 0x07cf, + 0x0004, 0x003d, 0x0022, 0x0187, 0x002f, 0x0387, 0x000b, 0x0059, + 0x0012, 0x0039, 0x0047, 0x00bd, 0x0079, 0x007d, 0x0032, 0x00fd, + 0x0003, 0x042f, 0x010b, 0x0247, 0x008b, 0x022f, 0x0147, 0x062f, + 0x012f, 0x052f, 0x018b, 0x0005, 0x004b, 0x0045, 0x014b, 0x032f, + 0x072f, 0x0083, 0x00cb, 0x0043, 0x01cb, 0x00c3, 0x002b, 0x012b, + 0x0023, 0x00ab, 0x00af, 0x0347, 0x01ab, 0x00a3, 0x006b, 0x016b, + 0x00eb, 0x00c7, 0x0063, 0x00e3, 0x0025, 0x01eb, 0x001b, 0x04af, + 0x0013, 0x02af, 0x02c7, 0x011b, 0x009b, 0x019b, 0x01c7, 0x0065, + 0x06af, 0x000a, 0x0093, 0x002a, 0x001a, 0x0014, 0x0015, 0x0055, + 0x0035, 0x003a, 0x005b, 0x015b, 0x0006, 0x0075, 0x0026, 0x0016, + 0x000d, 0x00db, 0x0036, 0x000e, 0x002e, 0x001e, 0x004d, 0x002d, + 0x0053, 0x006d, 0x01af, 0x01db, 0x003b, 0x00d3, 0x05af, 0x03c7, + 0x03af, 0x07af, 0x006f, 0x046f, 0x026f, 0x066f, 0x016f, 0x056f, + 0x036f, 0x076f, 0x00ef, 0x04ef, 0x0027, 0x0227, 0x02ef, 0x0127, + 0x0327, 0x00a7, 0x02a7, 0x01a7, 0x03a7, 0x06ef, 0x01ef, 0x05ef, + 0x0067, 0x03ef, 0x07ef, 0x0267, 0x0167, 0x0367, 0x00e7, 0x001f, + 0x041f, 0x021f, 0x061f, 0x02e7, 0x01e7, 0x03e7, 0x011f, 0x0017, + 0x0217, 0x051f, 0x031f, 0x0117, 0x0317, 0x0097, 0x0297, 0x071f, + 0x009f, 0x049f, 0x029f, 0x069f, 0x019f, 0x0197, 0x0397, 0x059f, + 0x039f, 0x0057, 0x0257, 0x0157, 0x0357, 0x079f, 0x005f, 0x045f, + 0x025f, 0x00d7, 0x02d7, 0x065f, 0x015f, 0x055f, 0x01d7, 0x035f, + 0x075f, 0x03d7, 0x0037, 0x00df, 0x04df, 0x02df, 0x06df, 0x01df, + 0x05df, 0x03df, 0x0237, 0x07df, 0x003f, 0x0137, 0x0337, 0x043f, + 0x023f, 0x063f, 0x00b7, 0x02b7, 0x01b7, 0x03b7, 0x0077, 0x013f, + 0x053f, 0x033f, 0x073f, 0x00bf, 0x0277, 0x0177, 0x04bf, 0x02bf, + 0x06bf, 0x01bf, 0x05bf, 0x0377, 0x00f7, 0x02f7, 0x01f7, 0x03f7, + 0x000f, 0x03bf, 0x07bf, 0x020f, 0x010f, 0x007f, 0x047f, 0x030f, + 0x027f, 0x067f, 0x017f, 0x057f, 0x037f, 0x077f, 0x00ff, 0x008f, + 0x04ff }, + + .lit_table_sizes = { + 0xa, 0xb, 0xa, 0xb, 0xb, 0xa, 0xb, 0xa, + 0xb, 0x7, 0x6, 0xb, 0xa, 0xa, 0xb, 0xb, + 0xb, 0xb, 0xa, 0xb, 0xa, 0xa, 0xa, 0xa, + 0xb, 0xb, 0xa, 0xb, 0xa, 0xa, 0xb, 0xb, + 0x5, 0x8, 0x6, 0xa, 0xb, 0xa, 0x9, 0x7, + 0x6, 0x7, 0xa, 0x8, 0x7, 0x8, 0x6, 0x8, + 0x8, 0xb, 0x9, 0xa, 0x9, 0xb, 0xa, 0xb, + 0xb, 0xb, 0x9, 0x7, 0x9, 0x7, 0x9, 0xb, + 0xb, 0x8, 0x9, 0x8, 0x9, 0x8, 0x9, 0x9, + 0x8, 0x9, 0xb, 0xa, 0x9, 0x8, 0x9, 0x9, + 0x9, 0xa, 0x8, 0x8, 0x7, 0x9, 0x9, 0xb, + 0x8, 0xb, 0xa, 0x9, 0x9, 0x9, 0xa, 0x7, + 0xb, 0x6, 0x8, 0x6, 0x6, 0x5, 0x7, 0x7, + 0x7, 0x6, 0x9, 0x9, 0x6, 0x7, 0x6, 0x6, + 0x7, 0x9, 0x6, 0x6, 0x6, 0x6, 0x7, 0x7, + 0x8, 0x7, 0xb, 0x9, 0x9, 0x8, 0xb, 0xa, + 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, + 0xb, 0xb, 0xb, 0xb, 0xa, 0xa, 0xb, 0xa, + 0xa, 0xa, 0xa, 0xa, 0xa, 0xb, 0xb, 0xb, + 0xa, 0xb, 0xb, 0xa, 0xa, 0xa, 0xa, 0xb, + 0xb, 0xb, 0xb, 0xa, 0xa, 0xa, 0xb, 0xa, + 0xa, 0xb, 0xb, 0xa, 0xa, 0xa, 0xa, 0xb, + 0xb, 0xb, 0xb, 0xb, 0xb, 0xa, 0xa, 0xb, + 0xb, 0xa, 0xa, 0xa, 0xa, 0xb, 0xb, 0xb, + 0xb, 0xa, 0xa, 0xb, 0xb, 0xb, 0xa, 0xb, + 0xb, 0xa, 0xa, 0xb, 0xb, 0xb, 0xb, 0xb, + 0xb, 0xb, 0xa, 0xb, 0xb, 0xa, 0xa, 0xb, + 0xb, 0xb, 0xa, 0xa, 0xa, 0xa, 0xa, 0xb, + 0xb, 0xb, 0xb, 0xb, 0xa, 0xa, 0xb, 0xb, + 0xb, 0xb, 0xb, 0xa, 0xa, 0xa, 0xa, 0xa, + 0xa, 0xb, 0xb, 0xa, 0xa, 0xb, 0xb, 0xa, + 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xa, + 0xb }, + +#ifndef LONGER_HUFFTABLE + .dcodes = { + 0x000d, 0x001b, 0x0007, 0x0017, 0x00bf, 0x01bf, 0x007f, 0x017f, + 0x00ff, 0x01ff }, + .dcodes_sizes = { 0x4, 0x5, 0x5, 0x5, 0x9, 0x9, 0x9, 0x9,0x9, 0x9 } +#else + .dcodes = { 0x007f, 0x017f, 0x00ff, 0x01ff }, + .dcodes_sizes = { 0x9, 0x9, 0x9, 0x9 } +#endif +}; diff --git a/igzip/igzip.c b/igzip/igzip.c new file mode 100644 index 0000000..94b0c3a --- /dev/null +++ b/igzip/igzip.c @@ -0,0 +1,882 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#define ASM + +#include +#include +#ifdef _WIN32 +# include +#endif + +#ifndef IGZIP_USE_GZIP_FORMAT +# define DEFLATE 1 +#endif + +#define MAX_WRITE_BITS_SIZE 8 +#define FORCE_FLUSH 64 +#define MIN_OBUF_SIZE 224 +#define NON_EMPTY_BLOCK_SIZE 6 +#define MAX_SYNC_FLUSH_SIZE NON_EMPTY_BLOCK_SIZE + MAX_WRITE_BITS_SIZE + +#include "huffman.h" +#include "bitbuf2.h" +#include "igzip_lib.h" +#include "repeated_char_result.h" + +extern const uint8_t gzip_hdr[]; +extern const uint32_t gzip_hdr_bytes; +extern const uint32_t gzip_trl_bytes; +extern const struct isal_hufftables hufftables_default; + +extern uint32_t crc32_gzip(uint32_t init_crc, const unsigned char *buf, uint64_t len); + +static int write_stored_block_stateless(struct isal_zstream *stream, uint32_t stored_len, + uint32_t crc32); +#ifndef DEFLATE +static int write_gzip_header_stateless(struct isal_zstream *stream); +#endif +static int write_deflate_header_stateless(struct isal_zstream *stream); +static int write_deflate_header_unaligned_stateless(struct isal_zstream *stream); +static int write_trailer_stateless(struct isal_zstream *stream, uint32_t avail_in, + uint32_t crc32); + +void isal_deflate_body_stateless(struct isal_zstream *stream); + +unsigned int detect_repeated_char(uint8_t * buf, uint32_t size); + +#define STORED_BLK_HDR_BZ 5 +#define STORED_BLK_MAX_BZ 65535 + +void isal_deflate_body(struct isal_zstream *stream); +void isal_deflate_finish(struct isal_zstream *stream); +uint32_t crc_512to32_01(uint32_t * crc); +uint32_t get_crc(uint32_t * crc); + +/*****************************************************************/ + +/* Forward declarations */ +static inline void reset_match_history(struct isal_zstream *stream); +void write_header(struct isal_zstream *stream); +void write_deflate_header(struct isal_zstream *stream); +void write_trailer(struct isal_zstream *stream); + +struct slver { + uint16_t snum; + uint8_t ver; + uint8_t core; +}; + +/* Version info */ +struct slver isal_deflate_init_slver_01030081; +struct slver isal_deflate_init_slver = { 0x0081, 0x03, 0x01 }; + +struct slver isal_deflate_slver_01030082; +struct slver isal_deflate_slver = { 0x0082, 0x03, 0x01 }; + +struct slver isal_deflate_stateless_slver_01010083; +struct slver isal_deflate_stateless_slver = { 0x0083, 0x01, 0x01 }; + +/*****************************************************************/ + +uint32_t file_size(struct isal_zstate *state) +{ + return state->b_bytes_valid + (uint32_t) (state->buffer - state->file_start); +} + +static +void sync_flush(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + uint64_t bits_to_write = 0xFFFF0000, bits_len; + uint64_t code = 0, len = 0, bytes; + int flush_size; + + if (stream->avail_out >= 8) { + set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + + if (!state->has_eob) + get_lit_code(stream->hufftables, 256, &code, &len); + + flush_size = (-(state->bitbuf.m_bit_count + len + 3)) % 8; + + bits_to_write <<= flush_size + 3; + bits_len = 32 + len + flush_size + 3; + +#ifdef USE_BITBUFB /* Write Bits Always */ + state->state = ZSTATE_NEW_HDR; +#else /* Not Write Bits Always */ + state->state = ZSTATE_FLUSH_WRITE_BUFFER; +#endif + state->has_eob = 0; + + if (len > 0) + bits_to_write = (bits_to_write << len) | code; + + write_bits(&state->bitbuf, bits_to_write, bits_len); + + bytes = buffer_used(&state->bitbuf); + stream->next_out = buffer_ptr(&state->bitbuf); + stream->avail_out -= bytes; + stream->total_out += bytes; + + if (stream->flush == FULL_FLUSH) { + /* Clear match history so there are no cross + * block length distance pairs */ + reset_match_history(stream); + } + } +} + +static void flush_write_buffer(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + int bytes = 0; + if (stream->avail_out >= 8) { + set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + flush(&state->bitbuf); + stream->next_out = buffer_ptr(&state->bitbuf); + bytes = buffer_used(&state->bitbuf); + stream->avail_out -= bytes; + stream->total_out += bytes; + state->state = ZSTATE_NEW_HDR; + } +} + +static void isal_deflate_int(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR) + write_header(stream); + + if (state->state == ZSTATE_BODY) + isal_deflate_body(stream); + + if (state->state == ZSTATE_FLUSH_READ_BUFFER) + isal_deflate_finish(stream); + + if (state->state == ZSTATE_SYNC_FLUSH) + sync_flush(stream); + + if (state->state == ZSTATE_FLUSH_WRITE_BUFFER) + flush_write_buffer(stream); + + if (state->state == ZSTATE_TRL) + write_trailer(stream); +} + +static uint32_t write_constant_compressed_stateless(struct isal_zstream *stream, + uint32_t repeated_char, + uint32_t repeated_length, + uint32_t end_of_stream) +{ + /* Assumes repeated_length is at least 1. + * Assumes the input end_of_stream is either 0 or 1. */ + struct isal_zstate *state = &stream->internal_state; + uint32_t rep_bits = ((repeated_length - 1) / 258) * 2; + uint32_t rep_bytes = rep_bits / 8; + uint32_t rep_extra = (repeated_length - 1) % 258; + uint32_t bytes; + + /* Guarantee there is enough space for the header even in the worst case */ + if (stream->avail_out < HEADER_LENGTH + MAX_FIXUP_CODE_LENGTH + rep_bytes + 8) + return STATELESS_OVERFLOW; + + /* Assumes the repeated char is either 0 or 0xFF. */ + memcpy(stream->next_out, repeated_char_header[repeated_char & 1], HEADER_LENGTH); + + if (end_of_stream > 0) + stream->next_out[0] |= 1; + + memset(stream->next_out + HEADER_LENGTH, 0, rep_bytes); + stream->avail_out -= HEADER_LENGTH + rep_bytes; + stream->next_out += HEADER_LENGTH + rep_bytes; + stream->total_out += HEADER_LENGTH + rep_bytes; + + set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + + /* These two lines are basically a modified version of init. */ + state->bitbuf.m_bits = 0; + state->bitbuf.m_bit_count = rep_bits % 8; + + /* Add smaller repeat codes as necessary. Code280 can describe repeat + * lengths of 115-130 bits. Code10 can describe repeat lengths of 10 + * bits. If more than 230 bits, fill code with two code280s. Else if + * more than 115 repeates, fill with code10s until one code280 can + * finish the rest of the repeats. Else, fill with code10s and + * literals */ + if (rep_extra > 115) { + while (rep_extra > 130 && rep_extra < 230) { + write_bits(&state->bitbuf, CODE_10, CODE_10_LENGTH); + rep_extra -= 10; + } + + if (rep_extra >= 230) { + write_bits(&state->bitbuf, + CODE_280 | ((rep_extra / 2 - 115) << CODE_280_LENGTH), + CODE_280_TOTAL_LENGTH); + rep_extra -= rep_extra / 2; + } + + write_bits(&state->bitbuf, + CODE_280 | ((rep_extra - 115) << CODE_280_LENGTH), + CODE_280_TOTAL_LENGTH); + + } else { + while (rep_extra >= 10) { + + write_bits(&state->bitbuf, CODE_10, CODE_10_LENGTH); + rep_extra -= 10; + } + + for (; rep_extra > 0; rep_extra--) + write_bits(&state->bitbuf, CODE_LIT, CODE_LIT_LENGTH); + } + + write_bits(&state->bitbuf, END_OF_BLOCK, END_OF_BLOCK_LEN); + + stream->next_in += repeated_length; + stream->avail_in -= repeated_length; + stream->total_in += repeated_length; + + bytes = buffer_used(&state->bitbuf); + stream->next_out = buffer_ptr(&state->bitbuf); + stream->avail_out -= bytes; + stream->total_out += bytes; + + return COMP_OK; +} + +int detect_repeated_char_length(uint8_t * in, uint32_t length) +{ + /* This currently assumes the first 8 bytes are the same character. + * This won't work effectively if the input stream isn't aligned well. */ + uint8_t *p_8, *end = in + length; + uint64_t *p_64 = (uint64_t *) in; + uint64_t w = *p_64; + uint8_t c = (uint8_t) w; + + for (; (p_64 <= (uint64_t *) (end - 8)) && (w == *p_64); p_64++) ; + + p_8 = (uint8_t *) p_64; + + for (; (p_8 < end) && (c == *p_8); p_8++) ; + + return p_8 - in; +} + +static int isal_deflate_int_stateless(struct isal_zstream *stream, uint8_t * next_in, + const uint32_t avail_in) +{ + uint32_t crc32 = 0; + uint32_t repeated_char_length; + +#ifndef DEFLATE + if (write_gzip_header_stateless(stream)) + return STATELESS_OVERFLOW; +#endif + + if (avail_in >= 8 + && (*(uint64_t *) stream->next_in == 0 + || *(uint64_t *) stream->next_in == ~(uint64_t) 0)) + repeated_char_length = + detect_repeated_char_length(stream->next_in, stream->avail_in); + else + repeated_char_length = 0; + + if (stream->avail_in == repeated_char_length) { + if (write_constant_compressed_stateless(stream, + stream->next_in[0], + repeated_char_length, 1) != COMP_OK) + return STATELESS_OVERFLOW; + +#ifndef DEFLATE + crc32 = crc32_gzip(0x0, next_in, avail_in); +#endif + + /* write_trailer_stateless is required because if flushes out the last of the output */ + if (write_trailer_stateless(stream, avail_in, crc32) != COMP_OK) + return STATELESS_OVERFLOW; + return COMP_OK; + + } else if (repeated_char_length >= MIN_REPEAT_LEN) { + if (write_constant_compressed_stateless + (stream, stream->next_in[0], repeated_char_length, 0) != COMP_OK) + return STATELESS_OVERFLOW; + } + + if (write_deflate_header_unaligned_stateless(stream) != COMP_OK) + return STATELESS_OVERFLOW; + if (stream->avail_out < 8) + return STATELESS_OVERFLOW; + + isal_deflate_body_stateless(stream); + + if (!stream->internal_state.has_eob) + return STATELESS_OVERFLOW; + +#ifndef DEFLATE + crc32 = crc32_gzip(0x0, next_in, avail_in); +#endif + + if (write_trailer_stateless(stream, avail_in, crc32) != COMP_OK) + return STATELESS_OVERFLOW; + + return COMP_OK; +} + +static int write_stored_block_stateless(struct isal_zstream *stream, + uint32_t stored_len, uint32_t crc32) +{ + uint64_t stored_blk_hdr; + uint32_t copy_size; + uint32_t avail_in; + +#ifndef DEFLATE + uint64_t gzip_trl; +#endif + + if (stream->avail_out < stored_len) + return STATELESS_OVERFLOW; + + stream->avail_out -= stored_len; + stream->total_out += stored_len; + avail_in = stream->avail_in; + +#ifndef DEFLATE + memcpy(stream->next_out, gzip_hdr, gzip_hdr_bytes); + stream->next_out += gzip_hdr_bytes; +#endif + + do { + if (avail_in >= STORED_BLK_MAX_BZ) { + stored_blk_hdr = 0xFFFF00; + copy_size = STORED_BLK_MAX_BZ; + } else { + stored_blk_hdr = ~avail_in; + stored_blk_hdr <<= 24; + stored_blk_hdr |= (avail_in & 0xFFFF) << 8; + copy_size = avail_in; + } + + avail_in -= copy_size; + + /* Handle BFINAL bit */ + if (avail_in == 0) + stored_blk_hdr |= 0x1; + + memcpy(stream->next_out, &stored_blk_hdr, STORED_BLK_HDR_BZ); + stream->next_out += STORED_BLK_HDR_BZ; + + memcpy(stream->next_out, stream->next_in, copy_size); + stream->next_out += copy_size; + stream->next_in += copy_size; + stream->total_in += copy_size; + } while (avail_in != 0); + +#ifndef DEFLATE + gzip_trl = stream->avail_in; + gzip_trl <<= 32; + gzip_trl |= crc32 & 0xFFFFFFFF; + memcpy(stream->next_out, &gzip_trl, gzip_trl_bytes); + stream->next_out += gzip_trl_bytes; +#endif + + stream->avail_in = 0; + return COMP_OK; +} + +static inline void reset_match_history(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + uint16_t *head = stream->internal_state.head; + int i = 0; + + for (i = 0; i < sizeof(state->head) / 2; i++) + head[i] = + (uint16_t) (state->b_bytes_processed + state->buffer - state->file_start - + (IGZIP_D + 1)); +} + +void isal_deflate_init_01(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + + stream->total_in = 0; + stream->total_out = 0; + stream->hufftables = (struct isal_hufftables *)&hufftables_default; + stream->flush = 0; + + state->b_bytes_valid = 0; + state->b_bytes_processed = 0; + state->has_eob = 0; + state->has_eob_hdr = 0; + state->left_over = 0; + state->last_flush = 0; + state->has_gzip_hdr = 0; + state->state = ZSTATE_NEW_HDR; + state->count = 0; + + state->tmp_out_start = 0; + state->tmp_out_end = 0; + + state->file_start = state->buffer; + + init(&state->bitbuf); + + memset(state->crc, 0, sizeof(state->crc)); + *state->crc = 0x9db42487; + + reset_match_history(stream); + + return; +} + +int isal_deflate_stateless(struct isal_zstream *stream) +{ + uint8_t *next_in = stream->next_in; + const uint32_t avail_in = stream->avail_in; + + uint8_t *next_out = stream->next_out; + const uint32_t avail_out = stream->avail_out; + + uint32_t crc32 = 0; + uint32_t stored_len; + uint32_t dyn_min_len; + uint32_t min_len; + uint32_t select_stored_blk = 0; + + if (avail_in == 0) + stored_len = STORED_BLK_HDR_BZ; + else + stored_len = + STORED_BLK_HDR_BZ * ((avail_in + STORED_BLK_MAX_BZ - 1) / + STORED_BLK_MAX_BZ) + avail_in; + + /* + at least 1 byte compressed data in the case of empty dynamic block which only + contains the EOB + */ + + dyn_min_len = stream->hufftables->deflate_hdr_count + 1; +#ifndef DEFLATE + dyn_min_len += gzip_hdr_bytes + gzip_trl_bytes + 1; + stored_len += gzip_hdr_bytes + gzip_trl_bytes; +#endif + + min_len = dyn_min_len; + + if (stored_len < dyn_min_len) { + min_len = stored_len; + select_stored_blk = 1; + } + + /* + the output buffer should be no less than 8 bytes + while empty stored deflate block is 5 bytes only + */ + if (avail_out < min_len || stream->avail_out < 8) + return STATELESS_OVERFLOW; + + if (!select_stored_blk) { + if (isal_deflate_int_stateless(stream, next_in, avail_in) == COMP_OK) + return COMP_OK; + } + if (avail_out < stored_len) + return STATELESS_OVERFLOW; + + isal_deflate_init(stream); + + stream->next_in = next_in; + stream->avail_in = avail_in; + stream->total_in = 0; + + stream->next_out = next_out; + stream->avail_out = avail_out; + stream->total_out = 0; + +#ifndef DEFLATE + crc32 = crc32_gzip(0x0, next_in, avail_in); +#endif + return write_stored_block_stateless(stream, stored_len, crc32); +} + +int isal_deflate(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + uint32_t size; + int ret = COMP_OK; + + if (stream->flush < 3) { + + state->last_flush = stream->flush; + + if (state->state >= TMP_OFFSET_SIZE) { + size = state->tmp_out_end - state->tmp_out_start; + if (size > stream->avail_out) + size = stream->avail_out; + memcpy(stream->next_out, state->tmp_out_buff + state->tmp_out_start, + size); + stream->next_out += size; + stream->avail_out -= size; + stream->total_out += size; + state->tmp_out_start += size; + + if (state->tmp_out_start == state->tmp_out_end) + state->state -= TMP_OFFSET_SIZE; + + if (stream->avail_out == 0 || state->state == ZSTATE_END) + return ret; + } + assert(state->tmp_out_start == state->tmp_out_end); + + isal_deflate_int(stream); + + if (stream->avail_out == 0) + return ret; + + else if (stream->avail_out < 8) { + uint8_t *next_out; + uint32_t avail_out; + uint32_t total_out; + + next_out = stream->next_out; + avail_out = stream->avail_out; + total_out = stream->total_out; + + stream->next_out = state->tmp_out_buff; + stream->avail_out = sizeof(state->tmp_out_buff); + stream->total_out = 0; + + isal_deflate_int(stream); + + state->tmp_out_start = 0; + state->tmp_out_end = stream->total_out; + + stream->next_out = next_out; + stream->avail_out = avail_out; + stream->total_out = total_out; + if (state->tmp_out_end) { + size = state->tmp_out_end; + if (size > stream->avail_out) + size = stream->avail_out; + memcpy(stream->next_out, state->tmp_out_buff, size); + stream->next_out += size; + stream->avail_out -= size; + stream->total_out += size; + state->tmp_out_start += size; + if (state->tmp_out_start != state->tmp_out_end) + state->state += TMP_OFFSET_SIZE; + + } + } + } else + ret = INVALID_FLUSH; + + return ret; +} + +#ifndef DEFLATE +static int write_gzip_header_stateless(struct isal_zstream *stream) +{ + if (gzip_hdr_bytes >= stream->avail_out) + return STATELESS_OVERFLOW; + + stream->avail_out -= gzip_hdr_bytes; + stream->total_out += gzip_hdr_bytes; + + memcpy(stream->next_out, gzip_hdr, gzip_hdr_bytes); + + stream->next_out += gzip_hdr_bytes; + + return COMP_OK; +} + +static void write_gzip_header(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + int bytes_to_write = gzip_hdr_bytes; + + bytes_to_write -= state->count; + + if (bytes_to_write > stream->avail_out) + bytes_to_write = stream->avail_out; + + memcpy(stream->next_out, gzip_hdr + state->count, bytes_to_write); + state->count += bytes_to_write; + + if (state->count == gzip_hdr_bytes) { + state->count = 0; + state->has_gzip_hdr = 1; + } + + stream->avail_out -= bytes_to_write; + stream->total_out += bytes_to_write; + stream->next_out += bytes_to_write; + +} +#endif + +static int write_deflate_header_stateless(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + struct isal_hufftables *hufftables = stream->hufftables; + uint32_t count; + + if (hufftables->deflate_hdr_count + 8 >= stream->avail_out) + return STATELESS_OVERFLOW; + + memcpy(stream->next_out, hufftables->deflate_hdr, hufftables->deflate_hdr_count); + + stream->avail_out -= hufftables->deflate_hdr_count; + stream->total_out += hufftables->deflate_hdr_count; + stream->next_out += hufftables->deflate_hdr_count; + + set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + + write_bits(&state->bitbuf, hufftables->deflate_hdr[hufftables->deflate_hdr_count], + hufftables->deflate_hdr_extra_bits); + + count = buffer_used(&state->bitbuf); + stream->next_out = buffer_ptr(&state->bitbuf); + stream->avail_out -= count; + stream->total_out += count; + + return COMP_OK; +} + +static int write_deflate_header_unaligned_stateless(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + struct isal_hufftables *hufftables = stream->hufftables; + unsigned int count; + uint64_t bit_count; + uint64_t *header_next; + uint64_t *header_end; + uint64_t header_bits; + + if (state->bitbuf.m_bit_count == 0) + return write_deflate_header_stateless(stream); + + if (hufftables->deflate_hdr_count + 16 >= stream->avail_out) + return STATELESS_OVERFLOW; + + set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + + header_next = (uint64_t *) hufftables->deflate_hdr; + header_end = header_next + hufftables->deflate_hdr_count / 8; + + /* Write out Complete Header bits */ + for (; header_next < header_end; header_next++) { + header_bits = *header_next; + write_bits(&state->bitbuf, header_bits, 32); + header_bits >>= 32; + write_bits(&state->bitbuf, header_bits, 32); + } + + header_bits = *header_next; + bit_count = + (hufftables->deflate_hdr_count & 0x7) * 8 + hufftables->deflate_hdr_extra_bits; + + if (bit_count > MAX_BITBUF_BIT_WRITE) { + write_bits(&state->bitbuf, header_bits, MAX_BITBUF_BIT_WRITE); + header_bits >>= MAX_BITBUF_BIT_WRITE; + bit_count -= MAX_BITBUF_BIT_WRITE; + + } + + write_bits(&state->bitbuf, header_bits, bit_count); + + /* check_space flushes extra bytes in bitbuf. Required because + * write_bits_always fails when the next commit makes the buffer + * length exceed 64 bits */ + check_space(&state->bitbuf, FORCE_FLUSH); + + count = buffer_used(&state->bitbuf); + stream->next_out = buffer_ptr(&state->bitbuf); + stream->avail_out -= count; + stream->total_out += count; + + return COMP_OK; +} + +void write_header(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + struct isal_hufftables *hufftables = stream->hufftables; + uint32_t count; + + state->state = ZSTATE_HDR; + + if (state->bitbuf.m_bit_count != 0) { + if (stream->avail_out < 8) + return; + set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + flush(&state->bitbuf); + count = buffer_used(&state->bitbuf); + stream->next_out = buffer_ptr(&state->bitbuf); + stream->avail_out -= count; + stream->total_out += count; + } +#ifndef DEFLATE + if (!state->has_gzip_hdr) + write_gzip_header(stream); +#endif + + count = hufftables->deflate_hdr_count - state->count; + + if (count != 0) { + if (count > stream->avail_out) + count = stream->avail_out; + + memcpy(stream->next_out, hufftables->deflate_hdr + state->count, count); + + if (state->count == 0 && count > 0) { + if (!stream->end_of_stream) + *stream->next_out &= 0xfe; + else + state->has_eob_hdr = 1; + } + + stream->next_out += count; + stream->avail_out -= count; + stream->total_out += count; + state->count += count; + + count = hufftables->deflate_hdr_count - state->count; + } + + if ((count == 0) && (stream->avail_out >= 8)) { + + set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + + write_bits(&state->bitbuf, + hufftables->deflate_hdr[hufftables->deflate_hdr_count], + hufftables->deflate_hdr_extra_bits); + + state->state = ZSTATE_BODY; + state->count = 0; + + count = buffer_used(&state->bitbuf); + stream->next_out = buffer_ptr(&state->bitbuf); + stream->avail_out -= count; + stream->total_out += count; + } + +} + +uint32_t get_crc_01(uint32_t * crc) +{ + return crc_512to32_01(crc); +} + +void write_trailer(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + unsigned int bytes; + + if (stream->avail_out >= 8) { + set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + + /* the flush() will pad to the next byte and write up to 8 bytes + * to the output stream/buffer. + */ + if (!state->has_eob_hdr) { + /* If the final header has not been written, write a + * final block. This block is a static huffman block + * which only contains the end of block symbol. The code + * that happens to do this is the fist 10 bits of + * 0x003 */ + state->has_eob_hdr = 1; + write_bits(&state->bitbuf, 0x003, 10); + if (is_full(&state->bitbuf)) { + stream->next_out = buffer_ptr(&state->bitbuf); + bytes = buffer_used(&state->bitbuf); + stream->avail_out -= bytes; + stream->total_out += bytes; + return; + } + } + + flush(&state->bitbuf); + stream->next_out = buffer_ptr(&state->bitbuf); + bytes = buffer_used(&state->bitbuf); + +#ifndef DEFLATE + uint32_t *crc = state->crc; + + if (!is_full(&state->bitbuf)) { + *(uint64_t *) stream->next_out = + ((uint64_t) file_size(state) << 32) | get_crc(crc); + stream->next_out += 8; + bytes += 8; + state->state = ZSTATE_END; + } +#else + state->state = ZSTATE_END; +#endif + + stream->avail_out -= bytes; + stream->total_out += bytes; + } +} + +static int write_trailer_stateless(struct isal_zstream *stream, uint32_t avail_in, + uint32_t crc32) +{ + int ret = COMP_OK; + struct isal_zstate *state = &stream->internal_state; + unsigned int bytes; + + if (stream->avail_out < 8) { + ret = STATELESS_OVERFLOW; + } else { + set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + + /* the flush() will pad to the next byte and write up to 8 bytes + * to the output stream/buffer. + */ + flush(&state->bitbuf); + stream->next_out = buffer_ptr(&state->bitbuf); + bytes = buffer_used(&state->bitbuf); +#ifndef DEFLATE + if (is_full(&state->bitbuf)) { + ret = STATELESS_OVERFLOW; + } else { + *(uint64_t *) stream->next_out = ((uint64_t) avail_in << 32) | crc32; + stream->next_out += 8; + bytes += 8; + } +#endif + stream->avail_out -= bytes; + stream->total_out += bytes; + } + + return ret; +} diff --git a/igzip/igzip_base.c b/igzip/igzip_base.c new file mode 100644 index 0000000..49bf0b1 --- /dev/null +++ b/igzip/igzip_base.c @@ -0,0 +1,292 @@ +#include +#include "igzip_lib.h" +#include "huffman.h" +#include "huff_codes.h" +#include "bitbuf2.h" + +extern const struct isal_hufftables hufftables_default; + +void isal_deflate_init_base(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + int i; + + uint32_t *crc = state->crc; + + stream->total_in = 0; + stream->total_out = 0; + stream->hufftables = (struct isal_hufftables *)&hufftables_default; + stream->flush = 0; + state->b_bytes_valid = 0; + state->b_bytes_processed = 0; + state->has_eob = 0; + state->has_eob_hdr = 0; + state->left_over = 0; + state->last_flush = 0; + state->has_gzip_hdr = 0; + state->state = ZSTATE_NEW_HDR; + state->count = 0; + + state->tmp_out_start = 0; + state->tmp_out_end = 0; + + state->file_start = state->buffer; + + init(&state->bitbuf); + + *crc = ~0; + + for (i = 0; i < HASH_SIZE; i++) + state->head[i] = (uint16_t) - (IGZIP_D + 1); + return; +} + +uint32_t get_crc_base(uint32_t * crc) +{ + return ~*crc; +} + +static inline void update_state(struct isal_zstream *stream, struct isal_zstate *state, + uint8_t * start_in) +{ + uint32_t bytes_written; + + stream->total_in += stream->next_in - start_in; + + bytes_written = buffer_used(&state->bitbuf); + stream->total_out += bytes_written; + stream->next_out += bytes_written; + stream->avail_out -= bytes_written; + +} + +void isal_deflate_body_base(struct isal_zstream *stream) +{ + uint32_t literal, hash; + uint8_t *start_in, *next_in, *end_in, *end, *next_hash; + uint16_t match_length; + uint32_t dist, bytes_to_buffer, offset; + uint64_t code, code_len, code2, code_len2; + struct isal_zstate *state = &stream->internal_state; + uint16_t *last_seen = state->head; + uint32_t *crc = state->crc; + + if (stream->avail_in == 0) { + if (stream->end_of_stream || stream->flush != NO_FLUSH) + state->state = ZSTATE_FLUSH_READ_BUFFER; + return; + } + + set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + start_in = stream->next_in; + + while (stream->avail_in != 0) { + bytes_to_buffer = + IGZIP_D + IGZIP_LA - (state->b_bytes_valid - state->b_bytes_processed); + + if (bytes_to_buffer > IGZIP_D) + bytes_to_buffer = IGZIP_D; + + if (stream->avail_in < IGZIP_D) + bytes_to_buffer = stream->avail_in; + + if (bytes_to_buffer > BSIZE - state->b_bytes_valid) { + if (state->b_bytes_valid - state->b_bytes_processed > IGZIP_LA) { + /* There was an out buffer overflow last round, + * complete the processing of data */ + bytes_to_buffer = 0; + + } else { + /* Not enough room in the buffer, shift the + * buffer down to make space for the new data */ + offset = state->b_bytes_processed - IGZIP_D; // state->b_bytes_valid - (IGZIP_D + IGZIP_LA); + memmove(state->buffer, state->buffer + offset, + IGZIP_D + IGZIP_LA); + + state->b_bytes_processed -= offset; + state->b_bytes_valid -= offset; + state->file_start -= offset; + + stream->avail_in -= bytes_to_buffer; + memcpy(state->buffer + state->b_bytes_valid, stream->next_in, + bytes_to_buffer); + update_crc(crc, stream->next_in, bytes_to_buffer); + stream->next_in += bytes_to_buffer; + } + } else { + /* There is enough space in the buffer, copy in the new data */ + stream->avail_in -= bytes_to_buffer; + memcpy(state->buffer + state->b_bytes_valid, stream->next_in, + bytes_to_buffer); + update_crc(crc, stream->next_in, bytes_to_buffer); + stream->next_in += bytes_to_buffer; + } + + state->b_bytes_valid += bytes_to_buffer; + + end_in = state->buffer + state->b_bytes_valid - IGZIP_LA; + + next_in = state->b_bytes_processed + state->buffer; + + while (next_in < end_in) { + + if (is_full(&state->bitbuf)) { + state->b_bytes_processed = next_in - state->buffer; + update_state(stream, state, start_in); + return; + } + + literal = *(uint32_t *) next_in; + hash = compute_hash(literal) & HASH_MASK; + dist = (next_in - state->file_start - last_seen[hash]) & 0xFFFF; + last_seen[hash] = (uint64_t) (next_in - state->file_start); + + if (dist - 1 < IGZIP_D - 1) { /* The -1 are to handle the case when dist = 0 */ + assert(next_in - dist >= state->buffer); + assert(dist != 0); + + match_length = compare258(next_in - dist, next_in, 258); + + if (match_length >= SHORTEST_MATCH) { + next_hash = next_in; +#ifdef LIMIT_HASH_UPDATE + end = next_hash + 3; +#else + end = next_hash + match_length; +#endif + next_hash++; + + for (; next_hash < end; next_hash++) { + literal = *(uint32_t *) next_hash; + hash = compute_hash(literal) & HASH_MASK; + last_seen[hash] = + (uint64_t) (next_hash - state->file_start); + } + + get_len_code(stream->hufftables, match_length, &code, + &code_len); + get_dist_code(stream->hufftables, dist, &code2, + &code_len2); + + code |= code2 << code_len; + code_len += code_len2; + + write_bits(&state->bitbuf, code, code_len); + + next_in += match_length; + + continue; + } + } + + get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len); + write_bits(&state->bitbuf, code, code_len); + next_in++; + } + + state->b_bytes_processed = next_in - state->buffer; + + } + + update_state(stream, state, start_in); + + if (stream->avail_in == 0) { + if (stream->end_of_stream || stream->flush != NO_FLUSH) + state->state = ZSTATE_FLUSH_READ_BUFFER; + return; + } + + return; + +} + +void isal_deflate_finish_base(struct isal_zstream *stream) +{ + uint32_t literal = 0, hash; + uint8_t *next_in, *end_in, *end, *next_hash; + uint16_t match_length; + uint32_t dist; + uint64_t code, code_len, code2, code_len2; + struct isal_zstate *state = &stream->internal_state; + uint16_t *last_seen = state->head; + + set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + + end_in = state->b_bytes_valid + (uint8_t *) state->buffer; + + next_in = state->b_bytes_processed + state->buffer; + + while (next_in < end_in) { + + if (is_full(&state->bitbuf)) { + state->b_bytes_processed = next_in - state->buffer; + update_state(stream, state, stream->next_in); + return; + } + + literal = *(uint32_t *) next_in; + hash = compute_hash(literal) & HASH_MASK; + dist = (next_in - state->file_start - last_seen[hash]) & 0xFFFF; + last_seen[hash] = (uint64_t) (next_in - state->file_start); + + if (dist - 1 < IGZIP_D - 1) { /* The -1 are to handle the case when dist = 0 */ + assert(next_in - dist >= state->buffer); + match_length = compare258(next_in - dist, next_in, end_in - next_in); + + if (match_length >= SHORTEST_MATCH) { + next_hash = next_in; +#ifdef LIMIT_HASH_UPDATE + end = next_hash + 3; +#else + end = next_hash + match_length; +#endif + next_hash++; + + for (; next_hash < end; next_hash++) { + literal = *(uint32_t *) next_hash; + hash = compute_hash(literal) & HASH_MASK; + last_seen[hash] = + (uint64_t) (next_hash - state->file_start); + } + + get_len_code(stream->hufftables, match_length, &code, + &code_len); + get_dist_code(stream->hufftables, dist, &code2, &code_len2); + + code |= code2 << code_len; + code_len += code_len2; + + write_bits(&state->bitbuf, code, code_len); + + next_in += match_length; + + continue; + } + } + + get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len); + write_bits(&state->bitbuf, code, code_len); + next_in++; + + } + + state->b_bytes_processed = next_in - state->buffer; + + if (is_full(&state->bitbuf) || state->left_over > 0) { + update_state(stream, state, stream->next_in); + return; + } + + get_lit_code(stream->hufftables, 256, &code, &code_len); + write_bits(&state->bitbuf, code, code_len); + state->has_eob = 1; + + update_state(stream, state, stream->next_in); + + if (stream->end_of_stream == 1) + state->state = ZSTATE_TRL; + else + state->state = ZSTATE_SYNC_FLUSH; + + return; +} diff --git a/igzip/igzip_body.asm b/igzip/igzip_body.asm new file mode 100644 index 0000000..a4a3516 --- /dev/null +++ b/igzip/igzip_body.asm @@ -0,0 +1,751 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "options.asm" +%ifndef TEST + +extern fold_4 + +%include "lz0a_const.asm" +%include "data_struct2.asm" +%include "bitbuf2.asm" +%include "huffman.asm" +%include "igzip_compare_types.asm" + +%include "reg_sizes.asm" + +%include "stdmac.asm" + +%if (ARCH == 04) + %define MOVDQA vmovdqa +%else + %define MOVDQA movdqa +%endif + +%ifdef DEBUG +%macro MARK 1 +global %1 +%1: +%endm +%else +%macro MARK 1 +%endm +%endif + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define tmp2 rcx +%define hash2 rcx + +%define b_bytes_valid rax +%define curr_data rax +%define code rax +%define tmp5 rax + +%define tmp4 rbx +%define dist rbx +%define code2 rbx + +%define x rdx +%define len rdx +%define hash rdx +%define code_len3 rdx + +%define tmp1 rsi +%define code_len2 rsi + +%define blen rdi +%define file_start rdi + +%define m_bit_count rbp + +%define in_buf r8 +%define curr_data2 r8 +%define len2 r8 +%define tmp6 r8 + +%define m_bits r9 + +%define f_i r10 + +%define m_out_buf r11 + +%define f_end_i r12 +%define dist2 r12 +%define tmp7 r12 +%define code4 r12 + +%define tmp3 r13 +%define code3 r13 + +%define stream r14 + +%define hufftables r15 + +%define crc_0 xmm0 ; in/out: crc state +%define crc_1 xmm1 ; in/out: crc state +%define crc_2 xmm2 ; in/out: crc state +%define crc_3 xmm3 ; in/out: crc state +%define crc_fold xmm4 ; in: (loaded from fold_4) + +%define xtmp0 xmm5 ; tmp +%define xtmp1 xmm6 ; tmp +%define xtmp2 xmm7 ; tmp +%define xtmp3 xmm8 ; tmp +%define xtmp4 xmm9 ; tmp + +%define ytmp0 ymm5 ; tmp +%define ytmp1 ymm6 ; tmp + +%if (ARCH == 04) +%define vtmp0 ymm5 ; tmp +%define vtmp1 ymm6 ; tmp +%define vtmp2 ymm7 ; tmp +%define vtmp3 ymm8 ; tmp +%define vtmp4 ymm9 ; tmp +%else +%define vtmp0 xmm5 ; tmp +%define vtmp1 xmm6 ; tmp +%define vtmp2 xmm7 ; tmp +%define vtmp3 xmm8 ; tmp +%define vtmp4 xmm9 ; tmp +%endif +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%define b_bytes_processed f_i + +blen_mem_offset equ 0 ; local variable (8 bytes) +in_buf_mem_offset equ 8 +f_end_i_mem_offset equ 16 +empty_buffer_flag equ 24 +gpr_save_mem_offset equ 32 ; gpr save area (8*8 bytes) +xmm_save_mem_offset equ 32 + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned) +stack_size equ 4*8 + 8*8 + 4*16 + 8 +;;; 8 because stack address is odd multiple of 8 after a function call and +;;; we want it aligned to 16 bytes + +; void isal_deflate_body ( isal_zstream *stream ) +; arg 1: rcx: addr of stream +global isal_deflate_body_ %+ ARCH +isal_deflate_body_ %+ ARCH %+ : +%ifidn __OUTPUT_FORMAT__, elf64 + mov rcx, rdi +%endif + + ;; do nothing if (avail_in == 0) + cmp dword [rcx + _avail_in], 0 + jne skip1 + + ;; Set stream's next state + mov rdx, ZSTATE_FLUSH_READ_BUFFER + mov rax, ZSTATE_BODY + cmp dword [rcx + _end_of_stream], 0 + cmovne rax, rdx + cmp dword [rcx + _flush], _NO_FLUSH + cmovne rax, rdx + mov dword [rcx + _internal_state_state], eax + ret +skip1: + +%ifdef ALIGN_STACK + push rbp + mov rbp, rsp + sub rsp, stack_size + and rsp, ~15 +%else + sub rsp, stack_size +%endif + + mov [rsp + gpr_save_mem_offset + 0*8], rbx + mov [rsp + gpr_save_mem_offset + 1*8], rsi + mov [rsp + gpr_save_mem_offset + 2*8], rdi + mov [rsp + gpr_save_mem_offset + 3*8], rbp + mov [rsp + gpr_save_mem_offset + 4*8], r12 + mov [rsp + gpr_save_mem_offset + 5*8], r13 + mov [rsp + gpr_save_mem_offset + 6*8], r14 + mov [rsp + gpr_save_mem_offset + 7*8], r15 + MOVDQA [rsp + xmm_save_mem_offset + 0*16], xmm6 + MOVDQA [rsp + xmm_save_mem_offset + 1*16], xmm7 + MOVDQA [rsp + xmm_save_mem_offset + 2*16], xmm8 + MOVDQA [rsp + xmm_save_mem_offset + 3*16], xmm9 + + mov stream, rcx + + MOVDQA crc_0, [stream + _internal_state_crc + 0*16] + MOVDQA crc_1, [stream + _internal_state_crc + 1*16] + MOVDQA crc_2, [stream + _internal_state_crc + 2*16] + MOVDQA crc_3, [stream + _internal_state_crc + 3*16] + MOVDQA crc_fold, [fold_4] + mov dword [stream + _internal_state_has_eob], 0 + + ; state->bitbuf.set_buf(stream->next_out, stream->avail_out); + mov m_out_buf, [stream + _next_out] + mov [stream + _internal_state_bitbuf_m_out_start], m_out_buf + mov tmp1 %+ d, [stream + _avail_out] + add tmp1, m_out_buf + sub tmp1, SLOP +skip_SLOP: + mov [stream + _internal_state_bitbuf_m_out_end], tmp1 + + mov m_bits, [stream + _internal_state_bitbuf_m_bits] + mov m_bit_count %+ d, [stream + _internal_state_bitbuf_m_bit_count] + + mov hufftables, [stream + _hufftables] + ; in_buf = stream->next_in + mov in_buf, [stream + _next_in] + mov blen %+ d, [stream + _avail_in] + + mov dword [rsp + empty_buffer_flag], 0 + cmp dword [stream + _flush], _FULL_FLUSH + sete byte [rsp + empty_buffer_flag] + cmp dword [stream + _internal_state_b_bytes_processed], 0 + sete byte [rsp + empty_buffer_flag + 1] + + ; while (blen != 0) +MARK __Compute_X_ %+ ARCH +loop1: + ; x = D + LA - (state->b_bytes_valid - state->b_bytes_processed); + mov b_bytes_valid %+ d, [stream + _internal_state_b_bytes_valid] + mov b_bytes_processed %+ d, [stream + _internal_state_b_bytes_processed] + lea x, [b_bytes_processed + D + LA] + sub x, b_bytes_valid + + ; if (x > D) x = D; + cmp x, D + cmova x, [const_D] + + ; if (blen < D) x = blen; + cmp blen, D + cmovb x, blen + + ;; process x bytes starting at in_buf + + ;; If there isn't enough room, shift buffer down + ; if (x > BSIZE - state->b_bytes_valid) { + mov tmp1, BSIZE + sub tmp1, b_bytes_valid + cmp x, tmp1 + jbe skip_move + + ; if (state->b_bytes_processed < state->b_bytes_valid - LA) { + mov tmp1, b_bytes_valid + sub tmp1, LA + cmp b_bytes_processed, tmp1 + jae do_move + + ;; We need to move an odd amount, skip move for this copy of loop + xor x,x + mov [rsp + blen_mem_offset], blen + jmp skip_move_zero + +MARK __shift_data_down_ %+ ARCH +do_move: + ; offset = state->b_bytes_valid - (D + LA); + mov tmp4, b_bytes_valid + sub tmp4, D + LA + ; copy_D_LA(state->buffer, state->buffer + offset); + lea tmp1, [stream + _internal_state_buffer] + lea tmp2, [tmp1 + tmp4] + copy_D_LA tmp1, tmp2, tmp3, vtmp0, vtmp1, vtmp2, vtmp3 + ; tmp1 clobbered + + ; state->file_start -= offset; + sub [stream + _internal_state_file_start], tmp4 + ; state->b_bytes_processed -= offset; + sub b_bytes_processed, tmp4 + mov b_bytes_valid, D + LA + +MARK __copy_in_ %+ ARCH +skip_move: + sub blen, x + + mov [rsp + blen_mem_offset], blen + + ; copy_in(state->buffer + state->b_bytes_valid, in_buf, x); + lea tmp1, [stream + _internal_state_buffer + b_bytes_valid] + mov tmp2, in_buf + mov tmp3, x + + + COPY_IN_CRC tmp1, tmp2, tmp3, tmp4, crc_0, crc_1, crc_2, crc_3, crc_fold, \ + xtmp0, xtmp1, xtmp2, xtmp3, xtmp4 + + ; in_buf += x; + add in_buf, x +MARK __prepare_loop_ %+ ARCH +skip_move_zero: + mov [rsp + in_buf_mem_offset], in_buf + ; state->b_bytes_valid += x; + add b_bytes_valid, x + mov [stream + _internal_state_b_bytes_valid], b_bytes_valid %+ d + + ; f_end_i = state->b_bytes_valid - LA; +%ifnidn f_end_i, b_bytes_valid + mov f_end_i, b_bytes_valid +%endif + sub f_end_i, LA + ; if (f_end_i <= 0) continue; + cmp f_end_i, 0 + jle continue_while + + ; f_start_i = state->b_bytes_processed; + ;; f_i and b_bytes_processed are same register, just store b_bytes_proc + mov [stream + _internal_state_b_bytes_processed], b_bytes_processed %+ d + + ; f_start_i += (uint32_t)(state->buffer - state->file_start); + mov file_start, [stream + _internal_state_file_start] + lea tmp1, [stream + _internal_state_buffer] + sub tmp1, file_start + add f_i, tmp1 + add f_end_i, tmp1 + mov [rsp + f_end_i_mem_offset], f_end_i + + ; for (f_i = f_start_i; f_i < f_end_i; f_i++) { + cmp f_i, f_end_i + jge end_loop_2 + +MARK __misc_compute_hash_lookup_ %+ ARCH + mov curr_data %+ d, [file_start + f_i] + + cmp dword [rsp + empty_buffer_flag], 0 + jne write_first_byte + + mov curr_data2, curr_data + + compute_hash hash, curr_data + jmp loop2 + + align 16 + +loop2: + shr curr_data2, 8 + xor hash2 %+ d, hash2 %+ d + crc32 hash2 %+ d, curr_data2 %+ d + + ; hash = compute_hash(state->file_start + f_i) & HASH_MASK; + and hash %+ d, HASH_MASK + and hash2 %+ d, HASH_MASK + + ; if (state->bitbuf.is_full()) { + cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] + ja bitbuf_full + + xor dist, dist + xor dist2, dist2 + xor tmp3, tmp3 + + lea tmp1, [file_start + f_i] + lea tmp6, [tmp1 - 1] + + mov dist %+ w, f_i %+ w + sub dist %+ w, word [stream + _internal_state_head + 2 * hash] + + ; state->head[hash] = (uint16_t) f_i; + mov [stream + _internal_state_head + 2 * hash], f_i %+ w + + inc f_i + + mov dist2 %+ w, f_i %+ w + sub dist2 %+ w, word [stream + _internal_state_head + 2 * hash2] + dec dist2 + + ; state->head[hash2] = (uint16_t) f_i; + mov [stream + _internal_state_head + 2 * hash2], f_i %+ w + + mov tmp2, tmp1 + sub tmp2, dist + dec dist + + ; if ((dist-1) < (D-1)) { + cmp dist %+ d, (D-1) + cmovae tmp2, tmp6 + cmovae dist, tmp3 + inc dist + + cmp dist2 %+ d, (D-1) + cmovae dist2, tmp3 + inc dist2 + +MARK __compare_ %+ ARCH + ; len = compare258(state->file_start + f_i, + ; state->file_start + f_i - dist); + + ;; Specutively load distance code (except for when large windows are used) + get_packed_dist_code dist, code2, hufftables + + ;; Check for long len/dist match (>7) with first literal + mov len, [tmp1] + xor len, [tmp2] + jz compare_loop + +%ifdef USE_HSWNI + blsmsk tmp3, len + or tmp3, 0xFFFFFF +%endif + + lea tmp1, [file_start + f_i] + mov tmp2, tmp1 + sub tmp2, dist2 + + ;; Specutively load distance code (except for when large windows are used) + get_packed_dist_code dist2, code4, hufftables + + ;; Check for len/dist match (>7) with second literal + mov len2, [tmp1] + xor len2, [tmp2] + jz compare_loop2 + +%ifdef USE_HSWNI + ;; Check for len/dist match for first literal + test tmp3, len2 + jz len_dist_lit_huffman_pre + + cmp tmp3, 0xFFFFFF + je encode_2_literals + jmp len_dist_huffman_pre + + +MARK __len_dist_lit_huffman_ %+ ARCH +len_dist_lit_huffman_pre: + movzx tmp1, curr_data %+ b + get_lit_code tmp1, code3, code_len3, hufftables +%else + ;; Specutively load the code for the first literal + movzx tmp1, curr_data %+ b + get_lit_code tmp1, code3, rcx, hufftables + + ;; Check for len/dist match for first literal + test len, 0xFFFFFF + jz len_dist_huffman_pre + + ;; Specutively load the code for the second literal + shr curr_data, 8 + and curr_data, 0xff + get_lit_code curr_data, code2, code_len2, hufftables + + shl code2, cl + or code2, code3 + add code_len2, rcx + + ;; Check for len/dist match for second literal + test len2, 0xFFFFFF + jnz write_lit_bits + +MARK __len_dist_lit_huffman_ %+ ARCH +len_dist_lit_huffman_pre: + mov code_len3, rcx +%endif + bsf len2, len2 + shr len2, 3 + +len_dist_lit_huffman: +%ifndef LONGER_HUFFTABLE + mov tmp4, dist2 + get_dist_code tmp4, code4, code_len2, hufftables ;; clobbers dist, rcx +%else + unpack_dist_code code4, code_len2 +%endif + get_len_code len2, code, rcx, hufftables ;; rcx is code_len + +%ifdef USE_HSWNI + shlx code4, code4, rcx +%else + shl code4, cl +%endif + or code4, code + add code_len2, rcx + + mov rcx, code_len3 + +%ifdef USE_HSWNI + shlx code4, code4, rcx +%else + shl code4, cl +%endif + or code4, code3 + add code_len2, rcx + + mov code2, code4 + ;; Setup for updating hash + lea tmp3, [f_i + 1] ; tmp3 <= k + add f_i, len2 + + ; hash = compute_hash(state->file_start + k) & HASH_MASK; + mov tmp5 %+ d, [file_start + tmp3] + mov tmp7, tmp5 + shr tmp7, 8 + + compute_hash hash, tmp5 + and hash %+ d, HASH_MASK + + ; state->head[hash] = k; + mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w + + add tmp3,1 + + jmp update_hash_for_symbol + ;; encode as dist/len + +MARK __len_dist_huffman_ %+ ARCH +len_dist_huffman_pre: + bsf len, len + shr len, 3 +len_dist_huffman: + dec f_i + + ; get_dist_code(dist, &code2, &code_len2); +%ifndef LONGER_HUFFTABLE + mov tmp3, dist ; since code2 and dist are rbx + get_dist_code tmp3, code2, code_len2, hufftables ;; clobbers dist, rcx +%else + unpack_dist_code code2, code_len2 +%endif + ; get_len_code(len, &code, &code_len); + get_len_code len, code, rcx, hufftables ;; rcx is code_len + + ; code2 <<= code_len + ; code2 |= code + ; code_len2 += code_len +%ifdef USE_HSWNI + shlx code2, code2, rcx +%else + shl code2, cl +%endif + or code2, code + add code_len2, rcx + + ;; Setup for updateing hash + lea tmp3, [f_i + 2] ; tmp3 <= k + add f_i, len + mov tmp7 %+ d, [file_start + tmp3] + +MARK __update_hash_for_symbol_ %+ ARCH +update_hash_for_symbol: + mov curr_data %+ d, [file_start + f_i] + mov curr_data2, curr_data + compute_hash hash, curr_data +%ifdef LIMIT_HASH_UPDATE + ; only update hash twice, first hash was already calculated. + + ; hash = compute_hash(state->file_start + k) & HASH_MASK; + compute_hash hash2, tmp7 + and hash2 %+ d, HASH_MASK + ; state->head[hash] = k; + mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w + +%else +loop3: + ; hash = compute_hash(state->file_start + k) & HASH_MASK; + mov tmp7 %+ d, [file_start + tmp3] + compute_hash hash2, tmp7 + and hash2 %+ d, HASH_MASK + ; state->head[hash] = k; + mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w + add tmp3,1 + cmp tmp3, f_i + jl loop3 +%endif + + +MARK __write_len_dist_bits_ %+ ARCH + mov f_end_i, [rsp + f_end_i_mem_offset] + write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3 + + ; continue + cmp f_i, f_end_i + jl loop2 + jmp end_loop_2 + + +MARK __write_lit_bits_ %+ ARCH +%ifdef USE_HSWNI +encode_2_literals: + movzx tmp1, curr_data %+ b + get_lit_code tmp1, code3, rcx, hufftables + + shr curr_data, 8 + and curr_data, 0xff + get_lit_code curr_data, code2, code_len2, hufftables + + ;; Calculate code associated with both literals + shlx code2, code2, rcx + or code2, code3 + add code_len2, rcx +%endif +write_lit_bits: + mov f_end_i, [rsp + f_end_i_mem_offset] + add f_i, 1 + mov curr_data %+ d, [file_start + f_i] + mov curr_data2, curr_data + + compute_hash hash, curr_data + + write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3 + + ; continue + cmp f_i, f_end_i + jl loop2 + + +MARK __end_loops_ %+ ARCH +end_loop_2: + + ; state->b_bytes_processed = f_i - (state->buffer - state->file_start); + add f_i, [stream + _internal_state_file_start] + sub f_i, stream + sub f_i, _internal_state_buffer + mov [stream + _internal_state_b_bytes_processed], f_i %+ d + + ; continue +continue_while: + mov blen, [rsp + blen_mem_offset] + mov in_buf, [rsp + in_buf_mem_offset] + cmp blen, 0 + jnz loop1 + +end: + ;; update input buffer + ; stream->total_in += (uint32_t)(in_buf - stream->next_in); // bytes copied + mov tmp1 %+ d, [stream + _total_in] + mov in_buf, [rsp + in_buf_mem_offset] + add tmp1, in_buf + sub tmp1, [stream + _next_in] + mov [stream + _total_in], tmp1 %+ d + + mov [stream + _next_in], in_buf + mov [stream + _avail_in], blen %+ d + + cmp blen, 0 + jne skip2 + + ;; Set stream's next state + mov tmp1, ZSTATE_FLUSH_READ_BUFFER + mov tmp5, ZSTATE_BODY + cmp dword [stream + _end_of_stream], 0 + cmovne tmp5, tmp1 + cmp dword [stream + _flush], _NO_FLUSH + cmovne tmp5, tmp1 + mov dword [stream + _internal_state_state], tmp5 %+ d +skip2: + mov [stream + _next_out], m_out_buf + ; offset = state->bitbuf.buffer_used(); + sub m_out_buf, [stream + _internal_state_bitbuf_m_out_start] + sub [stream + _avail_out], m_out_buf %+ d + add [stream + _total_out], m_out_buf %+ d + + mov [stream + _internal_state_bitbuf_m_bits], m_bits + mov [stream + _internal_state_bitbuf_m_bit_count], m_bit_count %+ d + + + MOVDQA [stream + _internal_state_crc + 0*16], crc_0 + MOVDQA [stream + _internal_state_crc + 1*16], crc_1 + MOVDQA [stream + _internal_state_crc + 2*16], crc_2 + MOVDQA [stream + _internal_state_crc + 3*16], crc_3 + + mov rbx, [rsp + gpr_save_mem_offset + 0*8] + mov rsi, [rsp + gpr_save_mem_offset + 1*8] + mov rdi, [rsp + gpr_save_mem_offset + 2*8] + mov rbp, [rsp + gpr_save_mem_offset + 3*8] + mov r12, [rsp + gpr_save_mem_offset + 4*8] + mov r13, [rsp + gpr_save_mem_offset + 5*8] + mov r14, [rsp + gpr_save_mem_offset + 6*8] + mov r15, [rsp + gpr_save_mem_offset + 7*8] + MOVDQA xmm6, [rsp + xmm_save_mem_offset + 0*16] + MOVDQA xmm7, [rsp + xmm_save_mem_offset + 1*16] + MOVDQA xmm8, [rsp + xmm_save_mem_offset + 2*16] + MOVDQA xmm9, [rsp + xmm_save_mem_offset + 3*16] + +%ifndef ALIGN_STACK + add rsp, stack_size +%else + mov rsp, rbp + pop rbp +%endif + ret + +MARK __bitbuf_full_ %+ ARCH +bitbuf_full: + mov blen, [rsp + blen_mem_offset] + ; state->b_bytes_processed = f_i - (state->buffer - state->file_start); + add f_i, [stream + _internal_state_file_start] + sub f_i, stream + sub f_i, _internal_state_buffer + mov [stream + _internal_state_b_bytes_processed], f_i %+ d + jmp end + +MARK __compare_loops_ %+ ARCH +compare_loop: +%if (COMPARE_TYPE == 1) + compare250 tmp1, tmp2, len, tmp3 +%elif (COMPARE_TYPE == 2) + compare250_x tmp1, tmp2, len, tmp3, xtmp0, xtmp1 +%elif (COMPARE_TYPE == 3) + compare250_y tmp1, tmp2, len, tmp3, ytmp0, ytmp1 +%else + %error Unknown Compare type COMPARE_TYPE + % error +%endif + jmp len_dist_huffman + +compare_loop2: +%if (COMPARE_TYPE == 1) + compare250 tmp1, tmp2, len2, tmp3 +%elif (COMPARE_TYPE == 2) + compare250_x tmp1, tmp2, len2, tmp3, xtmp0, xtmp1 +%elif (COMPARE_TYPE == 3) + compare250_y tmp1, tmp2, len2, tmp3, ytmp0, ytmp1 +%else +%error Unknown Compare type COMPARE_TYPE + % error +%endif + and curr_data, 0xff + get_lit_code curr_data, code3, code_len3, hufftables + jmp len_dist_lit_huffman + +MARK __write_first_byte_ %+ ARCH +write_first_byte: + cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] + ja bitbuf_full + + mov dword [rsp + empty_buffer_flag], 0 + compute_hash hash, curr_data + and hash %+ d, HASH_MASK + mov [stream + _internal_state_head + 2 * hash], f_i %+ w + and curr_data, 0xff + get_lit_code curr_data, code2, code_len2, hufftables + jmp write_lit_bits + +section .data + align 4 +const_D: dq D + +%endif ;; ifndef TEST diff --git a/igzip/igzip_body_01.asm b/igzip/igzip_body_01.asm new file mode 100644 index 0000000..0d130c7 --- /dev/null +++ b/igzip/igzip_body_01.asm @@ -0,0 +1,8 @@ +%define ARCH 01 + +%ifndef COMPARE_TYPE +%define COMPARE_TYPE 2 +%endif + +%include "igzip_buffer_utils_01.asm" +%include "igzip_body.asm" diff --git a/igzip/igzip_body_04.asm b/igzip/igzip_body_04.asm new file mode 100644 index 0000000..97b134b --- /dev/null +++ b/igzip/igzip_body_04.asm @@ -0,0 +1,9 @@ +%define ARCH 04 +%define USE_HSWNI + +%ifndef COMPARE_TYPE +%define COMPARE_TYPE 3 +%endif + +%include "igzip_buffer_utils_04.asm" +%include "igzip_body.asm" diff --git a/igzip/igzip_buffer_utils_01.asm b/igzip/igzip_buffer_utils_01.asm new file mode 100644 index 0000000..c6cb834 --- /dev/null +++ b/igzip/igzip_buffer_utils_01.asm @@ -0,0 +1,543 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%ifndef BUFFER_UTILS +%define BUFFER_UTILS + +%include "options.asm" + +extern pshufb_shf_table +extern mask3 + +%ifdef FIX_CACHE_READ +%define movntdqa movdqa +%else +%macro prefetchnta 1 +%endm +%endif + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; code for doing the CRC calculation as part of copy-in, using pclmulqdq + +; "shift" 4 input registers down 4 places +; macro FOLD4 xmm0, xmm1, xmm2, xmm3, const, tmp0, tmp1 +%macro FOLD4 7 +%define %%xmm0 %1 ; xmm reg, in/out +%define %%xmm1 %2 ; xmm reg, in/out +%define %%xmm2 %3 ; xmm reg, in/out +%define %%xmm3 %4 ; xmm reg, in/out +%define %%const %5 ; xmm reg, in +%define %%tmp0 %6 ; xmm reg, tmp +%define %%tmp1 %7 ; xmm reg, tmp + + movaps %%tmp0, %%xmm0 + movaps %%tmp1, %%xmm1 + + pclmulqdq %%xmm0, %%const, 0x01 + pclmulqdq %%xmm1, %%const, 0x01 + + pclmulqdq %%tmp0, %%const, 0x10 + pclmulqdq %%tmp1, %%const, 0x10 + + xorps %%xmm0, %%tmp0 + xorps %%xmm1, %%tmp1 + + + movaps %%tmp0, %%xmm2 + movaps %%tmp1, %%xmm3 + + pclmulqdq %%xmm2, %%const, 0x01 + pclmulqdq %%xmm3, %%const, 0x01 + + pclmulqdq %%tmp0, %%const, 0x10 + pclmulqdq %%tmp1, %%const, 0x10 + + xorps %%xmm2, %%tmp0 + xorps %%xmm3, %%tmp1 +%endm + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; "shift" 3 input registers down 4 places +; macro FOLD3 x0, x1, x2, x3, const, tmp0 +; x0 x1 x2 x3 +; In A B C D +; Out D A' B' C' +%macro FOLD3 6 +%define %%x0 %1 ; xmm reg, in/out +%define %%x1 %2 ; xmm reg, in/out +%define %%x2 %3 ; xmm reg, in/out +%define %%x3 %4 ; xmm reg, in/out +%define %%const %5 ; xmm reg, in +%define %%tmp0 %6 ; xmm reg, tmp + + movdqa %%tmp0, %%x3 + + movaps %%x3, %%x2 + pclmulqdq %%x2, %%const, 0x01 + pclmulqdq %%x3, %%const, 0x10 + xorps %%x3, %%x2 + + movaps %%x2, %%x1 + pclmulqdq %%x1, %%const, 0x01 + pclmulqdq %%x2, %%const, 0x10 + xorps %%x2, %%x1 + + movaps %%x1, %%x0 + pclmulqdq %%x0, %%const, 0x01 + pclmulqdq %%x1, %%const, 0x10 + xorps %%x1, %%x0 + + movdqa %%x0, %%tmp0 +%endm + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; "shift" 2 input registers down 4 places +; macro FOLD2 x0, x1, x2, x3, const, tmp0 +; x0 x1 x2 x3 +; In A B C D +; Out C D A' B' +%macro FOLD2 6 +%define %%x0 %1 ; xmm reg, in/out +%define %%x1 %2 ; xmm reg, in/out +%define %%x2 %3 ; xmm reg, in/out +%define %%x3 %4 ; xmm reg, in/out +%define %%const %5 ; xmm reg, in +%define %%tmp0 %6 ; xmm reg, tmp + + movdqa %%tmp0, %%x3 + + movaps %%x3, %%x1 + pclmulqdq %%x1, %%const, 0x01 + pclmulqdq %%x3, %%const, 0x10 + xorps %%x3, %%x1 + + movdqa %%x1, %%tmp0 + movdqa %%tmp0, %%x2 + + movaps %%x2, %%x0 + pclmulqdq %%x0, %%const, 0x01 + pclmulqdq %%x2, %%const, 0x10 + xorps %%x2, %%x0 + + movdqa %%x0, %%tmp0 +%endm + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; "shift" 1 input registers down 4 places +; macro FOLD1 x0, x1, x2, x3, const, tmp0 +; x0 x1 x2 x3 +; In A B C D +; Out B C D A' +%macro FOLD1 6 +%define %%x0 %1 ; xmm reg, in/out +%define %%x1 %2 ; xmm reg, in/out +%define %%x2 %3 ; xmm reg, in/out +%define %%x3 %4 ; xmm reg, in/out +%define %%const %5 ; xmm reg, in +%define %%tmp0 %6 ; xmm reg, tmp + + movdqa %%tmp0, %%x3 + + movaps %%x3, %%x0 + pclmulqdq %%x0, %%const, 0x01 + pclmulqdq %%x3, %%const, 0x10 + xorps %%x3, %%x0 + + movdqa %%x0, %%x1 + movdqa %%x1, %%x2 + movdqa %%x2, %%tmp0 +%endm + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; macro PARTIAL_FOLD x0, x1, x2, x3, xp, size, xfold, xt0, xt1, xt2, xt3 + +; XP X3 X2 X1 X0 tmp2 +; Initial state xI HG FE DC BA +; after shift IH GF ED CB A0 +; after fold ff GF ED CB ff = merge(IH, A0) +; +%macro PARTIAL_FOLD 12 +%define %%x0 %1 ; xmm reg, in/out +%define %%x1 %2 ; xmm reg, in/out +%define %%x2 %3 ; xmm reg, in/out +%define %%x3 %4 ; xmm reg, in/out +%define %%xp %5 ; xmm partial reg, in/clobbered +%define %%size %6 ; GPR, in/clobbered (1...15) +%define %%const %7 ; xmm reg, in +%define %%shl %8 ; xmm reg, tmp +%define %%shr %9 ; xmm reg, tmp +%define %%tmp2 %10 ; xmm reg, tmp +%define %%tmp3 %11 ; xmm reg, tmp +%define %%gtmp %12 ; GPR, tmp + + ; {XP X3 X2 X1 X0} = {xI HG FE DC BA} + shl %%size, 4 ; size *= 16 + lea %%gtmp, [pshufb_shf_table - 16 WRT_OPT] + movdqa %%shl, [%%gtmp + %%size] ; shl constant + movdqa %%shr, %%shl + pxor %%shr, [mask3 WRT_OPT] ; shr constant + + movdqa %%tmp2, %%x0 ; tmp2 = BA + pshufb %%tmp2, %%shl ; tmp2 = A0 + + pshufb %%x0, %%shr ; x0 = 0B + movdqa %%tmp3, %%x1 ; tmp3 = DC + pshufb %%tmp3, %%shl ; tmp3 = C0 + por %%x0, %%tmp3 ; x0 = CB + + pshufb %%x1, %%shr ; x1 = 0D + movdqa %%tmp3, %%x2 ; tmp3 = FE + pshufb %%tmp3, %%shl ; tmp3 = E0 + por %%x1, %%tmp3 ; x1 = ED + + pshufb %%x2, %%shr ; x2 = 0F + movdqa %%tmp3, %%x3 ; tmp3 = HG + pshufb %%tmp3, %%shl ; tmp3 = G0 + por %%x2, %%tmp3 ; x2 = GF + + pshufb %%x3, %%shr ; x3 = 0H + pshufb %%xp, %%shl ; xp = I0 + por %%x3, %%xp ; x3 = IH + + ; fold tmp2 into X3 + movaps %%tmp3, %%tmp2 + pclmulqdq %%tmp2, %%const, 0x01 + pclmulqdq %%tmp3, %%const, 0x10 + xorps %%x3, %%tmp2 + xorps %%x3, %%tmp3 +%endm + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; LOAD_FRACTIONAL_XMM: Packs xmm register with data when data input is less than 16 bytes. +; Returns 0 if data has length 0. +; Input: The input data (src), that data's length (size). +; Output: The packed xmm register (xmm_out). +; size is clobbered. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro LOAD_FRACTIONAL_XMM 3 +%define %%xmm_out %1 ; %%xmm_out is an xmm register +%define %%src %2 +%define %%size %3 + + pxor %%xmm_out, %%xmm_out + + cmp %%size, 0 + je %%_done + + add %%src, %%size + + cmp %%size, 8 + jl %%_byte_loop + + sub %%src, 8 + pinsrq %%xmm_out, [%%src], 0 ;Read in 8 bytes if they exists + sub %%size, 8 + + je %%_done + +%%_byte_loop: ;Read in data 1 byte at a time while data is left + pslldq %%xmm_out, 1 + + dec %%src + pinsrb %%xmm_out, BYTE [%%src], 0 + dec %%size + + jg %%_byte_loop + +%%_done: + +%endmacro ; LOAD_FRACTIONAL_XMM + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; copy x bytes (rounded up to 16 bytes) from src to dst +; src & dst are unaligned +; macro COPY_IN_CRC dst, src, size_in_bytes, tmp, x0, x1, x2, x3, xfold, +; xt0, xt1, xt2, xt3, xt4 +%macro COPY_IN_CRC 14 +%define %%dst %1 ; reg, in/clobbered +%define %%src %2 ; reg, in/clobbered +%define %%size %3 ; reg, in/clobbered +%define %%tmp %4 ; reg, tmp +%define %%x0 %5 ; xmm, in/out: crc state +%define %%x1 %6 ; xmm, in/out: crc state +%define %%x2 %7 ; xmm, in/out: crc state +%define %%x3 %8 ; xmm, in/out: crc state +%define %%xfold %9 ; xmm, in: (loaded from fold4) +%define %%xtmp0 %10 ; xmm, tmp +%define %%xtmp1 %11 ; xmm, tmp +%define %%xtmp2 %12 ; xmm, tmp +%define %%xtmp3 %13 ; xmm, tmp +%define %%xtmp4 %14 ; xmm, tmp + + cmp %%size, 16 + jl %%lt_16 + + ; align source + xor %%tmp, %%tmp + sub %%tmp, %%src + and %%tmp, 15 + jz %%already_aligned + + ; need to align, tmp contains number of bytes to transfer + movdqu %%xtmp0, [%%src] + movdqu [%%dst], %%xtmp0 + add %%dst, %%tmp + add %%src, %%tmp + sub %%size, %%tmp + +%ifndef DEFLATE + push %%dst + + PARTIAL_FOLD %%x0, %%x1, %%x2, %%x3, %%xtmp0, %%tmp, %%xfold, \ + %%xtmp1, %%xtmp2, %%xtmp3, %%xtmp4, %%dst + pop %%dst +%endif + +%%already_aligned: + sub %%size, 64 + jl %%end_loop + jmp %%loop +align 16 +%%loop: + movntdqa %%xtmp0, [%%src+0*16] + movntdqa %%xtmp1, [%%src+1*16] + movntdqa %%xtmp2, [%%src+2*16] + +%ifndef DEFLATE + FOLD4 %%x0, %%x1, %%x2, %%x3, %%xfold, %%xtmp3, %%xtmp4 +%endif + movntdqa %%xtmp3, [%%src+3*16] + + movdqu [%%dst+0*16], %%xtmp0 + movdqu [%%dst+1*16], %%xtmp1 + movdqu [%%dst+2*16], %%xtmp2 + movdqu [%%dst+3*16], %%xtmp3 + +%ifndef DEFLATE + pxor %%x0, %%xtmp0 + pxor %%x1, %%xtmp1 + pxor %%x2, %%xtmp2 + pxor %%x3, %%xtmp3 +%endif + add %%src, 4*16 + add %%dst, 4*16 + sub %%size, 4*16 + jge %%loop + +%%end_loop: + ; %%size contains (num bytes left - 64) + add %%size, 16 + jge %%three_full_regs + add %%size, 16 + jge %%two_full_regs + add %%size, 16 + jge %%one_full_reg + add %%size, 16 + +%%no_full_regs: ; 0 <= %%size < 16, no full regs + jz %%done ; if no bytes left, we're done + jmp %%partial + + ;; Handle case where input is <16 bytes +%%lt_16: + test %%size, %%size + jz %%done ; if no bytes left, we're done + jmp %%partial + + +%%one_full_reg: + movntdqa %%xtmp0, [%%src+0*16] + +%ifndef DEFLATE + FOLD1 %%x0, %%x1, %%x2, %%x3, %%xfold, %%xtmp3 +%endif + movdqu [%%dst+0*16], %%xtmp0 + +%ifndef DEFLATE + pxor %%x3, %%xtmp0 +%endif + test %%size, %%size + jz %%done ; if no bytes left, we're done + + add %%dst, 1*16 + add %%src, 1*16 + jmp %%partial + + +%%two_full_regs: + movntdqa %%xtmp0, [%%src+0*16] + movntdqa %%xtmp1, [%%src+1*16] + +%ifndef DEFLATE + FOLD2 %%x0, %%x1, %%x2, %%x3, %%xfold, %%xtmp3 +%endif + movdqu [%%dst+0*16], %%xtmp0 + movdqu [%%dst+1*16], %%xtmp1 + +%ifndef DEFLATE + pxor %%x2, %%xtmp0 + pxor %%x3, %%xtmp1 +%endif + test %%size, %%size + jz %%done ; if no bytes left, we're done + + add %%dst, 2*16 + add %%src, 2*16 + jmp %%partial + + +%%three_full_regs: + movntdqa %%xtmp0, [%%src+0*16] + movntdqa %%xtmp1, [%%src+1*16] + movntdqa %%xtmp2, [%%src+2*16] + +%ifndef DEFLATE + FOLD3 %%x0, %%x1, %%x2, %%x3, %%xfold, %%xtmp3 +%endif + movdqu [%%dst+0*16], %%xtmp0 + movdqu [%%dst+1*16], %%xtmp1 + movdqu [%%dst+2*16], %%xtmp2 + +%ifndef DEFLATE + pxor %%x1, %%xtmp0 + pxor %%x2, %%xtmp1 + pxor %%x3, %%xtmp2 +%endif + test %%size, %%size + jz %%done ; if no bytes left, we're done + + add %%dst, 3*16 + add %%src, 3*16 + + ; fall through to %%partial +%%partial: ; 0 <= %%size < 16 + +%ifndef DEFLATE + mov %%tmp, %%size +%endif + + LOAD_FRACTIONAL_XMM %%xtmp0, %%src, %%size + + movdqu [%%dst], %%xtmp0 + +%ifndef DEFLATE + PARTIAL_FOLD %%x0, %%x1, %%x2, %%x3, %%xtmp0, %%tmp, %%xfold, \ + %%xtmp1, %%xtmp2, %%xtmp3, %%xtmp4, %%dst +%endif + +%%done: +%endm + + +;%assign K 1024; +;%assign D 8 * K; ; Amount of history +;%assign LA 17 * 16; ; Max look-ahead, rounded up to 32 byte boundary + +; copy D + LA bytes from src to dst +; dst is aligned +;void copy_D_LA(uint8_t *dst, uint8_t *src); +; arg 1: rcx : dst +; arg 2: rdx : src +; copy_D_LA dst, src, tmp, xtmp0, xtmp1, xtmp2, xtmp3 +%macro copy_D_LA 7 +%define %%dst %1 ; reg, clobbered +%define %%src %2 ; reg, clobbered +%define %%tmp %3 +%define %%xtmp0 %4 +%define %%xtmp1 %5 +%define %%xtmp2 %6 +%define %%xtmp3 %7 + +%assign %%SIZE (D + LA) / 16 ; number of DQ words to be copied +%assign %%SIZE4 %%SIZE/4 + + lea %%tmp, [%%dst + 4 * 16 * %%SIZE4] + jmp %%copy_D_LA_1 +align 16 +%%copy_D_LA_1: + movdqu %%xtmp0, [%%src] + movdqu %%xtmp1, [%%src+16] + movdqu %%xtmp2, [%%src+32] + movdqu %%xtmp3, [%%src+48] + movdqa [%%dst], %%xtmp0 + movdqa [%%dst+16], %%xtmp1 + movdqa [%%dst+32], %%xtmp2 + movdqa [%%dst+48], %%xtmp3 + add %%src, 4*16 + add %%dst, 4*16 + cmp %%dst, %%tmp + jne %%copy_D_LA_1 +%assign %%i 0 +%rep (%%SIZE - 4 * %%SIZE4) + +%if (%%i == 0) + movdqu %%xtmp0, [%%src + %%i*16] +%elif (%%i == 1) + movdqu %%xtmp1, [%%src + %%i*16] +%elif (%%i == 2) + movdqu %%xtmp2, [%%src + %%i*16] +%elif (%%i == 3) + movdqu %%xtmp3, [%%src + %%i*16] +%else + %error too many i + % error +%endif + +%assign %%i %%i+1 +%endrep +%assign %%i 0 +%rep (%%SIZE - 4 * %%SIZE4) + +%if (%%i == 0) + movdqa [%%dst + %%i*16], %%xtmp0 +%elif (%%i == 1) + movdqa [%%dst + %%i*16], %%xtmp1 +%elif (%%i == 2) + movdqa [%%dst + %%i*16], %%xtmp2 +%elif (%%i == 3) + movdqa [%%dst + %%i*16], %%xtmp3 +%else + %error too many i + % error +%endif + +%assign %%i %%i+1 +%endrep +%endm +%endif diff --git a/igzip/igzip_buffer_utils_04.asm b/igzip/igzip_buffer_utils_04.asm new file mode 100644 index 0000000..94487cf --- /dev/null +++ b/igzip/igzip_buffer_utils_04.asm @@ -0,0 +1,552 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%ifndef BUFFER_UTILS +%define BUFFER_UTILS + +%include "options.asm" + +extern pshufb_shf_table +extern mask3 + +%ifdef FIX_CACHE_READ +%define vmovntdqa vmovdqa +%else +%macro prefetchnta 1 +%endm +%endif + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; code for doing the CRC calculation as part of copy-in, using pclmulqdq + +; "shift" 4 input registers down 4 places +; macro FOLD4 xmm0, xmm1, xmm2, xmm3, const, tmp0, tmp1 +%macro FOLD4 7 +%define %%xmm0 %1 ; xmm reg, in/out +%define %%xmm1 %2 ; xmm reg, in/out +%define %%xmm2 %3 ; xmm reg, in/out +%define %%xmm3 %4 ; xmm reg, in/out +%define %%const %5 ; xmm reg, in +%define %%tmp0 %6 ; xmm reg, tmp +%define %%tmp1 %7 ; xmm reg, tmp + + vmovaps %%tmp0, %%xmm0 + vmovaps %%tmp1, %%xmm1 + + vpclmulqdq %%xmm0, %%const, 0x01 + vpclmulqdq %%xmm1, %%const, 0x01 + + vpclmulqdq %%tmp0, %%const, 0x10 + vpclmulqdq %%tmp1, %%const, 0x10 + + vxorps %%xmm0, %%tmp0 + vxorps %%xmm1, %%tmp1 + + + vmovaps %%tmp0, %%xmm2 + vmovaps %%tmp1, %%xmm3 + + vpclmulqdq %%xmm2, %%const, 0x01 + vpclmulqdq %%xmm3, %%const, 0x01 + + vpclmulqdq %%tmp0, %%const, 0x10 + vpclmulqdq %%tmp1, %%const, 0x10 + + vxorps %%xmm2, %%tmp0 + vxorps %%xmm3, %%tmp1 +%endm + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; "shift" 3 input registers down 4 places +; macro FOLD3 x0, x1, x2, x3, const, tmp0 +; x0 x1 x2 x3 +; In A B C D +; Out D A' B' C' +%macro FOLD3 6 +%define %%x0 %1 ; xmm reg, in/out +%define %%x1 %2 ; xmm reg, in/out +%define %%x2 %3 ; xmm reg, in/out +%define %%x3 %4 ; xmm reg, in/out +%define %%const %5 ; xmm reg, in +%define %%tmp0 %6 ; xmm reg, tmp + + vmovdqa %%tmp0, %%x3 + + vmovaps %%x3, %%x2 + vpclmulqdq %%x2, %%const, 0x01 + vpclmulqdq %%x3, %%const, 0x10 + vxorps %%x3, %%x2 + + vmovaps %%x2, %%x1 + vpclmulqdq %%x1, %%const, 0x01 + vpclmulqdq %%x2, %%const, 0x10 + vxorps %%x2, %%x1 + + vmovaps %%x1, %%x0 + vpclmulqdq %%x0, %%const, 0x01 + vpclmulqdq %%x1, %%const, 0x10 + vxorps %%x1, %%x0 + + vmovdqa %%x0, %%tmp0 +%endm + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; "shift" 2 input registers down 4 places +; macro FOLD2 x0, x1, x2, x3, const, tmp0 +; x0 x1 x2 x3 +; In A B C D +; Out C D A' B' +%macro FOLD2 6 +%define %%x0 %1 ; xmm reg, in/out +%define %%x1 %2 ; xmm reg, in/out +%define %%x2 %3 ; xmm reg, in/out +%define %%x3 %4 ; xmm reg, in/out +%define %%const %5 ; xmm reg, in +%define %%tmp0 %6 ; xmm reg, tmp + + vmovdqa %%tmp0, %%x3 + + vmovaps %%x3, %%x1 + vpclmulqdq %%x1, %%const, 0x01 + vpclmulqdq %%x3, %%const, 0x10 + vxorps %%x3, %%x1 + + vmovdqa %%x1, %%tmp0 + vmovdqa %%tmp0, %%x2 + + vmovaps %%x2, %%x0 + vpclmulqdq %%x0, %%const, 0x01 + vpclmulqdq %%x2, %%const, 0x10 + vxorps %%x2, %%x0 + + vmovdqa %%x0, %%tmp0 +%endm + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; "shift" 1 input registers down 4 places +; macro FOLD1 x0, x1, x2, x3, const, tmp0 +; x0 x1 x2 x3 +; In A B C D +; Out B C D A' +%macro FOLD1 6 +%define %%x0 %1 ; xmm reg, in/out +%define %%x1 %2 ; xmm reg, in/out +%define %%x2 %3 ; xmm reg, in/out +%define %%x3 %4 ; xmm reg, in/out +%define %%const %5 ; xmm reg, in +%define %%tmp0 %6 ; xmm reg, tmp + + vmovdqa %%tmp0, %%x3 + + vmovaps %%x3, %%x0 + vpclmulqdq %%x0, %%const, 0x01 + vpclmulqdq %%x3, %%const, 0x10 + vxorps %%x3, %%x0 + + vmovdqa %%x0, %%x1 + vmovdqa %%x1, %%x2 + vmovdqa %%x2, %%tmp0 +%endm + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; macro PARTIAL_FOLD x0, x1, x2, x3, xp, size, xfold, xt0, xt1, xt2, xt3 + +; XP X3 X2 X1 X0 tmp2 +; Initial state xI HG FE DC BA +; after shift IH GF ED CB A0 +; after fold ff GF ED CB ff = merge(IH, A0) +; +%macro PARTIAL_FOLD 12 +%define %%x0 %1 ; xmm reg, in/out +%define %%x1 %2 ; xmm reg, in/out +%define %%x2 %3 ; xmm reg, in/out +%define %%x3 %4 ; xmm reg, in/out +%define %%xp %5 ; xmm partial reg, in/clobbered +%define %%size %6 ; GPR, in/clobbered (1...15) +%define %%const %7 ; xmm reg, in +%define %%shl %8 ; xmm reg, tmp +%define %%shr %9 ; xmm reg, tmp +%define %%tmp2 %10 ; xmm reg, tmp +%define %%tmp3 %11 ; xmm reg, tmp +%define %%gtmp %12 ; GPR, tmp + + ; {XP X3 X2 X1 X0} = {xI HG FE DC BA} + shl %%size, 4 ; size *= 16 + lea %%gtmp, [pshufb_shf_table - 16 WRT_OPT] + vmovdqa %%shl, [%%gtmp + %%size] ; shl constant + vmovdqa %%shr, %%shl + vpxor %%shr, [mask3 WRT_OPT] ; shr constant + + vmovdqa %%tmp2, %%x0 ; tmp2 = BA + vpshufb %%tmp2, %%shl ; tmp2 = A0 + + vpshufb %%x0, %%shr ; x0 = 0B + vmovdqa %%tmp3, %%x1 ; tmp3 = DC + vpshufb %%tmp3, %%shl ; tmp3 = C0 + vpor %%x0, %%tmp3 ; x0 = CB + + vpshufb %%x1, %%shr ; x1 = 0D + vmovdqa %%tmp3, %%x2 ; tmp3 = FE + vpshufb %%tmp3, %%shl ; tmp3 = E0 + vpor %%x1, %%tmp3 ; x1 = ED + + vpshufb %%x2, %%shr ; x2 = 0F + vmovdqa %%tmp3, %%x3 ; tmp3 = HG + vpshufb %%tmp3, %%shl ; tmp3 = G0 + vpor %%x2, %%tmp3 ; x2 = GF + + vpshufb %%x3, %%shr ; x3 = 0H + vpshufb %%xp, %%shl ; xp = I0 + vpor %%x3, %%xp ; x3 = IH + + ; fold tmp2 into X3 + vmovaps %%tmp3, %%tmp2 + vpclmulqdq %%tmp2, %%const, 0x01 + vpclmulqdq %%tmp3, %%const, 0x10 + vxorps %%x3, %%tmp2 + vxorps %%x3, %%tmp3 +%endm + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; LOAD_FRACTIONAL_XMM: Packs xmm register with data when data input is less than 16 bytes. +; Returns 0 if data has length 0. +; Input: The input data (src), that data's length (size). +; Output: The packed xmm register (xmm_out). +; size is clobbered. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro LOAD_FRACTIONAL_XMM 3 +%define %%xmm_out %1 ; %%xmm_out is an xmm register +%define %%src %2 +%define %%size %3 + + vpxor %%xmm_out, %%xmm_out + + cmp %%size, 0 + je %%_done + + add %%src, %%size + + cmp %%size, 8 + jl %%_byte_loop + + sub %%src, 8 + vpinsrq %%xmm_out, [%%src], 0 ;Read in 8 bytes if they exists + sub %%size, 8 + + je %%_done + +%%_byte_loop: ;Read in data 1 byte at a time while data is left + vpslldq %%xmm_out, 1 + + dec %%src + vpinsrb %%xmm_out, BYTE [%%src], 0 + dec %%size + + jg %%_byte_loop + +%%_done: + +%endmacro ; LOAD_FRACTIONAL_XMM + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; copy x bytes (rounded up to 16 bytes) from src to dst +; src & dst are unaligned +; macro COPY_IN_CRC dst, src, size_in_bytes, tmp, x0, x1, x2, x3, xfold, +; xt0, xt1, xt2, xt3, xt4 +%macro COPY_IN_CRC 14 +%define %%dst %1 ; reg, in/clobbered +%define %%src %2 ; reg, in/clobbered +%define %%size %3 ; reg, in/clobbered +%define %%tmp %4 ; reg, tmp +%define %%x0 %5 ; xmm, in/out: crc state +%define %%x1 %6 ; xmm, in/out: crc state +%define %%x2 %7 ; xmm, in/out: crc state +%define %%x3 %8 ; xmm, in/out: crc state +%define %%xfold %9 ; xmm, in: (loaded from fold4) +%define %%xtmp0 %10 ; xmm, tmp +%define %%xtmp1 %11 ; xmm, tmp +%define %%xtmp2 %12 ; xmm, tmp +%define %%xtmp3 %13 ; xmm, tmp +%define %%xtmp4 %14 ; xmm, tmp + + cmp %%size, 16 + jl %%lt_16 + + ; align source + xor %%tmp, %%tmp + sub %%tmp, %%src + and %%tmp, 15 + jz %%already_aligned + + ; need to align, tmp contains number of bytes to transfer + vmovdqu %%xtmp0, [%%src] + vmovdqu [%%dst], %%xtmp0 + add %%dst, %%tmp + add %%src, %%tmp + sub %%size, %%tmp + +%ifndef DEFLATE + push %%dst + + PARTIAL_FOLD %%x0, %%x1, %%x2, %%x3, %%xtmp0, %%tmp, %%xfold, \ + %%xtmp1, %%xtmp2, %%xtmp3, %%xtmp4, %%dst + pop %%dst +%endif + +%%already_aligned: + sub %%size, 64 + jl %%end_loop + jmp %%loop +align 16 +%%loop: + vmovntdqa %%xtmp0, [%%src+0*16] + vmovntdqa %%xtmp1, [%%src+1*16] + vmovntdqa %%xtmp2, [%%src+2*16] + +%ifndef DEFLATE + FOLD4 %%x0, %%x1, %%x2, %%x3, %%xfold, %%xtmp3, %%xtmp4 +%endif + vmovntdqa %%xtmp3, [%%src+3*16] + + vmovdqu [%%dst+0*16], %%xtmp0 + vmovdqu [%%dst+1*16], %%xtmp1 + vmovdqu [%%dst+2*16], %%xtmp2 + vmovdqu [%%dst+3*16], %%xtmp3 + +%ifndef DEFLATE + vpxor %%x0, %%xtmp0 + vpxor %%x1, %%xtmp1 + vpxor %%x2, %%xtmp2 + vpxor %%x3, %%xtmp3 +%endif + add %%src, 4*16 + add %%dst, 4*16 + sub %%size, 4*16 + jge %%loop + +%%end_loop: + ; %%size contains (num bytes left - 64) + add %%size, 16 + jge %%three_full_regs + add %%size, 16 + jge %%two_full_regs + add %%size, 16 + jge %%one_full_reg + add %%size, 16 + +%%no_full_regs: ; 0 <= %%size < 16, no full regs + jz %%done ; if no bytes left, we're done + jmp %%partial + + ;; Handle case where input is <16 bytes +%%lt_16: + test %%size, %%size + jz %%done ; if no bytes left, we're done + jmp %%partial + + +%%one_full_reg: + vmovntdqa %%xtmp0, [%%src+0*16] + +%ifndef DEFLATE + FOLD1 %%x0, %%x1, %%x2, %%x3, %%xfold, %%xtmp3 +%endif + vmovdqu [%%dst+0*16], %%xtmp0 + +%ifndef DEFLATE + vpxor %%x3, %%xtmp0 +%endif + test %%size, %%size + jz %%done ; if no bytes left, we're done + + add %%dst, 1*16 + add %%src, 1*16 + jmp %%partial + + +%%two_full_regs: + vmovntdqa %%xtmp0, [%%src+0*16] + vmovntdqa %%xtmp1, [%%src+1*16] + +%ifndef DEFLATE + FOLD2 %%x0, %%x1, %%x2, %%x3, %%xfold, %%xtmp3 +%endif + vmovdqu [%%dst+0*16], %%xtmp0 + vmovdqu [%%dst+1*16], %%xtmp1 + +%ifndef DEFLATE + vpxor %%x2, %%xtmp0 + vpxor %%x3, %%xtmp1 +%endif + test %%size, %%size + jz %%done ; if no bytes left, we're done + + add %%dst, 2*16 + add %%src, 2*16 + jmp %%partial + + +%%three_full_regs: + vmovntdqa %%xtmp0, [%%src+0*16] + vmovntdqa %%xtmp1, [%%src+1*16] + vmovntdqa %%xtmp2, [%%src+2*16] + +%ifndef DEFLATE + FOLD3 %%x0, %%x1, %%x2, %%x3, %%xfold, %%xtmp3 +%endif + vmovdqu [%%dst+0*16], %%xtmp0 + vmovdqu [%%dst+1*16], %%xtmp1 + vmovdqu [%%dst+2*16], %%xtmp2 + +%ifndef DEFLATE + vpxor %%x1, %%xtmp0 + vpxor %%x2, %%xtmp1 + vpxor %%x3, %%xtmp2 +%endif + test %%size, %%size + jz %%done ; if no bytes left, we're done + + add %%dst, 3*16 + add %%src, 3*16 + + ; fall through to %%partial +%%partial: ; 0 <= %%size < 16 + +%ifndef DEFLATE + mov %%tmp, %%size +%endif + + LOAD_FRACTIONAL_XMM %%xtmp0, %%src, %%size + + vmovdqu [%%dst], %%xtmp0 + +%ifndef DEFLATE + PARTIAL_FOLD %%x0, %%x1, %%x2, %%x3, %%xtmp0, %%tmp, %%xfold, \ + %%xtmp1, %%xtmp2, %%xtmp3, %%xtmp4, %%dst +%endif + +%%done: +%endm + + +;%assign K 1024; +;%assign D 8 * K; ; Amount of history +;%assign LA 17 * 16; ; Max look-ahead, rounded up to 32 byte boundary + +; copy D + LA bytes from src to dst +; dst is aligned +;void copy_D_LA(uint8_t *dst, uint8_t *src); +; arg 1: rcx : dst +; arg 2: rdx : src +; copy_D_LA dst, src, tmp, xtmp0, xtmp1, xtmp2, xtmp3 +%macro copy_D_LA 7 +%define %%dst %1 ; reg, clobbered +%define %%src %2 ; reg, clobbered +%define %%tmp %3 +%define %%ytmp0 %4 +%define %%ytmp1 %5 +%define %%ytmp2 %6 +%define %%ytmp3 %7 + +%define %%xtmp0 %4x + +%assign %%SIZE (D + LA) / 32 ; number of DQ words to be copied +%assign %%SIZE4 %%SIZE/4 +%assign %%MOD16 ((D + LA) - 32 * %%SIZE) / 16 + + lea %%tmp, [%%dst + 4 * 32 * %%SIZE4] + jmp %%copy_D_LA_1 +align 16 +%%copy_D_LA_1: + vmovdqu %%ytmp0, [%%src] + vmovdqu %%ytmp1, [%%src + 1 * 32] + vmovdqu %%ytmp2, [%%src + 2 * 32] + vmovdqu %%ytmp3, [%%src + 3 * 32] + vmovdqa [%%dst], %%ytmp0 + vmovdqa [%%dst + 1 * 32], %%ytmp1 + vmovdqa [%%dst + 2 * 32], %%ytmp2 + vmovdqa [%%dst + 3 * 32], %%ytmp3 + add %%src, 4*32 + add %%dst, 4*32 + cmp %%dst, %%tmp + jne %%copy_D_LA_1 +%assign %%i 0 +%rep (%%SIZE - 4 * %%SIZE4) + +%if (%%i == 0) + vmovdqu %%ytmp0, [%%src + %%i*32] +%elif (%%i == 1) + vmovdqu %%ytmp1, [%%src + %%i*32] +%elif (%%i == 2) + vmovdqu %%ytmp2, [%%src + %%i*32] +%elif (%%i == 3) + vmovdqu %%ytmp3, [%%src + %%i*32] +%else + %error too many i + % error +%endif + +%assign %%i %%i+1 +%endrep +%assign %%i 0 +%rep (%%SIZE - 4 * %%SIZE4) + +%if (%%i == 0) + vmovdqa [%%dst + %%i*32], %%ytmp0 +%elif (%%i == 1) + vmovdqa [%%dst + %%i*32], %%ytmp1 +%elif (%%i == 2) + vmovdqa [%%dst + %%i*32], %%ytmp2 +%elif (%%i == 3) + vmovdqa [%%dst + %%i*32], %%ytmp3 +%else + %error too many i + % error +%endif + +%assign %%i %%i+1 +%endrep + +%rep %%MOD16 + vmovdqu %%xtmp0, [%%src + (%%SIZE - 4 * %%SIZE4)*32] + vmovdqa [%%dst + (%%SIZE - 4 * %%SIZE4)*32], %%xtmp0 +%endrep + +%endm +%endif diff --git a/igzip/igzip_check.c b/igzip/igzip_check.c new file mode 100644 index 0000000..0ef79d9 --- /dev/null +++ b/igzip/igzip_check.c @@ -0,0 +1,1285 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include "igzip_lib.h" +#include "igzip_inflate_ref.h" +#include "crc_inflate.h" +#include + +#ifndef RANDOMS +# define RANDOMS 50 +#endif +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +#define IBUF_SIZE (1024*1024) + +#ifndef IGZIP_USE_GZIP_FORMAT +# define DEFLATE 1 +#endif + +#define str1 "Short test string" +#define str2 "one two three four five six seven eight nine ten eleven twelve " \ + "thirteen fourteen fifteen sixteen" + +#define TYPE0_HDR_SIZE 5 /* Size of a type 0 blocks header in bytes */ +#define TYPE0_MAX_SIZE 65535 /* Max length of a type 0 block in bytes (excludes the header) */ + +#define MAX_LOOPS 20 +/* Defines for the possible error conditions */ +enum IGZIP_TEST_ERROR_CODES { + IGZIP_COMP_OK, + + MALLOC_FAILED, + FILE_READ_FAILED, + + COMPRESS_INCORRECT_STATE, + COMPRESS_INPUT_STREAM_INTEGRITY_ERROR, + COMPRESS_OUTPUT_STREAM_INTEGRITY_ERROR, + COMPRESS_END_OF_STREAM_NOT_SET, + COMPRESS_ALL_INPUT_FAIL, + COMPRESS_OUT_BUFFER_OVERFLOW, + COMPRESS_LOOP_COUNT_OVERFLOW, + COMPRESS_GENERAL_ERROR, + + INFLATE_END_OF_INPUT, + INFLATE_INVALID_BLOCK_HEADER, + INFLATE_INVALID_SYMBOL, + INFLATE_OUT_BUFFER_OVERFLOW, + INFLATE_INVALID_NON_COMPRESSED_BLOCK_LENGTH, + INFLATE_LEFTOVER_INPUT, + INFLATE_INCORRECT_OUTPUT_SIZE, + INFLATE_INVALID_LOOK_BACK_DISTANCE, + INVALID_GZIP_HEADER, + INCORRECT_GZIP_TRAILER, + INFLATE_GENERAL_ERROR, + + INVALID_FLUSH_ERROR, + + OVERFLOW_TEST_ERROR, + RESULT_ERROR +}; + +const int hdr_bytes = 300; + +#ifndef DEFLATE +const uint8_t gzip_hdr[10] = { + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xff +}; + +const uint32_t gzip_hdr_bytes = 10; +const uint32_t gzip_trl_bytes = 8; + +const int trl_bytes = 8; +const int gzip_extra_bytes = 18; + +#else +const int trl_bytes = 0; +const int gzip_extra_bytes = 0; + +#endif + +#define HISTORY_SIZE 32*1024 +#define MIN_LENGTH 3 +#define MIN_DIST 1 + +/* Create random compressible data. This is achieved by randomly choosing a + * random character, or to repeat previous data in the stream for a random + * length and look back distance. The probability of a random character or a + * repeat being chosen is semi-randomly chosen by setting max_repeat_data to be + * differing values */ +void create_rand_repeat_data(uint8_t * data, int size) +{ + uint32_t next_data; + uint8_t *data_start = data; + uint32_t length, distance; + uint32_t max_repeat_data = 256; + uint32_t power = rand() % 32; + /* An array of the powers of 2 (except the final element which is 0) */ + const uint32_t power_of_2_array[] = { + 0x00000001, 0x00000002, 0x00000004, 0x00000008, + 0x00000010, 0x00000020, 0x00000040, 0x00000080, + 0x00000100, 0x00000200, 0x00000400, 0x00000800, + 0x00001000, 0x00002000, 0x00004000, 0x00008000, + 0x00010000, 0x00020000, 0x00040000, 0x00080000, + 0x00100000, 0x00200000, 0x00400000, 0x00800000, + 0x01000000, 0x02000000, 0x04000000, 0x08000000, + 0x10000000, 0x20000000, 0x40000000, 0x00000000 + }; + + max_repeat_data += power_of_2_array[power]; + + if (size-- > 0) + *data++ = rand(); + + while (size > 0) { + next_data = rand() % max_repeat_data; + if (next_data < 256) { + *data++ = next_data; + size--; + } else if (size < 3) { + *data++ = rand() % 256; + size--; + } else { + length = (rand() % 256) + MIN_LENGTH; + if (length > size) + length = (rand() % (size - 2)) + MIN_LENGTH; + + distance = (rand() % HISTORY_SIZE) + MIN_DIST; + if (distance > data - data_start) + distance = (rand() % (data - data_start)) + MIN_DIST; + + size -= length; + if (distance <= length) { + while (length-- > 0) { + *data = *(data - distance); + data++; + } + } else + memcpy(data, data - distance, length); + } + } +} + +void print_error(int error_code) +{ + switch (error_code) { + case IGZIP_COMP_OK: + break; + case MALLOC_FAILED: + printf("error: failed to allocate memory\n"); + break; + case FILE_READ_FAILED: + printf("error: failed to read in file\n"); + break; + case COMPRESS_INCORRECT_STATE: + printf("error: incorrect stream internal state\n"); + break; + case COMPRESS_INPUT_STREAM_INTEGRITY_ERROR: + printf("error: inconsistent stream input buffer\n"); + break; + case COMPRESS_OUTPUT_STREAM_INTEGRITY_ERROR: + printf("error: inconsistent stream output buffer\n"); + break; + case COMPRESS_END_OF_STREAM_NOT_SET: + printf("error: end of stream not set\n"); + break; + case COMPRESS_ALL_INPUT_FAIL: + printf("error: not all input data compressed\n"); + break; + case COMPRESS_OUT_BUFFER_OVERFLOW: + printf("error: output buffer overflow while compressing data\n"); + break; + case COMPRESS_GENERAL_ERROR: + printf("error: compression failed\n"); + break; + case INFLATE_END_OF_INPUT: + printf("error: did not decompress all input\n"); + break; + case INFLATE_INVALID_BLOCK_HEADER: + printf("error: invalid header\n"); + break; + case INFLATE_INVALID_SYMBOL: + printf("error: invalid symbol found when decompressing input\n"); + break; + case INFLATE_OUT_BUFFER_OVERFLOW: + printf("error: output buffer overflow while decompressing data\n"); + break; + case INFLATE_INVALID_NON_COMPRESSED_BLOCK_LENGTH: + printf("error: invalid length bits in non-compressed block\n"); + break; + case INFLATE_GENERAL_ERROR: + printf("error: decompression failed\n"); + break; + case INFLATE_LEFTOVER_INPUT: + printf("error: the trailer of igzip output contains junk\n"); + break; + case INFLATE_INCORRECT_OUTPUT_SIZE: + printf("error: incorrect amount of data was decompressed\n"); + break; + case INFLATE_INVALID_LOOK_BACK_DISTANCE: + printf("error: invalid look back distance found while decompressing\n"); + break; + case INVALID_GZIP_HEADER: + printf("error: incorrect gzip header found when inflating data\n"); + break; + case INCORRECT_GZIP_TRAILER: + printf("error: incorrect gzip trailer found when inflating data\n"); + break; + case INVALID_FLUSH_ERROR: + printf("error: invalid flush did not cause compression to error\n"); + break; + case RESULT_ERROR: + printf("error: decompressed data is not the same as the compressed data\n"); + break; + case OVERFLOW_TEST_ERROR: + printf("error: overflow undetected\n"); + break; + default: + printf("error: unknown error code\n"); + } +} + +void print_uint8_t(uint8_t * array, uint64_t length) +{ + int i; + + const int line_size = 16; + printf("Length = %lu", length); + for (i = 0; i < length; i++) { + if ((i % line_size) == 0) + printf("\n0x%08x\t", i); + else + printf(" "); + printf("0x%02x,", array[i]); + } + printf("\n"); +} + +#ifndef DEFLATE +uint32_t check_gzip_header(uint8_t * z_buf) +{ + /* These values are defined in RFC 1952 page 4 */ + const uint8_t ID1 = 0x1f, ID2 = 0x8b, CM = 0x08, FLG = 0; + uint32_t ret = 0; + int i; + /* Verify that the gzip header is the one used in hufftables_c.c */ + for (i = 0; i < gzip_hdr_bytes; i++) + if (z_buf[i] != gzip_hdr[i]) + ret = INVALID_GZIP_HEADER; + + /* Verify that the gzip header is a valid gzip header */ + if (*z_buf++ != ID1) + ret = INVALID_GZIP_HEADER; + + if (*z_buf++ != ID2) + ret = INVALID_GZIP_HEADER; + + /* Verfiy compression method is Deflate */ + if (*z_buf++ != CM) + ret = INVALID_GZIP_HEADER; + + /* The following comparison is specific to how gzip headers are written in igzip */ + /* Verify no extra flags are set */ + if (*z_buf != FLG) + ret = INVALID_GZIP_HEADER; + + /* The last 6 bytes in the gzip header do not contain any information + * important to decomrpessing the data */ + + return ret; +} + +uint32_t check_gzip_trl(struct inflate_state * gstream) +{ + uint8_t *index = NULL; + uint32_t crc, ret = 0; + + index = gstream->out_buffer.next_out - gstream->out_buffer.total_out; + crc = find_crc(index, gstream->out_buffer.total_out); + + if (gstream->out_buffer.total_out != *(uint32_t *) (gstream->in_buffer.next_in + 4) || + crc != *(uint32_t *) gstream->in_buffer.next_in) + ret = INCORRECT_GZIP_TRAILER; + + return ret; +} +#endif + +/* Inflate the compressed data and check that the decompressed data agrees with the input data */ +int inflate_check(uint8_t * z_buf, int z_size, uint8_t * in_buf, int in_size) +{ + /* Test inflate with reference inflate */ + + int ret = 0; + struct inflate_state gstream; + uint32_t test_size = in_size; + uint8_t *test_buf = NULL; + int mem_result = 0; + + assert(in_buf != NULL); + + if (in_size > 0) { + test_buf = malloc(test_size); + + if (test_buf == NULL) + return MALLOC_FAILED; + } + if (test_buf != NULL) + memset(test_buf, 0xff, test_size); + +#ifndef DEFLATE + int gzip_hdr_result, gzip_trl_result; + + gzip_hdr_result = check_gzip_header(z_buf); + z_buf += gzip_hdr_bytes; + z_size -= gzip_hdr_bytes; +#endif + + igzip_inflate_init(&gstream, z_buf, z_size, test_buf, test_size); + ret = igzip_inflate(&gstream); + + if (test_buf != NULL) + mem_result = memcmp(in_buf, test_buf, in_size); + +#ifdef VERBOSE + int i; + if (mem_result) + for (i = 0; i < in_size; i++) { + if (in_buf[i] != test_buf[i]) { + printf("First incorrect data at 0x%x of 0x%x, 0x%x != 0x%x\n", + i, in_size, in_buf[i], test_buf[i]); + break; + } + } +#endif + +#ifndef DEFLATE + gzip_trl_result = check_gzip_trl(&gstream); + gstream.in_buffer.avail_in -= gzip_trl_bytes; + gstream.in_buffer.next_in += gzip_trl_bytes; +#endif + + if (test_buf != NULL) + free(test_buf); + + switch (ret) { + case 0: + break; + case END_OF_INPUT: + return INFLATE_END_OF_INPUT; + break; + case INVALID_BLOCK_HEADER: + return INFLATE_INVALID_BLOCK_HEADER; + break; + case INVALID_SYMBOL: + return INFLATE_INVALID_SYMBOL; + break; + case OUT_BUFFER_OVERFLOW: + return INFLATE_OUT_BUFFER_OVERFLOW; + break; + case INVALID_NON_COMPRESSED_BLOCK_LENGTH: + return INFLATE_INVALID_NON_COMPRESSED_BLOCK_LENGTH; + break; + case INVALID_LOOK_BACK_DISTANCE: + return INFLATE_INVALID_LOOK_BACK_DISTANCE; + break; + default: + return INFLATE_GENERAL_ERROR; + break; + } + + if (gstream.in_buffer.avail_in != 0) + return INFLATE_LEFTOVER_INPUT; + + if (gstream.out_buffer.total_out != in_size) + return INFLATE_INCORRECT_OUTPUT_SIZE; + + if (mem_result) + return RESULT_ERROR; + +#ifndef DEFLATE + if (gzip_hdr_result) + return INVALID_GZIP_HEADER; + + if (gzip_trl_result) + return INCORRECT_GZIP_TRAILER; +#endif + + return 0; +} + +/* Check if that the state of the data stream is consistent */ +int stream_valid_check(struct isal_zstream *stream, uint8_t * in_buf, uint32_t in_size, + uint8_t * out_buf, uint32_t out_size, uint32_t in_processed, + uint32_t out_processed, uint32_t data_size) +{ + uint32_t total_in, in_buffer_size, total_out, out_buffer_size; + + total_in = + (in_size == + 0) ? in_processed : (in_processed - in_size) + (stream->next_in - in_buf); + in_buffer_size = (in_size == 0) ? 0 : stream->next_in - in_buf + stream->avail_in; + + /* Check for a consistent amount of data processed */ + if (total_in != stream->total_in || in_buffer_size != in_size) + return COMPRESS_INPUT_STREAM_INTEGRITY_ERROR; + + total_out = + (out_size == 0) ? out_processed : out_processed + (stream->next_out - out_buf); + out_buffer_size = (out_size == 0) ? 0 : stream->next_out - out_buf + stream->avail_out; + + /* Check for a consistent amount of data compressed */ + if (total_out != stream->total_out || out_buffer_size != out_size) { + return COMPRESS_OUTPUT_STREAM_INTEGRITY_ERROR; + } + + return 0; +} + +/* Performs compression with checks to discover and verify the state of the + * stream + * stream: compress data structure which has been initialized to use + * in_buf and out_buf as the buffers + * data_size: size of all input data + * compressed_size: size of all available output buffers + * in_buf: next buffer of data to be compressed + * in_size: size of in_buf + * out_buf: next out put buffer where data is stored + * out_size: size of out_buf + * in_processed: the amount of input data which has been loaded into buffers + * to be compressed, this includes the data in in_buf + * out_processed: the amount of output data which has been compressed and stored, + * this does not include the data in the current out_buf +*/ +int isal_deflate_with_checks(struct isal_zstream *stream, uint32_t data_size, + uint32_t compressed_size, uint8_t * in_buf, uint32_t in_size, + uint32_t in_processed, uint8_t * out_buf, uint32_t out_size, + uint32_t out_processed) +{ + int ret, stream_check; + struct isal_zstate *state = &stream->internal_state; + +#ifdef VERBOSE + printf("Pre compression\n"); + printf + ("data_size = 0x%05x, in_processed = 0x%05x, in_size = 0x%05x, avail_in = 0x%05x, total_in = 0x%05x\n", + data_size, in_processed, in_size, stream->avail_in, stream->total_in); + printf + ("compressed_size = 0x%05x, out_processed = 0x%05x, out_size = 0x%05x, avail_out = 0x%05x, total_out = 0x%05x\n", + compressed_size, out_processed, out_size, stream->avail_out, stream->total_out); +#endif + + ret = isal_deflate(stream); + +#ifdef VERBOSE + printf("Post compression\n"); + printf + ("data_size = 0x%05x, in_processed = 0x%05x, in_size = 0x%05x, avail_in = 0x%05x, total_in = 0x%05x\n", + data_size, in_processed, in_size, stream->avail_in, stream->total_in); + printf + ("compressed_size = 0x%05x, out_processed = 0x%05x, out_size = 0x%05x, avail_out = 0x%05x, total_out = 0x%05x\n", + compressed_size, out_processed, out_size, stream->avail_out, stream->total_out); + printf("\n\n"); +#endif + + /* Verify the stream is in a valid state */ + stream_check = stream_valid_check(stream, in_buf, in_size, out_buf, out_size, + in_processed, out_processed, data_size); + + if (stream_check != 0) + return stream_check; + + if (ret != IGZIP_COMP_OK) + return COMPRESS_GENERAL_ERROR; + + /* Check if the compression is completed */ + if (state->state != ZSTATE_END) + if (compressed_size - out_processed - (out_size - stream->avail_out) <= 0) + return COMPRESS_OUT_BUFFER_OVERFLOW; + + return ret; + +} + +/* Compress the input data into the output buffer where the input buffer and + * output buffer are randomly segmented to test state information for the + * compression*/ +int compress_multi_pass(uint8_t * data, uint32_t data_size, uint8_t * compressed_buf, + uint32_t * compressed_size, uint32_t flush_type) +{ + int ret = IGZIP_COMP_OK; + uint8_t *in_buf = NULL, *out_buf = NULL; + uint32_t in_size = 0, out_size = 0; + uint32_t in_processed = 0, out_processed = 0; + struct isal_zstream stream; + struct isal_zstate *state = &stream.internal_state; + uint32_t loop_count = 0; + +#ifdef VERBOSE + printf("Starting Compress Multi Pass\n"); +#endif + + create_rand_repeat_data((uint8_t *) & stream, sizeof(stream)); + + isal_deflate_init(&stream); + + if (state->state != ZSTATE_NEW_HDR) + return COMPRESS_INCORRECT_STATE; + + stream.flush = flush_type; + stream.end_of_stream = 0; + + /* These are set here to allow the loop to run correctly */ + stream.avail_in = 0; + stream.avail_out = 0; + + while (1) { + loop_count++; + + /* Setup in buffer for next round of compression */ + if (stream.avail_in == 0) { + if (flush_type != SYNC_FLUSH || state->state == ZSTATE_NEW_HDR) { + /* Randomly choose size of the next out buffer */ + in_size = rand() % (data_size + 1); + + /* Limit size of buffer to be smaller than maximum */ + if (in_size >= data_size - in_processed) { + in_size = data_size - in_processed; + stream.end_of_stream = 1; + } + + if (in_size != 0) { + if (in_buf != NULL) { + free(in_buf); + in_buf = NULL; + } + + in_buf = malloc(in_size); + if (in_buf == NULL) { + ret = MALLOC_FAILED; + break; + } + memcpy(in_buf, data + in_processed, in_size); + in_processed += in_size; + + stream.avail_in = in_size; + stream.next_in = in_buf; + } + } + } + + /* Setup out buffer for next round of compression */ + if (stream.avail_out == 0) { + /* Save compressed data inot compressed_buf */ + if (out_buf != NULL) { + memcpy(compressed_buf + out_processed, out_buf, + out_size - stream.avail_out); + out_processed += out_size - stream.avail_out; + } + + /* Randomly choose size of the next out buffer */ + out_size = rand() % (*compressed_size + 1); + + /* Limit size of buffer to be smaller than maximum */ + if (out_size > *compressed_size - out_processed) + out_size = *compressed_size - out_processed; + + if (out_size != 0) { + if (out_buf != NULL) { + free(out_buf); + out_buf = NULL; + } + + out_buf = malloc(out_size); + if (out_buf == NULL) { + ret = MALLOC_FAILED; + break; + } + + stream.avail_out = out_size; + stream.next_out = out_buf; + } + } + + ret = + isal_deflate_with_checks(&stream, data_size, *compressed_size, in_buf, + in_size, in_processed, out_buf, out_size, + out_processed); + + if (ret) { + if (ret == COMPRESS_OUT_BUFFER_OVERFLOW + || ret == COMPRESS_INCORRECT_STATE) + memcpy(compressed_buf + out_processed, out_buf, out_size); + break; + } + + /* Check if the compression is completed */ + if (state->state == ZSTATE_END) { + memcpy(compressed_buf + out_processed, out_buf, out_size); + *compressed_size = stream.total_out; + break; + } + + } + + if (in_buf != NULL) + free(in_buf); + if (out_buf != NULL) + free(out_buf); + + if (ret == COMPRESS_OUT_BUFFER_OVERFLOW && flush_type == SYNC_FLUSH + && loop_count >= MAX_LOOPS) + ret = COMPRESS_LOOP_COUNT_OVERFLOW; + + return ret; + +} + +/* Compress the input data into the outbuffer in one call to isal_deflate */ +int compress_single_pass(uint8_t * data, uint32_t data_size, uint8_t * compressed_buf, + uint32_t * compressed_size, uint32_t flush_type) +{ + int ret = IGZIP_COMP_OK; + struct isal_zstream stream; + struct isal_zstate *state = &stream.internal_state; + +#ifdef VERBOSE + printf("Starting Compress Single Pass\n"); +#endif + + create_rand_repeat_data((uint8_t *) & stream, sizeof(stream)); + + isal_deflate_init(&stream); + + if (state->state != ZSTATE_NEW_HDR) + return COMPRESS_INCORRECT_STATE; + + stream.flush = flush_type; + stream.avail_in = data_size; + stream.next_in = data; + stream.avail_out = *compressed_size; + stream.next_out = compressed_buf; + stream.end_of_stream = 1; + + ret = + isal_deflate_with_checks(&stream, data_size, *compressed_size, data, data_size, + data_size, compressed_buf, *compressed_size, 0); + + /* Check if the compression is completed */ + if (state->state == ZSTATE_END) + *compressed_size = stream.total_out; + else if (flush_type == SYNC_FLUSH && stream.avail_out < 16) + ret = COMPRESS_OUT_BUFFER_OVERFLOW; + + return ret; + +} + +/* Statelessly compress the input buffer into the output buffer */ +int compress_stateless(uint8_t * data, uint32_t data_size, uint8_t * compressed_buf, + uint32_t * compressed_size) +{ + int ret = IGZIP_COMP_OK; + struct isal_zstream stream; + + create_rand_repeat_data((uint8_t *) & stream, sizeof(stream)); + + isal_deflate_init(&stream); + + stream.avail_in = data_size; + stream.end_of_stream = 1; + stream.next_in = data; + stream.flush = NO_FLUSH; + + stream.avail_out = *compressed_size; + stream.next_out = compressed_buf; + + ret = isal_deflate_stateless(&stream); + + /* verify the stream */ + if (stream.next_in - data != stream.total_in || + stream.total_in + stream.avail_in != data_size) + return COMPRESS_INPUT_STREAM_INTEGRITY_ERROR; + + if (stream.next_out - compressed_buf != stream.total_out || + stream.total_out + stream.avail_out != *compressed_size) + return COMPRESS_OUTPUT_STREAM_INTEGRITY_ERROR; + + if (ret != IGZIP_COMP_OK) { + if (ret == STATELESS_OVERFLOW) + return COMPRESS_OUT_BUFFER_OVERFLOW; + else + return COMPRESS_GENERAL_ERROR; + } + + if (!stream.end_of_stream) { + return COMPRESS_END_OF_STREAM_NOT_SET; + } + + if (stream.avail_in != 0) + return COMPRESS_ALL_INPUT_FAIL; + + *compressed_size = stream.total_out; + + return ret; + +} + +/*Compress the input buffer into the output buffer, but switch the flush type in + * the middle of the compression to test what happens*/ +int compress_swap_flush(uint8_t * data, uint32_t data_size, uint8_t * compressed_buf, + uint32_t * compressed_size, uint32_t flush_type) +{ + int ret = IGZIP_COMP_OK; + struct isal_zstream stream; + struct isal_zstate *state = &stream.internal_state; + uint32_t partial_size; + +#ifdef VERBOSE + printf("Starting Compress Swap Flush\n"); +#endif + + isal_deflate_init(&stream); + + if (state->state != ZSTATE_NEW_HDR) + return COMPRESS_INCORRECT_STATE; + + partial_size = rand() % (data_size + 1); + + stream.flush = flush_type; + stream.avail_in = partial_size; + stream.next_in = data; + stream.avail_out = *compressed_size; + stream.next_out = compressed_buf; + stream.end_of_stream = 0; + + ret = + isal_deflate_with_checks(&stream, data_size, *compressed_size, data, partial_size, + partial_size, compressed_buf, *compressed_size, 0); + + if (ret) + return ret; + + if (flush_type == NO_FLUSH) + flush_type = SYNC_FLUSH; + else + flush_type = NO_FLUSH; + + stream.flush = flush_type; + stream.avail_in = data_size - partial_size; + stream.next_in = data + partial_size; + stream.end_of_stream = 1; + + ret = + isal_deflate_with_checks(&stream, data_size, *compressed_size, data + partial_size, + data_size - partial_size, data_size, compressed_buf, + *compressed_size, 0); + + if (ret == COMPRESS_GENERAL_ERROR) + return INVALID_FLUSH_ERROR; + + *compressed_size = stream.total_out; + + return ret; +} + +/* Test isal_deflate_stateless */ +int test_compress_stateless(uint8_t * in_buf, uint32_t in_size) +{ + int ret = IGZIP_COMP_OK; + uint32_t z_size, overflow; + uint8_t *z_buf = NULL; + + /* Test non-overflow case where a type 0 block is not written */ + z_size = 2 * in_size + hdr_bytes + trl_bytes; + + z_buf = malloc(z_size); + + if (z_buf == NULL) + return MALLOC_FAILED; + create_rand_repeat_data(z_buf, z_size); + + ret = compress_stateless(in_buf, in_size, z_buf, &z_size); + + if (!ret) + ret = inflate_check(z_buf, z_size, in_buf, in_size); + + if (z_buf != NULL) { + free(z_buf); + z_buf = NULL; + } + + print_error(ret); + + /*Test non-overflow case where a type 0 block is possible to be written */ + z_size = + TYPE0_HDR_SIZE * ((in_size + TYPE0_MAX_SIZE - 1) / TYPE0_MAX_SIZE) + in_size + + gzip_extra_bytes; + + if (z_size == gzip_extra_bytes) + z_size += TYPE0_HDR_SIZE; + + if (z_size < 8) + z_size = 8; + + z_buf = malloc(z_size); + + if (z_buf == NULL) + return MALLOC_FAILED; + + create_rand_repeat_data(z_buf, z_size); + + ret = compress_stateless(in_buf, in_size, z_buf, &z_size); + if (!ret) + ret = inflate_check(z_buf, z_size, in_buf, in_size); +#ifdef VERBOSE + if (ret) { + printf("Compressed array: "); + print_uint8_t(z_buf, z_size); + printf("\n"); + printf("Data: "); + print_uint8_t(in_buf, in_size); + } +#endif + + if (!ret) { + free(z_buf); + z_buf = NULL; + + /* Test random overflow case */ + z_size = rand() % z_size; + + if (z_size > in_size) + z_size = rand() & in_size; + + if (z_size > 0) { + z_buf = malloc(z_size); + + if (z_buf == NULL) + return MALLOC_FAILED; + } + + overflow = compress_stateless(in_buf, in_size, z_buf, &z_size); + + if (overflow != COMPRESS_OUT_BUFFER_OVERFLOW) { +#ifdef VERBOSE + printf("overflow error = %d\n", overflow); + print_error(overflow); + if (overflow == 0) { + overflow = inflate_check(z_buf, z_size, in_buf, in_size); + printf("inflate ret = %d\n", overflow); + print_error(overflow); + } + printf("Compressed array: "); + print_uint8_t(z_buf, z_size); + printf("\n"); + printf("Data: "); + print_uint8_t(in_buf, in_size); +#endif + ret = OVERFLOW_TEST_ERROR; + } + } + + print_error(ret); + + if (z_buf != NULL) + free(z_buf); + + return ret; +} + +/* Test isal_deflate */ +int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type) +{ + int ret = IGZIP_COMP_OK, fin_ret = IGZIP_COMP_OK; + uint32_t overflow = 0; + uint32_t z_size, z_size_max, z_compressed_size; + uint8_t *z_buf = NULL; + + /* Test a non overflow case */ + if (flush_type == NO_FLUSH) + z_size_max = 2 * in_size + hdr_bytes + trl_bytes + 2; + else if (flush_type == SYNC_FLUSH) + z_size_max = 2 * in_size + MAX_LOOPS * (hdr_bytes + trl_bytes + 5); + else { + printf("Invalid Flush Parameter\n"); + return COMPRESS_GENERAL_ERROR; + } + + z_size = z_size_max; + + z_buf = malloc(z_size); + if (z_buf == NULL) { + print_error(MALLOC_FAILED); + return MALLOC_FAILED; + } + create_rand_repeat_data(z_buf, z_size_max); + + ret = compress_single_pass(in_buf, in_size, z_buf, &z_size, flush_type); + + if (!ret) + ret = inflate_check(z_buf, z_size, in_buf, in_size); + + if (ret) { +#ifdef VERBOSE + printf("Compressed array: "); + print_uint8_t(z_buf, z_size); + printf("\n"); + printf("Data: "); + print_uint8_t(in_buf, in_size); +#endif + printf("Failed on compress single pass\n"); + print_error(ret); + } + + fin_ret |= ret; + + z_compressed_size = z_size; + z_size = z_size_max; + create_rand_repeat_data(z_buf, z_size_max); + + ret = compress_multi_pass(in_buf, in_size, z_buf, &z_size, flush_type); + + if (!ret) + ret = inflate_check(z_buf, z_size, in_buf, in_size); + + if (ret) { +#ifdef VERBOSE + printf("Compressed array: "); + print_uint8_t(z_buf, z_size); + printf("\n"); + printf("Data: "); + print_uint8_t(in_buf, in_size); +#endif + printf("Failed on compress multi pass\n"); + print_error(ret); + } + + fin_ret |= ret; + + ret = 0; + + /* Test random overflow case */ + if (flush_type == SYNC_FLUSH && z_compressed_size > in_size) + z_compressed_size = in_size + 1; + + z_size = rand() % z_compressed_size; + create_rand_repeat_data(z_buf, z_size_max); + + overflow = compress_single_pass(in_buf, in_size, z_buf, &z_size, flush_type); + + if (overflow != COMPRESS_OUT_BUFFER_OVERFLOW) { + if (overflow == 0) + ret = inflate_check(z_buf, z_size, in_buf, in_size); + + /* Rarely single pass overflow will compresses data + * better than the initial run. This is to stop that + * case from erroring. */ + if (overflow != 0 || ret != 0) { +#ifdef VERBOSE + printf("overflow error = %d\n", overflow); + print_error(overflow); + printf("inflate ret = %d\n", ret); + print_error(overflow); + + printf("Compressed array: "); + print_uint8_t(z_buf, z_size); + printf("\n"); + printf("Data: "); + print_uint8_t(in_buf, in_size); +#endif + printf("Failed on compress multi pass overflow\n"); + print_error(ret); + ret = OVERFLOW_TEST_ERROR; + } + } + + fin_ret |= ret; + + if (flush_type == NO_FLUSH) { + create_rand_repeat_data(z_buf, z_size_max); + + overflow = compress_multi_pass(in_buf, in_size, z_buf, &z_size, flush_type); + + if (overflow != COMPRESS_OUT_BUFFER_OVERFLOW) { + if (overflow == 0) + ret = inflate_check(z_buf, z_size, in_buf, in_size); + + /* Rarely multi pass overflow will compresses data + * better than the initial run. This is to stop that + * case from erroring */ + if (overflow != 0 || ret != 0) { +#ifdef VERBOSE + printf("overflow error = %d\n", overflow); + print_error(overflow); + printf("inflate ret = %d\n", ret); + print_error(overflow); + + printf("Compressed array: "); + print_uint8_t(z_buf, z_size); + printf("\n"); + printf("Data: "); + print_uint8_t(in_buf, in_size); +#endif + printf("Failed on compress multi pass overflow\n"); + print_error(ret); + ret = OVERFLOW_TEST_ERROR; + } + } + fin_ret |= ret; + } + + free(z_buf); + + return fin_ret; +} + +/* Test swapping flush types in the middle of compression */ +int test_flush(uint8_t * in_buf, uint32_t in_size) +{ + int fin_ret = IGZIP_COMP_OK, ret; + uint32_t z_size, flush_type = 0; + uint8_t *z_buf = NULL; + + z_size = 2 * in_size + 2 * (hdr_bytes + trl_bytes) + 8; + + z_buf = malloc(z_size); + + if (z_buf == NULL) + return MALLOC_FAILED; + + create_rand_repeat_data(z_buf, z_size); + + while (flush_type == NO_FLUSH || flush_type == SYNC_FLUSH) + flush_type = rand(); + + /* Test invalid flush */ + ret = compress_single_pass(in_buf, in_size, z_buf, &z_size, flush_type); + + if (ret == COMPRESS_GENERAL_ERROR) + ret = 0; + else { + printf("Failed when passing invalid flush parameter\n"); + ret = INVALID_FLUSH_ERROR; + } + + fin_ret |= ret; + print_error(ret); + + create_rand_repeat_data(z_buf, z_size); + + /* Test the valid case of SYNC_FLUSH followed by NO_FLUSH */ + ret = compress_swap_flush(in_buf, in_size, z_buf, &z_size, rand() % 2); + + if (!ret) + ret = inflate_check(z_buf, z_size, in_buf, in_size); + + if (ret) { +#ifdef VERBOSE + printf("Compressed array: "); + print_uint8_t(z_buf, z_size); + printf("\n"); + printf("Data: "); + print_uint8_t(in_buf, in_size); +#endif + printf("Failed on swapping from SYNC_FLUSH to NO_FLUSH\n"); + print_error(ret); + } + + fin_ret |= ret; + print_error(ret); + + return fin_ret; +} + +int get_filesize(FILE * f) +{ + int curr, end; + + curr = ftell(f); /* Save current position */ + fseek(f, 0L, SEEK_END); + end = ftell(f); + fseek(f, curr, SEEK_SET); /* Restore position */ + return end; +} + +/* Run multiple compression tests on data stored in a file */ +int test_compress_file(char *file_name) +{ + int ret = IGZIP_COMP_OK; + uint32_t in_size; + uint8_t *in_buf = NULL; + FILE *in_file = NULL; + + in_file = fopen(file_name, "rb"); + if (!in_file) + return FILE_READ_FAILED; + + in_size = get_filesize(in_file); + if (in_size != 0) { + in_buf = malloc(in_size); + if (in_buf == NULL) + return MALLOC_FAILED; + fread(in_buf, 1, in_size, in_file); + } + + ret |= test_compress_stateless(in_buf, in_size); + ret |= test_compress(in_buf, in_size, NO_FLUSH); + ret |= test_compress(in_buf, in_size, SYNC_FLUSH); + ret |= test_flush(in_buf, in_size); + + if (ret) + printf("Failed on file %s\n", file_name); + + if (in_buf != NULL) + free(in_buf); + + return ret; +} + +int main(int argc, char *argv[]) +{ + int i = 0, ret = 0, fin_ret = 0; + uint32_t in_size = 0, offset = 0; + uint8_t *in_buf; + +#ifndef VERBOSE + setbuf(stdout, NULL); +#endif + + printf("Window Size: %d K\n", HIST_SIZE); + printf("Test Seed : %d\n", TEST_SEED); + printf("Randoms : %d\n", RANDOMS); + srand(TEST_SEED); + + in_buf = malloc(IBUF_SIZE); + if (in_buf == NULL) { + fprintf(stderr, "Can't allocate in_buf memory\n"); + return -1; + } + + if (argc > 1) { + printf("igzip_rand_test files: "); + + for (i = 1; i < argc; i++) { + ret |= test_compress_file(argv[i]); + if (ret) + return ret; + } + + printf("................"); + printf("%s\n", ret ? "Fail" : "Pass"); + fin_ret |= ret; + } + + printf("igzip_rand_test stateless: "); + + ret = test_compress_stateless((uint8_t *) str1, sizeof(str1)); + if (ret) + return ret; + + ret |= test_compress_stateless((uint8_t *) str2, sizeof(str2)); + if (ret) + return ret; + + for (i = 0; i < RANDOMS; i++) { + in_size = rand() % (IBUF_SIZE + 1); + offset = rand() % (IBUF_SIZE + 1 - in_size); + in_buf += offset; + + create_rand_repeat_data(in_buf, in_size); + + ret |= test_compress_stateless(in_buf, in_size); + + in_buf -= offset; + + if (i % (RANDOMS / 16) == 0) + printf("."); + + if (ret) + return ret; + } + + fin_ret |= ret; + + printf("%s\n", ret ? "Fail" : "Pass"); + + printf("igzip_rand_test NO_FLUSH: "); + + ret = test_compress((uint8_t *) str1, sizeof(str1), NO_FLUSH); + if (ret) + return ret; + + ret |= test_compress((uint8_t *) str2, sizeof(str2), NO_FLUSH); + if (ret) + return ret; + + for (i = 0; i < RANDOMS; i++) { + in_size = rand() % (IBUF_SIZE + 1); + offset = rand() % (IBUF_SIZE + 1 - in_size); + in_buf += offset; + + create_rand_repeat_data(in_buf, in_size); + + ret |= test_compress(in_buf, in_size, NO_FLUSH); + + in_buf -= offset; + + if (i % (RANDOMS / 16) == 0) + printf("."); + if (ret) + return ret; + } + + fin_ret |= ret; + + printf("%s\n", ret ? "Fail" : "Pass"); + + printf("igzip_rand_test SYNC_FLUSH: "); + + ret = test_compress((uint8_t *) str1, sizeof(str1), SYNC_FLUSH); + if (ret) + return ret; + + ret |= test_compress((uint8_t *) str2, sizeof(str2), SYNC_FLUSH); + if (ret) + return ret; + + for (i = 0; i < RANDOMS; i++) { + in_size = rand() % (IBUF_SIZE + 1); + offset = rand() % (IBUF_SIZE + 1 - in_size); + in_buf += offset; + + create_rand_repeat_data(in_buf, in_size); + + ret |= test_compress(in_buf, in_size, SYNC_FLUSH); + + in_buf -= offset; + + if (i % (RANDOMS / 16) == 0) + printf("."); + if (ret) + return ret; + } + + fin_ret |= ret; + + printf("%s\n", ret ? "Fail" : "Pass"); + + printf("igzip rand test finished: %s\n", + fin_ret ? "Some tests failed" : "All tests passed"); + + return fin_ret != IGZIP_COMP_OK; +} diff --git a/igzip/igzip_compare_types.asm b/igzip/igzip_compare_types.asm new file mode 100644 index 0000000..6a49424 --- /dev/null +++ b/igzip/igzip_compare_types.asm @@ -0,0 +1,416 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "options.asm" +%ifndef UTILS_ASM +%define UTILS_ASM +; compare macro + +;; sttni2 is faster, but it can't be debugged +;; so following code is based on "mine5" + +;; compare 258 bytes = 8 * 32 + 2 +;; tmp16 is a 16-bit version of tmp +;; compare258 src1, src2, result, tmp +%macro compare258 4 +%define %%src1 %1 +%define %%src2 %2 +%define %%result %3 +%define %%tmp %4 +%define %%tmp16 %4w ; tmp as a 16-bit register + + xor %%result, %%result +%%loop1: + mov %%tmp, [%%src1 + %%result] + xor %%tmp, [%%src2 + %%result] + jnz %%miscompare + add %%result, 8 + + mov %%tmp, [%%src1 + %%result] + xor %%tmp, [%%src2 + %%result] + jnz %%miscompare + add %%result, 8 + + cmp %%result, 256 + jb %%loop1 + + ; compare last two bytes + mov %%tmp16, [%%src1 + %%result] + xor %%tmp16, [%%src2 + %%result] + jnz %%miscompare16 + + ; no miscompares, return 258 + add %%result, 2 + jmp %%end + +%%miscompare16: + and %%tmp, 0xFFFF +%%miscompare: + bsf %%tmp, %%tmp + shr %%tmp, 3 + add %%result, %%tmp +%%end: +%endm + +;; compare 258 bytes = 8 * 32 + 2 +;; tmp16 is a 16-bit version of tmp +;; compare258 src1, src2, result, tmp +%macro compare250 4 +%define %%src1 %1 +%define %%src2 %2 +%define %%result %3 +%define %%tmp %4 +%define %%tmp16 %4w ; tmp as a 16-bit register + + mov %%result, 8 + mov %%tmp, [%%src1 + 8] + xor %%tmp, [%%src2 + 8] + jnz %%miscompare + add %%result, 8 + +%%loop1: + mov %%tmp, [%%src1 + %%result] + xor %%tmp, [%%src2 + %%result] + jnz %%miscompare + add %%result, 8 + + mov %%tmp, [%%src1 + %%result] + xor %%tmp, [%%src2 + %%result] + jnz %%miscompare + add %%result, 8 + + cmp %%result, 256 + jb %%loop1 + + ; compare last two bytes + mov %%tmp16, [%%src1 + %%result] + xor %%tmp16, [%%src2 + %%result] + jnz %%miscompare16 + + ; no miscompares, return 258 + add %%result, 2 + jmp %%end + +%%miscompare16: + and %%tmp, 0xFFFF +%%miscompare: + bsf %%tmp, %%tmp + shr %%tmp, 3 + add %%result, %%tmp +%%end: +%endm + +;; compare 258 bytes = 8 * 32 + 2 +;; compares 16 bytes at a time, using pcmpeqb/pmovmskb +;; compare258_x src1, src2, result, tmp, xtmp1, xtmp2 +%macro compare258_x 6 +%define %%src1 %1 +%define %%src2 %2 +%define %%result %3 +%define %%tmp %4 +%define %%tmp32 %4d +%define %%tmp16 %4w ; tmp as a 16-bit register +%define %%xtmp %5 +%define %%xtmp2 %6 + + xor %%result, %%result +%%loop1: + movdqu %%xtmp, [%%src1 + %%result] + movdqu %%xtmp2, [%%src2 + %%result] + pcmpeqb %%xtmp, %%xtmp2 + pmovmskb %%tmp32, %%xtmp + xor %%tmp, 0xFFFF + jnz %%miscompare + add %%result, 16 + + movdqu %%xtmp, [%%src1 + %%result] + movdqu %%xtmp2, [%%src2 + %%result] + pcmpeqb %%xtmp, %%xtmp2 + pmovmskb %%tmp32, %%xtmp + xor %%tmp, 0xFFFF + jnz %%miscompare + add %%result, 16 + + cmp %%result, 256 + jb %%loop1 + + ; compare last two bytes + mov %%tmp16, [%%src1 + %%result] + xor %%tmp16, [%%src2 + %%result] + jnz %%miscompare16 + + ; no miscompares, return 258 + add %%result, 2 + jmp %%end + +%%miscompare16: + and %%tmp, 0xFFFF + bsf %%tmp, %%tmp + shr %%tmp, 3 + add %%result, %%tmp + jmp %%end +%%miscompare: + bsf %%tmp, %%tmp + add %%result, %%tmp +%%end: +%endm + +;; compare 258 bytes = 8 * 32 + 2, assuming first 8 bytes +;; were already checked +;; compares 16 bytes at a time, using pcmpeqb/pmovmskb +;; compare250_x src1, src2, result, tmp, xtmp1, xtmp2 +%macro compare250_x 6 +%define %%src1 %1 +%define %%src2 %2 +%define %%result %3 +%define %%tmp %4 +%define %%tmp32 %4d ; tmp as a 16-bit register +%define %%xtmp %5 +%define %%xtmp2 %6 + + mov %%result, 8 + movdqu %%xtmp, [%%src1 + 8] + movdqu %%xtmp2, [%%src2 + 8] + pcmpeqb %%xtmp, %%xtmp2 + pmovmskb %%tmp32, %%xtmp + xor %%tmp, 0xFFFF + jnz %%miscompare + add %%result, 16 +%%loop1: + movdqu %%xtmp, [%%src1 + %%result] + movdqu %%xtmp2, [%%src2 + %%result] + pcmpeqb %%xtmp, %%xtmp2 + pmovmskb %%tmp32, %%xtmp + xor %%tmp, 0xFFFF + jnz %%miscompare + add %%result, 16 + + movdqu %%xtmp, [%%src1 + %%result] + movdqu %%xtmp2, [%%src2 + %%result] + pcmpeqb %%xtmp, %%xtmp2 + pmovmskb %%tmp32, %%xtmp + xor %%tmp, 0xFFFF + jnz %%miscompare + add %%result, 16 + + cmp %%result, 258 - 16 + jb %%loop1 + + movdqu %%xtmp, [%%src1 + %%result] + movdqu %%xtmp2, [%%src2 + %%result] + pcmpeqb %%xtmp, %%xtmp2 + pmovmskb %%tmp32, %%xtmp + xor %%tmp, 0xFFFF + jnz %%miscompare_last + ; no miscompares, return 258 + mov %%result, 258 + jmp %%end + +%%miscompare_last: + bsf %%tmp, %%tmp + add %%result, %%tmp + + ;; Guarantee the result has length at most 258. + mov %%tmp, 258 + cmp %%result, 258 + cmova %%result, %%tmp + jmp %%end +%%miscompare: + bsf %%tmp, %%tmp + add %%result, %%tmp +%%end: +%endm + +;; compare 258 bytes = 8 * 32 + 2 +;; compares 32 bytes at a time, using pcmpeqb/pmovmskb +;; compare258_y src1, src2, result, tmp, xtmp1, xtmp2 +%macro compare258_y 6 +%define %%src1 %1 +%define %%src2 %2 +%define %%result %3 +%define %%tmp %4 +%define %%tmp16 %4w ; tmp as a 16-bit register +%define %%tmp32 %4d ; tmp as a 32-bit register +%define %%ytmp %5 +%define %%ytmp2 %6 + + xor %%result, %%result +%%loop1: + vmovdqu %%ytmp, [%%src1 + %%result] + vmovdqu %%ytmp2, [%%src2 + %%result] + vpcmpeqb %%ytmp, %%ytmp, %%ytmp2 + vpmovmskb %%tmp, %%ytmp + xor %%tmp32, 0xFFFFFFFF + jnz %%miscompare + add %%result, 32 + + vmovdqu %%ytmp, [%%src1 + %%result] + vmovdqu %%ytmp2, [%%src2 + %%result] + vpcmpeqb %%ytmp, %%ytmp, %%ytmp2 + vpmovmskb %%tmp, %%ytmp + xor %%tmp32, 0xFFFFFFFF + jnz %%miscompare + add %%result, 32 + + cmp %%result, 256 + jb %%loop1 + + ; compare last two bytes + mov %%tmp16, [%%src1 + %%result] + xor %%tmp16, [%%src2 + %%result] + jnz %%miscompare16 + + ; no miscompares, return 258 + add %%result, 2 + jmp %%end + +%%miscompare16: + and %%tmp, 0xFFFF + bsf %%tmp, %%tmp + shr %%tmp, 3 + add %%result, %%tmp + jmp %%end +%%miscompare: + bsf %%tmp, %%tmp + add %%result, %%tmp +%%end: +%endm + + +;; compare 258 bytes = 8 * 32 + 2, assuming first 8 bytes +;; were already checked +;; compares 32 bytes at a time, using pcmpeqb/pmovmskb +;; compare258_y src1, src2, result, tmp, xtmp1, xtmp2 +%macro compare250_y 6 +%define %%src1 %1 +%define %%src2 %2 +%define %%result %3 +%define %%tmp %4 +%define %%tmp16 %4w ; tmp as a 16-bit register +%define %%tmp32 %4d ; tmp as a 32-bit register +%define %%ytmp %5 +%define %%ytmp2 %6 + + mov %%result, 8 + vmovdqu %%ytmp, [%%src1 + 8] + vmovdqu %%ytmp2, [%%src2 + 8] + vpcmpeqb %%ytmp, %%ytmp, %%ytmp2 + vpmovmskb %%tmp, %%ytmp + xor %%tmp32, 0xFFFFFFFF + jnz %%miscompare + add %%result, 32 +%%loop1: + vmovdqu %%ytmp, [%%src1 + %%result] + vmovdqu %%ytmp2, [%%src2 + %%result] + vpcmpeqb %%ytmp, %%ytmp, %%ytmp2 + vpmovmskb %%tmp, %%ytmp + xor %%tmp32, 0xFFFFFFFF + jnz %%miscompare + add %%result, 32 + + vmovdqu %%ytmp, [%%src1 + %%result] + vmovdqu %%ytmp2, [%%src2 + %%result] + vpcmpeqb %%ytmp, %%ytmp, %%ytmp2 + vpmovmskb %%tmp, %%ytmp + xor %%tmp32, 0xFFFFFFFF + jnz %%miscompare + add %%result, 32 + + cmp %%result, 258 - 32 + jb %%loop1 + + vmovdqu %%ytmp, [%%src1 + %%result] + vmovdqu %%ytmp2, [%%src2 + %%result] + vpcmpeqb %%ytmp, %%ytmp, %%ytmp2 + vpmovmskb %%tmp, %%ytmp + xor %%tmp32, 0xFFFFFFFF + jnz %%miscompare_last + mov %%result, 258 + jmp %%end + +%%miscompare_last: + bsf %%tmp, %%tmp + add %%result, %%tmp + + ;; Guarantee the result has length at most 258. + mov %%tmp, 258 + cmp %%result, 258 + cmova %%result, %%tmp + jmp %%end + +%%miscompare: + bsf %%tmp, %%tmp + add %%result, %%tmp +%%end: +%endm + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; compare size, src1, src2, result, tmp +%macro compare 5 +%define %%size %1 +%define %%src1 %2 +%define %%src2 %3 +%define %%result %4 +%define %%tmp %5 +%define %%tmp8 %5b ; tmp as a 8-bit register + + xor %%result, %%result + sub %%size, 7 + jle %%lab2 +%%loop1: + mov %%tmp, [%%src1 + %%result] + xor %%tmp, [%%src2 + %%result] + jnz %%miscompare + add %%result, 8 + sub %%size, 8 + jg %%loop1 +%%lab2: + ;; if we fall through from above, we have found no mismatches, + ;; %%size+7 is the number of bytes left to look at, and %%result is the + ;; number of bytes that have matched + add %%size, 7 + jle %%end +%%loop3: + mov %%tmp8, [%%src1 + %%result] + cmp %%tmp8, [%%src2 + %%result] + jne %%end + inc %%result + dec %%size + jg %%loop3 + jmp %%end +%%miscompare: + bsf %%tmp, %%tmp + shr %%tmp, 3 + add %%result, %%tmp +%%end: +%endm + +%endif ;UTILS_ASM diff --git a/igzip/igzip_example.c b/igzip/igzip_example.c new file mode 100644 index 0000000..9d2b997 --- /dev/null +++ b/igzip/igzip_example.c @@ -0,0 +1,86 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include +#include +#include +#include "igzip_lib.h" + +#define BUF_SIZE 8192 + +struct isal_zstream stream; + +int main(int argc, char *argv[]) +{ + uint8_t inbuf[BUF_SIZE], outbuf[BUF_SIZE]; + FILE *in, *out; + + if (argc != 3) { + fprintf(stderr, "Usage: igzip_example infile outfile\n"); + exit(0); + } + in = fopen(argv[1], "rb"); + if (!in) { + fprintf(stderr, "Can't open %s for reading\n", argv[1]); + exit(0); + } + out = fopen(argv[2], "wb"); + if (!out) { + fprintf(stderr, "Can't open %s for writing\n", argv[2]); + exit(0); + } + + printf("igzip_example\nWindow Size: %d K\n", HIST_SIZE); + fflush(0); + + isal_deflate_init(&stream); + stream.end_of_stream = 0; + stream.flush = NO_FLUSH; + + do { + stream.avail_in = (uint32_t) fread(inbuf, 1, BUF_SIZE, in); + stream.end_of_stream = feof(in); + stream.next_in = inbuf; + do { + stream.avail_out = BUF_SIZE; + stream.next_out = outbuf; + + isal_deflate(&stream); + + fwrite(outbuf, 1, BUF_SIZE - stream.avail_out, out); + } while (stream.avail_out == 0); + + assert(stream.avail_in == 0); + } while (stream.internal_state.state != ZSTATE_END); + + fclose(out); + fclose(in); + + printf("End of igzip_example\n\n"); + return 0; +} diff --git a/igzip/igzip_file_perf.c b/igzip/igzip_file_perf.c new file mode 100644 index 0000000..4f74faa --- /dev/null +++ b/igzip/igzip_file_perf.c @@ -0,0 +1,180 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include +#include "igzip_lib.h" +#include "test.h" + +#define BUF_SIZE 1024 +#define MIN_TEST_LOOPS 100 +#ifndef RUN_MEM_SIZE +# define RUN_MEM_SIZE 500000000 +#endif + +struct isal_zstream stream; + +int get_filesize(FILE * f) +{ + int curr, end; + + curr = ftell(f); /* Save current position */ + fseek(f, 0L, SEEK_END); + end = ftell(f); + fseek(f, curr, SEEK_SET); /* Restore position */ + return end; +} + +int main(int argc, char *argv[]) +{ + FILE *in, *out = NULL; + unsigned char *inbuf, *outbuf; + int i, infile_size, iterations, outbuf_size; + struct isal_huff_histogram histogram; + struct isal_hufftables hufftables_custom; + + memset(&histogram, 0, sizeof(histogram)); + + if (argc > 3 || argc < 2) { + fprintf(stderr, "Usage: igzip_file_perf infile [outfile]\n" + "\t - Runs multiple iterations of igzip on a file to " + "get more accurate time results.\n"); + exit(0); + } + in = fopen(argv[1], "rb"); + if (!in) { + fprintf(stderr, "Can't open %s for reading\n", argv[1]); + exit(0); + } + if (argc > 2) { + out = fopen(argv[2], "wb"); + if (!out) { + fprintf(stderr, "Can't open %s for writing\n", argv[2]); + exit(0); + } + printf("outfile=%s\n", argv[2]); + } + printf("Window Size: %d K\n", HIST_SIZE); + printf("igzip_file_perf: \n"); + fflush(0); + /* Allocate space for entire input file and output + * (assuming some possible expansion on output size) + */ + infile_size = get_filesize(in); + + if (infile_size != 0) { + outbuf_size = infile_size * 2; + iterations = RUN_MEM_SIZE / infile_size; + } else { + outbuf_size = BUF_SIZE; + iterations = MIN_TEST_LOOPS; + } + if (iterations < MIN_TEST_LOOPS) + iterations = MIN_TEST_LOOPS; + + inbuf = malloc(infile_size); + if (inbuf == NULL) { + fprintf(stderr, "Can't allocate input buffer memory\n"); + exit(0); + } + outbuf = malloc(outbuf_size); + if (outbuf == NULL) { + fprintf(stderr, "Can't allocate output buffer memory\n"); + exit(0); + } + + printf("igzip_file_perf: %s %d iterations\n", argv[1], iterations); + /* Read complete input file into buffer */ + stream.avail_in = (uint32_t) fread(inbuf, 1, infile_size, in); + if (stream.avail_in != infile_size) { + fprintf(stderr, "Couldn't fit all of input file into buffer\n"); + exit(0); + } + + struct perf start, stop; + perf_start(&start); + + for (i = 0; i < iterations; i++) { + isal_deflate_init(&stream); + stream.end_of_stream = 1; /* Do the entire file at once */ + stream.flush = NO_FLUSH; + stream.next_in = inbuf; + stream.avail_in = infile_size; + stream.next_out = outbuf; + stream.avail_out = outbuf_size; + isal_deflate(&stream); + if (stream.avail_in != 0) + break; + } + perf_stop(&stop); + + if (stream.avail_in != 0) { + fprintf(stderr, "Could not compress all of inbuf\n"); + exit(0); + } + + printf(" file %s - in_size=%d out_size=%d iter=%d ratio_default=%3.1f%%", argv[1], + infile_size, stream.total_out, i, 100.0 * stream.total_out / infile_size); + + isal_update_histogram(inbuf, infile_size, &histogram); + isal_create_hufftables(&hufftables_custom, &histogram); + + isal_deflate_init(&stream); + stream.end_of_stream = 1; /* Do the entire file at once */ + stream.flush = NO_FLUSH; + stream.next_in = inbuf; + stream.avail_in = infile_size; + stream.next_out = outbuf; + stream.avail_out = outbuf_size; + stream.hufftables = &hufftables_custom; + isal_deflate(&stream); + + printf(" ratio_custom=%3.1f%%\n", 100.0 * stream.total_out / infile_size); + + if (stream.avail_in != 0) { + fprintf(stderr, "Could not compress all of inbuf\n"); + exit(0); + } + + printf("igzip_file: "); + perf_print(stop, start, (long long)infile_size * i); + + if (argc > 2 && out) { + printf("writing %s\n", argv[2]); + fwrite(outbuf, 1, stream.total_out, out); + fclose(out); + } + + fclose(in); + printf("End of igzip_file_perf\n\n"); + fflush(0); + return 0; +} diff --git a/igzip/igzip_finish.asm b/igzip/igzip_finish.asm new file mode 100644 index 0000000..69b9281 --- /dev/null +++ b/igzip/igzip_finish.asm @@ -0,0 +1,311 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "options.asm" +%include "lz0a_const.asm" +%include "data_struct2.asm" +%include "bitbuf2.asm" +%include "huffman.asm" +%include "igzip_compare_types.asm" + +%include "stdmac.asm" +%include "reg_sizes.asm" + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%define tmp1 rax + +%define f_index rbx +%define code rbx +%define tmp4 rbx +%define tmp5 rbx +%define tmp6 rbx + +%define tmp2 rcx +%define hash rcx + +%define tmp3 rdx + +%define stream rsi + +%define f_i rdi + +%define code_len2 rbp + +%define m_out_buf r8 + +%define m_bits r9 + +%define dist r10 + +%define m_bit_count r11 + +%define code2 r12 + +%define f_end_i r12 + +%define file_start r13 + +%define len r14 + +%define hufftables r15 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +f_end_i_mem_offset equ 0 ; local variable (8 bytes) +stack_size equ 8 +; void isal_deflate_finish ( isal_zstream *stream ) +; arg 1: rcx: addr of stream +global isal_deflate_finish_01 +isal_deflate_finish_01: + PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15 + sub rsp, stack_size + +%ifidn __OUTPUT_FORMAT__, elf64 + mov rcx, rdi +%endif + + mov stream, rcx + + ; state->bitbuf.set_buf(stream->next_out, stream->avail_out); + mov m_out_buf, [stream + _next_out] + mov [stream + _internal_state_bitbuf_m_out_start], m_out_buf + mov tmp1 %+ d, [stream + _avail_out] + add tmp1, m_out_buf + sub tmp1, SLOP +skip_SLOP: + mov [stream + _internal_state_bitbuf_m_out_end], tmp1 + + mov m_bits, [stream + _internal_state_bitbuf_m_bits] + mov m_bit_count %+ d, [stream + _internal_state_bitbuf_m_bit_count] + + mov hufftables, [stream + _hufftables] + + ; f_i = state->b_bytes_processed; + ; f_end_i = state->b_bytes_valid; + mov f_i %+ d, [stream + _internal_state_b_bytes_processed] + mov f_end_i %+ d, [stream + _internal_state_b_bytes_valid] + + ; f_i += (uint32_t)(state->buffer - state->file_start); + ; f_end_i += (uint32_t)(state->buffer - state->file_start); + mov file_start, [stream + _internal_state_file_start] + lea tmp1, [stream + _internal_state_buffer] + sub tmp1, file_start + add f_i, tmp1 + add f_end_i, tmp1 + mov [rsp + f_end_i_mem_offset], f_end_i + ; for (f_i = f_start_i; f_i < f_end_i; f_i++) { + cmp f_i, f_end_i + jge end_loop_2 + + mov tmp1 %+ d, [file_start + f_i] + +loop2: + ; if (state->bitbuf.is_full()) { + cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] + ja end_loop_2 + + ; hash = compute_hash(state->file_start + f_i) & HASH_MASK; + compute_hash hash, tmp1 + and hash %+ d, HASH_MASK + + ; f_index = state->head[hash]; + movzx f_index %+ d, word [stream + _internal_state_head + 2 * hash] + + ; state->head[hash] = (uint16_t) f_i; + mov [stream + _internal_state_head + 2 * hash], f_i %+ w + + ; dist = f_i - f_index; // mod 64k + mov dist %+ d, f_i %+ d + sub dist %+ d, f_index %+ d + and dist %+ d, 0xFFFF + + ; if ((dist-1) <= (D-1)) { + mov tmp1 %+ d, dist %+ d + sub tmp1 %+ d, 1 + cmp tmp1 %+ d, (D-1) + jae encode_literal + + ; len = f_end_i - f_i; + mov tmp4, [rsp + f_end_i_mem_offset] + sub tmp4, f_i + + ; if (len > 258) len = 258; + cmp tmp4, 258 + cmovg tmp4, [c258] + + ; len = compare(state->file_start + f_i, + ; state->file_start + f_i - dist, len); + lea tmp1, [file_start + f_i] + mov tmp2, tmp1 + sub tmp2, dist + compare tmp4, tmp1, tmp2, len, tmp3 + + ; if (len >= SHORTEST_MATCH) { + cmp len, SHORTEST_MATCH + jb encode_literal + + ;; encode as dist/len + + ; get_dist_code(dist, &code2, &code_len2); + get_dist_code dist, code2, code_len2, hufftables ;; clobbers dist, rcx + + ; get_len_code(len, &code, &code_len); + get_len_code len, code, rcx, hufftables ;; rcx is code_len + + ; code2 <<= code_len + ; code2 |= code + ; code_len2 += code_len +%ifdef USE_HSWNI + shlx code2, code2, rcx +%else + shl code2, cl +%endif + or code2, code + add code_len2, rcx + + ; for (k = f_i+1, f_i += len-1; k <= f_i; k++) { + lea tmp3, [f_i + 1] ; tmp3 <= k + add f_i, len +%ifdef LIMIT_HASH_UPDATE + ; only update hash twice + + ; hash = compute_hash(state->file_start + k) & HASH_MASK; + mov tmp6 %+ d, [file_start + tmp3] + compute_hash hash, tmp6 + and hash %+ d, HASH_MASK + ; state->head[hash] = k; + mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w + + add tmp3, 1 + + ; hash = compute_hash(state->file_start + k) & HASH_MASK; + mov tmp6 %+ d, [file_start + tmp3] + compute_hash hash, tmp6 + and hash %+ d, HASH_MASK + ; state->head[hash] = k; + mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w + +%else +loop3: + ; hash = compute_hash(state->file_start + k) & HASH_MASK; + mov tmp6 %+ d, [file_start + tmp3] + compute_hash hash, tmp6 + and hash %+ d, HASH_MASK + ; state->head[hash] = k; + mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w + inc tmp3 + cmp tmp3, f_i + jl loop3 +%endif + + mov tmp1 %+ d, [file_start + f_i] + + write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp5 + + ; continue + cmp f_i, [rsp + f_end_i_mem_offset] + jl loop2 + jmp end_loop_2 + +encode_literal: + mov tmp1 %+ d, [file_start + f_i + 1] + + ; get_lit_code(state->file_start[f_i], &code2, &code_len2); + movzx tmp5, byte [file_start + f_i] + get_lit_code tmp5, code2, code_len2, hufftables + + write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp5 + + ; continue + add f_i, 1 + cmp f_i, [rsp + f_end_i_mem_offset] + jl loop2 + +end_loop_2: + + ; if ((f_i >= f_end_i) && ! state->bitbuf.is_full()) { + cmp f_i, [rsp + f_end_i_mem_offset] + jl not_end + cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] + ja not_end + + cmp dword [stream + _end_of_stream], 1 + jne cont + cmp dword [stream + _internal_state_left_over], 0 + jg not_end + +cont: + ; get_lit_code(256, &code2, &code_len2); + get_lit_code 256, code2, code_len2, hufftables + + write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp1 + + mov dword [stream + _internal_state_has_eob], 1 + cmp dword [stream + _end_of_stream], 1 + jne sync_flush + ; state->state = ZSTATE_TRL; + mov dword [stream + _internal_state_state], ZSTATE_TRL + jmp not_end + +sync_flush: + ; state->state = ZSTATE_SYNC_FLUSH; + mov dword [stream + _internal_state_state], ZSTATE_SYNC_FLUSH + ; } +not_end: + + ; state->b_bytes_processed = f_i - (state->buffer - state->file_start); + add f_i, [stream + _internal_state_file_start] + sub f_i, stream + sub f_i, _internal_state_buffer + mov [stream + _internal_state_b_bytes_processed], f_i %+ d + + ; // update output buffer + ; stream->next_out = state->bitbuf.buffer_ptr(); + mov [stream + _next_out], m_out_buf + ; len = state->bitbuf.buffer_used(); + sub m_out_buf, [stream + _internal_state_bitbuf_m_out_start] + + ; stream->avail_out -= len; + sub [stream + _avail_out], m_out_buf %+ d + ; stream->total_out += len; + add [stream + _total_out], m_out_buf %+ d + + mov [stream + _internal_state_bitbuf_m_bits], m_bits + mov [stream + _internal_state_bitbuf_m_bit_count], m_bit_count %+ d + add rsp, stack_size + POP_ALL + ret + +section .data + align 4 +c258: dq 258 diff --git a/igzip/igzip_inflate_perf.c b/igzip/igzip_inflate_perf.c new file mode 100644 index 0000000..930377e --- /dev/null +++ b/igzip/igzip_inflate_perf.c @@ -0,0 +1,151 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include "huff_codes.h" +#include "igzip_inflate_ref.h" +#include "test.h" + +#define BUF_SIZE 1024 +#define MIN_TEST_LOOPS 100 +#ifndef RUN_MEM_SIZE +# define RUN_MEM_SIZE 1000000000 +#endif + +int get_filesize(FILE * f) +{ + int curr, end; + + curr = ftell(f); /* Save current position */ + fseek(f, 0L, SEEK_END); + end = ftell(f); + fseek(f, curr, SEEK_SET); /* Restore position */ + return end; +} + +int main(int argc, char *argv[]) +{ + FILE *in, *out = NULL; + unsigned char *inbuf, *outbuf, *tempbuf; + int i, infile_size, iterations, outbuf_size, check; + uint64_t inbuf_size; + struct inflate_state state; + + if (argc > 3 || argc < 2) { + fprintf(stderr, "Usage: igzip_inflate_file_perf infile\n" + "\t - Runs multiple iterations of igzip on a file to " + "get more accurate time results.\n"); + exit(0); + } + in = fopen(argv[1], "rb"); + if (!in) { + fprintf(stderr, "Can't open %s for reading\n", argv[1]); + exit(0); + } + if (argc > 2) { + out = fopen(argv[2], "wb"); + if (!out) { + fprintf(stderr, "Can't open %s for writing\n", argv[2]); + exit(0); + } + printf("outfile=%s\n", argv[2]); + } + printf("igzip_inflate_perf: \n"); + fflush(0); + /* Allocate space for entire input file and output + * (assuming some possible expansion on output size) + */ + infile_size = get_filesize(in); + + if (infile_size != 0) { + outbuf_size = infile_size; + iterations = RUN_MEM_SIZE / infile_size; + } else { + printf("Error: input file has 0 size\n"); + exit(0); + } + if (iterations < MIN_TEST_LOOPS) + iterations = MIN_TEST_LOOPS; + + tempbuf = malloc(infile_size); + if (tempbuf == NULL) { + fprintf(stderr, "Can't allocate temp buffer memory\n"); + exit(0); + } + inbuf_size = compressBound(infile_size); + inbuf = malloc(inbuf_size); + if (inbuf == NULL) { + fprintf(stderr, "Can't allocate input buffer memory\n"); + exit(0); + } + outbuf = malloc(infile_size); + if (outbuf == NULL) { + fprintf(stderr, "Can't allocate output buffer memory\n"); + exit(0); + } + fread(tempbuf, 1, infile_size, in); + i = compress2(inbuf, &inbuf_size, tempbuf, infile_size, 9); + if (i != Z_OK) { + printf("Compression of input file failed\n"); + exit(0); + } + printf("igzip_inflate_perf: %s %d iterations\n", argv[1], iterations); + /* Read complete input file into buffer */ + fclose(in); + struct perf start, stop; + perf_start(&start); + + for (i = 0; i < iterations; i++) { + igzip_inflate_init(&state, inbuf + 2, inbuf_size - 2, outbuf, outbuf_size); + + check = igzip_inflate(&state); + if (check) { + printf("Error in decompression with error %d\n", check); + break; + } + } + perf_stop(&stop); + + printf(" file %s - in_size=%d out_size=%d iter=%d\n", argv[1], + infile_size, state.out_buffer.total_out, i); + + printf("igzip_file: "); + perf_print(stop, start, (long long)infile_size * i); + + printf("End of igzip_inflate_perf\n\n"); + fflush(0); + + free(inbuf); + free(outbuf); + free(tempbuf); + + return 0; +} diff --git a/igzip/igzip_inflate_ref.c b/igzip/igzip_inflate_ref.c new file mode 100644 index 0000000..46464bc --- /dev/null +++ b/igzip/igzip_inflate_ref.c @@ -0,0 +1,668 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include "igzip_inflate_ref.h" + +void inline byte_copy(uint8_t * dest, uint64_t lookback_distance, int repeat_length) +{ + uint8_t *src = dest - lookback_distance; + + for (; repeat_length > 0; repeat_length--) + *dest++ = *src++; +} + +/* + * Returns integer with first length bits reversed and all higher bits zeroed + */ +uint16_t inline bit_reverse2(uint16_t bits, uint8_t length) +{ + bits = ((bits >> 1) & 0x55555555) | ((bits & 0x55555555) << 1); // swap bits + bits = ((bits >> 2) & 0x33333333) | ((bits & 0x33333333) << 2); // swap pairs + bits = ((bits >> 4) & 0x0F0F0F0F) | ((bits & 0x0F0F0F0F) << 4); // swap nibbles + bits = ((bits >> 8) & 0x00FF00FF) | ((bits & 0x00FF00FF) << 8); // swap bytes + return bits >> (16 - length); +} + +void inline init_inflate_in_buffer(struct inflate_in_buffer *inflate_in) +{ + inflate_in->read_in = 0; + inflate_in->read_in_length = 0; +} + +void inline set_inflate_in_buffer(struct inflate_in_buffer *inflate_in, uint8_t * in_stream, + uint32_t in_size) +{ + inflate_in->next_in = inflate_in->start = in_stream; + inflate_in->avail_in = in_size; +} + +void inline set_inflate_out_buffer(struct inflate_out_buffer *inflate_out, + uint8_t * out_stream, uint32_t out_size) +{ + inflate_out->next_out = out_stream; + inflate_out->avail_out = out_size; + inflate_out->total_out = 0; +} + +void inline inflate_in_clear_bits(struct inflate_in_buffer *inflate_in) +{ + uint8_t bytes; + + bytes = inflate_in->read_in_length / 8; + + inflate_in->read_in = 0; + inflate_in->read_in_length = 0; + inflate_in->next_in -= bytes; + inflate_in->avail_in += bytes; +} + +void inline inflate_in_load(struct inflate_in_buffer *inflate_in, int min_required) +{ + uint64_t temp = 0; + uint8_t new_bytes; + + if (inflate_in->avail_in >= 8) { + /* If there is enough space to load a 64 bits, load the data and use + * that to fill read_in */ + new_bytes = 8 - (inflate_in->read_in_length + 7) / 8; + temp = *(uint64_t *) inflate_in->next_in; + + inflate_in->read_in |= temp << inflate_in->read_in_length; + inflate_in->next_in += new_bytes; + inflate_in->avail_in -= new_bytes; + inflate_in->read_in_length += new_bytes * 8; + + } else { + /* Else fill the read_in buffer 1 byte at a time */ + while (inflate_in->read_in_length < 57 && inflate_in->avail_in > 0) { + temp = *inflate_in->next_in; + inflate_in->read_in |= temp << inflate_in->read_in_length; + inflate_in->next_in++; + inflate_in->avail_in--; + inflate_in->read_in_length += 8; + + } + } + +} + +uint64_t inline inflate_in_peek_bits(struct inflate_in_buffer *inflate_in, uint8_t bit_count) +{ + assert(bit_count < 57); + + /* Load inflate_in if not enough data is in the read_in buffer */ + if (inflate_in->read_in_length < bit_count) + inflate_in_load(inflate_in, 0); + + return (inflate_in->read_in) & ((1 << bit_count) - 1); +} + +void inline inflate_in_shift_bits(struct inflate_in_buffer *inflate_in, uint8_t bit_count) +{ + + inflate_in->read_in >>= bit_count; + inflate_in->read_in_length -= bit_count; +} + +uint64_t inline inflate_in_read_bits(struct inflate_in_buffer *inflate_in, uint8_t bit_count) +{ + uint64_t ret; + assert(bit_count < 57); + + /* Load inflate_in if not enough data is in the read_in buffer */ + if (inflate_in->read_in_length < bit_count) + inflate_in_load(inflate_in, bit_count); + + ret = (inflate_in->read_in) & ((1 << bit_count) - 1); + inflate_in->read_in >>= bit_count; + inflate_in->read_in_length -= bit_count; + + return ret; +} + +int inline setup_static_header(struct inflate_state *state) +{ + /* This could be turned into a memcpy of this functions output for + * higher speed, but then DECODE_LOOKUP_SIZE couldn't be changed without + * regenerating the table. */ + + int i; + struct huff_code lit_code[LIT_LEN + 2]; + struct huff_code dist_code[DIST_LEN + 2]; + + /* These tables are based on the static huffman tree described in RFC + * 1951 */ + uint16_t lit_count[16] = { + 0, 0, 0, 0, 0, 0, 0, 24, 152, 112, 0, 0, 0, 0, 0, 0 + }; + uint16_t dist_count[16] = { + 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + + /* These for loops set the code lengths for the static literal/length + * and distance codes defined in the deflate standard RFC 1951 */ + for (i = 0; i < 144; i++) + lit_code[i].length = 8; + + for (i = 144; i < 256; i++) + lit_code[i].length = 9; + + for (i = 256; i < 280; i++) + lit_code[i].length = 7; + + for (i = 280; i < LIT_LEN + 2; i++) + lit_code[i].length = 8; + + for (i = 0; i < DIST_LEN + 2; i++) + dist_code[i].length = 5; + + make_inflate_huff_code(&state->lit_huff_code, lit_code, LIT_LEN + 2, lit_count); + make_inflate_huff_code(&state->dist_huff_code, dist_code, DIST_LEN + 2, dist_count); + + return 0; +} + +void inline make_inflate_huff_code(struct inflate_huff_code *result, + struct huff_code *huff_code_table, int table_length, + uint16_t * count) +{ + int i, j; + uint16_t code = 0; + uint16_t next_code[MAX_HUFF_TREE_DEPTH + 1]; + uint16_t long_code_list[LIT_LEN]; + uint32_t long_code_length = 0; + uint16_t temp_code_list[1 << (15 - DECODE_LOOKUP_SIZE)]; + uint32_t temp_code_length; + uint32_t long_code_lookup_length = 0; + uint32_t max_length; + uint16_t first_bits; + uint32_t code_length; + uint16_t long_bits; + uint16_t min_increment; + + memset(result, 0, sizeof(struct inflate_huff_code)); + + next_code[0] = code; + + for (i = 1; i < MAX_HUFF_TREE_DEPTH + 1; i++) + next_code[i] = (next_code[i - 1] + count[i - 1]) << 1; + + for (i = 0; i < table_length; i++) { + if (huff_code_table[i].length != 0) { + /* Determine the code for symbol i */ + huff_code_table[i].code = + bit_reverse2(next_code[huff_code_table[i].length], + huff_code_table[i].length); + + next_code[huff_code_table[i].length] += 1; + + if (huff_code_table[i].length <= DECODE_LOOKUP_SIZE) { + /* Set lookup table to return the current symbol + * concatenated with the code length when the + * first DECODE_LENGTH bits of the address are + * the same as the code for the current + * symbol. The first 9 bits are the code, bits + * 14:10 are the code length, bit 15 is a flag + * representing this is a symbol*/ + for (j = 0; j < (1 << (DECODE_LOOKUP_SIZE - + huff_code_table[i].length)); j++) + + result->small_code_lookup[(j << + huff_code_table[i].length) + + huff_code_table[i].code] + = i | (huff_code_table[i].length) << 9; + + } else { + /* Store the element in a list of elements with long codes. */ + long_code_list[long_code_length] = i; + long_code_length++; + } + } + } + + for (i = 0; i < long_code_length; i++) { + /*Set the look up table to point to a hint where the symbol can be found + * in the list of long codes and add the current symbol to the list of + * long codes. */ + if (huff_code_table[long_code_list[i]].code == 0xFFFF) + continue; + + max_length = huff_code_table[long_code_list[i]].length; + first_bits = + huff_code_table[long_code_list[i]].code & ((1 << DECODE_LOOKUP_SIZE) - 1); + + temp_code_list[0] = long_code_list[i]; + temp_code_length = 1; + + for (j = i + 1; j < long_code_length; j++) { + if ((huff_code_table[long_code_list[j]].code & + ((1 << DECODE_LOOKUP_SIZE) - 1)) == first_bits) { + if (max_length < huff_code_table[long_code_list[j]].length) + max_length = huff_code_table[long_code_list[j]].length; + temp_code_list[temp_code_length] = long_code_list[j]; + temp_code_length++; + } + } + + for (j = 0; j < temp_code_length; j++) { + code_length = huff_code_table[temp_code_list[j]].length; + long_bits = + huff_code_table[temp_code_list[j]].code >> DECODE_LOOKUP_SIZE; + min_increment = 1 << (code_length - DECODE_LOOKUP_SIZE); + for (; long_bits < (1 << (max_length - DECODE_LOOKUP_SIZE)); + long_bits += min_increment) { + result->long_code_lookup[long_code_lookup_length + long_bits] = + temp_code_list[j] | (code_length << 9); + } + huff_code_table[temp_code_list[j]].code = 0xFFFF; + } + result->small_code_lookup[first_bits] = + long_code_lookup_length | (max_length << 9) | 0x8000; + long_code_lookup_length += 1 << (max_length - DECODE_LOOKUP_SIZE); + + } +} + +uint16_t inline decode_next(struct inflate_in_buffer *in_buffer, + struct inflate_huff_code *huff_code) +{ + uint16_t next_bits; + uint16_t next_sym; + + next_bits = inflate_in_peek_bits(in_buffer, DECODE_LOOKUP_SIZE); + + /* next_sym is a possible symbol decoded from next_bits. If bit 15 is 0, + * next_code is a symbol. Bits 9:0 represent the symbol, and bits 14:10 + * represent the length of that symbols huffman code. If next_sym is not + * a symbol, it provides a hint of where the large symbols containin + * this code are located. Note the hint is at largest the location the + * first actual symbol in the long code list.*/ + next_sym = huff_code->small_code_lookup[next_bits]; + + if (next_sym < 0x8000) { + /* Return symbol found if next_code is a complete huffman code + * and shift in buffer over by the length of the next_code */ + inflate_in_shift_bits(in_buffer, next_sym >> 9); + + return next_sym & 0x1FF; + + } else { + /* If a symbol is not found, perform a linear search of the long code + * list starting from the hint in next_sym */ + next_bits = inflate_in_peek_bits(in_buffer, (next_sym - 0x8000) >> 9); + next_sym = + huff_code->long_code_lookup[(next_sym & 0x1FF) + + (next_bits >> DECODE_LOOKUP_SIZE)]; + inflate_in_shift_bits(in_buffer, next_sym >> 9); + return next_sym & 0x1FF; + + } +} + +int inline setup_dynamic_header(struct inflate_state *state) +{ + int i, j; + struct huff_code code_huff[CODE_LEN_CODES]; + struct huff_code lit_and_dist_huff[LIT_LEN + DIST_LEN]; + struct huff_code *previous = NULL, *current; + struct inflate_huff_code inflate_code_huff; + uint8_t hclen, hdist, hlit; + uint16_t code_count[16], lit_count[16], dist_count[16]; + uint16_t *count; + uint16_t symbol; + + /* This order is defined in RFC 1951 page 13 */ + const uint8_t code_length_code_order[CODE_LEN_CODES] = { + 0x10, 0x11, 0x12, 0x00, 0x08, 0x07, 0x09, 0x06, + 0x0a, 0x05, 0x0b, 0x04, 0x0c, 0x03, 0x0d, 0x02, + 0x0e, 0x01, 0x0f + }; + + memset(code_count, 0, sizeof(code_count)); + memset(lit_count, 0, sizeof(lit_count)); + memset(dist_count, 0, sizeof(dist_count)); + memset(code_huff, 0, sizeof(code_huff)); + memset(lit_and_dist_huff, 0, sizeof(lit_and_dist_huff)); + + /* These variables are defined in the deflate standard, RFC 1951 */ + hlit = inflate_in_read_bits(&state->in_buffer, 5); + hdist = inflate_in_read_bits(&state->in_buffer, 5); + hclen = inflate_in_read_bits(&state->in_buffer, 4); + + /* Create the code huffman code for decoding the lit/len and dist huffman codes */ + for (i = 0; i < hclen + 4; i++) { + code_huff[code_length_code_order[i]].length = + inflate_in_read_bits(&state->in_buffer, 3); + + code_count[code_huff[code_length_code_order[i]].length] += 1; + } + + if (state->in_buffer.read_in_length < 0) + return END_OF_INPUT; + + make_inflate_huff_code(&inflate_code_huff, code_huff, CODE_LEN_CODES, code_count); + + /* Decode the lit/len and dist huffman codes using the code huffman code */ + count = lit_count; + current = lit_and_dist_huff; + + while (current < lit_and_dist_huff + LIT_LEN + hdist + 1) { + /* If finished decoding the lit/len huffman code, start decoding + * the distance code these decodings are in the same loop + * because the len/lit and dist huffman codes are run length + * encoded together. */ + if (current == lit_and_dist_huff + 257 + hlit) + current = lit_and_dist_huff + LIT_LEN; + + if (current == lit_and_dist_huff + LIT_LEN) + count = dist_count; + + symbol = decode_next(&state->in_buffer, &inflate_code_huff); + + if (state->in_buffer.read_in_length < 0) + return END_OF_INPUT; + + if (symbol < 16) { + /* If a length is found, update the current lit/len/dist + * to have length symbol */ + count[symbol]++; + current->length = symbol; + previous = current; + current++; + + } else if (symbol == 16) { + /* If a repeat length is found, update the next repeat + * length lit/len/dist elements to have the value of the + * repeated length */ + if (previous == NULL) /* No elements available to be repeated */ + return INVALID_BLOCK_HEADER; + + i = 3 + inflate_in_read_bits(&state->in_buffer, 2); + for (j = 0; j < i; j++) { + *current = *previous; + count[current->length]++; + previous = current; + + if (current == lit_and_dist_huff + 256 + hlit) { + current = lit_and_dist_huff + LIT_LEN; + count = dist_count; + + } else + current++; + } + + } else if (symbol == 17) { + /* If a repeat zeroes if found, update then next + * repeated zeroes length lit/len/dist elements to have + * length 0. */ + i = 3 + inflate_in_read_bits(&state->in_buffer, 3); + + for (j = 0; j < i; j++) { + previous = current; + + if (current == lit_and_dist_huff + 256 + hlit) { + current = lit_and_dist_huff + LIT_LEN; + count = dist_count; + + } else + current++; + } + + } else if (symbol == 18) { + /* If a repeat zeroes if found, update then next + * repeated zeroes length lit/len/dist elements to have + * length 0. */ + i = 11 + inflate_in_read_bits(&state->in_buffer, 7); + + for (j = 0; j < i; j++) { + previous = current; + + if (current == lit_and_dist_huff + 256 + hlit) { + current = lit_and_dist_huff + LIT_LEN; + count = dist_count; + + } else + current++; + } + } else + return INVALID_BLOCK_HEADER; + + } + + if (state->in_buffer.read_in_length < 0) + return END_OF_INPUT; + + make_inflate_huff_code(&state->lit_huff_code, lit_and_dist_huff, LIT_LEN, lit_count); + make_inflate_huff_code(&state->dist_huff_code, &lit_and_dist_huff[LIT_LEN], DIST_LEN, + dist_count); + + return 0; +} + +int read_header(struct inflate_state *state) +{ + state->new_block = 0; + + /* btype and bfinal are defined in RFC 1951, bfinal represents whether + * the current block is the end of block, and btype represents the + * encoding method on the current block. */ + state->bfinal = inflate_in_read_bits(&state->in_buffer, 1); + state->btype = inflate_in_read_bits(&state->in_buffer, 2); + + if (state->in_buffer.read_in_length < 0) + return END_OF_INPUT; + + if (state->btype == 0) { + inflate_in_clear_bits(&state->in_buffer); + return 0; + + } else if (state->btype == 1) + return setup_static_header(state); + + else if (state->btype == 2) + return setup_dynamic_header(state); + + return INVALID_BLOCK_HEADER; +} + +void igzip_inflate_init(struct inflate_state *state, uint8_t * in_stream, uint32_t in_size, + uint8_t * out_stream, uint64_t out_size) +{ + + init_inflate_in_buffer(&state->in_buffer); + + set_inflate_in_buffer(&state->in_buffer, in_stream, in_size); + set_inflate_out_buffer(&state->out_buffer, out_stream, out_size); + + state->new_block = 1; + state->bfinal = 0; +} + +int igzip_inflate(struct inflate_state *state) +{ + /* The following tables are based on the tables in the deflate standard, + * RFC 1951 page 11. */ + const uint16_t len_start[29] = { + 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, + 0x0b, 0x0d, 0x0f, 0x11, 0x13, 0x17, 0x1b, 0x1f, + 0x23, 0x2b, 0x33, 0x3b, 0x43, 0x53, 0x63, 0x73, + 0x83, 0xa3, 0xc3, 0xe3, 0x102 + }; + const uint8_t len_extra_bit_count[29] = { + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x1, 0x1, 0x1, 0x1, 0x2, 0x2, 0x2, 0x2, + 0x3, 0x3, 0x3, 0x3, 0x4, 0x4, 0x4, 0x4, + 0x5, 0x5, 0x5, 0x5, 0x0 + }; + const uint32_t dist_start[30] = { + 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d, + 0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1, + 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01, + 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001 + }; + const uint8_t dist_extra_bit_count[30] = { + 0x0, 0x0, 0x0, 0x0, 0x1, 0x1, 0x2, 0x2, + 0x3, 0x3, 0x4, 0x4, 0x5, 0x5, 0x6, 0x6, + 0x7, 0x7, 0x8, 0x8, 0x9, 0x9, 0xa, 0xa, + 0xb, 0xb, 0xc, 0xc, 0xd, 0xd + }; + + uint16_t next_lit, len, nlen; + uint8_t next_dist; + uint32_t repeat_length; + uint32_t look_back_dist; + uint32_t tmp; + + while (state->new_block == 0 || state->bfinal == 0) { + if (state->new_block != 0) { + tmp = read_header(state); + + if (tmp) + return tmp; + } + + if (state->btype == 0) { + /* If the block is uncompressed, perform a memcopy while + * updating state data */ + if (state->in_buffer.avail_in < 4) + return END_OF_INPUT; + + len = *(uint16_t *) state->in_buffer.next_in; + state->in_buffer.next_in += 2; + nlen = *(uint16_t *) state->in_buffer.next_in; + state->in_buffer.next_in += 2; + + /* Check if len and nlen match */ + if (len != (~nlen & 0xffff)) + return INVALID_NON_COMPRESSED_BLOCK_LENGTH; + + if (state->out_buffer.avail_out < len) + return OUT_BUFFER_OVERFLOW; + + if (state->in_buffer.avail_in < len) + len = state->in_buffer.avail_in; + + else + state->new_block = 1; + + memcpy(state->out_buffer.next_out, state->in_buffer.next_in, len); + + state->out_buffer.next_out += len; + state->out_buffer.avail_out -= len; + state->out_buffer.total_out += len; + state->in_buffer.next_in += len; + state->in_buffer.avail_in -= len + 4; + + if (state->in_buffer.avail_in == 0 && state->new_block == 0) + return END_OF_INPUT; + + } else { + /* Else decode a huffman encoded block */ + while (state->new_block == 0) { + /* While not at the end of block, decode the next + * symbol */ + + next_lit = + decode_next(&state->in_buffer, &state->lit_huff_code); + + if (state->in_buffer.read_in_length < 0) + return END_OF_INPUT; + + if (next_lit < 256) { + /* If the next symbol is a literal, + * write out the symbol and update state + * data accordingly. */ + if (state->out_buffer.avail_out < 1) + return OUT_BUFFER_OVERFLOW; + + *state->out_buffer.next_out = next_lit; + state->out_buffer.next_out++; + state->out_buffer.avail_out--; + state->out_buffer.total_out++; + + } else if (next_lit == 256) { + /* If the next symbol is the end of + * block, update the state data + * accordingly */ + state->new_block = 1; + + } else if (next_lit < 286) { + /* Else if the next symbol is a repeat + * length, read in the length extra + * bits, the distance code, the distance + * extra bits. Then write out the + * corresponding data and update the + * state data accordingly*/ + repeat_length = + len_start[next_lit - 257] + + inflate_in_read_bits(&state->in_buffer, + len_extra_bit_count[next_lit - + 257]); + + if (state->out_buffer.avail_out < repeat_length) + return OUT_BUFFER_OVERFLOW; + + next_dist = decode_next(&state->in_buffer, + &state->dist_huff_code); + + look_back_dist = dist_start[next_dist] + + inflate_in_read_bits(&state->in_buffer, + dist_extra_bit_count + [next_dist]); + + if (state->in_buffer.read_in_length < 0) + return END_OF_INPUT; + + if (look_back_dist > state->out_buffer.total_out) + return INVALID_LOOK_BACK_DISTANCE; + + if (look_back_dist > repeat_length) { + memcpy(state->out_buffer.next_out, + state->out_buffer.next_out - + look_back_dist, repeat_length); + } else + byte_copy(state->out_buffer.next_out, + look_back_dist, repeat_length); + + state->out_buffer.next_out += repeat_length; + state->out_buffer.avail_out -= repeat_length; + state->out_buffer.total_out += repeat_length; + + } else + /* Else the read in bits do not + * correspond to any valid symbol */ + return INVALID_SYMBOL; + } + } + } + state->in_buffer.next_in -= state->in_buffer.read_in_length / 8; + state->in_buffer.avail_in += state->in_buffer.read_in_length / 8; + + return DECOMPRESSION_FINISHED; +} diff --git a/igzip/igzip_inflate_ref.h b/igzip/igzip_inflate_ref.h new file mode 100644 index 0000000..ba1badd --- /dev/null +++ b/igzip/igzip_inflate_ref.h @@ -0,0 +1,150 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef INFLATE_H +#define INFLATE_H + +#include +#include "huff_codes.h" + +#define DECOMPRESSION_FINISHED 0 +#define END_OF_INPUT 1 +#define OUT_BUFFER_OVERFLOW 2 +#define INVALID_BLOCK_HEADER 3 +#define INVALID_SYMBOL 4 +#define INVALID_NON_COMPRESSED_BLOCK_LENGTH 5 +#define INVALID_LOOK_BACK_DISTANCE 6 + +#define DECODE_LOOKUP_SIZE 10 + +#if DECODE_LOOKUP_SIZE > 15 +# undef DECODE_LOOKUP_SIZE +# define DECODE_LOOKUP_SIZE 15 +#endif + +#if DECODE_LOOKUP_SIZE > 7 +# define MAX_LONG_CODE ((2 << 8) + 1) * (2 << (15 - DECODE_LOOKUP_SIZE)) + 32 +#else +# define MAX_LONG_CODE (2 << (15 - DECODE_LOOKUP_SIZE)) + (2 << (8 + DECODE_LOOKUP_SIZE)) + 32 +#endif + +/* Buffer used to manage decompressed output */ +struct inflate_out_buffer{ + uint8_t *next_out; + uint32_t avail_out; + uint32_t total_out; +}; + +/* Buffer used to manager compressed input */ +struct inflate_in_buffer{ + uint8_t *start; + uint8_t *next_in; + uint32_t avail_in; + uint64_t read_in; + int32_t read_in_length; +}; + +/* Data structure used to store a huffman code for fast look up */ +struct inflate_huff_code{ + uint16_t small_code_lookup[ 1 << (DECODE_LOOKUP_SIZE)]; + uint16_t long_code_lookup[MAX_LONG_CODE]; +}; + +/* Structure contained current state of decompression of data */ +struct inflate_state { + struct inflate_out_buffer out_buffer; + struct inflate_in_buffer in_buffer; + struct inflate_huff_code lit_huff_code; + struct inflate_huff_code dist_huff_code; + uint8_t new_block; + uint8_t bfinal; + uint8_t btype; +}; + +/*Performs a copy of length repeat_length data starting at dest - + * lookback_distance into dest. This copy copies data previously copied when the + * src buffer and the dest buffer overlap. */ +void byte_copy(uint8_t *dest, uint64_t lookback_distance, int repeat_length); + +/* Initialize a struct in_buffer for use */ +void init_inflate_in_buffer(struct inflate_in_buffer *inflate_in); + +/* Set up the in_stream used for the in_buffer*/ +void set_inflate_in_buffer(struct inflate_in_buffer *inflate_in, uint8_t *in_stream, + uint32_t in_size); + +/* Set up the out_stream used for the out_buffer */ +void set_inflate_out_buffer(struct inflate_out_buffer *inflate_out, uint8_t *out_stream, + uint32_t out_size); + +/* Load data from the in_stream into a buffer to allow for handling unaligned data*/ +void inflate_in_load(struct inflate_in_buffer *inflate_in, int min_load); + +/* Returns the next bit_count bits from the in stream*/ +uint64_t inflate_in_peek_bits(struct inflate_in_buffer *inflate_in, uint8_t bit_count); + +/* Shifts the in stream over by bit-count bits */ +void inflate_in_shift_bits(struct inflate_in_buffer *inflate_in, uint8_t bit_count); + +/* Returns the next bit_count bits from the in stream and shifts the stream over + * by bit-count bits */ +uint64_t inflate_in_read_bits(struct inflate_in_buffer *inflate_in, uint8_t bit_count); + +/* Sets the inflate_huff_codes in state to be the huffcodes corresponding to the + * deflate static header */ +int setup_static_header(struct inflate_state *state); + +/* Sets result to the inflate_huff_code corresponding to the huffcode defined by + * the lengths in huff_code_table,where count is a histogram of the appearance + * of each code length */ +void make_inflate_huff_code(struct inflate_huff_code *result, struct huff_code *huff_code_table, + int table_length, uint16_t * count); + +/* Decodes the next symbol symbol in in_buffer using the huff code defined by + * huff_code */ +uint16_t decode_next(struct inflate_in_buffer *in_buffer, struct inflate_huff_code *huff_code); + +/* Reads data from the in_buffer and sets the huff code corresponding to that + * data */ +int setup_dynamic_header(struct inflate_state *state); + +/* Reads in the header pointed to by in_stream and sets up state to reflect that + * header information*/ +int read_header(struct inflate_state *state); + +/* Initialize a struct inflate_state for deflate compressed input data at in_stream and to output + * data into out_stream */ +void igzip_inflate_init(struct inflate_state *state, uint8_t *in_stream, uint32_t in_size, + uint8_t *out_stream, uint64_t out_size); + +/* Decompress a deflate data. This function assumes a call to igzip_inflate_init + * has been made to set up the state structure to allow for decompression.*/ +int igzip_inflate(struct inflate_state *state); + +#endif //INFLATE_H diff --git a/igzip/igzip_inflate_test.c b/igzip/igzip_inflate_test.c new file mode 100644 index 0000000..1c745b5 --- /dev/null +++ b/igzip/igzip_inflate_test.c @@ -0,0 +1,182 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include "igzip_inflate_ref.h" +#include "huff_codes.h" + +/*Don't use file larger memory can support because compression and decompression + * are done in a stateless manner. */ +#define MAX_INPUT_FILE_SIZE 2L*1024L*1024L*1024L + +int test(uint8_t * compressed_stream, uint64_t * compressed_length, + uint8_t * uncompressed_stream, int uncompressed_length, + uint8_t * uncompressed_test_stream) +{ + struct inflate_state state; + int ret; + ret = + compress2(compressed_stream, compressed_length, uncompressed_stream, + uncompressed_length, 9); + if (ret) { + printf("Failed compressing input with exit code %d", ret); + return ret; + } + + igzip_inflate_init(&state, compressed_stream + 2, *compressed_length - 2, + uncompressed_test_stream, uncompressed_length); + ret = igzip_inflate(&state); + + switch (ret) { + case 0: + break; + case END_OF_INPUT: + printf(" did not decompress all input\n"); + return END_OF_INPUT; + break; + case INVALID_BLOCK_HEADER: + printf(" invalid header\n"); + return INVALID_BLOCK_HEADER; + break; + case INVALID_SYMBOL: + printf(" invalid symbol\n"); + return INVALID_SYMBOL; + break; + case OUT_BUFFER_OVERFLOW: + printf(" out buffer overflow\n"); + return OUT_BUFFER_OVERFLOW; + break; + case INVALID_NON_COMPRESSED_BLOCK_LENGTH: + printf("Invalid length bits in non-compressed block\n"); + return INVALID_NON_COMPRESSED_BLOCK_LENGTH; + break; + case INVALID_LOOK_BACK_DISTANCE: + printf("Invalid lookback distance"); + return INVALID_LOOK_BACK_DISTANCE; + break; + default: + printf(" error\n"); + return -1; + break; + } + + if (state.out_buffer.total_out != uncompressed_length) { + printf("incorrect amount of data was decompressed from compressed data\n"); + printf("%d decompressed of %d compressed", state.out_buffer.total_out, + uncompressed_length); + return -1; + } + if (memcmp(uncompressed_stream, uncompressed_test_stream, uncompressed_length)) { + printf(" decompressed data is not the same as the compressed data\n"); + return -1; + } + return 0; +} + +int main(int argc, char **argv) +{ + int i, j, ret = 0, fin_ret = 0; + FILE *file; + uint64_t compressed_length, file_length, uncompressed_length; + uint8_t *uncompressed_stream, *compressed_stream, *uncompressed_test_stream; + + if (argc == 1) + printf("Error, no input file\n"); + + for (i = 1; i < argc; i++) { + file = fopen(argv[i], "r"); + if (file == NULL) { + printf("Error opening file %s\n", argv[i]); + return 1; + } else + printf("Starting file %s", argv[i]); + + fseek(file, 0, SEEK_END); + file_length = ftell(file); + fseek(file, 0, SEEK_SET); + file_length -= ftell(file); + if (file_length > MAX_INPUT_FILE_SIZE) { + printf("File too large to run on this test\n"); + fclose(file); + continue; + } + compressed_length = compressBound(file_length); + uncompressed_stream = malloc(file_length); + compressed_stream = malloc(compressed_length); + uncompressed_test_stream = malloc(file_length); + + if (uncompressed_stream == NULL) { + printf("Failed to allocate memory\n"); + exit(0); + } + + if (compressed_stream == NULL) { + printf("Failed to allocate memory\n"); + exit(0); + } + + if (uncompressed_test_stream == NULL) { + printf("Failed to allocate memory\n"); + exit(0); + } + + uncompressed_length = fread(uncompressed_stream, 1, file_length, file); + ret = + test(compressed_stream, &compressed_length, uncompressed_stream, + uncompressed_length, uncompressed_test_stream); + if (ret) { + for (j = 0; j < compressed_length; j++) { + if ((j & 31) == 0) + printf("\n"); + else + printf(" "); + printf("0x%02x,", compressed_stream[j]); + + } + printf("\n"); + + } + + fclose(file); + free(compressed_stream); + free(uncompressed_stream); + free(uncompressed_test_stream); + + if (ret) { + printf(" ... Fail with exit code %d\n", ret); + return ret; + } else + printf(" ... Pass\n"); + + fin_ret |= ret; + } + return fin_ret; +} diff --git a/igzip/igzip_multibinary.asm b/igzip/igzip_multibinary.asm new file mode 100644 index 0000000..9fb2741 --- /dev/null +++ b/igzip/igzip_multibinary.asm @@ -0,0 +1,73 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +default rel +[bits 64] + +%ifidn __OUTPUT_FORMAT__, elf64 +%define WRT_OPT wrt ..plt +%else +%define WRT_OPT +%endif + +%include "reg_sizes.asm" + +extern isal_deflate_body_stateless_base +extern isal_deflate_body_stateless_01 +extern isal_deflate_body_stateless_04 + +extern isal_deflate_body_base +extern isal_deflate_body_01 +extern isal_deflate_body_04 +extern isal_deflate_finish_base +extern isal_deflate_finish_01 + +extern get_crc_base +extern get_crc_01 + +extern isal_deflate_init_base +extern isal_deflate_init_01 + +section .text + +%include "multibinary.asm" + +mbin_interface isal_deflate_init +mbin_dispatch_init5 isal_deflate_init, isal_deflate_init_base, isal_deflate_init_01, isal_deflate_init_01, isal_deflate_init_01 + +mbin_interface isal_deflate_body_stateless +mbin_dispatch_init5 isal_deflate_body_stateless, isal_deflate_body_stateless_base, isal_deflate_body_stateless_01, isal_deflate_body_stateless_01, isal_deflate_body_stateless_04 + +mbin_interface isal_deflate_body +mbin_dispatch_init5 isal_deflate_body, isal_deflate_body_base, isal_deflate_body_01, isal_deflate_body_01, isal_deflate_body_04 +mbin_interface isal_deflate_finish +mbin_dispatch_init5 isal_deflate_finish, isal_deflate_finish_base, isal_deflate_finish_01, isal_deflate_finish_01, isal_deflate_finish_01 + +mbin_interface get_crc +mbin_dispatch_init5 get_crc, get_crc_base, get_crc_01, get_crc_01, get_crc_01 diff --git a/igzip/igzip_perf.c b/igzip/igzip_perf.c new file mode 100644 index 0000000..1ccd039 --- /dev/null +++ b/igzip/igzip_perf.c @@ -0,0 +1,92 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include +#include "igzip_lib.h" +#include "test.h" + +#define TEST_LEN (1024*1024) +#define IBUF_SIZE (1024*1024) +#define OBUF_SIZE (1024*1024) + +#define TEST_LOOPS 400 +#define TEST_TYPE_STR "_warm" + +void create_data(unsigned char *data, int size) +{ + char c = 'a'; + while (size--) + *data++ = c = c < 'z' ? c + 1 : 'a'; +} + +int main(int argc, char *argv[]) +{ + int i = 1; + struct isal_zstream stream; + unsigned char inbuf[IBUF_SIZE], zbuf[OBUF_SIZE]; + + printf("Window Size: %d K\n", HIST_SIZE); + printf("igzip_perf: \n"); + fflush(0); + create_data(inbuf, TEST_LEN); + + struct perf start, stop; + perf_start(&start); + + for (i = 0; i < TEST_LOOPS; i++) { + isal_deflate_init(&stream); + + stream.avail_in = TEST_LEN; + stream.end_of_stream = 1; + stream.next_in = inbuf; + stream.flush = NO_FLUSH; + + do { + stream.avail_out = OBUF_SIZE; + stream.next_out = zbuf; + isal_deflate(&stream); + } while (stream.avail_out == 0); + } + + perf_stop(&stop); + + printf("igzip" TEST_TYPE_STR ": "); + perf_print(stop, start, (long long)TEST_LEN * i); + + if (!stream.end_of_stream) { + printf("error: compression test could not fit into allocated buffers\n"); + return -1; + } + printf("End of igzip_perf\n\n"); + fflush(0); + return 0; +} diff --git a/igzip/igzip_rand_test.c b/igzip/igzip_rand_test.c new file mode 100644 index 0000000..f3d3ff4 --- /dev/null +++ b/igzip/igzip_rand_test.c @@ -0,0 +1,1614 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include "igzip_lib.h" +#include "igzip_inflate_ref.h" +#include "crc_inflate.h" +#include + +#ifndef RANDOMS +# define RANDOMS 400 +#endif +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +#define IBUF_SIZE (1024*1024) + +#ifndef IGZIP_USE_GZIP_FORMAT +# define DEFLATE 1 +#endif + +#define PAGE_SIZE 4*1024 + +#define str1 "Short test string" +#define str2 "one two three four five six seven eight nine ten eleven twelve " \ + "thirteen fourteen fifteen sixteen" + +#define TYPE0_HDR_SIZE 5 /* Size of a type 0 blocks header in bytes */ +#define TYPE0_MAX_SIZE 65535 /* Max length of a type 0 block in bytes (excludes the header) */ + +#define MAX_LOOPS 20 +/* Defines for the possible error conditions */ +enum IGZIP_TEST_ERROR_CODES { + IGZIP_COMP_OK, + + MALLOC_FAILED, + FILE_READ_FAILED, + + COMPRESS_INCORRECT_STATE, + COMPRESS_INPUT_STREAM_INTEGRITY_ERROR, + COMPRESS_OUTPUT_STREAM_INTEGRITY_ERROR, + COMPRESS_END_OF_STREAM_NOT_SET, + COMPRESS_ALL_INPUT_FAIL, + COMPRESS_OUT_BUFFER_OVERFLOW, + COMPRESS_LOOP_COUNT_OVERFLOW, + COMPRESS_GENERAL_ERROR, + + INFLATE_END_OF_INPUT, + INFLATE_INVALID_BLOCK_HEADER, + INFLATE_INVALID_SYMBOL, + INFLATE_OUT_BUFFER_OVERFLOW, + INFLATE_INVALID_NON_COMPRESSED_BLOCK_LENGTH, + INFLATE_LEFTOVER_INPUT, + INFLATE_INCORRECT_OUTPUT_SIZE, + INFLATE_INVALID_LOOK_BACK_DISTANCE, + INVALID_GZIP_HEADER, + INCORRECT_GZIP_TRAILER, + INFLATE_GENERAL_ERROR, + + INVALID_FLUSH_ERROR, + + OVERFLOW_TEST_ERROR, + RESULT_ERROR +}; + +const int hdr_bytes = 300; + +#ifndef DEFLATE +const uint8_t gzip_hdr[10] = { + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xff +}; + +const uint32_t gzip_hdr_bytes = 10; +const uint32_t gzip_trl_bytes = 8; + +const int trl_bytes = 8; +const int gzip_extra_bytes = 18; + +#else +const int trl_bytes = 0; +const int gzip_extra_bytes = 0; + +#endif + +struct isal_hufftables *hufftables = NULL; + +#define HISTORY_SIZE 32*1024 +#define MIN_LENGTH 3 +#define MIN_DIST 1 + +/* Create random compressible data. This is achieved by randomly choosing a + * random character, or to repeat previous data in the stream for a random + * length and look back distance. The probability of a random character or a + * repeat being chosen is semi-randomly chosen by setting max_repeat_data to be + * differing values */ +void create_rand_repeat_data(uint8_t * data, int size) +{ + uint32_t next_data; + uint8_t *data_start = data; + uint32_t length, distance; + uint32_t max_repeat_data = 256; + uint32_t power = rand() % 32; + /* An array of the powers of 2 (except the final element which is 0) */ + const uint32_t power_of_2_array[] = { + 0x00000001, 0x00000002, 0x00000004, 0x00000008, + 0x00000010, 0x00000020, 0x00000040, 0x00000080, + 0x00000100, 0x00000200, 0x00000400, 0x00000800, + 0x00001000, 0x00002000, 0x00004000, 0x00008000, + 0x00010000, 0x00020000, 0x00040000, 0x00080000, + 0x00100000, 0x00200000, 0x00400000, 0x00800000, + 0x01000000, 0x02000000, 0x04000000, 0x08000000, + 0x10000000, 0x20000000, 0x40000000, 0x00000000 + }; + + max_repeat_data += power_of_2_array[power]; + + if (size-- > 0) + *data++ = rand(); + + while (size > 0) { + next_data = rand() % max_repeat_data; + if (next_data < 256) { + *data++ = next_data; + size--; + } else if (size < 3) { + *data++ = rand() % 256; + size--; + } else { + length = (rand() % 256) + MIN_LENGTH; + if (length > size) + length = (rand() % (size - 2)) + MIN_LENGTH; + + distance = (rand() % HISTORY_SIZE) + MIN_DIST; + if (distance > data - data_start) + distance = (rand() % (data - data_start)) + MIN_DIST; + + size -= length; + if (distance <= length) { + while (length-- > 0) { + *data = *(data - distance); + data++; + } + } else + memcpy(data, data - distance, length); + } + } +} + +void print_error(int error_code) +{ + switch (error_code) { + case IGZIP_COMP_OK: + break; + case MALLOC_FAILED: + printf("error: failed to allocate memory\n"); + break; + case FILE_READ_FAILED: + printf("error: failed to read in file\n"); + break; + case COMPRESS_INCORRECT_STATE: + printf("error: incorrect stream internal state\n"); + break; + case COMPRESS_INPUT_STREAM_INTEGRITY_ERROR: + printf("error: inconsistent stream input buffer\n"); + break; + case COMPRESS_OUTPUT_STREAM_INTEGRITY_ERROR: + printf("error: inconsistent stream output buffer\n"); + break; + case COMPRESS_END_OF_STREAM_NOT_SET: + printf("error: end of stream not set\n"); + break; + case COMPRESS_ALL_INPUT_FAIL: + printf("error: not all input data compressed\n"); + break; + case COMPRESS_OUT_BUFFER_OVERFLOW: + printf("error: output buffer overflow while compressing data\n"); + break; + case COMPRESS_GENERAL_ERROR: + printf("error: compression failed\n"); + break; + case INFLATE_END_OF_INPUT: + printf("error: did not decompress all input\n"); + break; + case INFLATE_INVALID_BLOCK_HEADER: + printf("error: invalid header\n"); + break; + case INFLATE_INVALID_SYMBOL: + printf("error: invalid symbol found when decompressing input\n"); + break; + case INFLATE_OUT_BUFFER_OVERFLOW: + printf("error: output buffer overflow while decompressing data\n"); + break; + case INFLATE_INVALID_NON_COMPRESSED_BLOCK_LENGTH: + printf("error: invalid length bits in non-compressed block\n"); + break; + case INFLATE_GENERAL_ERROR: + printf("error: decompression failed\n"); + break; + case INFLATE_LEFTOVER_INPUT: + printf("error: the trailer of igzip output contains junk\n"); + break; + case INFLATE_INCORRECT_OUTPUT_SIZE: + printf("error: incorrect amount of data was decompressed\n"); + break; + case INFLATE_INVALID_LOOK_BACK_DISTANCE: + printf("error: invalid look back distance found while decompressing\n"); + break; + case INVALID_GZIP_HEADER: + printf("error: incorrect gzip header found when inflating data\n"); + break; + case INCORRECT_GZIP_TRAILER: + printf("error: incorrect gzip trailer found when inflating data\n"); + break; + case INVALID_FLUSH_ERROR: + printf("error: invalid flush did not cause compression to error\n"); + break; + case RESULT_ERROR: + printf("error: decompressed data is not the same as the compressed data\n"); + break; + case OVERFLOW_TEST_ERROR: + printf("error: overflow undetected\n"); + break; + default: + printf("error: unknown error code\n"); + } +} + +void print_uint8_t(uint8_t * array, uint64_t length) +{ + const int line_size = 16; + int i; + + printf("Length = %lu", length); + for (i = 0; i < length; i++) { + if ((i % line_size) == 0) + printf("\n0x%08x\t", i); + else + printf(" "); + printf("0x%02x,", array[i]); + } + printf("\n"); +} + +#ifndef DEFLATE +uint32_t check_gzip_header(uint8_t * z_buf) +{ + /* These values are defined in RFC 1952 page 4 */ + const uint8_t ID1 = 0x1f, ID2 = 0x8b, CM = 0x08, FLG = 0; + uint32_t ret = 0; + int i; + /* Verify that the gzip header is the one used in hufftables_c.c */ + for (i = 0; i < gzip_hdr_bytes; i++) + if (z_buf[i] != gzip_hdr[i]) + ret = INVALID_GZIP_HEADER; + + /* Verify that the gzip header is a valid gzip header */ + if (*z_buf++ != ID1) + ret = INVALID_GZIP_HEADER; + + if (*z_buf++ != ID2) + ret = INVALID_GZIP_HEADER; + + /* Verfiy compression method is Deflate */ + if (*z_buf++ != CM) + ret = INVALID_GZIP_HEADER; + + /* The following comparison is specific to how gzip headers are written in igzip */ + /* Verify no extra flags are set */ + if (*z_buf != FLG) + ret = INVALID_GZIP_HEADER; + + /* The last 6 bytes in the gzip header do not contain any information + * important to decomrpessing the data */ + + return ret; +} + +uint32_t check_gzip_trl(struct inflate_state * gstream) +{ + uint8_t *index = NULL; + uint32_t crc, ret = 0; + + index = gstream->out_buffer.next_out - gstream->out_buffer.total_out; + crc = find_crc(index, gstream->out_buffer.total_out); + + if (gstream->out_buffer.total_out != *(uint32_t *) (gstream->in_buffer.next_in + 4) || + crc != *(uint32_t *) gstream->in_buffer.next_in) + ret = INCORRECT_GZIP_TRAILER; + + return ret; +} +#endif + +/* Inflate the compressed data and check that the decompressed data agrees with the input data */ +int inflate_check(uint8_t * z_buf, int z_size, uint8_t * in_buf, int in_size) +{ + /* Test inflate with reference inflate */ + + int ret = 0; + struct inflate_state gstream; + uint32_t test_size = in_size; + uint8_t *test_buf = NULL; + int mem_result = 0; + + if (in_size > 0) { + assert(in_buf != NULL); + test_buf = malloc(test_size); + + if (test_buf == NULL) + return MALLOC_FAILED; + } + if (test_buf != NULL) + memset(test_buf, 0xff, test_size); + +#ifndef DEFLATE + int gzip_hdr_result, gzip_trl_result; + + gzip_hdr_result = check_gzip_header(z_buf); + z_buf += gzip_hdr_bytes; + z_size -= gzip_hdr_bytes; +#endif + + igzip_inflate_init(&gstream, z_buf, z_size, test_buf, test_size); + ret = igzip_inflate(&gstream); + + if (test_buf != NULL) + mem_result = memcmp(in_buf, test_buf, in_size); + +#ifdef VERBOSE + int i; + if (mem_result) + for (i = 0; i < in_size; i++) { + if (in_buf[i] != test_buf[i]) { + printf("First incorrect data at 0x%x of 0x%x, 0x%x != 0x%x\n", + i, in_size, in_buf[i], test_buf[i]); + break; + } + } +#endif + +#ifndef DEFLATE + gzip_trl_result = check_gzip_trl(&gstream); + gstream.in_buffer.avail_in -= gzip_trl_bytes; + gstream.in_buffer.next_in += gzip_trl_bytes; +#endif + + if (test_buf != NULL) + free(test_buf); + + switch (ret) { + case 0: + break; + case END_OF_INPUT: + return INFLATE_END_OF_INPUT; + break; + case INVALID_BLOCK_HEADER: + return INFLATE_INVALID_BLOCK_HEADER; + break; + case INVALID_SYMBOL: + return INFLATE_INVALID_SYMBOL; + break; + case OUT_BUFFER_OVERFLOW: + return INFLATE_OUT_BUFFER_OVERFLOW; + break; + case INVALID_NON_COMPRESSED_BLOCK_LENGTH: + return INFLATE_INVALID_NON_COMPRESSED_BLOCK_LENGTH; + break; + case INVALID_LOOK_BACK_DISTANCE: + return INFLATE_INVALID_LOOK_BACK_DISTANCE; + break; + default: + return INFLATE_GENERAL_ERROR; + break; + } + + if (gstream.in_buffer.avail_in != 0) + return INFLATE_LEFTOVER_INPUT; + + if (gstream.out_buffer.total_out != in_size) + return INFLATE_INCORRECT_OUTPUT_SIZE; + + if (mem_result) + return RESULT_ERROR; + +#ifndef DEFLATE + if (gzip_hdr_result) + return INVALID_GZIP_HEADER; + + if (gzip_trl_result) + return INCORRECT_GZIP_TRAILER; +#endif + + return 0; +} + +/* Check if that the state of the data stream is consistent */ +int stream_valid_check(struct isal_zstream *stream, uint8_t * in_buf, uint32_t in_size, + uint8_t * out_buf, uint32_t out_size, uint32_t in_processed, + uint32_t out_processed, uint32_t data_size) +{ + uint32_t total_in, in_buffer_size, total_out, out_buffer_size; + + total_in = + (in_size == + 0) ? in_processed : (in_processed - in_size) + (stream->next_in - in_buf); + in_buffer_size = (in_size == 0) ? 0 : stream->next_in - in_buf + stream->avail_in; + + /* Check for a consistent amount of data processed */ + if (total_in != stream->total_in || in_buffer_size != in_size) + return COMPRESS_INPUT_STREAM_INTEGRITY_ERROR; + + total_out = + (out_size == 0) ? out_processed : out_processed + (stream->next_out - out_buf); + out_buffer_size = (out_size == 0) ? 0 : stream->next_out - out_buf + stream->avail_out; + + /* Check for a consistent amount of data compressed */ + if (total_out != stream->total_out || out_buffer_size != out_size) { + return COMPRESS_OUTPUT_STREAM_INTEGRITY_ERROR; + } + + return 0; +} + +/* Performs compression with checks to discover and verify the state of the + * stream + * stream: compress data structure which has been initialized to use + * in_buf and out_buf as the buffers + * data_size: size of all input data + * compressed_size: size of all available output buffers + * in_buf: next buffer of data to be compressed + * in_size: size of in_buf + * out_buf: next out put buffer where data is stored + * out_size: size of out_buf + * in_processed: the amount of input data which has been loaded into buffers + * to be compressed, this includes the data in in_buf + * out_processed: the amount of output data which has been compressed and stored, + * this does not include the data in the current out_buf +*/ +int isal_deflate_with_checks(struct isal_zstream *stream, uint32_t data_size, + uint32_t compressed_size, uint8_t * in_buf, uint32_t in_size, + uint32_t in_processed, uint8_t * out_buf, uint32_t out_size, + uint32_t out_processed) +{ + int ret, stream_check; + struct isal_zstate *state = &stream->internal_state; + +#ifdef VERBOSE + printf("Pre compression\n"); + printf + ("data_size = 0x%05x, in_processed = 0x%05x, in_size = 0x%05x, avail_in = 0x%05x, total_in = 0x%05x\n", + data_size, in_processed, in_size, stream->avail_in, stream->total_in); + printf + ("compressed_size = 0x%05x, out_processed = 0x%05x, out_size = 0x%05x, avail_out = 0x%05x, total_out = 0x%05x\n", + compressed_size, out_processed, out_size, stream->avail_out, stream->total_out); +#endif + + ret = isal_deflate(stream); + +#ifdef VERBOSE + printf("Post compression\n"); + printf + ("data_size = 0x%05x, in_processed = 0x%05x, in_size = 0x%05x, avail_in = 0x%05x, total_in = 0x%05x\n", + data_size, in_processed, in_size, stream->avail_in, stream->total_in); + printf + ("compressed_size = 0x%05x, out_processed = 0x%05x, out_size = 0x%05x, avail_out = 0x%05x, total_out = 0x%05x\n", + compressed_size, out_processed, out_size, stream->avail_out, stream->total_out); + printf("\n\n"); +#endif + + /* Verify the stream is in a valid state */ + stream_check = stream_valid_check(stream, in_buf, in_size, out_buf, out_size, + in_processed, out_processed, data_size); + + if (stream_check != 0) + return stream_check; + + if (ret != IGZIP_COMP_OK) + return COMPRESS_GENERAL_ERROR; + + /* Check if the compression is completed */ + if (state->state != ZSTATE_END) + if (compressed_size - out_processed - (out_size - stream->avail_out) <= 0) + return COMPRESS_OUT_BUFFER_OVERFLOW; + + return ret; + +} + +/* Compress the input data into the output buffer where the input buffer and + * output buffer are randomly segmented to test state information for the + * compression*/ +int compress_multi_pass(uint8_t * data, uint32_t data_size, uint8_t * compressed_buf, + uint32_t * compressed_size, uint32_t flush_type) +{ + int ret = IGZIP_COMP_OK; + uint8_t *in_buf = NULL, *out_buf = NULL; + uint32_t in_size = 0, out_size = 0; + uint32_t in_processed = 0, out_processed = 0; + struct isal_zstream stream; + struct isal_zstate *state = &stream.internal_state; + uint32_t loop_count = 0; + +#ifdef VERBOSE + printf("Starting Compress Multi Pass\n"); +#endif + + create_rand_repeat_data((uint8_t *) & stream, sizeof(stream)); + + isal_deflate_init(&stream); + + if (hufftables != NULL) + stream.hufftables = hufftables; + + if (state->state != ZSTATE_NEW_HDR) + return COMPRESS_INCORRECT_STATE; + + stream.flush = flush_type; + stream.end_of_stream = 0; + + /* These are set here to allow the loop to run correctly */ + stream.avail_in = 0; + stream.avail_out = 0; + + while (1) { + loop_count++; + + /* Setup in buffer for next round of compression */ + if (stream.avail_in == 0) { + if (flush_type == NO_FLUSH || state->state == ZSTATE_NEW_HDR) { + /* Randomly choose size of the next out buffer */ + in_size = rand() % (data_size + 1); + + /* Limit size of buffer to be smaller than maximum */ + if (in_size >= data_size - in_processed) { + in_size = data_size - in_processed; + stream.end_of_stream = 1; + } + + if (in_size != 0) { + if (in_buf != NULL) { + free(in_buf); + in_buf = NULL; + } + + in_buf = malloc(in_size); + if (in_buf == NULL) { + ret = MALLOC_FAILED; + break; + } + memcpy(in_buf, data + in_processed, in_size); + in_processed += in_size; + + stream.avail_in = in_size; + stream.next_in = in_buf; + } + } + } + + /* Setup out buffer for next round of compression */ + if (stream.avail_out == 0) { + /* Save compressed data inot compressed_buf */ + if (out_buf != NULL) { + memcpy(compressed_buf + out_processed, out_buf, + out_size - stream.avail_out); + out_processed += out_size - stream.avail_out; + } + + /* Randomly choose size of the next out buffer */ + out_size = rand() % (*compressed_size + 1); + + /* Limit size of buffer to be smaller than maximum */ + if (out_size > *compressed_size - out_processed) + out_size = *compressed_size - out_processed; + + if (out_size != 0) { + if (out_buf != NULL) { + free(out_buf); + out_buf = NULL; + } + + out_buf = malloc(out_size); + if (out_buf == NULL) { + ret = MALLOC_FAILED; + break; + } + + stream.avail_out = out_size; + stream.next_out = out_buf; + } + } + + ret = + isal_deflate_with_checks(&stream, data_size, *compressed_size, in_buf, + in_size, in_processed, out_buf, out_size, + out_processed); + + if (ret) { + if (ret == COMPRESS_OUT_BUFFER_OVERFLOW + || ret == COMPRESS_INCORRECT_STATE) + memcpy(compressed_buf + out_processed, out_buf, out_size); + break; + } + + /* Check if the compression is completed */ + if (state->state == ZSTATE_END) { + memcpy(compressed_buf + out_processed, out_buf, out_size); + *compressed_size = stream.total_out; + break; + } + + } + + if (in_buf != NULL) + free(in_buf); + if (out_buf != NULL) + free(out_buf); + + if (ret == COMPRESS_OUT_BUFFER_OVERFLOW && flush_type == SYNC_FLUSH + && loop_count >= MAX_LOOPS) + ret = COMPRESS_LOOP_COUNT_OVERFLOW; + + return ret; + +} + +/* Compress the input data into the outbuffer in one call to isal_deflate */ +int compress_single_pass(uint8_t * data, uint32_t data_size, uint8_t * compressed_buf, + uint32_t * compressed_size, uint32_t flush_type) +{ + int ret = IGZIP_COMP_OK; + struct isal_zstream stream; + struct isal_zstate *state = &stream.internal_state; + +#ifdef VERBOSE + printf("Starting Compress Single Pass\n"); +#endif + + create_rand_repeat_data((uint8_t *) & stream, sizeof(stream)); + + isal_deflate_init(&stream); + + if (hufftables != NULL) + stream.hufftables = hufftables; + + if (state->state != ZSTATE_NEW_HDR) + return COMPRESS_INCORRECT_STATE; + + stream.flush = flush_type; + stream.avail_in = data_size; + stream.next_in = data; + stream.avail_out = *compressed_size; + stream.next_out = compressed_buf; + stream.end_of_stream = 1; + + ret = + isal_deflate_with_checks(&stream, data_size, *compressed_size, data, data_size, + data_size, compressed_buf, *compressed_size, 0); + + /* Check if the compression is completed */ + if (state->state == ZSTATE_END) + *compressed_size = stream.total_out; + else if (flush_type == SYNC_FLUSH && stream.avail_out < 16) + ret = COMPRESS_OUT_BUFFER_OVERFLOW; + + return ret; + +} + +/* Statelessly compress the input buffer into the output buffer */ +int compress_stateless(uint8_t * data, uint32_t data_size, uint8_t * compressed_buf, + uint32_t * compressed_size) +{ + int ret = IGZIP_COMP_OK; + struct isal_zstream stream; + + create_rand_repeat_data((uint8_t *) & stream, sizeof(stream)); + + isal_deflate_init(&stream); + + if (hufftables != NULL) + stream.hufftables = hufftables; + + stream.avail_in = data_size; + stream.end_of_stream = 1; + stream.next_in = data; + stream.flush = NO_FLUSH; + + stream.avail_out = *compressed_size; + stream.next_out = compressed_buf; + + ret = isal_deflate_stateless(&stream); + + /* verify the stream */ + if (stream.next_in - data != stream.total_in || + stream.total_in + stream.avail_in != data_size) + return COMPRESS_INPUT_STREAM_INTEGRITY_ERROR; + + if (stream.next_out - compressed_buf != stream.total_out || + stream.total_out + stream.avail_out != *compressed_size) + return COMPRESS_OUTPUT_STREAM_INTEGRITY_ERROR; + + if (ret != IGZIP_COMP_OK) { + if (ret == STATELESS_OVERFLOW) + return COMPRESS_OUT_BUFFER_OVERFLOW; + else + return COMPRESS_GENERAL_ERROR; + } + + if (!stream.end_of_stream) { + return COMPRESS_END_OF_STREAM_NOT_SET; + } + + if (stream.avail_in != 0) + return COMPRESS_ALL_INPUT_FAIL; + + *compressed_size = stream.total_out; + + return ret; + +} + +/* Compress the input data into the output buffer where the input buffer and + * is randomly segmented to test for independence of blocks in full flush + * compression*/ +int compress_full_flush(uint8_t * data, uint32_t data_size, uint8_t * compressed_buf, + uint32_t * compressed_size) +{ + int ret = IGZIP_COMP_OK; + uint8_t *in_buf = NULL, *out_buf = compressed_buf; + uint32_t in_size = 0; + uint32_t in_processed = 00; + struct isal_zstream stream; + struct isal_zstate *state = &stream.internal_state; + uint32_t loop_count = 0; + +#ifdef VERBOSE + printf("Starting Compress Full Flush\n"); +#endif + + create_rand_repeat_data((uint8_t *) & stream, sizeof(stream)); + + isal_deflate_init(&stream); + + if (hufftables != NULL) + stream.hufftables = hufftables; + + if (state->state != ZSTATE_NEW_HDR) + return COMPRESS_INCORRECT_STATE; + + stream.flush = FULL_FLUSH; + stream.end_of_stream = 0; + stream.avail_out = *compressed_size; + stream.next_out = compressed_buf; + stream.total_out = 0; + + while (1) { + loop_count++; + + /* Setup in buffer for next round of compression */ + if (state->state == ZSTATE_NEW_HDR) { + /* Randomly choose size of the next out buffer */ + in_size = rand() % (data_size + 1); + + /* Limit size of buffer to be smaller than maximum */ + if (in_size >= data_size - in_processed) { + in_size = data_size - in_processed; + stream.end_of_stream = 1; + } + + stream.avail_in = in_size; + + if (in_size != 0) { + if (in_buf != NULL) { + free(in_buf); + in_buf = NULL; + } + + in_buf = malloc(in_size); + if (in_buf == NULL) { + ret = MALLOC_FAILED; + break; + } + memcpy(in_buf, data + in_processed, in_size); + in_processed += in_size; + + stream.next_in = in_buf; + } + + out_buf = stream.next_out; + } + + ret = isal_deflate(&stream); + + if (ret) + break; + + /* Verify that blocks are independent */ + if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_END) { + ret = + inflate_check(out_buf, stream.next_out - out_buf, in_buf, in_size); + + if (ret == INFLATE_INVALID_LOOK_BACK_DISTANCE) + break; + else + ret = 0; + } + + /* Check if the compression is completed */ + if (state->state == ZSTATE_END) { + *compressed_size = stream.total_out; + break; + } + + } + + if (in_buf != NULL) + free(in_buf); + + if (ret == COMPRESS_OUT_BUFFER_OVERFLOW && loop_count >= MAX_LOOPS) + ret = COMPRESS_LOOP_COUNT_OVERFLOW; + + return ret; + +} + +/*Compress the input buffer into the output buffer, but switch the flush type in + * the middle of the compression to test what happens*/ +int compress_swap_flush(uint8_t * data, uint32_t data_size, uint8_t * compressed_buf, + uint32_t * compressed_size, uint32_t flush_type) +{ + int ret = IGZIP_COMP_OK; + struct isal_zstream stream; + struct isal_zstate *state = &stream.internal_state; + uint32_t partial_size; + +#ifdef VERBOSE + printf("Starting Compress Swap Flush\n"); +#endif + + isal_deflate_init(&stream); + + if (hufftables != NULL) + stream.hufftables = hufftables; + + if (state->state != ZSTATE_NEW_HDR) + return COMPRESS_INCORRECT_STATE; + + partial_size = rand() % (data_size + 1); + + stream.flush = flush_type; + stream.avail_in = partial_size; + stream.next_in = data; + stream.avail_out = *compressed_size; + stream.next_out = compressed_buf; + stream.end_of_stream = 0; + + ret = + isal_deflate_with_checks(&stream, data_size, *compressed_size, data, partial_size, + partial_size, compressed_buf, *compressed_size, 0); + + if (ret) + return ret; + + flush_type = rand() % 3; + + stream.flush = flush_type; + stream.avail_in = data_size - partial_size; + stream.next_in = data + partial_size; + stream.end_of_stream = 1; + + ret = + isal_deflate_with_checks(&stream, data_size, *compressed_size, data + partial_size, + data_size - partial_size, data_size, compressed_buf, + *compressed_size, 0); + + if (ret == COMPRESS_GENERAL_ERROR) + return INVALID_FLUSH_ERROR; + + *compressed_size = stream.total_out; + + return ret; +} + +/* Test deflate_stateless */ +int test_compress_stateless(uint8_t * in_data, uint32_t in_size) +{ + int ret = IGZIP_COMP_OK; + uint32_t z_size, overflow; + uint8_t *z_buf = NULL; + uint8_t *in_buf = NULL; + + if (in_size != 0) { + in_buf = malloc(in_size); + + if (in_buf == NULL) + return MALLOC_FAILED; + + memcpy(in_buf, in_data, in_size); + } + + /* Test non-overflow case where a type 0 block is not written */ + z_size = 2 * in_size + hdr_bytes + trl_bytes; + + z_buf = malloc(z_size); + + if (z_buf == NULL) + return MALLOC_FAILED; + + create_rand_repeat_data(z_buf, z_size); + + ret = compress_stateless(in_buf, in_size, z_buf, &z_size); + + if (!ret) + ret = inflate_check(z_buf, z_size, in_buf, in_size); + +#ifdef VERBOSE + if (ret) { + printf("Compressed array: "); + print_uint8_t(z_buf, z_size); + printf("\n"); + printf("Data: "); + print_uint8_t(in_buf, in_size); + } +#endif + if (z_buf != NULL) { + free(z_buf); + z_buf = NULL; + } + print_error(ret); + if (ret) + return ret; + + /*Test non-overflow case where a type 0 block is possible to be written */ + z_size = + TYPE0_HDR_SIZE * ((in_size + TYPE0_MAX_SIZE - 1) / TYPE0_MAX_SIZE) + in_size + + gzip_extra_bytes; + + if (z_size == gzip_extra_bytes) + z_size += TYPE0_HDR_SIZE; + + if (z_size < 8) + z_size = 8; + + z_buf = malloc(z_size); + + if (z_buf == NULL) + return MALLOC_FAILED; + + create_rand_repeat_data(z_buf, z_size); + + ret = compress_stateless(in_buf, in_size, z_buf, &z_size); + if (!ret) + ret = inflate_check(z_buf, z_size, in_buf, in_size); +#ifdef VERBOSE + if (ret) { + printf("Compressed array: "); + print_uint8_t(z_buf, z_size); + printf("\n"); + printf("Data: "); + print_uint8_t(in_buf, in_size); + } +#endif + + if (!ret) { + free(z_buf); + z_buf = NULL; + + /* Test random overflow case */ + z_size = rand() % z_size; + + if (z_size > in_size) + z_size = rand() & in_size; + + if (z_size > 0) { + z_buf = malloc(z_size); + + if (z_buf == NULL) + return MALLOC_FAILED; + } + + overflow = compress_stateless(in_buf, in_size, z_buf, &z_size); + + if (overflow != COMPRESS_OUT_BUFFER_OVERFLOW) { +#ifdef VERBOSE + printf("overflow error = %d\n", overflow); + print_error(overflow); + if (overflow == 0) { + overflow = inflate_check(z_buf, z_size, in_buf, in_size); + printf("inflate ret = %d\n", overflow); + print_error(overflow); + } + printf("Compressed array: "); + print_uint8_t(z_buf, z_size); + printf("\n"); + printf("Data: "); + print_uint8_t(in_buf, in_size); +#endif + ret = OVERFLOW_TEST_ERROR; + } + } + + print_error(ret); + + if (z_buf != NULL) + free(z_buf); + + if (in_buf != NULL) + free(in_buf); + + return ret; +} + +/* Test deflate */ +int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type) +{ + int ret = IGZIP_COMP_OK, fin_ret = IGZIP_COMP_OK; + uint32_t overflow = 0; + uint32_t z_size, z_size_max, z_compressed_size; + uint8_t *z_buf = NULL; + + /* Test a non overflow case */ + if (flush_type == NO_FLUSH) + z_size_max = 2 * in_size + hdr_bytes + trl_bytes + 2; + else if (flush_type == SYNC_FLUSH || flush_type == FULL_FLUSH) + z_size_max = 2 * in_size + MAX_LOOPS * (hdr_bytes + trl_bytes + 5); + else { + printf("Invalid Flush Parameter\n"); + return COMPRESS_GENERAL_ERROR; + } + + z_size = z_size_max; + + z_buf = malloc(z_size); + if (z_buf == NULL) { + print_error(MALLOC_FAILED); + return MALLOC_FAILED; + } + create_rand_repeat_data(z_buf, z_size_max); + + ret = compress_single_pass(in_buf, in_size, z_buf, &z_size, flush_type); + + if (!ret) + ret = inflate_check(z_buf, z_size, in_buf, in_size); + + if (ret) { +#ifdef VERBOSE + printf("Compressed array: "); + print_uint8_t(z_buf, z_size); + printf("\n"); + printf("Data: "); + print_uint8_t(in_buf, in_size); +#endif + printf("Failed on compress single pass\n"); + print_error(ret); + } + + fin_ret |= ret; + + z_compressed_size = z_size; + z_size = z_size_max; + create_rand_repeat_data(z_buf, z_size_max); + + ret = compress_multi_pass(in_buf, in_size, z_buf, &z_size, flush_type); + + if (!ret) + ret = inflate_check(z_buf, z_size, in_buf, in_size); + + if (ret) { +#ifdef VERBOSE + printf("Compressed array: "); + print_uint8_t(z_buf, z_size); + printf("\n"); + printf("Data: "); + print_uint8_t(in_buf, in_size); +#endif + printf("Failed on compress multi pass\n"); + print_error(ret); + } + + fin_ret |= ret; + + ret = 0; + + /* Test random overflow case */ + if (flush_type == SYNC_FLUSH && z_compressed_size > in_size) + z_compressed_size = in_size + 1; + + z_size = rand() % z_compressed_size; + create_rand_repeat_data(z_buf, z_size_max); + + overflow = compress_single_pass(in_buf, in_size, z_buf, &z_size, flush_type); + + if (overflow != COMPRESS_OUT_BUFFER_OVERFLOW) { + if (overflow == 0) + ret = inflate_check(z_buf, z_size, in_buf, in_size); + + /* Rarely single pass overflow will compresses data + * better than the initial run. This is to stop that + * case from erroring. */ + if (overflow != 0 || ret != 0) { +#ifdef VERBOSE + printf("overflow error = %d\n", overflow); + print_error(overflow); + printf("inflate ret = %d\n", ret); + print_error(overflow); + + printf("Compressed array: "); + print_uint8_t(z_buf, z_size); + printf("\n"); + printf("Data: "); + print_uint8_t(in_buf, in_size); +#endif + printf("Failed on compress multi pass overflow\n"); + print_error(ret); + ret = OVERFLOW_TEST_ERROR; + } + } + + fin_ret |= ret; + + if (flush_type == NO_FLUSH) { + create_rand_repeat_data(z_buf, z_size_max); + + overflow = compress_multi_pass(in_buf, in_size, z_buf, &z_size, flush_type); + + if (overflow != COMPRESS_OUT_BUFFER_OVERFLOW) { + if (overflow == 0) + ret = inflate_check(z_buf, z_size, in_buf, in_size); + + /* Rarely multi pass overflow will compresses data + * better than the initial run. This is to stop that + * case from erroring */ + if (overflow != 0 || ret != 0) { +#ifdef VERBOSE + printf("overflow error = %d\n", overflow); + print_error(overflow); + printf("inflate ret = %d\n", ret); + print_error(overflow); + + printf("Compressed array: "); + print_uint8_t(z_buf, z_size); + printf("\n"); + printf("Data: "); + print_uint8_t(in_buf, in_size); +#endif + printf("Failed on compress multi pass overflow\n"); + print_error(ret); + ret = OVERFLOW_TEST_ERROR; + } + } + fin_ret |= ret; + } + + free(z_buf); + + return fin_ret; +} + +/* Test swapping flush types in the middle of compression */ +int test_flush(uint8_t * in_buf, uint32_t in_size) +{ + int fin_ret = IGZIP_COMP_OK, ret; + uint32_t z_size, flush_type = 0; + uint8_t *z_buf = NULL; + + z_size = 2 * in_size + 2 * (hdr_bytes + trl_bytes) + 8; + + z_buf = malloc(z_size); + + if (z_buf == NULL) + return MALLOC_FAILED; + + create_rand_repeat_data(z_buf, z_size); + + while (flush_type < 3) + flush_type = rand(); + + /* Test invalid flush */ + ret = compress_single_pass(in_buf, in_size, z_buf, &z_size, flush_type); + + if (ret == COMPRESS_GENERAL_ERROR) + ret = 0; + else { + printf("Failed when passing invalid flush parameter\n"); + ret = INVALID_FLUSH_ERROR; + } + + fin_ret |= ret; + print_error(ret); + + create_rand_repeat_data(z_buf, z_size); + + /* Test swapping flush type */ + ret = compress_swap_flush(in_buf, in_size, z_buf, &z_size, rand() % 3); + + if (!ret) + ret = inflate_check(z_buf, z_size, in_buf, in_size); + + if (ret) { +#ifdef VERBOSE + printf("Compressed array: "); + print_uint8_t(z_buf, z_size); + printf("\n"); + printf("Data: "); + print_uint8_t(in_buf, in_size); +#endif + printf("Failed on swapping flush type\n"); + print_error(ret); + } + + fin_ret |= ret; + print_error(ret); + + return fin_ret; +} + +/* Test there are no length distance pairs across full flushes */ +int test_full_flush(uint8_t * in_buf, uint32_t in_size) +{ + int ret = IGZIP_COMP_OK; + uint32_t z_size; + uint8_t *z_buf = NULL; + + z_size = 2 * in_size + MAX_LOOPS * (hdr_bytes + trl_bytes + 5); + + z_buf = malloc(z_size); + if (z_buf == NULL) { + print_error(MALLOC_FAILED); + return MALLOC_FAILED; + } + + create_rand_repeat_data(z_buf, z_size); + + ret = compress_full_flush(in_buf, in_size, z_buf, &z_size); + + if (!ret) + ret = inflate_check(z_buf, z_size, in_buf, in_size); + + if (ret) { +#ifdef VERBOSE + printf("Compressed array: "); + print_uint8_t(z_buf, z_size); + printf("\n"); + printf("Data: "); + print_uint8_t(in_buf, in_size); +#endif + printf("Failed on compress multi pass\n"); + print_error(ret); + } + + free(z_buf); + + return ret; +} + +int get_filesize(FILE * f) +{ + int curr, end; + + curr = ftell(f); /* Save current position */ + fseek(f, 0L, SEEK_END); + end = ftell(f); + fseek(f, curr, SEEK_SET); /* Restore position */ + return end; +} + +/* Run multiple compression tests on data stored in a file */ +int test_compress_file(char *file_name) +{ + int ret = IGZIP_COMP_OK; + uint32_t in_size; + uint8_t *in_buf = NULL; + FILE *in_file = NULL; + + in_file = fopen(file_name, "rb"); + if (!in_file) + return FILE_READ_FAILED; + + in_size = get_filesize(in_file); + if (in_size != 0) { + in_buf = malloc(in_size); + if (in_buf == NULL) + return MALLOC_FAILED; + fread(in_buf, 1, in_size, in_file); + } + + ret |= test_compress_stateless(in_buf, in_size); + ret |= test_compress(in_buf, in_size, NO_FLUSH); + ret |= test_compress(in_buf, in_size, SYNC_FLUSH); + ret |= test_compress(in_buf, in_size, FULL_FLUSH); + ret |= test_flush(in_buf, in_size); + + if (ret) + printf("Failed on file %s\n", file_name); + + if (in_buf != NULL) + free(in_buf); + + return ret; +} + +int create_custom_hufftables(struct isal_hufftables *hufftables_custom, int argc, char *argv[]) +{ + long int file_length; + uint8_t *stream = NULL; + struct isal_huff_histogram histogram; + FILE *file; + + memset(&histogram, 0, sizeof(histogram)); + + while (argc > 1) { + printf("Processing %s\n", argv[argc - 1]); + file = fopen(argv[argc - 1], "r"); + if (file == NULL) { + printf("Error opening file\n"); + return 1; + } + fseek(file, 0, SEEK_END); + file_length = ftell(file); + fseek(file, 0, SEEK_SET); + file_length -= ftell(file); + + if (file_length > 0) { + stream = malloc(file_length); + if (stream == NULL) { + printf("Failed to allocate memory to read in file\n"); + fclose(file); + return 1; + } + } + + fread(stream, 1, file_length, file); + + if (ferror(file)) { + printf("Error occurred when reading file"); + fclose(file); + free(stream); + return 1; + } + + /* Create a histogram of frequency of symbols found in stream to + * generate the huffman tree.*/ + isal_update_histogram(stream, file_length, &histogram); + + fclose(file); + free(stream); + argc--; + } + + return isal_create_hufftables(hufftables_custom, &histogram); + +} + +int main(int argc, char *argv[]) +{ + int i = 0, ret = 0, fin_ret = 0; + uint32_t in_size = 0, offset = 0; + uint8_t *in_buf; + struct isal_hufftables hufftables_custom; + +#ifndef VERBOSE + setbuf(stdout, NULL); +#endif + + printf("Window Size: %d K\n", HIST_SIZE); + printf("Test Seed : %d\n", TEST_SEED); + printf("Randoms : %d\n", RANDOMS); + srand(TEST_SEED); + + if (argc > 1) { + ret = create_custom_hufftables(&hufftables_custom, argc, argv); + if (ret == 0) + hufftables = &hufftables_custom; + else { + printf("Failed to generate custom hufftable"); + return -1; + } + } + + in_buf = malloc(IBUF_SIZE); + memset(in_buf, 0, IBUF_SIZE); + + if (in_buf == NULL) { + fprintf(stderr, "Can't allocate in_buf memory\n"); + return -1; + } + + if (argc > 1) { + printf("igzip_rand_test files: "); + + for (i = 1; i < argc; i++) { + ret |= test_compress_file(argv[i]); + if (ret) + return ret; + } + + printf("................"); + printf("%s\n", ret ? "Fail" : "Pass"); + fin_ret |= ret; + } + + printf("igzip_rand_test stateless: "); + + ret = test_compress_stateless((uint8_t *) str1, sizeof(str1)); + if (ret) + return ret; + + ret |= test_compress_stateless((uint8_t *) str2, sizeof(str2)); + if (ret) + return ret; + + for (i = 0; i < RANDOMS; i++) { + in_size = rand() % (IBUF_SIZE + 1); + offset = rand() % (IBUF_SIZE + 1 - in_size); + in_buf += offset; + + create_rand_repeat_data(in_buf, in_size); + + ret |= test_compress_stateless(in_buf, in_size); + + in_buf -= offset; + + if (i % (RANDOMS / 16) == 0) + printf("."); + + if (ret) + return ret; + } + + for (i = 0; i < RANDOMS / 16; i++) { + create_rand_repeat_data(in_buf, PAGE_SIZE); + ret |= test_compress_stateless(in_buf, PAGE_SIZE); // good for efence + } + + fin_ret |= ret; + + printf("%s\n", ret ? "Fail" : "Pass"); + + printf("igzip_rand_test NO_FLUSH: "); + + ret = test_compress((uint8_t *) str1, sizeof(str1), NO_FLUSH); + if (ret) + return ret; + + ret |= test_compress((uint8_t *) str2, sizeof(str2), NO_FLUSH); + if (ret) + return ret; + + for (i = 0; i < RANDOMS; i++) { + in_size = rand() % (IBUF_SIZE + 1); + offset = rand() % (IBUF_SIZE + 1 - in_size); + in_buf += offset; + + create_rand_repeat_data(in_buf, in_size); + + ret |= test_compress(in_buf, in_size, NO_FLUSH); + + in_buf -= offset; + + if (i % (RANDOMS / 16) == 0) + printf("."); + if (ret) + return ret; + } + + fin_ret |= ret; + + printf("%s\n", ret ? "Fail" : "Pass"); + + printf("igzip_rand_test SYNC_FLUSH: "); + + ret = test_compress((uint8_t *) str1, sizeof(str1), SYNC_FLUSH); + if (ret) + return ret; + + ret |= test_compress((uint8_t *) str2, sizeof(str2), SYNC_FLUSH); + if (ret) + return ret; + + for (i = 0; i < RANDOMS; i++) { + in_size = rand() % (IBUF_SIZE + 1); + offset = rand() % (IBUF_SIZE + 1 - in_size); + in_buf += offset; + + create_rand_repeat_data(in_buf, in_size); + + ret |= test_compress(in_buf, in_size, SYNC_FLUSH); + + in_buf -= offset; + + if (i % (RANDOMS / 16) == 0) + printf("."); + if (ret) + return ret; + } + + fin_ret |= ret; + + printf("%s\n", ret ? "Fail" : "Pass"); + + printf("igzip_rand_test FULL_FLUSH: "); + + ret = test_compress((uint8_t *) str1, sizeof(str1), FULL_FLUSH); + if (ret) + return ret; + + ret |= test_compress((uint8_t *) str2, sizeof(str2), FULL_FLUSH); + if (ret) + return ret; + + for (i = 0; i < RANDOMS; i++) { + in_size = rand() % (IBUF_SIZE + 1); + offset = rand() % (IBUF_SIZE + 1 - in_size); + in_buf += offset; + + create_rand_repeat_data(in_buf, in_size); + + ret |= test_compress(in_buf, in_size, FULL_FLUSH); + + in_buf -= offset; + + if (i % (RANDOMS / 16) == 0) + printf("."); + if (ret) + return ret; + } + +#ifdef DEFLATE + for (i = 0; i < RANDOMS / 8; i++) { + in_size = rand() % (IBUF_SIZE + 1); + offset = rand() % (IBUF_SIZE + 1 - in_size); + in_buf += offset; + + create_rand_repeat_data(in_buf, in_size); + + ret |= test_full_flush(in_buf, in_size); + + in_buf -= offset; + + if (ret) + return ret; + } +#endif + + fin_ret |= ret; + + printf("%s\n", ret ? "Fail" : "Pass"); + + printf("igzip_rand_test Change Flush: "); + + ret = test_flush((uint8_t *) str1, sizeof(str1)); + if (ret) + return ret; + + ret |= test_flush((uint8_t *) str2, sizeof(str2)); + if (ret) + return ret; + + for (i = 0; i < RANDOMS / 4; i++) { + in_size = rand() % (IBUF_SIZE + 1); + offset = rand() % (IBUF_SIZE + 1 - in_size); + in_buf += offset; + + create_rand_repeat_data(in_buf, in_size); + + ret |= test_flush(in_buf, in_size); + + in_buf -= offset; + + if (i % ((RANDOMS / 4) / 16) == 0) + printf("."); + if (ret) + return ret; + } + + fin_ret |= ret; + + printf("%s\n", ret ? "Fail" : "Pass"); + + printf("igzip rand test finished: %s\n", + fin_ret ? "Some tests failed" : "All tests passed"); + + return fin_ret != IGZIP_COMP_OK; +} diff --git a/igzip/igzip_stateless.asm b/igzip/igzip_stateless.asm new file mode 100644 index 0000000..5946145 --- /dev/null +++ b/igzip/igzip_stateless.asm @@ -0,0 +1,644 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%include "options.asm" + +%include "lz0a_const.asm" +%include "data_struct2.asm" +%include "bitbuf2.asm" +%include "huffman.asm" +%include "igzip_compare_types.asm" +%include "reg_sizes.asm" + +%include "stdmac.asm" + +%define LAST_BYTES_COUNT 3 ; Bytes to prevent reading out of array bounds +%define LA_STATELESS 264 ; Max number of bytes read in loop2 rounded up to 8 byte boundary + +%ifdef DEBUG +%macro MARK 1 +global %1 +%1: +%endm +%else +%macro MARK 1 +%endm +%endif + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%define tmp2 rcx +%define hash2 rcx + +%define curr_data rax +%define code rax +%define tmp5 rax + +%define tmp4 rbx +%define dist rbx +%define code2 rbx + +%define hash rdx +%define len rdx +%define code_len3 rdx + +%define tmp1 rsi +%define code_len2 rsi + +%define file_start rdi + +%define m_bit_count rbp + +%define curr_data2 r8 +%define len2 r8 +%define tmp6 r8 + +%define m_bits r9 + +%define f_i r10 + +%define m_out_buf r11 + +%define f_end_i r12 +%define dist2 r12 +%define tmp7 r12 +%define code4 r12 + +%define tmp3 r13 +%define code3 r13 + +%define stream r14 + +%define hufftables r15 + +;; GPR r8 & r15 can be used + +%define xtmp0 xmm0 ; tmp +%define xtmp1 xmm1 ; tmp + +%define ytmp0 ymm0 ; tmp +%define ytmp1 ymm1 ; tmp + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +blen_mem_offset equ 0 ; local variable (8 bytes) +f_end_i_mem_offset equ 8 +gpr_save_mem_offset equ 16 ; gpr save area (8*8 bytes) +xmm_save_mem_offset equ 16 + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned) +stack_size equ 2*8 + 8*8 + 4*16 + 8 +;;; 8 because stack address is odd multiple of 8 after a function call and +;;; we want it aligned to 16 bytes + +; void isal_deflate_body_stateless ( isal_zstream *stream ) +; arg 1: rcx: addr of stream +global isal_deflate_body_stateless_ %+ ARCH +isal_deflate_body_stateless_ %+ ARCH %+ : +%ifidn __OUTPUT_FORMAT__, elf64 + mov rcx, rdi +%endif + + ;; do nothing if (avail_in == 0) + cmp dword [rcx + _avail_in], 0 + jne skip1 + ret +skip1: + +%ifdef ALIGN_STACK + push rbp + mov rbp, rsp + sub rsp, stack_size + and rsp, ~15 +%else + sub rsp, stack_size +%endif + + mov [rsp + gpr_save_mem_offset + 0*8], rbx + mov [rsp + gpr_save_mem_offset + 1*8], rsi + mov [rsp + gpr_save_mem_offset + 2*8], rdi + mov [rsp + gpr_save_mem_offset + 3*8], rbp + mov [rsp + gpr_save_mem_offset + 4*8], r12 + mov [rsp + gpr_save_mem_offset + 5*8], r13 + mov [rsp + gpr_save_mem_offset + 6*8], r14 + mov [rsp + gpr_save_mem_offset + 7*8], r15 + + mov stream, rcx + mov dword [stream + _internal_state_has_eob], 0 + + ; state->bitbuf.set_buf(stream->next_out, stream->avail_out); + mov m_out_buf, [stream + _next_out] + mov [stream + _internal_state_bitbuf_m_out_start], m_out_buf + mov tmp1 %+ d, [stream + _avail_out] + add tmp1, m_out_buf + sub tmp1, SLOP + +skip_SLOP: + mov [stream + _internal_state_bitbuf_m_out_end], tmp1 + + mov m_bits, [stream + _internal_state_bitbuf_m_bits] + mov m_bit_count %+ d, [stream + _internal_state_bitbuf_m_bit_count] + mov hufftables, [stream + _hufftables] + ; state->b_bytes_valid = stream->avail_in; + mov f_end_i %+ d, [stream + _avail_in] + mov [stream + _internal_state_b_bytes_valid], f_end_i %+ d + + mov f_i, 0 + mov file_start, [stream + _next_in] + mov [stream + _internal_state_file_start], file_start + + ; f_end_i -= LA; + sub f_end_i, LA_STATELESS + mov [rsp + f_end_i_mem_offset], f_end_i + ; if (f_end_i <= 0) continue; + cmp f_end_i, 0 + jle end_loop_2 + + ; for (f_i = f_start_i; f_i < f_end_i; f_i++) { +MARK __stateless_compute_hash_ %+ ARCH + mov curr_data %+ d, [file_start + f_i] + + cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] + ja end + + ;; Encode first byte in the stream as a literal + compute_hash hash, curr_data + and hash %+ d, HASH_MASK + mov [stream + _internal_state_head + 2 * hash], f_i %+ w + and curr_data, 0xff + get_lit_code curr_data, code2, code_len2, hufftables + jmp write_lit_bits + + align 16 + +loop2: + shr curr_data2, 8 + xor hash2 %+ d, hash2 %+ d + crc32 hash2 %+ d, curr_data2 %+ d + + ; hash = compute_hash(state->file_start + f_i) & HASH_MASK; + and hash %+ d, HASH_MASK + and hash2 %+ d, HASH_MASK + + ; if (state->bitbuf.is_full()) { + cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] + ja end + + xor dist, dist + xor dist2, dist2 + xor tmp3, tmp3 + + lea tmp1, [file_start + f_i] + lea tmp6, [tmp1 - 1] + + mov dist %+ w, f_i %+ w + sub dist %+ w, word [stream + _internal_state_head + 2 * hash] + + ; state->head[hash] = (uint16_t) f_i; + mov [stream + _internal_state_head + 2 * hash], f_i %+ w + + inc f_i + + mov dist2 %+ w, f_i %+ w + sub dist2 %+ w, word [stream + _internal_state_head + 2 * hash2] + dec dist2 + + ; state->head[hash2] = (uint16_t) f_i; + mov [stream + _internal_state_head + 2 * hash2], f_i %+ w + + mov tmp2, tmp1 + sub tmp2, dist + dec dist + + ; if ((dist-1) < (D-1)) { + cmp dist %+ d, (D-1) + cmovae tmp2, tmp6 + cmovae dist, tmp3 + inc dist + + cmp dist2 %+ d, (D-1) + cmovae dist2, tmp3 + inc dist2 + +MARK __stateless_compare_ %+ ARCH + ; len = compare258(state->file_start + f_i, + ; state->file_start + f_i - dist); + + ;; Specutively load distance code (except for when large windows are used) + get_packed_dist_code dist, code2, hufftables + + ;; Check for long len/dist match (>7) with first literal + mov len, [tmp1] + xor len, [tmp2] + jz compare_loop + +%ifdef USE_HSWNI + blsmsk tmp3, len + or tmp3, 0xFFFFFF +%endif + + lea tmp1, [file_start + f_i] + mov tmp2, tmp1 + sub tmp2, dist2 + + ;; Specutively load distance code (except for when large windows are used) + get_packed_dist_code dist2, code4, hufftables + + ;; Check for len/dist match (>7) with second literal + mov len2, [tmp1] + xor len2, [tmp2] + jz compare_loop2 + +%ifdef USE_HSWNI + ;; Check for len/dist match for first literal + test tmp3, len2 + jz len_dist_lit_huffman_pre + + cmp tmp3, 0xFFFFFF + je encode_2_literals + jmp len_dist_huffman_pre + + +MARK __stateless_len_dist_lit_huffman_ %+ ARCH +len_dist_lit_huffman_pre: + movzx tmp1, curr_data %+ b + get_lit_code tmp1, code3, code_len3, hufftables +%else + ;; Specutively load the code for the first literal + movzx tmp1, curr_data %+ b + get_lit_code tmp1, code3, rcx, hufftables + + ;; Check for len/dist match for first literal + test len, 0xFFFFFF + jz len_dist_huffman_pre + + ;; Specutively load the code for the second literal + shr curr_data, 8 + and curr_data, 0xff + get_lit_code curr_data, code2, code_len2, hufftables + + shl code2, cl + or code2, code3 + add code_len2, rcx + + ;; Check for len/dist match for second literal + test len2, 0xFFFFFF + jnz write_lit_bits + +MARK __stateless_len_dist_lit_huffman_ %+ ARCH +len_dist_lit_huffman_pre: + mov code_len3, rcx +%endif + bsf len2, len2 + shr len2, 3 + + +len_dist_lit_huffman: +%ifndef LONGER_HUFFTABLE + mov tmp4, dist2 + get_dist_code tmp4, code4, code_len2, hufftables ;; clobbers dist, rcx +%else + unpack_dist_code code4, code_len2 +%endif + get_len_code len2, code, rcx, hufftables ;; rcx is code_len + +%ifdef USE_HSWNI + shlx code4, code4, rcx +%else + shl code4, cl +%endif + or code4, code + add code_len2, rcx + + mov rcx, code_len3 + +%ifdef USE_HSWNI + shlx code4, code4, rcx +%else + shl code4, cl +%endif + or code4, code3 + add code_len2, rcx + + mov code2, code4 + ;; Setup for updating hash + lea tmp3, [f_i + 1] ; tmp3 <= k + add f_i, len2 + + ; hash = compute_hash(state->file_start + k) & HASH_MASK; + mov tmp5 %+ d, [file_start + tmp3] + mov tmp7, tmp5 + shr tmp7, 8 + + compute_hash hash, tmp5 + and hash %+ d, HASH_MASK + + ; state->head[hash] = k; + mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w + + add tmp3,1 + + jmp update_hash_for_symbol + ;; encode as dist/len + +MARK __stateless_len_dist_huffman_ %+ ARCH +len_dist_huffman_pre: + bsf len, len + shr len, 3 + +len_dist_huffman: + dec f_i + + ; get_dist_code(dist, &code2, &code_len2); +%ifndef LONGER_HUFFTABLE + mov tmp3, dist ; since code2 and dist are rbx + get_dist_code tmp3, code2, code_len2, hufftables ;; clobbers dist, rcx +%else + unpack_dist_code code2, code_len2 +%endif + ; get_len_code(len, &code, &code_len); + get_len_code len, code, rcx, hufftables ;; rcx is code_len + + ; code2 <<= code_len + ; code2 |= code + ; code_len2 += code_len +%ifdef USE_HSWNI + shlx code2, code2, rcx +%else + shl code2, cl +%endif + or code2, code + add code_len2, rcx + + ;; Setup for updateing hash + lea tmp3, [f_i + 2] ; tmp3 <= k + add f_i, len + mov tmp7 %+ d, [file_start + tmp3] + +MARK __stateless_update_hash_for_symbol_ %+ ARCH +update_hash_for_symbol: + mov curr_data %+ d, [file_start + f_i] + mov curr_data2, curr_data + compute_hash hash, curr_data +%ifdef LIMIT_HASH_UPDATE + ; only update hash twice, first hash was already calculated. + + ; hash = compute_hash(state->file_start + k) & HASH_MASK; + compute_hash hash2, tmp7 + and hash2 %+ d, HASH_MASK + ; state->head[hash] = k; + mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w + +%else +loop3: + ; hash = compute_hash(state->file_start + k) & HASH_MASK; + mov tmp7 %+ d, [file_start + tmp3] + compute_hash hash2, tmp7 + and hash2 %+ d, HASH_MASK + ; state->head[hash] = k; + mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w + add tmp3,1 + cmp tmp3, f_i + jl loop3 +%endif + + +MARK __stateless_write_len_dist_bits_ %+ ARCH + mov f_end_i, [rsp + f_end_i_mem_offset] + write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3 + + ; continue + cmp f_i, f_end_i + jl loop2 + jmp end_loop_2 + + +MARK __stateless_write_lit_bits_ %+ ARCH +%ifdef USE_HSWNI +encode_2_literals: + movzx tmp1, curr_data %+ b + get_lit_code tmp1, code3, rcx, hufftables + + shr curr_data, 8 + and curr_data, 0xff + get_lit_code curr_data, code2, code_len2, hufftables + + ;; Calculate code associated with both literals + shlx code2, code2, rcx + or code2, code3 + add code_len2, rcx +%endif +write_lit_bits: + mov f_end_i, [rsp + f_end_i_mem_offset] + add f_i, 1 + mov curr_data %+ d, [file_start + f_i] + mov curr_data2, curr_data + + compute_hash hash, curr_data + + write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3 + + ; continue + cmp f_i, f_end_i + jl loop2 + +MARK __stateless_end_loops_ %+ ARCH +end_loop_2: + ;; Handle the last bytes (at most LA_statless bytes) + add f_end_i, LA_STATELESS - LAST_BYTES_COUNT + cmp f_i, f_end_i + jge end_loop_2_finish + +loop2_finish: + ;; Check for space in out buffer + cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] + ja end + + mov curr_data %+ d, [file_start + f_i] + compute_hash hash, curr_data + and hash %+ d, HASH_MASK + + ;; Calculate possible distance for length/dist pair. + xor dist, dist + mov dist %+ w, f_i %+ w + sub dist %+ w, word [stream + _internal_state_head + 2 * hash] + mov [stream + _internal_state_head + 2 * hash], f_i %+ w + + ;; Check if look back distance is valid (the dec is to handle when dist = 0) + dec dist + cmp dist %+ d, (D-1) + jae encode_literal_finish + inc dist + + ;; Check if look back distance is a match + lea tmp6, [f_end_i + LAST_BYTES_COUNT] + sub tmp6, f_i + lea tmp1, [file_start + f_i] + mov tmp2, tmp1 + sub tmp2, dist + compare tmp6, tmp1, tmp2, len, tmp3 + + ;; Limit len to maximum value of 258 + mov tmp2, 258 + cmp len, 258 + cmova len, tmp2 + cmp len, SHORTEST_MATCH + jb encode_literal_finish + + ;; Encode len/dist pair +%ifndef LONGER_HUFFTABLE + mov tmp3, dist + get_dist_code tmp3, code2, code_len2, hufftables ;; clobbers dist, rcx +%else + get_dist_code dist, code2, code_len2, hufftables ;; clobbers dist, rcx +%endif + get_len_code len, code, rcx, hufftables ;; rcx is code_len + + ;; Combine length and distance code for writing it out +%ifdef USE_HSWNI + shlx code2, code2, rcx +%else + shl code2, cl +%endif + or code2, code + add code_len2, rcx + write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3 + + ;; Setup for next loop + add f_i, len + cmp f_i, f_end_i + jl loop2_finish + jmp end_loop_2_finish + +encode_literal_finish: + ;; Encode literal + and curr_data %+ d, 0xFF + get_lit_code curr_data, code2, code_len2, hufftables + write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3 + + ;; Setup for next loop + add f_i, 1 + cmp f_i, f_end_i + jl loop2_finish +end_loop_2_finish: + cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] + ja end + + ;; Check if any bytes left (at most LAST_BYTES_COUNT bytes) + add f_end_i, LAST_BYTES_COUNT + cmp f_i, f_end_i + jz write_eob + + ;; Handle encoding last few bytes by encoding them as literals + xor curr_data, curr_data +final_bytes: + movzx curr_data, byte [file_start + f_i] + get_lit_code curr_data, code2, code_len2, hufftables + write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3 + + cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] + ja end + + inc f_i + cmp f_i, f_end_i + jl final_bytes + +write_eob: + ;; Write out end of block + get_lit_code 256, code2, code_len2, hufftables + write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3 + mov dword [stream + _internal_state_has_eob], 1 + +end: + ;; update input buffer + add [stream + _total_in], f_i %+ d + add [stream + _next_in], f_i + sub [stream + _avail_in], f_i %+ d + + ;; update output buffer + mov [stream + _next_out], m_out_buf + sub m_out_buf, [stream + _internal_state_bitbuf_m_out_start] + sub [stream + _avail_out], m_out_buf %+ d + add [stream + _total_out], m_out_buf %+ d + + mov [stream + _internal_state_bitbuf_m_bits], m_bits + mov [stream + _internal_state_bitbuf_m_bit_count], m_bit_count %+ d + + mov rbx, [rsp + gpr_save_mem_offset + 0*8] + mov rsi, [rsp + gpr_save_mem_offset + 1*8] + mov rdi, [rsp + gpr_save_mem_offset + 2*8] + mov rbp, [rsp + gpr_save_mem_offset + 3*8] + mov r12, [rsp + gpr_save_mem_offset + 4*8] + mov r13, [rsp + gpr_save_mem_offset + 5*8] + mov r14, [rsp + gpr_save_mem_offset + 6*8] + mov r15, [rsp + gpr_save_mem_offset + 7*8] + +%ifndef ALIGN_STACK + add rsp, stack_size +%else + mov rsp, rbp + pop rbp +%endif + ret + +MARK __stateless_compare_loops_ %+ ARCH +compare_loop: +%if (COMPARE_TYPE == 1) + compare250 tmp1, tmp2, len, tmp3 +%elif (COMPARE_TYPE == 2) + compare250_x tmp1, tmp2, len, tmp3, xtmp0, xtmp1 +%elif (COMPARE_TYPE == 3) + compare250_y tmp1, tmp2, len, tmp3, ytmp0, ytmp1 +%else + %error Unknown Compare type COMPARE_TYPE + % error +%endif + jmp len_dist_huffman + +compare_loop2: +%if (COMPARE_TYPE == 1) + compare250 tmp1, tmp2, len2, tmp3 +%elif (COMPARE_TYPE == 2) + compare250_x tmp1, tmp2, len2, tmp3, xtmp0, xtmp1 +%elif (COMPARE_TYPE == 3) + compare250_y tmp1, tmp2, len2, tmp3, ytmp0, ytmp1 +%else +%error Unknown Compare type COMPARE_TYPE + % error +%endif + and curr_data, 0xff + get_lit_code curr_data, code3, code_len3, hufftables + jmp len_dist_lit_huffman + +section .data + align 4 +const_D: dq D diff --git a/igzip/igzip_stateless_01.asm b/igzip/igzip_stateless_01.asm new file mode 100644 index 0000000..83ed1ba --- /dev/null +++ b/igzip/igzip_stateless_01.asm @@ -0,0 +1,7 @@ +%define ARCH 01 + +%ifndef COMPARE_TYPE +%define COMPARE_TYPE 1 +%endif + +%include "igzip_stateless.asm" diff --git a/igzip/igzip_stateless_04.asm b/igzip/igzip_stateless_04.asm new file mode 100644 index 0000000..39d8ef5 --- /dev/null +++ b/igzip/igzip_stateless_04.asm @@ -0,0 +1,8 @@ +%define ARCH 04 +%define USE_HSWNI + +%ifndef COMPARE_TYPE +%define COMPARE_TYPE 3 +%endif + +%include "igzip_stateless.asm" diff --git a/igzip/igzip_stateless_base.c b/igzip/igzip_stateless_base.c new file mode 100644 index 0000000..63fa578 --- /dev/null +++ b/igzip/igzip_stateless_base.c @@ -0,0 +1,151 @@ +#include +#include "igzip_lib.h" +#include "huffman.h" +#include "huff_codes.h" +#include "bitbuf2.h" + +static inline void update_state(struct isal_zstream *stream, struct isal_zstate *state, + uint8_t * end_in, uint8_t * start_in) +{ + uint32_t count; + stream->avail_in = end_in - stream->next_in; + stream->total_in += stream->next_in - start_in; + count = buffer_used(&state->bitbuf); + stream->next_out = buffer_ptr(&state->bitbuf); + stream->avail_out -= count; + stream->total_out += count; + +} + +void isal_deflate_body_stateless_base(struct isal_zstream *stream) +{ + uint32_t literal = 0, hash; + uint8_t *start_in, *end_in, *end, *next_hash; + uint16_t match_length; + uint32_t dist; + uint64_t code, code_len, code2, code_len2, i; + struct isal_zstate *state = &stream->internal_state; + uint16_t *last_seen = state->head; + + if (stream->avail_in == 0) + return; + + set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + start_in = stream->next_in; + end_in = stream->next_in + stream->avail_in; + + while (stream->next_in < end_in - 3) { + if (is_full(&state->bitbuf)) { + update_state(stream, state, end_in, start_in); + return; + } + + literal = *(uint32_t *) stream->next_in; + hash = compute_hash(literal) & HASH_MASK; + dist = (uint64_t) (stream->next_in - last_seen[hash]) & 0xFFFF; + last_seen[hash] = (uint64_t) stream->next_in; + + if (dist - 1 < IGZIP_D - 1 && stream->next_in - dist >= start_in) { /* The -1 are to handle the case when dist = 0 */ + match_length = + compare258(stream->next_in - dist, stream->next_in, + end_in - stream->next_in); + + if (match_length >= SHORTEST_MATCH) { + next_hash = stream->next_in; +#ifdef LIMIT_HASH_UPDATE + end = next_hash + 3; +#else + end = next_hash + match_length; +#endif + if (end > end_in - 3) + end = end_in - 3; + next_hash++; + for (; next_hash < end; next_hash++) { + literal = *(uint32_t *) next_hash; + hash = compute_hash(literal) & HASH_MASK; + last_seen[hash] = (uint64_t) next_hash; + } + + get_len_code(stream->hufftables, match_length, &code, + &code_len); + get_dist_code(stream->hufftables, dist, &code2, &code_len2); + + code |= code2 << code_len; + code_len += code_len2; + + write_bits(&state->bitbuf, code, code_len); + + stream->next_in += match_length; + + continue; + } + } + + get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len); + write_bits(&state->bitbuf, code, code_len); + stream->next_in++; + } + + if (is_full(&state->bitbuf)) { + update_state(stream, state, end_in, start_in); + return; + } + + literal = *(uint32_t *) (end_in - 4); + + for (i = 4; i > end_in - stream->next_in; i--) + literal = literal >> 8; + + hash = compute_hash(literal) & HASH_MASK; + dist = (uint64_t) (stream->next_in - last_seen[hash]) & 0xFFFF; + + if (dist - 1 < IGZIP_D - 1 && stream->next_in - dist >= start_in) { + match_length = + compare258(stream->next_in - dist, stream->next_in, + end_in - stream->next_in); + if (match_length >= SHORTEST_MATCH) { + get_len_code(stream->hufftables, match_length, &code, &code_len); + get_dist_code(stream->hufftables, dist, &code2, &code_len2); + code |= code2 << code_len; + code_len += code_len2; + write_bits(&state->bitbuf, code, code_len); + stream->next_in += 3; + + if (is_full(&state->bitbuf)) { + update_state(stream, state, end_in, start_in); + return; + } + + get_lit_code(stream->hufftables, 256, &code, &code_len); + write_bits(&state->bitbuf, code, code_len); + + if (is_full(&state->bitbuf)) { + update_state(stream, state, end_in, start_in); + return; + } + + state->has_eob = 1; + update_state(stream, state, end_in, start_in); + return; + } + } + + while (stream->next_in < end_in) { + get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len); + write_bits(&state->bitbuf, code, code_len); + stream->next_in++; + + if (is_full(&state->bitbuf)) { + update_state(stream, state, end_in, start_in); + return; + } + literal >>= 8; + } + + get_lit_code(stream->hufftables, 256, &code, &code_len); + write_bits(&state->bitbuf, code, code_len); + + state->has_eob = 1; + update_state(stream, state, end_in, start_in); + return; +} diff --git a/igzip/igzip_stateless_file_perf.c b/igzip/igzip_stateless_file_perf.c new file mode 100644 index 0000000..d82615a --- /dev/null +++ b/igzip/igzip_stateless_file_perf.c @@ -0,0 +1,155 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include "igzip_lib.h" +#include "test.h" + +#define BUF_SIZE 1024 +#define MIN_TEST_LOOPS 10 +#ifndef RUN_MEM_SIZE +# define RUN_MEM_SIZE 5000000000 +#endif + +struct isal_zstream stream; + +int get_filesize(FILE * f) +{ + int curr, end; + + curr = ftell(f); /* Save current position */ + fseek(f, 0L, SEEK_END); + end = ftell(f); + fseek(f, curr, SEEK_SET); /* Restore position */ + return end; +} + +int main(int argc, char *argv[]) +{ + FILE *in, *out = NULL; + unsigned char *inbuf, *outbuf; + int i, infile_size, iterations, outbuf_size; + + if (argc > 3 || argc < 2) { + fprintf(stderr, "Usage: igzip_file_perf infile [outfile]\n" + "\t - Runs multiple iterations of igzip on a file to " + "get more accurate time results.\n"); + exit(0); + } + in = fopen(argv[1], "rb"); + if (!in) { + fprintf(stderr, "Can't open %s for reading\n", argv[1]); + exit(0); + } + if (argc > 2) { + out = fopen(argv[2], "wb"); + if (!out) { + fprintf(stderr, "Can't open %s for writing\n", argv[2]); + exit(0); + } + printf("outfile=%s\n", argv[2]); + } + printf("Window Size: %d K\n", HIST_SIZE); + printf("igzip_file_perf: \n"); + fflush(0); + /* Allocate space for entire input file and output + * (assuming some possible expansion on output size) + */ + infile_size = get_filesize(in); + + if (infile_size != 0) { + outbuf_size = infile_size * 1.07; + iterations = RUN_MEM_SIZE / infile_size; + } else { + outbuf_size = BUF_SIZE; + iterations = MIN_TEST_LOOPS; + } + if (iterations < MIN_TEST_LOOPS) + iterations = MIN_TEST_LOOPS; + + inbuf = malloc(infile_size); + if (inbuf == NULL) { + fprintf(stderr, "Can't allocate input buffer memory\n"); + exit(0); + } + outbuf = malloc(outbuf_size); + if (outbuf == NULL) { + fprintf(stderr, "Can't allocate output buffer memory\n"); + exit(0); + } + + printf("igzip_file_perf: %s %d iterations\n", argv[1], iterations); + /* Read complete input file into buffer */ + stream.avail_in = (uint32_t) fread(inbuf, 1, infile_size, in); + if (stream.avail_in != infile_size) { + fprintf(stderr, "Couldn't fit all of input file into buffer\n"); + exit(0); + } + + struct perf start, stop; + perf_start(&start); + + for (i = 0; i < iterations; i++) { + isal_deflate_init(&stream); + stream.end_of_stream = 1; /* Do the entire file at once */ + stream.flush = NO_FLUSH; + stream.next_in = inbuf; + stream.avail_in = infile_size; + stream.next_out = outbuf; + stream.avail_out = outbuf_size; + isal_deflate_stateless(&stream); + if (stream.avail_in != 0) + break; + } + perf_stop(&stop); + + if (stream.avail_in != 0) { + fprintf(stderr, "Could not compress all of inbuf\n"); + exit(0); + } + + printf(" file %s - in_size=%d out_size=%d iter=%d ratio=%3.1f%%\n", argv[1], + infile_size, stream.total_out, i, 100.0 * stream.total_out / infile_size); + + printf("igzip_file: "); + perf_print(stop, start, (long long)infile_size * i); + + if (argc > 2 && out) { + printf("writing %s\n", argv[2]); + fwrite(outbuf, 1, stream.total_out, out); + fclose(out); + } + + fclose(in); + printf("End of igzip_file_perf\n\n"); + fflush(0); + return 0; +} diff --git a/igzip/igzip_sync_flush_example.c b/igzip/igzip_sync_flush_example.c new file mode 100644 index 0000000..a020c85 --- /dev/null +++ b/igzip/igzip_sync_flush_example.c @@ -0,0 +1,86 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include "igzip_lib.h" + +#define BUF_SIZE 8 * 1024 + +struct isal_zstream stream; + +int main(int argc, char *argv[]) +{ + uint8_t inbuf[BUF_SIZE], outbuf[BUF_SIZE]; + FILE *in, *out; + + if (argc != 3) { + fprintf(stderr, "Usage: igzip_sync_flush_example infile outfile\n"); + exit(0); + } + in = fopen(argv[1], "rb"); + if (!in) { + fprintf(stderr, "Can't open %s for reading\n", argv[1]); + exit(0); + } + out = fopen(argv[2], "wb"); + if (!out) { + fprintf(stderr, "Can't open %s for writing\n", argv[2]); + exit(0); + } + + printf("igzip_sync_flush_example\nWindow Size: %d K\n", HIST_SIZE); + fflush(0); + + isal_deflate_init(&stream); + stream.end_of_stream = 0; + stream.flush = SYNC_FLUSH; + + do { + if (stream.internal_state.state == ZSTATE_NEW_HDR) { + stream.avail_in = (uint32_t) fread(inbuf, 1, BUF_SIZE, in); + stream.end_of_stream = feof(in); + stream.next_in = inbuf; + } + do { + stream.avail_out = BUF_SIZE; + stream.next_out = outbuf; + isal_deflate(&stream); + fwrite(outbuf, 1, BUF_SIZE - stream.avail_out, out); + } while (stream.avail_out == 0); + + } while (stream.internal_state.state != ZSTATE_END); + + fclose(out); + fclose(in); + + printf("End of igzip_sync_flush_example\n\n"); + return 0; +} diff --git a/igzip/igzip_sync_flush_file_perf.c b/igzip/igzip_sync_flush_file_perf.c new file mode 100644 index 0000000..4e256c3 --- /dev/null +++ b/igzip/igzip_sync_flush_file_perf.c @@ -0,0 +1,163 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include "igzip_lib.h" +#include "test.h" + +#define BUF_SIZE 1024 +#define MIN_TEST_LOOPS 100 +#ifndef RUN_MEM_SIZE +# define RUN_MEM_SIZE 500000000 +#endif + +struct isal_zstream stream; + +int get_filesize(FILE * f) +{ + int curr, end; + + curr = ftell(f); /* Save current position */ + fseek(f, 0L, SEEK_END); + end = ftell(f); + fseek(f, curr, SEEK_SET); /* Restore position */ + return end; +} + +int main(int argc, char *argv[]) +{ + FILE *in, *out = NULL; + unsigned char *inbuf, *outbuf; + int i, infile_size, iterations, outbuf_size; + + if (argc > 3 || argc < 2) { + fprintf(stderr, "Usage: igzip_sync_flush_file_perf infile [outfile]\n" + "\t - Runs multiple iterations of igzip on a file to get more accurate time results.\n"); + exit(0); + } + in = fopen(argv[1], "rb"); + if (!in) { + fprintf(stderr, "Can't open %s for reading\n", argv[1]); + exit(0); + } + if (argc > 2) { + out = fopen(argv[2], "wb"); + if (!out) { + fprintf(stderr, "Can't open %s for writing\n", argv[2]); + exit(0); + } + printf("outfile=%s\n", argv[2]); + } + printf("Window Size: %d K\n", HIST_SIZE); + printf("igzip_sync_flush_file_perf: \n"); + fflush(0); + /* Allocate space for entire input file and + * output (assuming 1:1 max output size) + */ + infile_size = get_filesize(in); + + if (infile_size != 0) { + outbuf_size = infile_size; + iterations = RUN_MEM_SIZE / infile_size; + } else { + outbuf_size = BUF_SIZE; + iterations = MIN_TEST_LOOPS; + } + if (iterations < MIN_TEST_LOOPS) + iterations = MIN_TEST_LOOPS; + + inbuf = malloc(infile_size); + if (inbuf == NULL) { + fprintf(stderr, "Can't allocate input buffer memory\n"); + exit(0); + } + outbuf = malloc(outbuf_size); + if (outbuf == NULL) { + fprintf(stderr, "Can't allocate output buffer memory\n"); + exit(0); + } + + printf("igzip_sync_flush_file_perf: %s %d iterations\n", argv[1], iterations); + /* Read complete input file into buffer */ + stream.avail_in = (uint32_t) fread(inbuf, 1, infile_size, in); + if (stream.avail_in != infile_size) { + fprintf(stderr, "Couldn't fit all of input file into buffer\n"); + exit(0); + } + + struct perf start, stop; + perf_start(&start); + + for (i = 0; i < iterations; i++) { + isal_deflate_init(&stream); + stream.end_of_stream = 0; + stream.flush = SYNC_FLUSH; + stream.next_in = inbuf; + stream.avail_in = infile_size / 2; + stream.next_out = outbuf; + stream.avail_out = outbuf_size / 2; + isal_deflate(&stream); + if (infile_size == 0) + continue; + stream.avail_in = infile_size - infile_size / 2; + stream.end_of_stream = 1; + stream.next_in = inbuf + stream.total_in; + stream.flush = SYNC_FLUSH; + stream.avail_out = infile_size - outbuf_size / 2; + stream.next_out = outbuf + stream.total_out; + isal_deflate(&stream); + if (stream.avail_in != 0) + break; + } + perf_stop(&stop); + + if (stream.avail_in != 0) { + fprintf(stderr, "Could not compress all of inbuf\n"); + exit(0); + } + + printf(" file %s - in_size=%d out_size=%d iter=%d ratio=%3.1f%%\n", argv[1], + infile_size, stream.total_out, i, 100.0 * stream.total_out / infile_size); + + printf("igzip_file: "); + perf_print(stop, start, (long long)infile_size * i); + + if (argc > 2 && out) { + printf("writing %s\n", argv[2]); + fwrite(outbuf, 1, stream.total_out, out); + fclose(out); + } + + fclose(in); + printf("End of igzip_sync_flush_file_perf\n\n"); + fflush(0); + return 0; +} diff --git a/igzip/igzip_sync_flush_perf.c b/igzip/igzip_sync_flush_perf.c new file mode 100644 index 0000000..53a6dbe --- /dev/null +++ b/igzip/igzip_sync_flush_perf.c @@ -0,0 +1,96 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include +#include "igzip_lib.h" +#include "test.h" + +#define TEST_LEN (1024*1024) +#define IBUF_SIZE (1024*1024) +#define OBUF_SIZE (1024*1024) + +#define TEST_LOOPS 400 +#define TEST_TYPE_STR "_warm" + +void create_data(unsigned char *data, int size) +{ + char c = 'a'; + while (size--) + *data++ = c = c < 'z' ? c + 1 : 'a'; +} + +int main(int argc, char *argv[]) +{ + int i = 1; + struct isal_zstream stream; + unsigned char inbuf[IBUF_SIZE], zbuf[OBUF_SIZE]; + struct perf start, stop; + + create_data(inbuf, TEST_LEN); + printf("Window Size: %d K\n", HIST_SIZE); + printf("igzip_sync_flush_perf: \n"); + fflush(0); + + perf_start(&start); + + for (i = 0; i < TEST_LOOPS; i++) { + isal_deflate_init(&stream); + + stream.avail_in = TEST_LEN; + if (i == (TEST_LOOPS - 1)) + stream.end_of_stream = 1; + else + stream.end_of_stream = 0; + stream.next_in = inbuf; + stream.flush = SYNC_FLUSH; + + do { + stream.avail_out = OBUF_SIZE; + stream.next_out = zbuf; + isal_deflate(&stream); + } while (stream.avail_out == 0); + + } + + perf_stop(&stop); + + printf("igzip_sync_flush_perf" TEST_TYPE_STR ": "); + perf_print(stop, start, (long long)(TEST_LEN) * (i)); + + if (!stream.end_of_stream) { + printf("error: compression test could not fit into allocated buffers\n"); + return -1; + } + printf("End of igzip_sync_flush_perf\n\n"); + fflush(0); + return 0; +} diff --git a/igzip/lz0a_const.asm b/igzip/lz0a_const.asm new file mode 100644 index 0000000..4d95739 --- /dev/null +++ b/igzip/lz0a_const.asm @@ -0,0 +1,44 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%assign K 1024 +%assign D HIST_SIZE * K ;; Amount of history +%assign LA 17 * 16 ;; Max look-ahead, rounded up to 32 byte boundary +%assign BSIZE 2*HIST_SIZE*K + LA ;; Nominal buffer size + +;; Constants for stateless compression +%define LAST_BYTES_COUNT 3 ;; Bytes to prevent reading out of array bounds +%define LA_STATELESS 258 ;; No round up since no data is copied to a buffer + +%assign HASH_SIZE D +%assign HASH_MASK (HASH_SIZE - 1) + +%assign SHORTEST_MATCH 3 + +%assign SLOP 8 diff --git a/igzip/options.asm b/igzip/options.asm new file mode 100644 index 0000000..d86a41f --- /dev/null +++ b/igzip/options.asm @@ -0,0 +1,87 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +default rel + +%ifndef __OPTIONS_ASM__ +%define __OPTIONS_ASM__ + +%ifndef IGZIP_USE_GZIP_FORMAT +%define DEFLATE +%endif + +; Options:dir +; m - reschedule mem reads +; e b - bitbuff style +; t s x - compare style +; h - limit hash updates +; l - use longer huffman table +; f - fix cache read + +%ifdef LARGE_WINDOW +%define HIST_SIZE 32 +%else +%define HIST_SIZE 8 +%endif + +%ifdef USE_BITBUFB +%elifdef USE_BITBUF8 +%elifdef USE_BITBUF_ELSE +%else +; bit buffer types +; BITBUFB: (b) Always write data +%define USE_BITBUFB +%endif + +; (h) limit hash update +%define LIMIT_HASH_UPDATE + +; (l) longer huffman table +%define LONGER_HUFFTABLE + +; (f) fix cache read problem +%define FIX_CACHE_READ + +%if (HIST_SIZE > 8) +%undef LONGER_HUFFTABLE +%endif + +%define IGZIP_MAX_DEF_HDR_SIZE 328 + +%ifidn __OUTPUT_FORMAT__, elf64 +%ifndef __NASM_VER__ +%define WRT_OPT wrt ..sym +%else +%define WRT_OPT +%endif +%else +%define WRT_OPT +%endif + +%endif ; ifndef __OPTIONS_ASM__ diff --git a/igzip/repeated_char_result.h b/igzip/repeated_char_result.h new file mode 100644 index 0000000..60a5fc1 --- /dev/null +++ b/igzip/repeated_char_result.h @@ -0,0 +1,68 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#ifndef _IGZIP_REPEATED_8K_CHAR_RESULT_H_ +#define _IGZIP_REPEATED_8K_CHAR_RESULT_H_ + +/* The code for the literal being encoded */ +#define CODE_LIT 0x1 +#define CODE_LIT_LENGTH 0x2 + +/* The code for repeat 10. The Length includes the distance code length*/ +#define CODE_10 0x3 +#define CODE_10_LENGTH 0x4 + +/* The code for repeat 115-130. The Length includes the distance code length*/ +#define CODE_280 0x0f +#define CODE_280_LENGTH 0x4 +#define CODE_280_TOTAL_LENGTH CODE_280_LENGTH + 4 + 1 + +/* Code representing the end of block. */ +#define END_OF_BLOCK 0x7 +#define END_OF_BLOCK_LEN 0x4 + +/* MIN_REPEAT_LEN currently optimizes storage space, another possiblity is to + * find the size which optimizes speed instead.*/ +#define MIN_REPEAT_LEN 4*1024 + +#define HEADER_LENGTH 16 + +/* Maximum length of the portion of the header represented by repeat lengths + * smaller than 258 */ +#define MAX_FIXUP_CODE_LENGTH 8 + + +/* Headers for constant 0x00 and 0xFF blocks + * This also contains the first literal character. */ +const uint32_t repeated_char_header[2][5] = { + { 0x0121c0ec, 0xc30c0000, 0x7d57fab0, 0x49270938}, /* Deflate header for 0x00 */ + { 0x0121c0ec, 0xc30c0000, 0x7baaff30, 0x49270938} /* Deflate header for 0xFF */ + +}; + +#endif /*_IGZIP_REPEATED_8K_CHAR_RESULT_H_*/ diff --git a/igzip/stdmac.asm b/igzip/stdmac.asm new file mode 100644 index 0000000..a4c2b6f --- /dev/null +++ b/igzip/stdmac.asm @@ -0,0 +1,207 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +;; internal macro used by push_all +;; push args L to R +%macro push_all_ 1-* +%xdefine _PUSH_ALL_REGS_COUNT_ %0 +%rep %0 + push %1 + %rotate 1 +%endrep +%endmacro + +;; internal macro used by pop_all +;; pop args R to L +%macro pop_all_ 1-* +%rep %0 + %rotate -1 + pop %1 +%endrep +%endmacro + +%xdefine _PUSH_ALL_REGS_COUNT_ 0 +%xdefine _ALLOC_STACK_VAL_ 0 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; STACK_OFFSET +;; Number of bytes subtracted from stack due to PUSH_ALL and ALLOC_STACK +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define STACK_OFFSET (_PUSH_ALL_REGS_COUNT_ * 8 + _ALLOC_STACK_VAL_) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; PUSH_ALL reg1, reg2, ... +;; push args L to R, remember regs for pop_all +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro PUSH_ALL 1+ +%xdefine _PUSH_ALL_REGS_ %1 + push_all_ %1 +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; POP_ALL +;; push args from prev "push_all" R to L +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro POP_ALL 0 + pop_all_ _PUSH_ALL_REGS_ +%xdefine _PUSH_ALL_REGS_COUNT_ 0 +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALLOC_STACK n +;; subtract n from the stack pointer and remember the value for restore_stack +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro ALLOC_STACK 1 +%xdefine _ALLOC_STACK_VAL_ %1 + sub rsp, %1 +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; RESTORE_STACK +;; add n to the stack pointer, where n is the arg to the previous alloc_stack +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro RESTORE_STACK 0 + add rsp, _ALLOC_STACK_VAL_ +%xdefine _ALLOC_STACK_VAL_ 0 +%endmacro + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; NOPN n +;; Create n bytes of NOP, using nops of up to 8 bytes each +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro NOPN 1 + + %assign %%i %1 + %rep 200 + %if (%%i < 9) + nopn %%i + %exitrep + %else + nopn 8 + %assign %%i (%%i - 8) + %endif + %endrep +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; nopn n +;; Create n bytes of NOP, where n is between 1 and 9 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro nopn 1 +%if (%1 == 1) + nop +%elif (%1 == 2) + db 0x66 + nop +%elif (%1 == 3) + db 0x0F + db 0x1F + db 0x00 +%elif (%1 == 4) + db 0x0F + db 0x1F + db 0x40 + db 0x00 +%elif (%1 == 5) + db 0x0F + db 0x1F + db 0x44 + db 0x00 + db 0x00 +%elif (%1 == 6) + db 0x66 + db 0x0F + db 0x1F + db 0x44 + db 0x00 + db 0x00 +%elif (%1 == 7) + db 0x0F + db 0x1F + db 0x80 + db 0x00 + db 0x00 + db 0x00 + db 0x00 +%elif (%1 == 8) + db 0x0F + db 0x1F + db 0x84 + db 0x00 + db 0x00 + db 0x00 + db 0x00 + db 0x00 +%elif (%1 == 9) + db 0x66 + db 0x0F + db 0x1F + db 0x84 + db 0x00 + db 0x00 + db 0x00 + db 0x00 + db 0x00 +%else +%error Invalid value to nopn +%endif +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; rolx64 dst, src, amount +;; Emulate a rolx instruction using rorx, assuming data 64 bits wide +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro rolx64 3 + rorx %1, %2, (64-%3) +%endm + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; rolx32 dst, src, amount +;; Emulate a rolx instruction using rorx, assuming data 32 bits wide +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro rolx32 3 + rorx %1, %2, (32-%3) +%endm + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Define a function void ssc(uint64_t x) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro DEF_SSC 0 +global ssc +ssc: + mov rax, rbx + mov rbx, rcx + db 0x64 + db 0x67 + nop + mov rbx, rax + ret +%endm diff --git a/include/igzip_lib.h b/include/igzip_lib.h new file mode 100644 index 0000000..1dd930c --- /dev/null +++ b/include/igzip_lib.h @@ -0,0 +1,371 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _IGZIP_H +#define _IGZIP_H + +/** + * @file igzip_lib.h + * + * @brief This file defines the igzip compression interface, a high performance + * deflate compression interface for storage applications. + * + * Deflate is a widely used compression standard that can be used standalone, it + * also forms the basis of gzip and zlib compression formats. Igzip supports the + * following flush features: + * + * - No Flush: The default method where no flush is performed. + * + * - Sync flush: whereby isal_deflate() finishes the current deflate block at + * the end of each input buffer. The deflate block is byte aligned by + * appending an empty stored block. + * + * - Full flush: whereby isal_deflate() finishes and aligns the deflate block as + * in sync flush but also ensures that subsequent block's history does not + * look back beyond this point and new blocks are fully independent. + * + * Igzip's default configuration is: + * + * - 8K window size + * + * This option can be overridden to enable: + * + * - 32K window size, by adding \#define LARGE_WINDOW 1 in igzip_lib.h and + * \%define LARGE_WINDOW in options.asm, or via the command line with + * @verbatim gmake D="-D LARGE_WINDOW" @endverbatim on Linux and FreeBSD, or + * with @verbatim nmake -f Makefile.nmake D="-D LARGE_WINDOW" @endverbatim on + * Windows. + * + * KNOWN ISSUES: + * - If building the code on Windows with the 32K window enabled, the + * /LARGEADDRESSAWARE:NO link option must be added. + * - The 32K window isn't supported when used in a shared library. + * + */ +#include +#include "types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Options:dir +// m - reschedule mem reads +// e b - bitbuff style +// t s x - compare style +// h - limit hash updates +// l - use longer huffman table +// f - fix cache read + +#if defined(LARGE_WINDOW) +# define HIST_SIZE 32 +#else +# define HIST_SIZE 8 +#endif + +/* bit buffer types + * BITBUF8: (e) Always write 8 bytes of data + * BITBUFB: (b) Always write data + */ +#if !(defined(USE_BITBUFB) || defined(USE_BITBUF8) || defined(USE_BITBUF_ELSE)) +# define USE_BITBUFB +#endif + +/* compare types + * 1: ( ) original + * 2: (t) with CMOV + * 3: (s) with sttni + * 4: (x) with xmm / pmovbmsk + * 5: (y) with ymm / pmovbmsk (32-bytes at a time) + */ +# define LIMIT_HASH_UPDATE + +/* (l) longer huffman table */ +#define LONGER_HUFFTABLE + +/* (f) fix cache read problem */ +#define FIX_CACHE_READ + +#if (HIST_SIZE > 8) +# undef LONGER_HUFFTABLE +#endif + +#define IGZIP_K 1024 +#define IGZIP_D (HIST_SIZE * IGZIP_K) /* Amount of history */ +#define IGZIP_LA (17 * 16) /* Max look-ahead, rounded up to 32 byte boundary */ +#define BSIZE (2*IGZIP_D + IGZIP_LA) /* Nominal buffer size */ + +#define HASH_SIZE IGZIP_D +#define HASH_MASK (HASH_SIZE - 1) + +#define SHORTEST_MATCH 3 + +#define IGZIP_MAX_DEF_HDR_SIZE 328 + +#ifdef LONGER_HUFFTABLE +enum {DIST_TABLE_SIZE = 8*1024}; + +/* DECODE_OFFSET is dist code index corresponding to DIST_TABLE_SIZE + 1 */ +enum { DECODE_OFFSET = 26 }; +#else +enum {DIST_TABLE_SIZE = 1024}; +/* DECODE_OFFSET is dist code index corresponding to DIST_TABLE_SIZE + 1 */ +enum { DECODE_OFFSET = 20 }; +#endif +enum {LEN_TABLE_SIZE = 256}; +enum {LIT_TABLE_SIZE = 257}; + +#define IGZIP_LIT_LEN 286 +#define IGZIP_DIST_LEN 30 + +/* Flush Flags */ +#define NO_FLUSH 0 /* Default */ +#define SYNC_FLUSH 1 +#define FULL_FLUSH 2 +#define FINISH_FLUSH 0 /* Deprecated */ + +/* Return values */ +#define COMP_OK 0 +#define INVALID_FLUSH -7 +#define INVALID_PARAM -8 +#define STATELESS_OVERFLOW -1 +#define DEFLATE_HDR_LEN 3 +/** + * @enum isal_zstate + * @brief Compression State please note ZSTATE_TRL only applies for GZIP compression + */ + + +/* When the state is set to ZSTATE_NEW_HDR or TMP_ZSTATE_NEW_HEADER, the + * hufftable being used for compression may be swapped + */ +enum isal_zstate_state { + ZSTATE_NEW_HDR, //!< Header to be written + ZSTATE_HDR, //!< Header state + ZSTATE_BODY, //!< Body state + ZSTATE_FLUSH_READ_BUFFER, //!< Flush buffer + ZSTATE_SYNC_FLUSH, //!< Write sync flush block + ZSTATE_FLUSH_WRITE_BUFFER, //!< Flush bitbuf + ZSTATE_TRL, //!< Trailer state + ZSTATE_END, //!< End state + ZSTATE_TMP_NEW_HDR, //!< Temporary Header to be written + ZSTATE_TMP_HDR, //!< Temporary Header state + ZSTATE_TMP_BODY, //!< Temporary Body state + ZSTATE_TMP_FLUSH_READ_BUFFER, //!< Flush buffer + ZSTATE_TMP_SYNC_FLUSH, //!< Write sync flush block + ZSTATE_TMP_FLUSH_WRITE_BUFFER, //!< Flush bitbuf + ZSTATE_TMP_TRL, //!< Temporary Trailer state + ZSTATE_TMP_END //!< Temporary End state +}; + +/* Offset used to switch between TMP states and non-tmp states */ +#define TMP_OFFSET_SIZE ZSTATE_TMP_HDR - ZSTATE_HDR + +struct isal_huff_histogram { + uint64_t lit_len_histogram[IGZIP_LIT_LEN]; + uint64_t dist_histogram[IGZIP_DIST_LEN]; +}; + +/** @brief Holds Bit Buffer information*/ +struct BitBuf2 { + uint64_t m_bits; //!< bits in the bit buffer + uint32_t m_bit_count; //!< number of valid bits in the bit buffer + uint8_t *m_out_buf; //!< current index of buffer to write to + uint8_t *m_out_end; //!< end of buffer to write to + uint8_t *m_out_start; //!< start of buffer to write to +}; + +/* Variable prefixes: + * b_ : Measured wrt the start of the buffer + * f_ : Measured wrt the start of the file (aka file_start) + */ + +/** @brief Holds the internal state information for input and output compression streams*/ +struct isal_zstate { + uint32_t b_bytes_valid; //!< number of bytes of valid data in buffer + uint32_t b_bytes_processed; //!< keeps track of the number of bytes processed in isal_zstate.buffer + uint8_t *file_start; //!< pointer to where file would logically start + DECLARE_ALIGNED(uint32_t crc[16], 16); //!< actually 4 128-bit integers + struct BitBuf2 bitbuf; //!< Bit Buffer + enum isal_zstate_state state; //!< Current state in processing the data stream + uint32_t count; //!< used for partial header/trailer writes + uint8_t tmp_out_buff[16]; //!< temporary array + uint32_t tmp_out_start; //!< temporary variable + uint32_t tmp_out_end; //!< temporary variable + uint32_t last_flush; //!< keeps track of last submitted flush + uint32_t has_gzip_hdr; //!< keeps track of if the gzip header has been written. + uint32_t has_eob; //!< keeps track of eob on the last deflate block + uint32_t has_eob_hdr; //!< keeps track of eob hdr (with BFINAL set) + uint32_t left_over; //!< keeps track of overflow bytes + + + + DECLARE_ALIGNED(uint8_t buffer[BSIZE + 16], 32); //!< Internal buffer + + DECLARE_ALIGNED(uint16_t head[HASH_SIZE], 16); //!< Hash array + +}; + +/** @brief Holds the huffman tree used to huffman encode the input stream **/ +struct isal_hufftables { + + uint8_t deflate_hdr[IGZIP_MAX_DEF_HDR_SIZE]; //!< deflate huffman tree header + uint32_t deflate_hdr_count; //!< Number of whole bytes in deflate_huff_hdr + uint32_t deflate_hdr_extra_bits; //!< Number of bits in the partial byte in header + uint32_t dist_table[DIST_TABLE_SIZE]; //!< bits 4:0 are the code length, bits 31:5 are the code + uint32_t len_table[LEN_TABLE_SIZE]; //!< bits 4:0 are the code length, bits 31:5 are the code + uint16_t lit_table[LIT_TABLE_SIZE]; //!< literal code + uint8_t lit_table_sizes[LIT_TABLE_SIZE]; //!< literal code length + uint16_t dcodes[30 - DECODE_OFFSET]; //!< distance code + uint8_t dcodes_sizes[30 - DECODE_OFFSET]; //!< distance code length + +}; + +/** @brief Holds stream information*/ +struct isal_zstream { + uint8_t *next_in; //!< Next input byte + uint32_t avail_in; //!< number of bytes available at next_in + uint32_t total_in; //!< total number of bytes read so far + + uint8_t *next_out; //!< Next output byte + uint32_t avail_out; //!< number of bytes available at next_out + uint32_t total_out; //!< total number of bytes written so far + + struct isal_hufftables *hufftables; //!< Huffman encoding used when compressing + uint32_t end_of_stream; //!< non-zero if this is the last input buffer + uint32_t flush; //!< Flush type can be NO_FLUSH or SYNC_FLUSH + + struct isal_zstate internal_state; //!< Internal state for this stream +}; + + +/** + * @brief Updates histograms to include the symbols found in the input + * stream. Since this function only updates the histograms, it can be called on + * multiple streams to get a histogram better representing the desired data + * set. When first using histogram it must be initialized by zeroing the + * structure. + * + * @param in_stream: Input stream of data. + * @param length: The length of start_stream. + * @param histogram: The returned histogram of lit/len/dist symbols. + */ +void isal_update_histogram(uint8_t * in_stream, int length, struct isal_huff_histogram * histogram); + + +/** + * @brief Creates a custom huffman code for the given histograms in which + * every literal and repeat length is assigned a code and all possible lookback + * distances are assigned a code. + * + * @param hufftables: the output structure containing the huffman code + * @param lit_histogram: histogram containing frequency of literal symbols and + * repeat lengths + * @param dist_histogram: histogram containing frequency of of lookback distances + * @returns Returns a non zero value if an invalid huffman code was created. + */ +int isal_create_hufftables(struct isal_hufftables * hufftables, + struct isal_huff_histogram * histogram); + +/** + * @brief Creates a custom huffman code for the given histograms like + * isal_create_hufftables() except literals with 0 frequency in the histogram + * are not assigned a code + * + * @param hufftables: the output structure containing the huffman code + * @param lit_histogram: histogram containing frequency of literal symbols and + * repeat lengths + * @param dist_histogram: histogram containing frequency of of lookback distances + * @returns Returns a non zero value if an invalid huffman code was created. + */ +int isal_create_hufftables_subset(struct isal_hufftables * hufftables, + struct isal_huff_histogram * histogram); + +/** + * @brief Initialize compression stream data structure + * + * @param stream Structure holding state information on the compression streams. + * @returns none + */ +void isal_deflate_init(struct isal_zstream *stream); + + +/** + * @brief Fast data (deflate) compression for storage applications. + * + * On entry to isal_deflate(), next_in points to an input buffer and avail_in + * indicates the length of that buffer. Similarly next_out points to an empty + * output buffer and avail_out indicates the size of that buffer. + * + * The fields total_in and total_out start at 0 and are updated by + * isal_deflate(). These reflect the total number of bytes read or written so far. + * + * The call to isal_deflate() will take data from the input buffer (updating + * next_in, avail_in and write a compressed stream to the output buffer + * (updating next_out and avail_out). The function returns when either the input + * buffer is empty or the output buffer is full. + * + * When the last input buffer is passed in, signaled by setting the + * end_of_stream, the routine will complete compression at the end of the input + * buffer, as long as the output buffer is big enough. + * + * The equivalent of the zlib FLUSH_SYNC operation is currently supported. + * Flush types can be NO_FLUSH or SYNC_FLUSH. Default flush type is NO_FLUSH. + * If SYNC_FLUSH is selected each input buffer is compressed and byte aligned + * with a type 0 block appended to the end. Switching between NO_FLUSH and + * SYNC_FLUSH is supported to select after which input buffer a SYNC_FLUSH is + * performed. + * + * @param stream Structure holding state information on the compression streams. + * @return COMP_OK (if everything is ok), + * INVALID_FLUSH (if an invalid FLUSH is selected), + */ +int isal_deflate(struct isal_zstream *stream); + + +/** + * @brief Fast data (deflate) stateless compression for storage applications. + * + * Stateless (one shot) compression routine with a similar interface to + * isal_deflate() but operates on entire input buffer at one time. Parameter + * avail_out must be large enough to fit the entire compressed output. Max + * expansion is limited to the input size plus the header size of a stored/raw + * block. + * + * @param stream Structure holding state information on the compression streams. + * @return COMP_OK (if everything is ok), + * STATELESS_OVERFLOW (if output buffer will not fit output). + */ +int isal_deflate_stateless(struct isal_zstream *stream); + + +#ifdef __cplusplus +} +#endif +#endif /* ifndef _IGZIP_H */ diff --git a/isa-l.def b/isa-l.def index 22e4b30..4cd1fa2 100644 --- a/isa-l.def +++ b/isa-l.def @@ -75,3 +75,9 @@ crc32_iscsi @71 crc16_t10dif_base @72 crc32_ieee_base @73 crc32_iscsi_base @74 +isal_deflate_stateless @75 +isal_deflate @76 +isal_deflate_init @77 +isal_update_histogram @78 +isal_create_hufftables @79 +isal_create_hufftables_subset @80