igzip: implement igzip two pass
Change-Id: I9564b2da251a02197b39cab5f141e7aff1ae8439 Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
This commit is contained in:
parent
43d1029b81
commit
01dfbcc484
@ -32,16 +32,25 @@ lsrc += igzip/igzip.c igzip/hufftables_c.c \
|
||||
igzip/igzip_body_02.asm \
|
||||
igzip/igzip_body_04.asm \
|
||||
igzip/igzip_finish.asm \
|
||||
igzip/igzip_icf_body_01.asm \
|
||||
igzip/igzip_icf_body_02.asm \
|
||||
igzip/igzip_icf_body_04.asm \
|
||||
igzip/igzip_icf_finish.asm \
|
||||
igzip/crc_data.asm \
|
||||
igzip/rfc1951_lookup.asm \
|
||||
igzip/crc32_gzip.asm igzip/detect_repeated_char.asm \
|
||||
igzip/igzip_multibinary.asm \
|
||||
igzip/igzip_base.c \
|
||||
igzip/igzip_icf_base.c \
|
||||
igzip/igzip_update_histogram_01.asm \
|
||||
igzip/igzip_update_histogram_04.asm \
|
||||
igzip/igzip_decode_block_stateless_01.asm \
|
||||
igzip/igzip_decode_block_stateless_04.asm \
|
||||
igzip/igzip_inflate_multibinary.asm
|
||||
igzip/igzip_inflate_multibinary.asm \
|
||||
igzip/flatten_ll.c \
|
||||
igzip/encode_df.c \
|
||||
igzip/encode_df_04.asm \
|
||||
igzip/proc_heap.asm
|
||||
|
||||
src_include += -I $(srcdir)/igzip
|
||||
extern_hdrs += include/igzip_lib.h
|
||||
@ -55,11 +64,12 @@ perf_tests += igzip/igzip_perf igzip/igzip_sync_flush_perf
|
||||
other_tests += igzip/igzip_file_perf igzip/igzip_sync_flush_file_perf igzip/igzip_stateless_file_perf igzip/igzip_hist_perf
|
||||
other_tests += igzip/igzip_semi_dyn_file_perf
|
||||
|
||||
other_src += igzip/bitbuf2.asm igzip/data_struct2.asm \
|
||||
other_src += igzip/bitbuf2.asm igzip/data_struct2.asm \
|
||||
igzip/inflate_data_structs.asm \
|
||||
igzip/igzip_body.asm igzip/igzip_finish.asm \
|
||||
igzip/lz0a_const.asm igzip/options.asm igzip/stdmac.asm igzip/igzip_compare_types.asm \
|
||||
igzip/bitbuf2.h igzip/repeated_char_result.h \
|
||||
igzip/bitbuf2.h \
|
||||
igzip/repeated_char_result.h \
|
||||
igzip/igzip_body.asm \
|
||||
igzip/igzip_update_histogram.asm \
|
||||
igzip/huffman.asm \
|
||||
@ -67,8 +77,13 @@ other_src += igzip/bitbuf2.asm igzip/data_struct2.asm \
|
||||
include/multibinary.asm \
|
||||
include/test.h \
|
||||
igzip/huffman.h \
|
||||
igzip/igzip_level_buf_structs.h \
|
||||
igzip/igzip_decode_block_stateless.asm \
|
||||
igzip/inflate_std_vects.h
|
||||
igzip/inflate_std_vects.h \
|
||||
igzip/flatten_ll.h \
|
||||
igzip/encode_df.h \
|
||||
igzip/encode_df_asm.asm\
|
||||
igzip/heap_macros.asm
|
||||
|
||||
examples += igzip/igzip_example igzip/igzip_sync_flush_example
|
||||
|
||||
|
@ -203,3 +203,10 @@
|
||||
; code2 is clobbered, rcx is clobbered
|
||||
%endif
|
||||
%endm
|
||||
|
||||
%macro write_dword 2
|
||||
%define %%data %1d
|
||||
%define %%addr %2
|
||||
movnti [%%addr], %%data
|
||||
add %%addr, 4
|
||||
%endm
|
||||
|
@ -102,19 +102,28 @@ static inline uint32_t buffer_used(struct BitBuf2 *me)
|
||||
return (uint32_t)(me->m_out_buf - me->m_out_start);
|
||||
}
|
||||
|
||||
static inline uint32_t buffer_bits_used(struct BitBuf2 *me)
|
||||
{
|
||||
return (8 * (uint32_t)(me->m_out_buf - me->m_out_start) + me->m_bit_count);
|
||||
}
|
||||
|
||||
static inline void flush_bits(struct BitBuf2 *me)
|
||||
{
|
||||
uint32_t bits;
|
||||
_mm_stream_si64x((int64_t *) me->m_out_buf, me->m_bits);
|
||||
bits = me->m_bit_count & ~7;
|
||||
me->m_bit_count -= bits;
|
||||
me->m_out_buf += bits/8;
|
||||
me->m_bits >>= bits;
|
||||
|
||||
}
|
||||
|
||||
static inline void check_space(struct BitBuf2 *me, uint32_t num_bits)
|
||||
{
|
||||
/* Checks if bitbuf has num_bits extra space and flushes the bytes in
|
||||
* the bitbuf if it doesn't. */
|
||||
uint32_t bytes;
|
||||
if (63 - me->m_bit_count < num_bits) {
|
||||
_mm_stream_si64x((int64_t *) me->m_out_buf, me->m_bits);
|
||||
bytes = me->m_bit_count / 8;
|
||||
me->m_out_buf += bytes;
|
||||
bytes *= 8;
|
||||
me->m_bit_count -= bytes;
|
||||
me->m_bits >>= bytes;
|
||||
}
|
||||
if (63 - me->m_bit_count < num_bits)
|
||||
flush_bits(me);
|
||||
}
|
||||
|
||||
static inline void write_bits_unsafe(struct BitBuf2 *me, uint64_t code, uint32_t count)
|
||||
@ -136,16 +145,10 @@ static inline void write_bits(struct BitBuf2 *me, uint64_t code, uint32_t count)
|
||||
}
|
||||
#elif defined(USE_BITBUFB) /* Write bits always */
|
||||
/* Assumes there is space to fit code into m_bits. */
|
||||
uint32_t bits;
|
||||
me->m_bits |= code << me->m_bit_count;
|
||||
me->m_bit_count += count;
|
||||
if (me->m_bit_count >= 8) {
|
||||
_mm_stream_si64x((int64_t *) me->m_out_buf, me->m_bits);
|
||||
bits = me->m_bit_count & ~7;
|
||||
me->m_bit_count -= bits;
|
||||
me->m_out_buf += bits/8;
|
||||
me->m_bits >>= bits;
|
||||
}
|
||||
if (me->m_bit_count >= 8)
|
||||
flush_bits(me);
|
||||
#else /* USE_BITBUF_ELSE */
|
||||
check_space(me, count);
|
||||
write_bits_unsafe(me, code, count);
|
||||
|
@ -67,6 +67,52 @@ FIELD _m_out_start, 8, 8
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
START_FIELDS ;; isal_mod_hist
|
||||
|
||||
;; name size align
|
||||
FIELD _d_hist, 30*2, 2
|
||||
FIELD _ll_hist, 513*2, 2
|
||||
|
||||
%assign _isal_mod_hist_size _FIELD_OFFSET
|
||||
%assign _isal_mod_hist_align _STRUCT_ALIGN
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%define HUFF_CODE_SIZE 4
|
||||
|
||||
START_FIELDS ;; hufftables_icf
|
||||
|
||||
;; name size align
|
||||
FIELD _lit_len_table, 513 * HUFF_CODE_SIZE, HUFF_CODE_SIZE
|
||||
FIELD _dist_table, 31 * HUFF_CODE_SIZE, HUFF_CODE_SIZE
|
||||
|
||||
%assign _hufftables_icf_size _FIELD_OFFSET
|
||||
%assign _hufftables_icf_align _STRUCT_ALIGN
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%define DEF_MAX_HDR_SIZE 328
|
||||
START_FIELDS ;; level_2_buf
|
||||
|
||||
;; name size align
|
||||
FIELD _encode_tables, _hufftables_icf_size, _hufftables_icf_align
|
||||
FIELD _deflate_hdr_buf_used, 8, 8
|
||||
FIELD _deflate_hdr_buf, DEF_MAX_HDR_SIZE, 1
|
||||
FIELD _icf_buf_next, 8, 8
|
||||
FIELD _icf_buf_avail_out, 8, 8
|
||||
FIELD _icf_buf_start, 0, 0
|
||||
|
||||
%assign _level_2_buf_size _FIELD_OFFSET
|
||||
%assign _level_2_buf_align _STRUCT_ALIGN
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
START_FIELDS ;; isal_zstate
|
||||
|
||||
;; name size align
|
||||
@ -82,7 +128,8 @@ FIELD _tmp_out_start, 4, 4
|
||||
FIELD _tmp_out_end, 4, 4
|
||||
FIELD _has_eob, 4, 4
|
||||
FIELD _has_eob_hdr, 4, 4
|
||||
FIELD _has_hist, 4, 4
|
||||
FIELD _has_hist, 4, 4
|
||||
FIELD _hist, _isal_mod_hist_size, _isal_mod_hist_align
|
||||
FIELD _buffer, BSIZE, 32
|
||||
FIELD _head, IGZIP_HASH_SIZE*2, 16
|
||||
|
||||
@ -95,6 +142,8 @@ _bitbuf_m_out_buf equ _bitbuf+_m_out_buf
|
||||
_bitbuf_m_out_end equ _bitbuf+_m_out_end
|
||||
_bitbuf_m_out_start equ _bitbuf+_m_out_start
|
||||
|
||||
_hist_lit_len equ _hist+_ll_hist
|
||||
_hist_dist equ _hist+_d_hist
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
@ -109,6 +158,9 @@ FIELD _next_out, 8, 8
|
||||
FIELD _avail_out, 4, 4
|
||||
FIELD _total_out, 4, 4
|
||||
FIELD _hufftables, 8, 8
|
||||
FIELD _level, 4, 4
|
||||
FIELD _level_buf_size, 4, 4
|
||||
FIELD _level_buf, 8, 8
|
||||
FIELD _end_of_stream, 4, 4
|
||||
FIELD _flush, 4, 4
|
||||
FIELD _gzip_flag, 4, 4
|
||||
@ -137,16 +189,22 @@ _internal_state_bitbuf_m_bit_count equ _internal_state+_bitbuf_m_bit_count
|
||||
_internal_state_bitbuf_m_out_buf equ _internal_state+_bitbuf_m_out_buf
|
||||
_internal_state_bitbuf_m_out_end equ _internal_state+_bitbuf_m_out_end
|
||||
_internal_state_bitbuf_m_out_start equ _internal_state+_bitbuf_m_out_start
|
||||
_internal_state_hist_lit_len equ _internal_state+_hist_lit_len
|
||||
_internal_state_hist_dist equ _internal_state+_hist_dist
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
ZSTATE_HDR equ 1
|
||||
ZSTATE_BODY equ 2
|
||||
ZSTATE_FLUSH_READ_BUFFER equ 3
|
||||
ZSTATE_SYNC_FLUSH equ 4
|
||||
ZSTATE_TRL equ 6
|
||||
;; Internal States
|
||||
ZSTATE_NEW_HDR equ 0
|
||||
ZSTATE_HDR equ (ZSTATE_NEW_HDR + 1)
|
||||
ZSTATE_CREATE_HDR equ (ZSTATE_HDR + 1)
|
||||
ZSTATE_BODY equ (ZSTATE_CREATE_HDR + 1)
|
||||
ZSTATE_FLUSH_READ_BUFFER equ (ZSTATE_BODY + 1)
|
||||
ZSTATE_FLUSH_ICF_BUFFER equ (ZSTATE_FLUSH_READ_BUFFER + 1)
|
||||
ZSTATE_SYNC_FLUSH equ (ZSTATE_FLUSH_ICF_BUFFER + 1)
|
||||
ZSTATE_FLUSH_WRITE_BUFFER equ (ZSTATE_SYNC_FLUSH + 1)
|
||||
ZSTATE_TRL equ (ZSTATE_FLUSH_WRITE_BUFFER + 1)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
36
igzip/encode_df.c
Normal file
36
igzip/encode_df.c
Normal file
@ -0,0 +1,36 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <memory.h>
|
||||
#include <assert.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# include <intrin.h>
|
||||
#else
|
||||
# include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#include "encode_df.h"
|
||||
#include "bitbuf2.h"
|
||||
|
||||
struct deflate_icf *encode_deflate_icf_base(struct deflate_icf *next_in,
|
||||
struct deflate_icf *end_in, struct BitBuf2 *bb,
|
||||
struct hufftables_icf *hufftables)
|
||||
{
|
||||
struct huff_code lsym, dsym;
|
||||
|
||||
while (next_in < end_in && !is_full(bb)) {
|
||||
lsym = hufftables->lit_len_table[next_in->lit_len];
|
||||
dsym = hufftables->dist_table[next_in->lit_dist];
|
||||
|
||||
// insert ll code, dist_code, and extra_bits
|
||||
write_bits_unsafe(bb, lsym.code_and_extra, lsym.length);
|
||||
write_bits_unsafe(bb, dsym.code, dsym.length);
|
||||
write_bits_unsafe(bb, next_in->dist_extra, dsym.extra_bit_count);
|
||||
flush_bits(bb);
|
||||
|
||||
next_in++;
|
||||
}
|
||||
|
||||
return next_in;
|
||||
}
|
19
igzip/encode_df.h
Normal file
19
igzip/encode_df.h
Normal file
@ -0,0 +1,19 @@
|
||||
#ifndef ENCODE_DF_H
|
||||
#define ENCODE_DF_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "huff_codes.h"
|
||||
|
||||
/* Deflate Intermediate Compression Format */
|
||||
#define ICF_DIST_OFFSET 14
|
||||
#define NULL_DIST_SYM 30
|
||||
|
||||
struct deflate_icf {
|
||||
uint32_t lit_len:ICF_DIST_OFFSET;
|
||||
uint32_t lit_dist:5;
|
||||
uint32_t dist_extra:32 - 5 - ICF_DIST_OFFSET;
|
||||
};
|
||||
|
||||
struct deflate_icf *encode_deflate_icf(struct deflate_icf *next_in, struct deflate_icf *end_in,
|
||||
struct BitBuf2 *bb, struct hufftables_icf * hufftables);
|
||||
#endif
|
4
igzip/encode_df_04.asm
Normal file
4
igzip/encode_df_04.asm
Normal file
@ -0,0 +1,4 @@
|
||||
%define ARCH 04
|
||||
%define USE_HSWNI
|
||||
|
||||
%include "encode_df_asm.asm"
|
169
igzip/encode_df_asm.asm
Normal file
169
igzip/encode_df_asm.asm
Normal file
@ -0,0 +1,169 @@
|
||||
%include "reg_sizes.asm"
|
||||
%include "lz0a_const.asm"
|
||||
%include "data_struct2.asm"
|
||||
%include "stdmac.asm"
|
||||
|
||||
; tree entry is 4 bytes:
|
||||
; lit/len tree (513 entries)
|
||||
; | 3 | 2 | 1 | 0 |
|
||||
; | len | code |
|
||||
;
|
||||
; dist tree
|
||||
; | 3 | 2 | 1 | 0 |
|
||||
; |eblen:codlen| code |
|
||||
|
||||
; token format:
|
||||
; DIST_OFFSET:0 : lit/len
|
||||
; 31:(DIST_OFFSET + 5) : dist Extra Bits
|
||||
; (DIST_OFFSET + 5):DIST_OFFSET : dist code
|
||||
; lit/len: 0-256 (literal)
|
||||
; 257-512 (dist + 254)
|
||||
|
||||
; returns final token pointer
|
||||
; equal to token_end if successful
|
||||
; uint32_t* encode_df(uint32_t *token_start, uint32_t *token_end,
|
||||
; BitBuf *bb, uint32_t *trees);
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define ARG1 rcx
|
||||
%define ARG2 rdx
|
||||
%define ARG3 r8
|
||||
%define ARG4 r9
|
||||
%define TMP1 rsi
|
||||
%define TMP2 rdi
|
||||
%define ll_tree ARG4
|
||||
%define ptr r11
|
||||
%else
|
||||
; Linux
|
||||
%define ARG1 rdi
|
||||
%define ARG2 rsi
|
||||
%define ARG3 rdx
|
||||
%define ARG4 rcx
|
||||
%define TMP1 r8
|
||||
%define TMP2 r9
|
||||
%define ll_tree r11 ; ARG4
|
||||
%define ptr ARG1 ; r11
|
||||
%endif
|
||||
|
||||
%define in_buf_end ARG2
|
||||
%define bb ARG3
|
||||
%define out_buf bb
|
||||
; bit_count is rcx
|
||||
%define bits rax
|
||||
%define data r12
|
||||
%define tmp rbx
|
||||
%define sym TMP1
|
||||
%define dsym TMP2
|
||||
%define len dsym
|
||||
%define tmp2 r10
|
||||
%define end_ptr rbp
|
||||
%define dist_tree ll_tree + 4*513
|
||||
|
||||
global encode_deflate_icf_ %+ ARCH
|
||||
encode_deflate_icf_ %+ ARCH:
|
||||
push rbx
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
push rsi
|
||||
push rdi
|
||||
%endif
|
||||
push r12
|
||||
push rbp
|
||||
push bb
|
||||
|
||||
; free up rcx
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
mov ptr, ARG1
|
||||
%else
|
||||
mov ll_tree, ARG4
|
||||
%endif
|
||||
|
||||
mov bits, [bb + _m_bits]
|
||||
mov ecx, [bb + _m_bit_count]
|
||||
mov end_ptr, [bb + _m_out_end]
|
||||
mov out_buf, [bb + _m_out_buf] ; clobbers bb
|
||||
|
||||
.start_loop:
|
||||
mov DWORD(data), [ptr]
|
||||
|
||||
cmp out_buf, end_ptr
|
||||
ja .overflow
|
||||
|
||||
mov sym, data
|
||||
and sym, 0x3FF ; sym has ll_code
|
||||
mov DWORD(sym), [ll_tree + sym * 4]
|
||||
|
||||
; look up dist sym
|
||||
mov dsym, data
|
||||
shr dsym, DIST_OFFSET
|
||||
and dsym, 0x1F
|
||||
mov DWORD(dsym), [dist_tree + dsym * 4]
|
||||
|
||||
; insert LL code
|
||||
; sym: 31:24 length; 23:0 code
|
||||
mov tmp2, sym
|
||||
and sym, 0xFFFFFF
|
||||
shl sym, cl
|
||||
shr tmp2, 24
|
||||
or bits, sym
|
||||
add rcx, tmp2
|
||||
|
||||
; insert dist code
|
||||
movzx tmp, WORD(dsym)
|
||||
shl tmp, cl
|
||||
or bits, tmp
|
||||
mov tmp, dsym
|
||||
shr tmp, 24
|
||||
add rcx, tmp
|
||||
|
||||
; insert dist extra bits
|
||||
shr data, EXTRA_BITS_OFFSET
|
||||
add ptr, 4
|
||||
shl data, cl
|
||||
or bits, data
|
||||
shr dsym, 16
|
||||
and dsym, 0xFF
|
||||
add rcx, dsym
|
||||
|
||||
; empty bits
|
||||
mov [out_buf], bits
|
||||
mov tmp, rcx
|
||||
shr tmp, 3 ; byte count
|
||||
add out_buf, tmp
|
||||
mov tmp, rcx
|
||||
and rcx, ~7
|
||||
shr bits, cl
|
||||
mov rcx, tmp
|
||||
and rcx, 7
|
||||
|
||||
cmp ptr, in_buf_end
|
||||
jb .start_loop
|
||||
|
||||
;.end:
|
||||
; ; empty bits
|
||||
; mov [out_buf], bits
|
||||
; mov tmp, rcx
|
||||
; shr tmp, 3 ; byte count
|
||||
; add out_buf, tmp
|
||||
; mov tmp, rcx
|
||||
; and rcx, ~7
|
||||
; shr bits, cl
|
||||
; mov rcx, tmp
|
||||
; and rcx, 7
|
||||
|
||||
.overflow:
|
||||
pop TMP1 ; TMP1 now points to bb
|
||||
mov [TMP1 + _m_bits], bits
|
||||
mov [TMP1 + _m_bit_count], ecx
|
||||
mov [TMP1 + _m_out_buf], out_buf
|
||||
|
||||
pop rbp
|
||||
pop r12
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
pop rdi
|
||||
pop rsi
|
||||
%endif
|
||||
pop rbx
|
||||
|
||||
mov rax, ptr
|
||||
|
||||
ret
|
41
igzip/flatten_ll.c
Normal file
41
igzip/flatten_ll.c
Normal file
@ -0,0 +1,41 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "flatten_ll.h"
|
||||
|
||||
void flatten_ll(uint16_t * ll_hist)
|
||||
{
|
||||
uint32_t i, j;
|
||||
uint16_t *s = ll_hist, x, *p;
|
||||
|
||||
s[265] += s[266];
|
||||
s[266] = s[267] + s[268];
|
||||
s[267] = s[269] + s[270];
|
||||
s[268] = s[271] + s[272];
|
||||
s[269] = s[273] + s[274] + s[275] + s[276];
|
||||
s[270] = s[277] + s[278] + s[279] + s[280];
|
||||
s[271] = s[281] + s[282] + s[283] + s[284];
|
||||
s[272] = s[285] + s[286] + s[287] + s[288];
|
||||
p = s + 289;
|
||||
for (i = 273; i < 277; i++) {
|
||||
x = *(p++);
|
||||
for (j = 1; j < 8; j++)
|
||||
x += *(p++);
|
||||
s[i] = x;
|
||||
}
|
||||
for (; i < 281; i++) {
|
||||
x = *(p++);
|
||||
for (j = 1; j < 16; j++)
|
||||
x += *(p++);
|
||||
s[i] = x;
|
||||
}
|
||||
for (; i < 285; i++) {
|
||||
x = *(p++);
|
||||
for (j = 1; j < 32; j++)
|
||||
x += *(p++);
|
||||
s[i] = x;
|
||||
}
|
||||
s[284] -= s[512];
|
||||
s[285] = s[512];
|
||||
}
|
3
igzip/flatten_ll.h
Normal file
3
igzip/flatten_ll.h
Normal file
@ -0,0 +1,3 @@
|
||||
#include <stdint.h>
|
||||
|
||||
void flatten_ll(uint16_t *ll_hist);
|
69
igzip/heap_macros.asm
Normal file
69
igzip/heap_macros.asm
Normal file
@ -0,0 +1,69 @@
|
||||
; heapify heap, heap_size, i, child, tmp1, tmp2, tmpd
|
||||
%macro heapify2 7
|
||||
%define %%heap %1 ; qword ptr
|
||||
%define %%heap_size %2 ; dword
|
||||
%define %%i %3 ; dword
|
||||
%define %%child %4 ; dword
|
||||
%define %%tmp1 %5 ; qword
|
||||
%define %%tmp2 %6 ; qword
|
||||
%define %%tmpd %7 ; dword
|
||||
align 16
|
||||
%%heapify1:
|
||||
lea %%child, [%%i + %%i]
|
||||
cmp %%child, %%heap_size
|
||||
ja %%end_heapify1
|
||||
mov %%tmp1, [%%heap + %%child]
|
||||
mov %%tmpd, %%child
|
||||
mov %%tmp2, [%%heap + %%child) + 8]
|
||||
lea %%child, [%%child + 1]
|
||||
cmove %%tmp2, %%tmp1
|
||||
cmp %%tmp1, %%tmp2
|
||||
cmovbe %%child, %%tmpd
|
||||
cmovbe %%tmp2, %%tmp1
|
||||
; child is correct, %%tmp2 = heap[child]
|
||||
mov %%tmp1, [%%heap + %%i]
|
||||
cmp %%tmp1, %%tmp2
|
||||
jbe %%end_heapify1
|
||||
mov [%%heap + %%i], %%tmp2
|
||||
mov [%%heap + %%child], %%tmp1
|
||||
mov %%i, %%child
|
||||
jmp %%heapify1
|
||||
%%end_heapify1
|
||||
%endm
|
||||
|
||||
; heapify heap, heap_size, i, child, tmp1, tmp2, tmpd, tmp3
|
||||
%macro heapify 8
|
||||
%define %%heap %1 ; qword ptr
|
||||
%define %%heap_size %2 ; qword
|
||||
%define %%i %3 ; qword
|
||||
%define %%child %4 ; qword
|
||||
%define %%tmp1 %5 ; qword
|
||||
%define %%tmp2 %6 ; qword
|
||||
%define %%tmpd %7 ; qword
|
||||
%define %%tmp3 %8
|
||||
align 16
|
||||
%%heapify1:
|
||||
lea %%child, [%%i + %%i]
|
||||
; mov %%child, %%i
|
||||
; add %%child, %%child
|
||||
cmp %%child, %%heap_size
|
||||
ja %%end_heapify1
|
||||
mov %%tmp1, [%%heap + %%child*8]
|
||||
mov %%tmp2, [%%heap + %%child*8 + 8]
|
||||
mov %%tmp3, [%%heap + %%i*8]
|
||||
mov %%tmpd, %%child
|
||||
add %%tmpd, 1
|
||||
|
||||
cmp %%tmp2, %%tmp1
|
||||
cmovb %%child, %%tmpd
|
||||
cmovb %%tmp1, %%tmp2
|
||||
; child is correct, tmp1 = heap[child]
|
||||
cmp %%tmp3, %%tmp1
|
||||
jbe %%end_heapify1
|
||||
; swap i and child
|
||||
mov [%%heap + %%i*8], %%tmp1
|
||||
mov [%%heap + %%child*8], %%tmp3
|
||||
mov %%i, %%child
|
||||
jmp %%heapify1
|
||||
%%end_heapify1:
|
||||
%endm
|
1040
igzip/huff_codes.c
1040
igzip/huff_codes.c
File diff suppressed because it is too large
Load Diff
@ -79,81 +79,57 @@
|
||||
#define HASH_MASK (IGZIP_HASH_SIZE - 1)
|
||||
#define SHORTEST_MATCH 4
|
||||
|
||||
#define LENGTH_BITS 5
|
||||
#define FREQ_SHIFT 16
|
||||
#define FREQ_MASK_HI (0xFFFFFFFFFFFF0000)
|
||||
#define DEPTH_SHIFT 24
|
||||
#define DEPTH_MASK 0x7F
|
||||
#define DEPTH_MASK_HI (DEPTH_MASK << DEPTH_SHIFT)
|
||||
#define DEPTH_1 (1 << DEPTH_SHIFT)
|
||||
#define HEAP_TREE_SIZE (3*MAX_HISTHEAP_SIZE + 1)
|
||||
#define HEAP_TREE_NODE_START (HEAP_TREE_SIZE-1)
|
||||
#define MAX_BL_CODE_LEN 7
|
||||
|
||||
/**
|
||||
* @brief Structure used to store huffman codes
|
||||
*/
|
||||
struct huff_code {
|
||||
uint16_t code;
|
||||
uint8_t length;
|
||||
union {
|
||||
struct {
|
||||
uint16_t code;
|
||||
uint8_t extra_bit_count;
|
||||
uint8_t length;
|
||||
};
|
||||
struct {
|
||||
uint32_t code_and_extra:24;
|
||||
uint8_t length2;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Binary tree used to store and create a huffman tree.
|
||||
*/
|
||||
struct huff_tree {
|
||||
uint16_t value;
|
||||
uint64_t frequency;
|
||||
struct huff_tree *left;
|
||||
struct huff_tree *right;
|
||||
struct tree_node {
|
||||
uint32_t child;
|
||||
uint32_t depth;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Nodes in a doubly linked list.
|
||||
*/
|
||||
struct linked_list_node {
|
||||
uint16_t value;
|
||||
struct linked_list_node *next;
|
||||
struct linked_list_node *previous;
|
||||
struct heap_tree {
|
||||
union {
|
||||
uint64_t heap[HEAP_TREE_SIZE];
|
||||
uint64_t code_len_count[MAX_HUFF_TREE_DEPTH + 1];
|
||||
struct tree_node tree[HEAP_TREE_SIZE];
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief This structure is a doubly linked list.
|
||||
*/
|
||||
struct linked_list {
|
||||
uint64_t length;
|
||||
struct linked_list_node *start;
|
||||
struct linked_list_node *end;
|
||||
struct rl_code {
|
||||
uint8_t code;
|
||||
uint8_t extra_bits;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief This is a binary minheap structure which stores huffman trees.
|
||||
* @details The huffman trees are sorted by the frequency of the root.
|
||||
* The structure is represented in a fixed sized array.
|
||||
*/
|
||||
struct histheap {
|
||||
struct huff_tree tree[MAX_HISTHEAP_SIZE];
|
||||
uint16_t size;
|
||||
struct hufftables_icf {
|
||||
struct huff_code lit_len_table[513];
|
||||
struct huff_code dist_table[31];
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Inserts a hufftree into a histheap.
|
||||
* @param element: the hufftree to be inserted
|
||||
* @param heap: the heap which element is being inserted into.
|
||||
* @requires This function assumes the heap has enough allocated space.
|
||||
* @returns Returns the index in heap of the inserted element
|
||||
*/
|
||||
int heap_push(struct huff_tree element, struct histheap *heap);
|
||||
|
||||
/**
|
||||
* @brief Removes the top element from the heap and returns it.
|
||||
*/
|
||||
struct huff_tree heap_pop(struct histheap *heap);
|
||||
|
||||
/**
|
||||
* @brief Removes the first element from list and returns it.
|
||||
*/
|
||||
struct linked_list_node *pop_from_front(struct linked_list *list);
|
||||
|
||||
/**
|
||||
* @brief Adds new_element to the front of list.
|
||||
*/
|
||||
void append_to_front(struct linked_list *list, struct linked_list_node *new_element);
|
||||
|
||||
/**
|
||||
* @brief Adds new_element to the end of list.
|
||||
*/
|
||||
void append_to_back(struct linked_list *list, struct linked_list_node *new_element);
|
||||
|
||||
/**
|
||||
* @brief Returns the deflate symbol value for a repeat length.
|
||||
*/
|
||||
@ -164,68 +140,6 @@ uint32_t convert_length_to_len_sym(uint32_t length);
|
||||
*/
|
||||
uint32_t convert_dist_to_dist_sym(uint32_t dist);
|
||||
|
||||
/**
|
||||
* Constructs a huffman tree on tree_array which only uses elements with non-zero frequency.
|
||||
* @requires Assumes there will be at least two symbols in the produced tree.
|
||||
* @requires tree_array must have length at least 2*size-1, and size must be less than 286.
|
||||
* @param tree_array: array of huff_tree elements used to create a huffman tree, the first
|
||||
* size elements of the array are the leaf elements in the huffman tree.
|
||||
* @param histogram: a histogram of the frequency of elements in tree_array.
|
||||
* @param size: the number of leaf elements in the huffman tree.
|
||||
*/
|
||||
struct huff_tree create_symbol_subset_huff_tree(struct huff_tree *tree_array,
|
||||
uint64_t * histogram, uint32_t size);
|
||||
|
||||
/**
|
||||
* @brief Construct a huffman tree on tree_array which uses every symbol.
|
||||
* @requires tree_array must have length at least 2*size-1, and size must be less than 286.
|
||||
* @param tree_array: array of huff_tree elements used to create a huffman tree, the first
|
||||
* @param size elements of the array are the leaf elements in the huffman tree.
|
||||
* @param histogram: a histogram of the frequency of elements in tree_array.
|
||||
* @param size: the number of leaf elements in the huffman tree.
|
||||
*/
|
||||
struct huff_tree create_huff_tree(struct huff_tree *tree_array, uint64_t * histogram,
|
||||
uint32_t size);
|
||||
|
||||
/**
|
||||
* @brief Creates a deflate compliant huffman tree with maximum depth max_depth.
|
||||
* @details The huffman tree is represented as a lookup table.
|
||||
* @param huff_lookup_table: The output lookup table.
|
||||
* @param table_length: The length of table.
|
||||
* @param root: the input huffman tree the created tree is based on.
|
||||
* @param max_depth: maximum depth the huffman tree can have
|
||||
* @returns Returns 0 if sucessful and returns 1 otherwise.
|
||||
*/
|
||||
int create_huff_lookup(struct huff_code *huff_lookup_table, int table_length,
|
||||
struct huff_tree root, uint8_t max_depth);
|
||||
|
||||
/**
|
||||
* @brief Determines the code length for every value in a huffmant tree.
|
||||
* @param huff_lookup_table: An output lookup table used to store the code lengths
|
||||
* @param corresponding to the possible values
|
||||
* @param count: An output histogram representing code length versus number of occurences.
|
||||
* @param current_node: A node of the huffman tree being analyzed currently.
|
||||
* @param current_depth: The depth of the current node in the huffman tree.
|
||||
* @returns Returns 0 if sucessful and returns 1 otherwise.
|
||||
*/
|
||||
int find_code_lengths(struct huff_code *huff_lookup_table, uint16_t * count,
|
||||
struct huff_tree root, uint8_t max_depth);
|
||||
|
||||
/**
|
||||
* @brief Creates an array of linked lists.
|
||||
* @detail Each linked list contains all the elements with codes of a given length for
|
||||
* lengths less than 16, and an list for all elements with codes at least 16. These lists
|
||||
* are sorted by frequency from least frequent to most frequent within any given code length.
|
||||
* @param depth_array: depth_array[i] is a linked list of elements with code length i
|
||||
* @param linked_lists: An input structure the linked lists in depth array are built on.
|
||||
* @param current_node: the current node being visited in a huffman tree
|
||||
* @param current_depth: the depth of current_node in a huffman tree
|
||||
*/
|
||||
void huffman_tree_traversal(struct linked_list *depth_array,
|
||||
struct linked_list_node *linked_lists, uint16_t * extra_nodes,
|
||||
uint8_t max_depth, struct huff_tree current_node,
|
||||
uint16_t current_depth);
|
||||
|
||||
/**
|
||||
* @brief Determines the code each element of a deflate compliant huffman tree and stores
|
||||
* it in a lookup table
|
||||
@ -234,10 +148,7 @@ void huffman_tree_traversal(struct linked_list *depth_array,
|
||||
* @param table_length: The length of table.
|
||||
* @param count: a histogram representing the number of occurences of codes of a given length
|
||||
*/
|
||||
void set_huff_codes(struct huff_code *table, int table_length, uint16_t * count);
|
||||
|
||||
/* Reverse the first length bits in bits and returns that value */
|
||||
uint16_t bit_reverse(uint16_t bits, uint8_t length);
|
||||
uint32_t set_huff_codes(struct huff_code *table, int table_length, uint32_t * count);
|
||||
|
||||
/**
|
||||
* @brief Checks if a literal/length huffman table can be stored in the igzip hufftables files.
|
||||
@ -263,32 +174,8 @@ uint16_t valid_dist_huff_table(struct huff_code *huff_code_table);
|
||||
* @param end_of_block: Value determining whether end of block header is produced or not;
|
||||
* 0 corresponds to not end of block and all other inputs correspond to end of block.
|
||||
*/
|
||||
int create_header(uint8_t *header, uint32_t header_length, struct huff_code *lit_huff_table,
|
||||
struct huff_code *dist_huff_table, uint32_t end_of_block);
|
||||
|
||||
/**
|
||||
* @brief Creates a run length encoded reprsentation of huff_table.
|
||||
* @details Also creates a histogram representing the frequency of each symbols
|
||||
* @returns Returns the number of symbols written into huffman_rep.
|
||||
* @param huffman_rep: The output run length encoded version of huff_table.
|
||||
* @param histogram: The output histogram of frequencies of elements in huffman_rep.
|
||||
* @param extra_bits: An output table storing extra bits associated with huffman_rep.
|
||||
* @param huff_table: The input huffman_table or concatonation of huffman_tables.
|
||||
* @parma len: The length of huff_table.
|
||||
*/
|
||||
uint16_t create_huffman_rep(uint16_t * huffman_rep, uint64_t * histogram,
|
||||
uint16_t * extra_bits, struct huff_code *huff_table, uint16_t len);
|
||||
|
||||
/**
|
||||
* @brief Flushes the symbols for a repeat of last_code for length run_length into huffman_rep.
|
||||
* @param huffman_rep: pointer to array containing the output huffman_rep.
|
||||
* @param histogram: histogram of elements seen in huffman_rep.
|
||||
* @param extra_bits: an array holding extra bits for the corresponding symbol in huffman_rep.
|
||||
* @param huff_table: a concatenated list of huffman lookup tables.
|
||||
* @param current_index: The next spot elements will be written in huffman_rep.
|
||||
*/
|
||||
uint16_t flush_repeats(uint16_t * huffman_rep, uint64_t * histogram, uint16_t * extra_bits,
|
||||
uint16_t last_code, uint16_t run_length, uint16_t current_index);
|
||||
int create_header(struct BitBuf2 *header_bitbuf, struct rl_code *huffman_rep, uint32_t length,
|
||||
uint64_t * histogram, uint32_t hlit, uint32_t hdist, uint32_t end_of_block);
|
||||
|
||||
/**
|
||||
* @brief Creates the header for run length encoded huffman trees.
|
||||
@ -303,10 +190,10 @@ uint16_t flush_repeats(uint16_t * huffman_rep, uint64_t * histogram, uint16_t *
|
||||
* @param hlit: Length of literal/length table minus 257.
|
||||
* @parm hdist: Length of distance table minus 1.
|
||||
*/
|
||||
int create_huffman_header(uint8_t *header, uint32_t header_length, struct huff_code *lookup_table,
|
||||
uint16_t * huffman_rep, uint16_t * extra_bits,
|
||||
uint16_t huffman_rep_length, uint32_t end_of_block, uint32_t hclen,
|
||||
uint32_t hlit, uint32_t hdist);
|
||||
int create_huffman_header(struct BitBuf2 *header_bitbuf, struct huff_code *lookup_table,
|
||||
struct rl_code * huffman_rep, uint16_t huffman_rep_length,
|
||||
uint32_t end_of_block, uint32_t hclen, uint32_t hlit,
|
||||
uint32_t hdist);
|
||||
|
||||
/**
|
||||
* @brief Creates a two table representation of huffman codes.
|
||||
@ -348,4 +235,18 @@ void create_packed_dist_table(uint32_t * packed_table, uint32_t length,
|
||||
*/
|
||||
int are_hufftables_useable(struct huff_code *lit_len_hufftable,
|
||||
struct huff_code *dist_hufftable);
|
||||
|
||||
/**
|
||||
* @brief Creates a representation of the huffman code from a histogram used to
|
||||
* decompress the intermediate compression format.
|
||||
*
|
||||
* @param bb: bitbuf structure where the header huffman code header is written
|
||||
* @param hufftables: output huffman code representation
|
||||
* @param hist: histogram used to generat huffman code
|
||||
* @param end_of_block: flag whether this is the final huffman code
|
||||
*/
|
||||
void
|
||||
create_hufftables_icf(struct BitBuf2 *bb, struct hufftables_icf * hufftables,
|
||||
struct isal_mod_hist *hist, uint32_t end_of_block);
|
||||
|
||||
#endif
|
||||
|
@ -168,6 +168,49 @@
|
||||
%endif
|
||||
|
||||
|
||||
; Macros for doing Huffman Encoding
|
||||
|
||||
; Assumes (dist != 0)
|
||||
; Uses RCX, clobbers dist
|
||||
; void compute_dist_code dist, code, len
|
||||
%macro compute_dist_icf_code 3
|
||||
%define %%dist %1 ; IN, clobbered
|
||||
%define %%distq %1
|
||||
%define %%code %2 ; OUT
|
||||
%define %%tmp1 %3
|
||||
|
||||
bsr rcx, %%dist ; ecx = msb = bsr(dist)
|
||||
dec rcx ; ecx = num_extra_bits = msb - N
|
||||
BZHI %%code, %%dist, rcx, %%tmp1
|
||||
SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits
|
||||
lea %%dist, [%%dist + 2*rcx] ; code = sym = dist + num_extra_bits*2
|
||||
shl %%code, EXTRA_BITS_OFFSET - DIST_OFFSET
|
||||
add %%code, %%dist ; code = extra_bits | sym
|
||||
|
||||
%endm
|
||||
|
||||
; Uses RCX, clobbers dist
|
||||
; get_dist_code dist, code, len
|
||||
%macro get_dist_icf_code 3
|
||||
%define %%dist %1 ; 32-bit IN, clobbered
|
||||
%define %%distq %1 ; 64-bit IN, clobbered
|
||||
%define %%code %2 ; 32-bit OUT
|
||||
%define %%tmp1 %3
|
||||
|
||||
cmp %%dist, 1
|
||||
jg %%do_compute
|
||||
|
||||
%ifnidn %%code, %%dist
|
||||
mov %%code, %%dist
|
||||
%endif
|
||||
jmp %%done
|
||||
%%do_compute:
|
||||
compute_dist_icf_code %%distq, %%code, %%tmp1
|
||||
%%done:
|
||||
shl %%code, DIST_OFFSET
|
||||
%endm
|
||||
|
||||
|
||||
; "len" can be same register as "length"
|
||||
; get_len_code length, code, len
|
||||
%macro get_len_code 4
|
||||
|
@ -126,6 +126,48 @@ static inline void get_lit_code(struct isal_hufftables *hufftables, uint32_t lit
|
||||
*len = hufftables->lit_table_sizes[lit];
|
||||
}
|
||||
|
||||
static void compute_dist_icf_code(uint32_t dist, uint32_t *code, uint32_t *extra_bits)
|
||||
{
|
||||
uint32_t msb;
|
||||
uint32_t num_extra_bits;
|
||||
|
||||
dist -= 1;
|
||||
msb = bsr(dist);
|
||||
assert(msb >= 1);
|
||||
num_extra_bits = msb - 2;
|
||||
*extra_bits = dist & ((1 << num_extra_bits) - 1);
|
||||
dist >>= num_extra_bits;
|
||||
*code = dist + 2 * num_extra_bits;
|
||||
assert(*code < 30);
|
||||
}
|
||||
|
||||
static inline void get_dist_icf_code(uint32_t dist, uint32_t *code, uint32_t *extra_bits)
|
||||
{
|
||||
assert(dist >= 1);
|
||||
assert(dist <= 32768);
|
||||
if (dist <= 2) {
|
||||
*code = dist - 1;
|
||||
*extra_bits = 0;
|
||||
} else {
|
||||
compute_dist_icf_code(dist, code, extra_bits);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void get_len_icf_code(uint32_t length, uint32_t *code)
|
||||
{
|
||||
assert(length >= 3);
|
||||
assert(length <= 258);
|
||||
|
||||
*code = length + 254;
|
||||
}
|
||||
|
||||
static inline void get_lit_icf_code(uint32_t lit, uint32_t *code)
|
||||
{
|
||||
assert(lit <= 256);
|
||||
|
||||
*code = lit;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Returns a hash of the first 3 bytes of input data.
|
||||
*/
|
||||
|
268
igzip/igzip.c
268
igzip/igzip.c
@ -41,10 +41,15 @@
|
||||
#define NON_EMPTY_BLOCK_SIZE 6
|
||||
#define MAX_SYNC_FLUSH_SIZE NON_EMPTY_BLOCK_SIZE + MAX_WRITE_BITS_SIZE
|
||||
|
||||
#define MAX_TOKENS (16 * 1024)
|
||||
|
||||
#include "huffman.h"
|
||||
#include "bitbuf2.h"
|
||||
#include "igzip_lib.h"
|
||||
#include "repeated_char_result.h"
|
||||
#include "huff_codes.h"
|
||||
#include "encode_df.h"
|
||||
#include "igzip_level_buf_structs.h"
|
||||
|
||||
extern const uint8_t gzip_hdr[];
|
||||
extern const uint32_t gzip_hdr_bytes;
|
||||
@ -59,6 +64,7 @@ static int write_stored_block_stateless(struct isal_zstream *stream, uint32_t st
|
||||
uint32_t crc32);
|
||||
|
||||
static int write_gzip_header_stateless(struct isal_zstream *stream);
|
||||
static void write_gzip_header(struct isal_zstream *stream);
|
||||
static int write_deflate_header_stateless(struct isal_zstream *stream);
|
||||
static int write_deflate_header_unaligned_stateless(struct isal_zstream *stream);
|
||||
|
||||
@ -70,11 +76,15 @@ unsigned int detect_repeated_char(uint8_t * buf, uint32_t size);
|
||||
void isal_deflate_body(struct isal_zstream *stream);
|
||||
void isal_deflate_finish(struct isal_zstream *stream);
|
||||
|
||||
void isal_deflate_icf_body(struct isal_zstream *stream);
|
||||
void isal_deflate_icf_finish(struct isal_zstream *stream);
|
||||
/*****************************************************************/
|
||||
|
||||
/* Forward declarations */
|
||||
static inline void reset_match_history(struct isal_zstream *stream);
|
||||
void write_header(struct isal_zstream *stream);
|
||||
void write_header(struct isal_zstream *stream, uint8_t * deflate_hdr,
|
||||
uint32_t deflate_hdr_count, uint32_t extra_bits_count, uint32_t next_state,
|
||||
uint32_t toggle_end_of_stream);
|
||||
void write_deflate_header(struct isal_zstream *stream);
|
||||
void write_trailer(struct isal_zstream *stream);
|
||||
|
||||
@ -157,13 +167,122 @@ static void flush_write_buffer(struct isal_zstream *stream)
|
||||
}
|
||||
}
|
||||
|
||||
static void flush_icf_block(struct isal_zstream *stream)
|
||||
{
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
struct level_2_buf *level_buf = (struct level_2_buf *)stream->level_buf;
|
||||
struct BitBuf2 *write_buf = &state->bitbuf;
|
||||
struct deflate_icf *icf_buf_encoded_next;
|
||||
|
||||
set_buf(write_buf, stream->next_out, stream->avail_out);
|
||||
|
||||
#if defined (USE_BITBUF8) || (USE_BITBUF_ELSE)
|
||||
if (!is_full(write_buf))
|
||||
flush_bits(write_buf);
|
||||
#endif
|
||||
|
||||
icf_buf_encoded_next = encode_deflate_icf(level_buf->icf_buf_start + state->count,
|
||||
level_buf->icf_buf_next, write_buf,
|
||||
&level_buf->encode_tables);
|
||||
|
||||
state->count = icf_buf_encoded_next - level_buf->icf_buf_start;
|
||||
stream->next_out = buffer_ptr(write_buf);
|
||||
stream->total_out += buffer_used(write_buf);
|
||||
stream->avail_out -= buffer_used(write_buf);
|
||||
|
||||
if (level_buf->icf_buf_next <= icf_buf_encoded_next) {
|
||||
state->count = 0;
|
||||
if (stream->avail_in == 0 && stream->end_of_stream)
|
||||
state->state = ZSTATE_TRL;
|
||||
else if (stream->avail_in == 0 && stream->flush != NO_FLUSH)
|
||||
state->state = ZSTATE_SYNC_FLUSH;
|
||||
else
|
||||
state->state = ZSTATE_NEW_HDR;
|
||||
}
|
||||
}
|
||||
|
||||
static void init_new_icf_block(struct isal_zstream *stream)
|
||||
{
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
struct level_2_buf *level_buf = (struct level_2_buf *)stream->level_buf;
|
||||
|
||||
if (stream->level_buf_size >=
|
||||
sizeof(struct level_2_buf) + 100 * sizeof(struct deflate_icf)) {
|
||||
level_buf->icf_buf_next = level_buf->icf_buf_start;
|
||||
level_buf->icf_buf_avail_out =
|
||||
stream->level_buf_size - sizeof(struct level_2_buf) -
|
||||
sizeof(struct deflate_icf);
|
||||
memset(&state->hist, 0, sizeof(struct isal_mod_hist));
|
||||
state->state = ZSTATE_BODY;
|
||||
}
|
||||
}
|
||||
|
||||
static void create_icf_block_hdr(struct isal_zstream *stream)
|
||||
{
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
struct level_2_buf *level_buf = (struct level_2_buf *)stream->level_buf;
|
||||
struct BitBuf2 *write_buf = &state->bitbuf;
|
||||
struct BitBuf2 write_buf_tmp;
|
||||
uint32_t out_size = stream->avail_out;
|
||||
uint8_t *end_out = stream->next_out + out_size;
|
||||
/* Write EOB in icf_buf */
|
||||
state->hist.ll_hist[256] = 1;
|
||||
level_buf->icf_buf_next->lit_len = 0x100;
|
||||
level_buf->icf_buf_next->lit_dist = NULL_DIST_SYM;
|
||||
level_buf->icf_buf_next->dist_extra = 0;
|
||||
level_buf->icf_buf_next++;
|
||||
|
||||
state->has_eob_hdr = stream->end_of_stream && !stream->avail_in;
|
||||
if (end_out - stream->next_out >= ISAL_DEF_MAX_HDR_SIZE) {
|
||||
/* Determine whether this is the final block */
|
||||
|
||||
if (stream->gzip_flag == IGZIP_GZIP)
|
||||
write_gzip_header_stateless(stream);
|
||||
|
||||
set_buf(write_buf, stream->next_out, stream->avail_out);
|
||||
|
||||
create_hufftables_icf(write_buf, &level_buf->encode_tables, &state->hist,
|
||||
state->has_eob_hdr);
|
||||
state->state = ZSTATE_FLUSH_ICF_BUFFER;
|
||||
stream->next_out = buffer_ptr(write_buf);
|
||||
stream->total_out += buffer_used(write_buf);
|
||||
stream->avail_out -= buffer_used(write_buf);
|
||||
} else {
|
||||
/* Start writing into temporary buffer */
|
||||
write_buf_tmp.m_bits = write_buf->m_bits;
|
||||
write_buf_tmp.m_bit_count = write_buf->m_bit_count;
|
||||
|
||||
write_buf->m_bits = 0;
|
||||
write_buf->m_bit_count = 0;
|
||||
|
||||
set_buf(&write_buf_tmp, level_buf->deflate_hdr, ISAL_DEF_MAX_HDR_SIZE);
|
||||
|
||||
create_hufftables_icf(&write_buf_tmp, &level_buf->encode_tables,
|
||||
&state->hist, state->has_eob_hdr);
|
||||
|
||||
level_buf->deflate_hdr_count = buffer_used(&write_buf_tmp);
|
||||
level_buf->deflate_hdr_extra_bits = write_buf_tmp.m_bit_count;
|
||||
flush(&write_buf_tmp);
|
||||
|
||||
state->state = ZSTATE_HDR;
|
||||
}
|
||||
}
|
||||
|
||||
static void isal_deflate_pass(struct isal_zstream *stream)
|
||||
{
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
struct isal_hufftables *hufftables = stream->hufftables;
|
||||
uint8_t *start_in = stream->next_in;
|
||||
|
||||
if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR)
|
||||
write_header(stream);
|
||||
if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR) {
|
||||
if (state->count == 0)
|
||||
/* Assume the final header is being written since the header
|
||||
* stored in hufftables is the final header. */
|
||||
state->has_eob_hdr = 1;
|
||||
write_header(stream, hufftables->deflate_hdr, hufftables->deflate_hdr_count,
|
||||
hufftables->deflate_hdr_extra_bits, ZSTATE_BODY,
|
||||
!stream->end_of_stream);
|
||||
}
|
||||
|
||||
if (state->state == ZSTATE_BODY)
|
||||
isal_deflate_body(stream);
|
||||
@ -184,6 +303,52 @@ static void isal_deflate_pass(struct isal_zstream *stream)
|
||||
write_trailer(stream);
|
||||
}
|
||||
|
||||
static void isal_deflate_icf_pass(struct isal_zstream *stream)
|
||||
{
|
||||
uint8_t *start_in = stream->next_in;
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
struct level_2_buf *level_buf = (struct level_2_buf *)stream->level_buf;
|
||||
|
||||
do {
|
||||
if (state->state == ZSTATE_NEW_HDR)
|
||||
init_new_icf_block(stream);
|
||||
|
||||
if (state->state == ZSTATE_BODY)
|
||||
isal_deflate_icf_body(stream);
|
||||
|
||||
if (state->state == ZSTATE_FLUSH_READ_BUFFER)
|
||||
isal_deflate_icf_finish(stream);
|
||||
|
||||
if (state->state == ZSTATE_CREATE_HDR)
|
||||
create_icf_block_hdr(stream);
|
||||
|
||||
if (state->state == ZSTATE_HDR)
|
||||
/* Note that the header may be prepended by the
|
||||
* remaining bits in the previous block, as such the
|
||||
* toggle header flag cannot be used */
|
||||
write_header(stream, level_buf->deflate_hdr,
|
||||
level_buf->deflate_hdr_count,
|
||||
level_buf->deflate_hdr_extra_bits,
|
||||
ZSTATE_FLUSH_ICF_BUFFER, 0);
|
||||
|
||||
if (state->state == ZSTATE_FLUSH_ICF_BUFFER)
|
||||
flush_icf_block(stream);
|
||||
|
||||
} while (state->state == ZSTATE_NEW_HDR);
|
||||
|
||||
if (state->state == ZSTATE_SYNC_FLUSH)
|
||||
sync_flush(stream);
|
||||
|
||||
if (state->state == ZSTATE_FLUSH_WRITE_BUFFER)
|
||||
flush_write_buffer(stream);
|
||||
|
||||
if (stream->gzip_flag)
|
||||
state->crc = crc32_gzip(state->crc, start_in, stream->next_in - start_in);
|
||||
|
||||
if (state->state == ZSTATE_TRL)
|
||||
write_trailer(stream);
|
||||
}
|
||||
|
||||
static void isal_deflate_int(struct isal_zstream *stream)
|
||||
{
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
@ -209,7 +374,10 @@ static void isal_deflate_int(struct isal_zstream *stream)
|
||||
}
|
||||
assert(state->tmp_out_start == state->tmp_out_end);
|
||||
|
||||
isal_deflate_pass(stream);
|
||||
if (stream->level == 0)
|
||||
isal_deflate_pass(stream);
|
||||
else
|
||||
isal_deflate_icf_pass(stream);
|
||||
|
||||
/* Fill temporary output buffer then complete filling output buffer */
|
||||
if (stream->avail_out > 0 && stream->avail_out < 8 && state->state != ZSTATE_NEW_HDR) {
|
||||
@ -225,7 +393,10 @@ static void isal_deflate_int(struct isal_zstream *stream)
|
||||
stream->avail_out = sizeof(state->tmp_out_buff);
|
||||
stream->total_out = 0;
|
||||
|
||||
isal_deflate_pass(stream);
|
||||
if (stream->level == 0)
|
||||
isal_deflate_pass(stream);
|
||||
else
|
||||
isal_deflate_icf_pass(stream);
|
||||
|
||||
state->tmp_out_start = 0;
|
||||
state->tmp_out_end = stream->total_out;
|
||||
@ -304,8 +475,8 @@ static void write_constant_compressed_stateless(struct isal_zstream *stream,
|
||||
|
||||
if (rep_extra >= 230) {
|
||||
write_bits(&state->bitbuf,
|
||||
CODE_280 | ((rep_extra / 2 - 115) << CODE_280_LENGTH),
|
||||
CODE_280_TOTAL_LENGTH);
|
||||
CODE_280 | ((rep_extra / 2 - 115) <<
|
||||
CODE_280_LENGTH), CODE_280_TOTAL_LENGTH);
|
||||
rep_extra -= rep_extra / 2;
|
||||
}
|
||||
|
||||
@ -377,16 +548,34 @@ static int isal_deflate_int_stateless(struct isal_zstream *stream)
|
||||
write_constant_compressed_stateless(stream, repeat_length);
|
||||
}
|
||||
|
||||
if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR) {
|
||||
write_deflate_header_unaligned_stateless(stream);
|
||||
if (stream->level == 0) {
|
||||
if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR) {
|
||||
write_deflate_header_unaligned_stateless(stream);
|
||||
if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR)
|
||||
return STATELESS_OVERFLOW;
|
||||
|
||||
reset_match_history(stream);
|
||||
}
|
||||
|
||||
state->file_start = stream->next_in - stream->total_in;
|
||||
isal_deflate_pass(stream);
|
||||
|
||||
} else if (stream->level == 1) {
|
||||
if (stream->level_buf == NULL || stream->level_buf_size < ISAL_DEF_LVL1_MIN) {
|
||||
/* Default to internal buffer if invalid size is supplied */
|
||||
stream->level_buf = state->buffer;
|
||||
stream->level_buf_size = sizeof(state->buffer);
|
||||
}
|
||||
|
||||
if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR)
|
||||
return STATELESS_OVERFLOW;
|
||||
reset_match_history(stream);
|
||||
|
||||
reset_match_history(stream);
|
||||
}
|
||||
state->count = 0;
|
||||
state->file_start = stream->next_in - stream->total_in;
|
||||
isal_deflate_icf_pass(stream);
|
||||
|
||||
state->file_start = stream->next_in - stream->total_in;
|
||||
isal_deflate_pass(stream);
|
||||
} else
|
||||
return ISAL_INVALID_LEVEL;
|
||||
|
||||
if (state->state == ZSTATE_END
|
||||
|| (state->state == ZSTATE_NEW_HDR && stream->flush == FULL_FLUSH))
|
||||
@ -481,7 +670,11 @@ void isal_deflate_init(struct isal_zstream *stream)
|
||||
stream->total_in = 0;
|
||||
stream->total_out = 0;
|
||||
stream->hufftables = (struct isal_hufftables *)&hufftables_default;
|
||||
stream->flush = 0;
|
||||
stream->level = 0;
|
||||
stream->level_buf = NULL;
|
||||
stream->level_buf_size = 0;
|
||||
stream->end_of_stream = 0;
|
||||
stream->flush = NO_FLUSH;
|
||||
stream->gzip_flag = 0;
|
||||
|
||||
state->b_bytes_valid = 0;
|
||||
@ -536,8 +729,11 @@ void isal_deflate_stateless_init(struct isal_zstream *stream)
|
||||
stream->total_in = 0;
|
||||
stream->total_out = 0;
|
||||
stream->hufftables = (struct isal_hufftables *)&hufftables_default;
|
||||
stream->flush = NO_FLUSH;
|
||||
stream->level = 0;
|
||||
stream->level_buf = NULL;
|
||||
stream->level_buf_size = 0;
|
||||
stream->end_of_stream = 0;
|
||||
stream->flush = NO_FLUSH;
|
||||
stream->gzip_flag = 0;
|
||||
stream->internal_state.state = ZSTATE_NEW_HDR;
|
||||
return;
|
||||
@ -581,6 +777,9 @@ int isal_deflate_stateless(struct isal_zstream *stream)
|
||||
if (stream->flush != NO_FLUSH && stream->flush != FULL_FLUSH)
|
||||
return INVALID_FLUSH;
|
||||
|
||||
if (stream->level != 0 && stream->level != 1)
|
||||
return ISAL_INVALID_LEVEL;
|
||||
|
||||
if (avail_in == 0)
|
||||
stored_len = STORED_BLK_HDR_BZ;
|
||||
else
|
||||
@ -890,13 +1089,14 @@ static int write_deflate_header_unaligned_stateless(struct isal_zstream *stream)
|
||||
return COMP_OK;
|
||||
}
|
||||
|
||||
void write_header(struct isal_zstream *stream)
|
||||
/* Toggle end of stream only works when deflate header is aligned */
|
||||
void write_header(struct isal_zstream *stream, uint8_t * deflate_hdr,
|
||||
uint32_t deflate_hdr_count, uint32_t extra_bits_count, uint32_t next_state,
|
||||
uint32_t toggle_end_of_stream)
|
||||
{
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
struct isal_hufftables *hufftables = stream->hufftables;
|
||||
uint64_t hdr_extra_bits = hufftables->deflate_hdr[hufftables->deflate_hdr_count];
|
||||
uint32_t hdr_extra_bits = deflate_hdr[deflate_hdr_count];
|
||||
uint32_t count;
|
||||
|
||||
state->state = ZSTATE_HDR;
|
||||
|
||||
if (state->bitbuf.m_bit_count != 0) {
|
||||
@ -913,19 +1113,18 @@ void write_header(struct isal_zstream *stream)
|
||||
if (stream->gzip_flag == IGZIP_GZIP)
|
||||
write_gzip_header(stream);
|
||||
|
||||
count = hufftables->deflate_hdr_count - state->count;
|
||||
count = deflate_hdr_count - state->count;
|
||||
|
||||
if (count != 0) {
|
||||
if (count > stream->avail_out)
|
||||
count = stream->avail_out;
|
||||
|
||||
memcpy(stream->next_out, hufftables->deflate_hdr + state->count, count);
|
||||
memcpy(stream->next_out, deflate_hdr + state->count, count);
|
||||
|
||||
if (state->count == 0 && count > 0) {
|
||||
if (!stream->end_of_stream)
|
||||
*stream->next_out -= 1;
|
||||
else
|
||||
state->has_eob_hdr = 1;
|
||||
if (toggle_end_of_stream && state->count == 0 && count > 0) {
|
||||
/* Assumes the final block bit is the first bit */
|
||||
*stream->next_out ^= 1;
|
||||
state->has_eob_hdr = !state->has_eob_hdr;
|
||||
}
|
||||
|
||||
stream->next_out += count;
|
||||
@ -933,21 +1132,20 @@ void write_header(struct isal_zstream *stream)
|
||||
stream->total_out += count;
|
||||
state->count += count;
|
||||
|
||||
count = hufftables->deflate_hdr_count - state->count;
|
||||
} else if (hufftables->deflate_hdr_count == 0) {
|
||||
if (!stream->end_of_stream)
|
||||
hdr_extra_bits -= 1;
|
||||
else
|
||||
state->has_eob_hdr = 1;
|
||||
count = deflate_hdr_count - state->count;
|
||||
} else if (toggle_end_of_stream && deflate_hdr_count == 0) {
|
||||
/* Assumes the final block bit is the first bit */
|
||||
hdr_extra_bits ^= 1;
|
||||
state->has_eob_hdr = !state->has_eob_hdr;
|
||||
}
|
||||
|
||||
if ((count == 0) && (stream->avail_out >= 8)) {
|
||||
|
||||
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
|
||||
|
||||
write_bits(&state->bitbuf, hdr_extra_bits, hufftables->deflate_hdr_extra_bits);
|
||||
write_bits(&state->bitbuf, hdr_extra_bits, extra_bits_count);
|
||||
|
||||
state->state = ZSTATE_BODY;
|
||||
state->state = next_state;
|
||||
state->count = 0;
|
||||
|
||||
count = buffer_used(&state->bitbuf);
|
||||
|
223
igzip/igzip_icf_base.c
Normal file
223
igzip/igzip_icf_base.c
Normal file
@ -0,0 +1,223 @@
|
||||
#include <stdint.h>
|
||||
#include "igzip_lib.h"
|
||||
#include "huffman.h"
|
||||
#include "huff_codes.h"
|
||||
#include "encode_df.h"
|
||||
#include "igzip_level_buf_structs.h"
|
||||
|
||||
static inline void write_deflate_icf(struct deflate_icf *icf, uint32_t lit_len,
|
||||
uint32_t lit_dist, uint32_t extra_bits)
|
||||
{
|
||||
icf->lit_len = lit_len;
|
||||
icf->lit_dist = lit_dist;
|
||||
icf->dist_extra = extra_bits;
|
||||
}
|
||||
|
||||
static inline void update_state(struct isal_zstream *stream, uint8_t * start_in,
|
||||
uint8_t * next_in, uint8_t * end_in,
|
||||
struct deflate_icf *start_out, struct deflate_icf *next_out,
|
||||
struct deflate_icf *end_out)
|
||||
{
|
||||
stream->next_in = next_in;
|
||||
stream->total_in += next_in - start_in;
|
||||
stream->avail_in = end_in - next_in;
|
||||
|
||||
((struct level_2_buf *)stream->level_buf)->icf_buf_next = next_out;
|
||||
((struct level_2_buf *)stream->level_buf)->icf_buf_avail_out = end_out - next_out;
|
||||
}
|
||||
|
||||
void isal_deflate_icf_body_base(struct isal_zstream *stream)
|
||||
{
|
||||
uint32_t literal, hash;
|
||||
uint8_t *start_in, *next_in, *end_in, *end, *next_hash;
|
||||
struct deflate_icf *start_out, *next_out, *end_out;
|
||||
uint16_t match_length;
|
||||
uint32_t dist;
|
||||
uint32_t code, code2, extra_bits;
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
uint16_t *last_seen = state->head;
|
||||
|
||||
if (stream->avail_in == 0) {
|
||||
if (stream->end_of_stream || stream->flush != NO_FLUSH)
|
||||
state->state = ZSTATE_FLUSH_READ_BUFFER;
|
||||
return;
|
||||
}
|
||||
|
||||
start_in = stream->next_in;
|
||||
end_in = start_in + stream->avail_in;
|
||||
next_in = start_in;
|
||||
|
||||
start_out = ((struct level_2_buf *)stream->level_buf)->icf_buf_next;
|
||||
end_out =
|
||||
start_out + ((struct level_2_buf *)stream->level_buf)->icf_buf_avail_out /
|
||||
sizeof(struct deflate_icf);
|
||||
next_out = start_out;
|
||||
|
||||
while (next_in < end_in - ISAL_LOOK_AHEAD) {
|
||||
|
||||
if (next_out >= end_out) {
|
||||
state->state = ZSTATE_CREATE_HDR;
|
||||
update_state(stream, start_in, next_in, end_in, start_out, next_out,
|
||||
end_out);
|
||||
return;
|
||||
}
|
||||
|
||||
literal = *(uint32_t *) next_in;
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
dist = (next_in - state->file_start - last_seen[hash]) & 0xFFFF;
|
||||
last_seen[hash] = (uint64_t) (next_in - state->file_start);
|
||||
|
||||
/* The -1 are to handle the case when dist = 0 */
|
||||
if (dist - 1 < IGZIP_HIST_SIZE - 1) {
|
||||
assert(dist != 0);
|
||||
|
||||
match_length = compare258(next_in - dist, next_in, 258);
|
||||
|
||||
if (match_length >= SHORTEST_MATCH) {
|
||||
next_hash = next_in;
|
||||
#ifdef ISAL_LIMIT_HASH_UPDATE
|
||||
end = next_hash + 3;
|
||||
#else
|
||||
end = next_hash + match_length;
|
||||
#endif
|
||||
next_hash++;
|
||||
|
||||
for (; next_hash < end; next_hash++) {
|
||||
literal = *(uint32_t *) next_hash;
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
last_seen[hash] =
|
||||
(uint64_t) (next_hash - state->file_start);
|
||||
}
|
||||
|
||||
get_len_icf_code(match_length, &code);
|
||||
get_dist_icf_code(dist, &code2, &extra_bits);
|
||||
|
||||
state->hist.ll_hist[code]++;
|
||||
state->hist.d_hist[code2]++;
|
||||
|
||||
write_deflate_icf(next_out, code, code2, extra_bits);
|
||||
next_out++;
|
||||
next_in += match_length;
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
get_lit_icf_code(literal & 0xFF, &code);
|
||||
state->hist.ll_hist[code]++;
|
||||
write_deflate_icf(next_out, code, NULL_DIST_SYM, 0);
|
||||
next_out++;
|
||||
next_in++;
|
||||
}
|
||||
|
||||
update_state(stream, start_in, next_in, end_in, start_out, next_out, end_out);
|
||||
|
||||
assert(stream->avail_in <= ISAL_LOOK_AHEAD);
|
||||
if (stream->end_of_stream || stream->flush != NO_FLUSH)
|
||||
state->state = ZSTATE_FLUSH_READ_BUFFER;
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
void isal_deflate_icf_finish_base(struct isal_zstream *stream)
|
||||
{
|
||||
uint32_t literal = 0, hash;
|
||||
uint8_t *start_in, *next_in, *end_in, *end, *next_hash;
|
||||
struct deflate_icf *start_out, *next_out, *end_out;
|
||||
uint16_t match_length;
|
||||
uint32_t dist;
|
||||
uint32_t code, code2, extra_bits;
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
uint16_t *last_seen = state->head;
|
||||
|
||||
start_in = stream->next_in;
|
||||
end_in = start_in + stream->avail_in;
|
||||
next_in = start_in;
|
||||
|
||||
start_out = ((struct level_2_buf *)stream->level_buf)->icf_buf_next;
|
||||
end_out = start_out + ((struct level_2_buf *)stream->level_buf)->icf_buf_avail_out /
|
||||
sizeof(struct deflate_icf);
|
||||
next_out = start_out;
|
||||
|
||||
while (next_in < end_in - 3) {
|
||||
if (next_out >= end_out) {
|
||||
state->state = ZSTATE_CREATE_HDR;
|
||||
update_state(stream, start_in, next_in, end_in, start_out, next_out,
|
||||
end_out);
|
||||
return;
|
||||
}
|
||||
|
||||
literal = *(uint32_t *) next_in;
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
dist = (next_in - state->file_start - last_seen[hash]) & 0xFFFF;
|
||||
last_seen[hash] = (uint64_t) (next_in - state->file_start);
|
||||
|
||||
if (dist - 1 < IGZIP_HIST_SIZE - 1) { /* The -1 are to handle the case when dist = 0 */
|
||||
match_length = compare258(next_in - dist, next_in, end_in - next_in);
|
||||
|
||||
if (match_length >= SHORTEST_MATCH) {
|
||||
next_hash = next_in;
|
||||
#ifdef ISAL_LIMIT_HASH_UPDATE
|
||||
end = next_hash + 3;
|
||||
#else
|
||||
end = next_hash + match_length;
|
||||
#endif
|
||||
next_hash++;
|
||||
|
||||
for (; next_hash < end - 3; next_hash++) {
|
||||
literal = *(uint32_t *) next_hash;
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
last_seen[hash] =
|
||||
(uint64_t) (next_hash - state->file_start);
|
||||
}
|
||||
|
||||
get_len_icf_code(match_length, &code);
|
||||
get_dist_icf_code(dist, &code2, &extra_bits);
|
||||
|
||||
state->hist.ll_hist[code]++;
|
||||
state->hist.d_hist[code2]++;
|
||||
|
||||
write_deflate_icf(next_out, code, code2, extra_bits);
|
||||
|
||||
next_out++;
|
||||
next_in += match_length;
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
get_lit_icf_code(literal & 0xFF, &code);
|
||||
state->hist.ll_hist[code]++;
|
||||
write_deflate_icf(next_out, code, NULL_DIST_SYM, 0);
|
||||
next_out++;
|
||||
next_in++;
|
||||
|
||||
}
|
||||
|
||||
while (next_in < end_in) {
|
||||
if (next_out >= end_out) {
|
||||
state->state = ZSTATE_CREATE_HDR;
|
||||
update_state(stream, start_in, next_in, end_in, start_out, next_out,
|
||||
end_out);
|
||||
return;
|
||||
}
|
||||
|
||||
literal = *next_in;
|
||||
get_lit_icf_code(literal & 0xFF, &code);
|
||||
state->hist.ll_hist[code]++;
|
||||
write_deflate_icf(next_out, code, NULL_DIST_SYM, 0);
|
||||
next_out++;
|
||||
next_in++;
|
||||
|
||||
}
|
||||
|
||||
if (next_in == end_in) {
|
||||
if (stream->end_of_stream || stream->flush != NO_FLUSH)
|
||||
state->state = ZSTATE_CREATE_HDR;
|
||||
}
|
||||
|
||||
update_state(stream, start_in, next_in, end_in, start_out, next_out, end_out);
|
||||
|
||||
return;
|
||||
}
|
550
igzip/igzip_icf_body.asm
Normal file
550
igzip/igzip_icf_body.asm
Normal file
@ -0,0 +1,550 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
%include "options.asm"
|
||||
|
||||
%include "lz0a_const.asm"
|
||||
%include "data_struct2.asm"
|
||||
%include "bitbuf2.asm"
|
||||
%include "huffman.asm"
|
||||
%include "igzip_compare_types.asm"
|
||||
%include "reg_sizes.asm"
|
||||
|
||||
%include "stdmac.asm"
|
||||
|
||||
%ifdef DEBUG
|
||||
%macro MARK 1
|
||||
global %1
|
||||
%1:
|
||||
%endm
|
||||
%else
|
||||
%macro MARK 1
|
||||
%endm
|
||||
%endif
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%define tmp2 rcx
|
||||
%define hash2 rcx
|
||||
|
||||
%define curr_data rax
|
||||
%define code rax
|
||||
%define tmp5 rax
|
||||
|
||||
%define tmp4 rbx
|
||||
%define dist rbx
|
||||
%define code2 rbx
|
||||
|
||||
%define hash rdx
|
||||
%define len rdx
|
||||
%define code_len3 rdx
|
||||
%define tmp8 rdx
|
||||
|
||||
%define tmp1 rsi
|
||||
%define code_len2 rsi
|
||||
%define code5 rsi
|
||||
|
||||
%define file_start rdi
|
||||
|
||||
%define curr_data2 r8
|
||||
%define len2 r8
|
||||
%define tmp6 r8
|
||||
|
||||
%define f_i r10
|
||||
|
||||
%define m_out_buf r11
|
||||
|
||||
%define f_end_i r12
|
||||
%define dist2 r12
|
||||
%define tmp7 r12
|
||||
%define code4 r12
|
||||
|
||||
%define tmp3 r13
|
||||
%define code3 r13
|
||||
|
||||
%define stream r14
|
||||
|
||||
%define hufftables r15
|
||||
|
||||
;; GPR r8 & r15 can be used
|
||||
|
||||
%define xtmp0 xmm0 ; tmp
|
||||
%define xtmp1 xmm1 ; tmp
|
||||
%define xhash xmm2
|
||||
%define xmask xmm3
|
||||
%define xdata xmm4
|
||||
|
||||
%define ytmp0 ymm0 ; tmp
|
||||
%define ytmp1 ymm1 ; tmp
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
|
||||
m_out_end equ 0 ; local variable (8 bytes)
|
||||
m_out_start equ 8
|
||||
f_end_i_mem_offset equ 16
|
||||
gpr_save_mem_offset equ 24 ; gpr save area (8*8 bytes)
|
||||
xmm_save_mem_offset equ 24 + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned)
|
||||
stack_size equ 3*8 + 8*8 + 4*16
|
||||
;;; 8 because stack address is odd multiple of 8 after a function call and
|
||||
;;; we want it aligned to 16 bytes
|
||||
|
||||
; void isal_deflate_icf_body ( isal_zstream *stream )
|
||||
; arg 1: rcx: addr of stream
|
||||
global isal_deflate_icf_body_ %+ ARCH
|
||||
isal_deflate_icf_body_ %+ ARCH %+ :
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
mov rcx, rdi
|
||||
%endif
|
||||
|
||||
;; do nothing if (avail_in == 0)
|
||||
cmp dword [rcx + _avail_in], 0
|
||||
jne skip1
|
||||
|
||||
;; Set stream's next state
|
||||
mov rdx, ZSTATE_FLUSH_READ_BUFFER
|
||||
mov rax, ZSTATE_CREATE_HDR
|
||||
cmp dword [rcx + _end_of_stream], 0
|
||||
cmovne rax, rdx
|
||||
cmp dword [rcx + _flush], _NO_FLUSH
|
||||
cmovne rax, rdx
|
||||
mov dword [rcx + _internal_state_state], eax
|
||||
ret
|
||||
skip1:
|
||||
|
||||
%ifdef ALIGN_STACK
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
sub rsp, stack_size
|
||||
and rsp, ~15
|
||||
%else
|
||||
sub rsp, stack_size
|
||||
%endif
|
||||
|
||||
mov [rsp + gpr_save_mem_offset + 0*8], rbx
|
||||
mov [rsp + gpr_save_mem_offset + 1*8], rsi
|
||||
mov [rsp + gpr_save_mem_offset + 2*8], rdi
|
||||
mov [rsp + gpr_save_mem_offset + 3*8], rbp
|
||||
mov [rsp + gpr_save_mem_offset + 4*8], r12
|
||||
mov [rsp + gpr_save_mem_offset + 5*8], r13
|
||||
mov [rsp + gpr_save_mem_offset + 6*8], r14
|
||||
mov [rsp + gpr_save_mem_offset + 7*8], r15
|
||||
|
||||
mov stream, rcx
|
||||
mov dword [stream + _internal_state_has_eob], 0
|
||||
|
||||
MOVDQU xmask, [mask]
|
||||
|
||||
; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
|
||||
mov tmp1, [stream + _level_buf]
|
||||
mov m_out_buf, [tmp1 + _icf_buf_next]
|
||||
|
||||
mov [rsp + m_out_start], m_out_buf
|
||||
mov tmp1, [tmp1 + _icf_buf_avail_out]
|
||||
add tmp1, m_out_buf
|
||||
sub tmp1, SLOP
|
||||
|
||||
mov [rsp + m_out_end], tmp1
|
||||
|
||||
mov hufftables, [stream + _hufftables]
|
||||
|
||||
mov file_start, [stream + _next_in]
|
||||
|
||||
mov f_i %+ d, dword [stream + _total_in]
|
||||
sub file_start, f_i
|
||||
|
||||
mov f_end_i %+ d, [stream + _avail_in]
|
||||
add f_end_i, f_i
|
||||
|
||||
; f_end_i -= LA;
|
||||
sub f_end_i, LA
|
||||
mov [rsp + f_end_i_mem_offset], f_end_i
|
||||
; if (f_end_i <= 0) continue;
|
||||
|
||||
cmp f_end_i, f_i
|
||||
jle input_end
|
||||
|
||||
; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
|
||||
MARK __body_compute_hash_ %+ ARCH
|
||||
MOVDQU xdata, [file_start + f_i]
|
||||
mov curr_data, [file_start + f_i]
|
||||
mov tmp3, curr_data
|
||||
mov tmp6, curr_data
|
||||
|
||||
compute_hash hash, curr_data
|
||||
|
||||
shr tmp3, 8
|
||||
compute_hash hash2, tmp3
|
||||
|
||||
and hash, HASH_MASK
|
||||
and hash2, HASH_MASK
|
||||
|
||||
cmp dword [stream + _internal_state_has_hist], 0
|
||||
je write_first_byte
|
||||
|
||||
jmp loop2
|
||||
align 16
|
||||
|
||||
loop2:
|
||||
; if (state->bitbuf.is_full()) {
|
||||
cmp m_out_buf, [rsp + m_out_end]
|
||||
ja output_end
|
||||
|
||||
xor dist, dist
|
||||
xor dist2, dist2
|
||||
xor tmp3, tmp3
|
||||
|
||||
lea tmp1, [file_start + f_i]
|
||||
|
||||
mov dist %+ w, f_i %+ w
|
||||
dec dist
|
||||
sub dist %+ w, word [stream + _internal_state_head + 2 * hash]
|
||||
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
|
||||
|
||||
inc f_i
|
||||
|
||||
MOVQ tmp6, xdata
|
||||
shr tmp5, 16
|
||||
mov tmp8, tmp5
|
||||
compute_hash tmp6, tmp5
|
||||
|
||||
mov dist2 %+ w, f_i %+ w
|
||||
dec dist2
|
||||
sub dist2 %+ w, word [stream + _internal_state_head + 2 * hash2]
|
||||
mov [stream + _internal_state_head + 2 * hash2], f_i %+ w
|
||||
|
||||
; if ((dist-1) < (D-1)) {
|
||||
and dist %+ d, (D-1)
|
||||
neg dist
|
||||
|
||||
shr tmp8, 8
|
||||
compute_hash tmp2, tmp8
|
||||
|
||||
and dist2 %+ d, (D-1)
|
||||
neg dist2
|
||||
|
||||
MARK __body_compare_ %+ ARCH
|
||||
;; Check for long len/dist match (>7) with first literal
|
||||
MOVQ len, xdata
|
||||
mov curr_data, len
|
||||
PSRLDQ xdata, 1
|
||||
xor len, [tmp1 + dist - 1]
|
||||
jz compare_loop
|
||||
|
||||
MOVD xhash, tmp6 %+ d
|
||||
PINSRD xhash, tmp2 %+ d, 1
|
||||
PAND xhash, xhash, xmask
|
||||
|
||||
;; Check for len/dist match (>7) with second literal
|
||||
MOVQ len2, xdata
|
||||
xor len2, [tmp1 + dist2]
|
||||
jz compare_loop2
|
||||
|
||||
;; Check for len/dist match for first literal
|
||||
test len %+ d, 0xFFFFFFFF
|
||||
jz len_dist_huffman_pre
|
||||
|
||||
;; Check for len/dist match for second literal
|
||||
test len2 %+ d, 0xFFFFFFFF
|
||||
jnz write_lit_bits
|
||||
|
||||
MARK __body_len_dist_lit_huffman_ %+ ARCH
|
||||
len_dist_lit_huffman_pre:
|
||||
bsf len2, len2
|
||||
shr len2, 3
|
||||
|
||||
len_dist_lit_huffman:
|
||||
neg dist2
|
||||
|
||||
%ifndef LONGER_HUFFTABLE
|
||||
mov tmp4, dist2
|
||||
get_dist_icf_code tmp4, code4, tmp1 ;; clobbers dist, rcx
|
||||
%else
|
||||
get_dist_icf_code dist2, code4, tmp1
|
||||
%endif
|
||||
|
||||
movzx code5, curr_data %+ b
|
||||
;; get_len_code
|
||||
add f_i, len2
|
||||
neg len2
|
||||
|
||||
MOVQ tmp5, xdata
|
||||
shr tmp5, 24
|
||||
compute_hash tmp4, tmp5
|
||||
and tmp4, HASH_MASK
|
||||
|
||||
;; Setup for updating hash
|
||||
lea tmp3, [f_i + len2 + 1] ; tmp3 <= k
|
||||
|
||||
MOVDQU xdata, [file_start + f_i]
|
||||
mov curr_data, [file_start + f_i]
|
||||
|
||||
MOVD hash %+ d, xhash
|
||||
PEXTRD hash2 %+ d, xhash, 1
|
||||
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
|
||||
|
||||
compute_hash hash, curr_data
|
||||
|
||||
add tmp3,1
|
||||
mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
|
||||
|
||||
add tmp3, 1
|
||||
mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w
|
||||
|
||||
neg len2
|
||||
add len2, 254
|
||||
or code4, len2
|
||||
|
||||
inc word [stream + _internal_state_hist_lit_len + 2*code5]
|
||||
or code5, LIT
|
||||
inc word [stream + _internal_state_hist_lit_len + 2*len2]
|
||||
write_dword code5, m_out_buf
|
||||
write_dword code4, m_out_buf
|
||||
shr code4, DIST_OFFSET
|
||||
and code4, 0x1F
|
||||
inc word [stream + _internal_state_hist_dist + 2*code4]
|
||||
|
||||
|
||||
mov f_end_i, [rsp + f_end_i_mem_offset]
|
||||
|
||||
mov curr_data2, curr_data
|
||||
shr curr_data2, 8
|
||||
compute_hash hash2, curr_data2
|
||||
|
||||
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash2 %+ d, HASH_MASK
|
||||
|
||||
; continue
|
||||
cmp f_i, f_end_i
|
||||
jl loop2
|
||||
jmp input_end
|
||||
;; encode as dist/len
|
||||
|
||||
MARK __body_len_dist_huffman_ %+ ARCH
|
||||
len_dist_huffman_pre:
|
||||
bsf len, len
|
||||
shr len, 3
|
||||
|
||||
len_dist_huffman:
|
||||
dec f_i
|
||||
neg dist
|
||||
|
||||
; get_dist_code(dist, &code2, &code_len2);
|
||||
%ifndef LONGER_HUFFTABLE
|
||||
mov tmp3, dist ; since code2 and dist are rbx
|
||||
get_dist_icf_code tmp3, code2, tmp1 ;; clobbers dist, rcx
|
||||
%else
|
||||
get_dist_icf_code dist, code2, tmp1
|
||||
%endif
|
||||
|
||||
; get_len_code(len, &code, &code_len);
|
||||
lea code5, [len + 254]
|
||||
|
||||
or code2, code5
|
||||
|
||||
;; Setup for updateing hash
|
||||
lea tmp3, [f_i + 2] ; tmp3 <= k
|
||||
add f_i, len
|
||||
|
||||
MOVD hash %+ d, xhash
|
||||
PEXTRD hash2 %+ d, xhash, 1
|
||||
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
|
||||
add tmp3,1
|
||||
mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
|
||||
|
||||
MOVDQU xdata, [file_start + f_i]
|
||||
mov curr_data, [file_start + f_i]
|
||||
mov curr_data2, curr_data
|
||||
compute_hash hash, curr_data
|
||||
|
||||
inc word [stream + _internal_state_hist_lit_len + 2*code5]
|
||||
write_dword code2, m_out_buf
|
||||
shr code2, DIST_OFFSET
|
||||
and code2, 0x1F
|
||||
inc word [stream + _internal_state_hist_dist + 2*code2]
|
||||
|
||||
mov f_end_i, [rsp + f_end_i_mem_offset]
|
||||
|
||||
shr curr_data2, 8
|
||||
compute_hash hash2, curr_data2
|
||||
|
||||
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash2 %+ d, HASH_MASK
|
||||
|
||||
; continue
|
||||
cmp f_i, f_end_i
|
||||
jl loop2
|
||||
jmp input_end
|
||||
|
||||
MARK __body_write_lit_bits_ %+ ARCH
|
||||
write_lit_bits:
|
||||
movzx code3, curr_data %+ b
|
||||
shr curr_data, 8
|
||||
and curr_data, 0xff
|
||||
mov code2, curr_data
|
||||
|
||||
MOVDQU xdata, [file_start + f_i + 1]
|
||||
mov f_end_i, [rsp + f_end_i_mem_offset]
|
||||
add f_i, 1
|
||||
mov curr_data, [file_start + f_i]
|
||||
|
||||
MOVD hash %+ d, xhash
|
||||
|
||||
inc word [stream + _internal_state_hist_lit_len + 2*code3]
|
||||
or code3, LIT
|
||||
inc word [stream + _internal_state_hist_lit_len + 2*code2]
|
||||
or code2, LIT
|
||||
write_dword code3, m_out_buf
|
||||
write_dword code2, m_out_buf
|
||||
|
||||
PEXTRD hash2 %+ d, xhash, 1
|
||||
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash2 %+ d, HASH_MASK
|
||||
|
||||
; continue
|
||||
cmp f_i, f_end_i
|
||||
jl loop2
|
||||
|
||||
input_end:
|
||||
mov tmp1, ZSTATE_FLUSH_READ_BUFFER
|
||||
mov tmp5, ZSTATE_BODY
|
||||
cmp dword [stream + _end_of_stream], 0
|
||||
cmovne tmp5, tmp1
|
||||
cmp dword [stream + _flush], _NO_FLUSH
|
||||
cmovne tmp5, tmp1
|
||||
mov dword [stream + _internal_state_state], tmp5 %+ d
|
||||
jmp end
|
||||
|
||||
output_end:
|
||||
mov dword [stream + _internal_state_state], ZSTATE_CREATE_HDR
|
||||
|
||||
end:
|
||||
;; update input buffer
|
||||
add f_end_i, LA
|
||||
mov [stream + _total_in], f_i %+ d
|
||||
add file_start, f_i
|
||||
mov [stream + _next_in], file_start
|
||||
sub f_end_i, f_i
|
||||
mov [stream + _avail_in], f_end_i %+ d
|
||||
|
||||
;; update output buffer
|
||||
mov tmp1, [stream + _level_buf]
|
||||
mov [tmp1 + _icf_buf_next], m_out_buf
|
||||
sub m_out_buf, [rsp + m_out_start]
|
||||
sub [tmp1 + _icf_buf_avail_out], m_out_buf %+ d
|
||||
|
||||
mov rbx, [rsp + gpr_save_mem_offset + 0*8]
|
||||
mov rsi, [rsp + gpr_save_mem_offset + 1*8]
|
||||
mov rdi, [rsp + gpr_save_mem_offset + 2*8]
|
||||
mov rbp, [rsp + gpr_save_mem_offset + 3*8]
|
||||
mov r12, [rsp + gpr_save_mem_offset + 4*8]
|
||||
mov r13, [rsp + gpr_save_mem_offset + 5*8]
|
||||
mov r14, [rsp + gpr_save_mem_offset + 6*8]
|
||||
mov r15, [rsp + gpr_save_mem_offset + 7*8]
|
||||
|
||||
%ifndef ALIGN_STACK
|
||||
add rsp, stack_size
|
||||
%else
|
||||
mov rsp, rbp
|
||||
pop rbp
|
||||
%endif
|
||||
ret
|
||||
|
||||
MARK __body_compare_loops_ %+ ARCH
|
||||
compare_loop:
|
||||
MOVD xhash, tmp6 %+ d
|
||||
PINSRD xhash, tmp2 %+ d, 1
|
||||
PAND xhash, xhash, xmask
|
||||
lea tmp2, [tmp1 + dist - 1]
|
||||
%if (COMPARE_TYPE == 1)
|
||||
compare250 tmp1, tmp2, len, tmp3
|
||||
%elif (COMPARE_TYPE == 2)
|
||||
compare250_x tmp1, tmp2, len, tmp3, xtmp0, xtmp1
|
||||
%elif (COMPARE_TYPE == 3)
|
||||
compare250_y tmp1, tmp2, len, tmp3, ytmp0, ytmp1
|
||||
%else
|
||||
%error Unknown Compare type COMPARE_TYPE
|
||||
% error
|
||||
%endif
|
||||
jmp len_dist_huffman
|
||||
|
||||
compare_loop2:
|
||||
lea tmp2, [tmp1 + dist2]
|
||||
add tmp1, 1
|
||||
%if (COMPARE_TYPE == 1)
|
||||
compare250 tmp1, tmp2, len2, tmp3
|
||||
%elif (COMPARE_TYPE == 2)
|
||||
compare250_x tmp1, tmp2, len2, tmp3, xtmp0, xtmp1
|
||||
%elif (COMPARE_TYPE == 3)
|
||||
compare250_y tmp1, tmp2, len2, tmp3, ytmp0, ytmp1
|
||||
%else
|
||||
%error Unknown Compare type COMPARE_TYPE
|
||||
% error
|
||||
%endif
|
||||
jmp len_dist_lit_huffman
|
||||
|
||||
MARK __write_first_byte_ %+ ARCH
|
||||
write_first_byte:
|
||||
cmp m_out_buf, [rsp + m_out_end]
|
||||
ja output_end
|
||||
|
||||
mov dword [stream + _internal_state_has_hist], 1
|
||||
|
||||
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
|
||||
|
||||
mov hash, hash2
|
||||
shr tmp6, 16
|
||||
compute_hash hash2, tmp6
|
||||
|
||||
and curr_data, 0xff
|
||||
inc word [stream + _internal_state_hist_lit_len + 2*curr_data]
|
||||
or curr_data, LIT
|
||||
|
||||
write_dword curr_data, m_out_buf
|
||||
MOVDQU xdata, [file_start + f_i + 1]
|
||||
add f_i, 1
|
||||
mov curr_data, [file_start + f_i]
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash2 %+ d, HASH_MASK
|
||||
|
||||
cmp f_i, [rsp + f_end_i_mem_offset]
|
||||
jl loop2
|
||||
jmp input_end
|
||||
|
||||
section .data
|
||||
align 16
|
||||
mask: dd HASH_MASK, HASH_MASK, HASH_MASK, HASH_MASK
|
||||
const_D: dq D
|
7
igzip/igzip_icf_body_01.asm
Normal file
7
igzip/igzip_icf_body_01.asm
Normal file
@ -0,0 +1,7 @@
|
||||
%define ARCH 01
|
||||
|
||||
%ifndef COMPARE_TYPE
|
||||
%define COMPARE_TYPE 2
|
||||
%endif
|
||||
|
||||
%include "igzip_icf_body.asm"
|
7
igzip/igzip_icf_body_02.asm
Normal file
7
igzip/igzip_icf_body_02.asm
Normal file
@ -0,0 +1,7 @@
|
||||
%define ARCH 02
|
||||
|
||||
%ifndef COMPARE_TYPE
|
||||
%define COMPARE_TYPE 2
|
||||
%endif
|
||||
|
||||
%include "igzip_icf_body.asm"
|
8
igzip/igzip_icf_body_04.asm
Normal file
8
igzip/igzip_icf_body_04.asm
Normal file
@ -0,0 +1,8 @@
|
||||
%define ARCH 04
|
||||
%define USE_HSWNI
|
||||
|
||||
%ifndef COMPARE_TYPE
|
||||
%define COMPARE_TYPE 3
|
||||
%endif
|
||||
|
||||
%include "igzip_icf_body.asm"
|
299
igzip/igzip_icf_finish.asm
Normal file
299
igzip/igzip_icf_finish.asm
Normal file
@ -0,0 +1,299 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%include "options.asm"
|
||||
%include "lz0a_const.asm"
|
||||
%include "data_struct2.asm"
|
||||
%include "bitbuf2.asm"
|
||||
%include "huffman.asm"
|
||||
%include "igzip_compare_types.asm"
|
||||
|
||||
%include "stdmac.asm"
|
||||
%include "reg_sizes.asm"
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%define curr_data rax
|
||||
%define tmp1 rax
|
||||
|
||||
%define f_index rbx
|
||||
%define code rbx
|
||||
%define tmp4 rbx
|
||||
%define tmp5 rbx
|
||||
%define tmp6 rbx
|
||||
|
||||
%define tmp2 rcx
|
||||
%define hash rcx
|
||||
|
||||
%define tmp3 rdx
|
||||
|
||||
%define stream rsi
|
||||
|
||||
%define f_i rdi
|
||||
|
||||
%define code_len2 rbp
|
||||
|
||||
%define m_out_buf r8
|
||||
|
||||
%define dist r10
|
||||
|
||||
%define code2 r12
|
||||
%define f_end_i r12
|
||||
|
||||
%define file_start r13
|
||||
|
||||
%define len r14
|
||||
|
||||
%define hufftables r15
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
f_end_i_mem_offset equ 0 ; local variable (8 bytes)
|
||||
m_out_end equ 8
|
||||
m_out_start equ 16
|
||||
stack_size equ 32
|
||||
; void isal_deflate_icf_finish ( isal_zstream *stream )
|
||||
; arg 1: rcx: addr of stream
|
||||
global isal_deflate_icf_finish_01
|
||||
isal_deflate_icf_finish_01:
|
||||
PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15
|
||||
sub rsp, stack_size
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
mov rcx, rdi
|
||||
%endif
|
||||
|
||||
mov stream, rcx
|
||||
|
||||
; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
|
||||
mov tmp1, [stream + _level_buf]
|
||||
mov m_out_buf, [tmp1 + _icf_buf_next]
|
||||
mov [rsp + m_out_start], m_out_buf
|
||||
mov tmp1, [tmp1 + _icf_buf_avail_out]
|
||||
add tmp1, m_out_buf
|
||||
sub tmp1, 4
|
||||
|
||||
mov [rsp + m_out_end], tmp1
|
||||
|
||||
mov hufftables, [stream + _hufftables]
|
||||
|
||||
mov file_start, [stream + _next_in]
|
||||
|
||||
mov f_i %+ d, dword [stream + _total_in]
|
||||
sub file_start, f_i
|
||||
|
||||
mov f_end_i %+ d, dword [stream + _avail_in]
|
||||
add f_end_i, f_i
|
||||
|
||||
sub f_end_i, LAST_BYTES_COUNT
|
||||
mov [rsp + f_end_i_mem_offset], f_end_i
|
||||
; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
|
||||
cmp f_i, f_end_i
|
||||
jge end_loop_2
|
||||
|
||||
mov curr_data %+ d, [file_start + f_i]
|
||||
|
||||
cmp dword [stream + _internal_state_has_hist], 0
|
||||
jne skip_write_first_byte
|
||||
|
||||
cmp m_out_buf, [rsp + m_out_end]
|
||||
ja end_loop_2
|
||||
|
||||
compute_hash hash, curr_data
|
||||
and hash %+ d, HASH_MASK
|
||||
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
|
||||
mov dword [stream + _internal_state_has_hist], 1
|
||||
jmp encode_literal
|
||||
|
||||
skip_write_first_byte:
|
||||
|
||||
loop2:
|
||||
; if (state->bitbuf.is_full()) {
|
||||
cmp m_out_buf, [rsp + m_out_end]
|
||||
ja end_loop_2
|
||||
|
||||
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
|
||||
mov curr_data %+ d, [file_start + f_i]
|
||||
compute_hash hash, curr_data
|
||||
and hash %+ d, HASH_MASK
|
||||
|
||||
; f_index = state->head[hash];
|
||||
movzx f_index %+ d, word [stream + _internal_state_head + 2 * hash]
|
||||
|
||||
; state->head[hash] = (uint16_t) f_i;
|
||||
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
|
||||
|
||||
; dist = f_i - f_index; // mod 64k
|
||||
mov dist %+ d, f_i %+ d
|
||||
sub dist %+ d, f_index %+ d
|
||||
and dist %+ d, 0xFFFF
|
||||
|
||||
; if ((dist-1) <= (D-1)) {
|
||||
mov tmp1 %+ d, dist %+ d
|
||||
sub tmp1 %+ d, 1
|
||||
cmp tmp1 %+ d, (D-1)
|
||||
jae encode_literal
|
||||
|
||||
; len = f_end_i - f_i;
|
||||
mov tmp4, [rsp + f_end_i_mem_offset]
|
||||
sub tmp4, f_i
|
||||
add tmp4, LAST_BYTES_COUNT
|
||||
|
||||
; if (len > 258) len = 258;
|
||||
cmp tmp4, 258
|
||||
cmovg tmp4, [c258]
|
||||
|
||||
; len = compare(state->file_start + f_i,
|
||||
; state->file_start + f_i - dist, len);
|
||||
lea tmp1, [file_start + f_i]
|
||||
mov tmp2, tmp1
|
||||
sub tmp2, dist
|
||||
compare tmp4, tmp1, tmp2, len, tmp3
|
||||
|
||||
; if (len >= SHORTEST_MATCH) {
|
||||
cmp len, SHORTEST_MATCH
|
||||
jb encode_literal
|
||||
|
||||
;; encode as dist/len
|
||||
|
||||
; get_dist_code(dist, &code2, &code_len2);
|
||||
dec dist
|
||||
get_dist_icf_code dist, code2, tmp3 ;; clobbers dist, rcx
|
||||
|
||||
;; get_len_code
|
||||
lea code, [len + 254]
|
||||
|
||||
or code2, code
|
||||
inc word [stream + _internal_state_hist_lit_len + 2*code]
|
||||
|
||||
; for (k = f_i+1, f_i += len-1; k <= f_i; k++) {
|
||||
lea tmp3, [f_i + 1] ; tmp3 <= k
|
||||
add f_i, len
|
||||
cmp f_i, [rsp + f_end_i_mem_offset]
|
||||
jae skip_hash_update
|
||||
|
||||
; only update hash twice
|
||||
|
||||
; hash = compute_hash(state->file_start + k) & HASH_MASK;
|
||||
mov tmp6 %+ d, dword [file_start + tmp3]
|
||||
compute_hash hash, tmp6
|
||||
and hash %+ d, HASH_MASK
|
||||
; state->head[hash] = k;
|
||||
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
|
||||
|
||||
add tmp3, 1
|
||||
|
||||
; hash = compute_hash(state->file_start + k) & HASH_MASK;
|
||||
mov tmp6 %+ d, dword [file_start + tmp3]
|
||||
compute_hash hash, tmp6
|
||||
and hash %+ d, HASH_MASK
|
||||
; state->head[hash] = k;
|
||||
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
|
||||
|
||||
skip_hash_update:
|
||||
write_dword code2, m_out_buf
|
||||
shr code2, DIST_OFFSET
|
||||
and code2, 0x1F
|
||||
inc word [stream + _internal_state_hist_dist + 2*code2]
|
||||
; continue
|
||||
cmp f_i, [rsp + f_end_i_mem_offset]
|
||||
jl loop2
|
||||
jmp end_loop_2
|
||||
|
||||
encode_literal:
|
||||
; get_lit_code(state->file_start[f_i], &code2, &code_len2);
|
||||
movzx tmp5, byte [file_start + f_i]
|
||||
inc word [stream + _internal_state_hist_lit_len + 2*tmp5]
|
||||
or tmp5, LIT
|
||||
write_dword tmp5, m_out_buf
|
||||
; continue
|
||||
add f_i, 1
|
||||
cmp f_i, [rsp + f_end_i_mem_offset]
|
||||
jl loop2
|
||||
|
||||
end_loop_2:
|
||||
mov f_end_i, [rsp + f_end_i_mem_offset]
|
||||
add f_end_i, LAST_BYTES_COUNT
|
||||
mov [rsp + f_end_i_mem_offset], f_end_i
|
||||
; if ((f_i >= f_end_i) && ! state->bitbuf.is_full()) {
|
||||
cmp f_i, f_end_i
|
||||
jge input_end
|
||||
|
||||
xor tmp5, tmp5
|
||||
final_bytes:
|
||||
cmp m_out_buf, [rsp + m_out_end]
|
||||
ja out_end
|
||||
|
||||
movzx tmp5, byte [file_start + f_i]
|
||||
inc word [stream + _internal_state_hist_lit_len + 2*tmp5]
|
||||
or tmp5, LIT
|
||||
write_dword tmp5, m_out_buf
|
||||
|
||||
inc f_i
|
||||
cmp f_i, [rsp + f_end_i_mem_offset]
|
||||
jl final_bytes
|
||||
|
||||
input_end:
|
||||
cmp dword [stream + _end_of_stream], 0
|
||||
jne out_end
|
||||
cmp dword [stream + _flush], _NO_FLUSH
|
||||
jne out_end
|
||||
jmp end
|
||||
|
||||
out_end:
|
||||
mov dword [stream + _internal_state_state], ZSTATE_CREATE_HDR
|
||||
end:
|
||||
;; Update input buffer
|
||||
mov f_end_i, [rsp + f_end_i_mem_offset]
|
||||
mov [stream + _total_in], f_i %+ d
|
||||
add file_start, f_i
|
||||
mov [stream + _next_in], file_start
|
||||
sub f_end_i, f_i
|
||||
mov [stream + _avail_in], f_end_i %+ d
|
||||
|
||||
;; Update output buffer
|
||||
mov tmp1, [stream + _level_buf]
|
||||
mov [tmp1 + _icf_buf_next], m_out_buf
|
||||
|
||||
; len = state->bitbuf.buffer_used();
|
||||
sub m_out_buf, [rsp + m_out_start]
|
||||
|
||||
; stream->avail_out -= len;
|
||||
sub [tmp1 + _icf_buf_avail_out], m_out_buf
|
||||
|
||||
add rsp, stack_size
|
||||
POP_ALL
|
||||
ret
|
||||
|
||||
section .data
|
||||
align 4
|
||||
c258: dq 258
|
16
igzip/igzip_level_buf_structs.h
Normal file
16
igzip/igzip_level_buf_structs.h
Normal file
@ -0,0 +1,16 @@
|
||||
#ifndef IGZIP_LEVEL_BUF_STRUCTS_H
|
||||
#define IGZIP_LEVEL_BUF_STRUCTS_H
|
||||
|
||||
#include "huff_codes.h"
|
||||
#include "encode_df.h"
|
||||
|
||||
struct level_2_buf {
|
||||
struct hufftables_icf encode_tables;
|
||||
uint32_t deflate_hdr_count;
|
||||
uint32_t deflate_hdr_extra_bits;
|
||||
uint8_t deflate_hdr[ISAL_DEF_MAX_HDR_SIZE];
|
||||
struct deflate_icf *icf_buf_next;
|
||||
uint64_t icf_buf_avail_out;
|
||||
struct deflate_icf icf_buf_start[0];
|
||||
};
|
||||
#endif
|
@ -45,10 +45,21 @@ extern isal_deflate_body_04
|
||||
extern isal_deflate_finish_base
|
||||
extern isal_deflate_finish_01
|
||||
|
||||
|
||||
extern isal_deflate_icf_body_base
|
||||
extern isal_deflate_icf_body_01
|
||||
extern isal_deflate_icf_body_02
|
||||
extern isal_deflate_icf_body_04
|
||||
extern isal_deflate_icf_finish_base
|
||||
extern isal_deflate_icf_finish_01
|
||||
|
||||
extern isal_update_histogram_base
|
||||
extern isal_update_histogram_01
|
||||
extern isal_update_histogram_04
|
||||
|
||||
extern encode_deflate_icf_base
|
||||
extern encode_deflate_icf_04
|
||||
|
||||
extern crc32_gzip_base
|
||||
extern crc32_gzip_01
|
||||
|
||||
@ -61,8 +72,16 @@ mbin_dispatch_init5 isal_deflate_body, isal_deflate_body_base, isal_deflate_body
|
||||
mbin_interface isal_deflate_finish
|
||||
mbin_dispatch_init5 isal_deflate_finish, isal_deflate_finish_base, isal_deflate_finish_01, isal_deflate_finish_01, isal_deflate_finish_01
|
||||
|
||||
mbin_interface isal_deflate_icf_body
|
||||
mbin_dispatch_init5 isal_deflate_icf_body, isal_deflate_icf_body_base, isal_deflate_icf_body_01, isal_deflate_icf_body_02, isal_deflate_icf_body_04
|
||||
mbin_interface isal_deflate_icf_finish
|
||||
mbin_dispatch_init5 isal_deflate_icf_finish, isal_deflate_icf_finish_base, isal_deflate_icf_finish_01, isal_deflate_icf_finish_01, isal_deflate_icf_finish_01
|
||||
|
||||
mbin_interface isal_update_histogram
|
||||
mbin_dispatch_init5 isal_update_histogram, isal_update_histogram_base, isal_update_histogram_01, isal_update_histogram_01, isal_update_histogram_04
|
||||
|
||||
mbin_interface encode_deflate_icf
|
||||
mbin_dispatch_init5 encode_deflate_icf, encode_deflate_icf_base, encode_deflate_icf_base, encode_deflate_icf_base, encode_deflate_icf_04
|
||||
|
||||
mbin_interface crc32_gzip
|
||||
mbin_dispatch_init5 crc32_gzip, crc32_gzip_base, crc32_gzip_base, crc32_gzip_01, crc32_gzip_01
|
||||
|
@ -676,7 +676,8 @@ void set_random_hufftable(struct isal_zstream *stream)
|
||||
* output buffer are randomly segmented to test state information for the
|
||||
* compression*/
|
||||
int compress_multi_pass(uint8_t * data, uint32_t data_size, uint8_t * compressed_buf,
|
||||
uint32_t * compressed_size, uint32_t flush_type, uint32_t gzip_flag)
|
||||
uint32_t * compressed_size, uint32_t flush_type, uint32_t gzip_flag,
|
||||
uint32_t level)
|
||||
{
|
||||
int ret = IGZIP_COMP_OK;
|
||||
uint8_t *in_buf = NULL, *out_buf = NULL;
|
||||
@ -685,6 +686,8 @@ int compress_multi_pass(uint8_t * data, uint32_t data_size, uint8_t * compressed
|
||||
struct isal_zstream stream;
|
||||
struct isal_zstate *state = &stream.internal_state;
|
||||
uint32_t loop_count = 0;
|
||||
uint32_t level_buf_size;
|
||||
uint8_t *level_buf = NULL;
|
||||
|
||||
#ifdef VERBOSE
|
||||
printf("Starting Compress Multi Pass\n");
|
||||
@ -704,6 +707,15 @@ int compress_multi_pass(uint8_t * data, uint32_t data_size, uint8_t * compressed
|
||||
stream.avail_in = 0;
|
||||
stream.avail_out = 0;
|
||||
stream.gzip_flag = gzip_flag;
|
||||
stream.level = level;
|
||||
|
||||
if (level >= 1) {
|
||||
level_buf_size = rand() % IBUF_SIZE + ISAL_DEF_LVL1_MIN;
|
||||
level_buf = malloc(level_buf_size);
|
||||
create_rand_repeat_data(level_buf, level_buf_size);
|
||||
stream.level_buf = level_buf;
|
||||
stream.level_buf_size = level_buf_size;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
loop_count++;
|
||||
@ -797,6 +809,8 @@ int compress_multi_pass(uint8_t * data, uint32_t data_size, uint8_t * compressed
|
||||
|
||||
}
|
||||
|
||||
if (level_buf != NULL)
|
||||
free(level_buf);
|
||||
if (in_buf != NULL)
|
||||
free(in_buf);
|
||||
if (out_buf != NULL)
|
||||
@ -812,11 +826,14 @@ int compress_multi_pass(uint8_t * data, uint32_t data_size, uint8_t * compressed
|
||||
|
||||
/* Compress the input data into the outbuffer in one call to isal_deflate */
|
||||
int compress_single_pass(uint8_t * data, uint32_t data_size, uint8_t * compressed_buf,
|
||||
uint32_t * compressed_size, uint32_t flush_type, uint32_t gzip_flag)
|
||||
uint32_t * compressed_size, uint32_t flush_type, uint32_t gzip_flag,
|
||||
uint32_t level)
|
||||
{
|
||||
int ret = IGZIP_COMP_OK;
|
||||
struct isal_zstream stream;
|
||||
struct isal_zstate *state = &stream.internal_state;
|
||||
uint32_t level_buf_size;
|
||||
uint8_t *level_buf = NULL;
|
||||
|
||||
#ifdef VERBOSE
|
||||
printf("Starting Compress Single Pass\n");
|
||||
@ -838,11 +855,23 @@ int compress_single_pass(uint8_t * data, uint32_t data_size, uint8_t * compresse
|
||||
stream.next_out = compressed_buf;
|
||||
stream.end_of_stream = 1;
|
||||
stream.gzip_flag = gzip_flag;
|
||||
stream.level = level;
|
||||
|
||||
if (level >= 1) {
|
||||
level_buf_size = rand() % IBUF_SIZE + ISAL_DEF_LVL1_MIN;
|
||||
level_buf = malloc(level_buf_size);
|
||||
create_rand_repeat_data(level_buf, level_buf_size);
|
||||
stream.level_buf = level_buf;
|
||||
stream.level_buf_size = level_buf_size;
|
||||
}
|
||||
|
||||
ret =
|
||||
isal_deflate_with_checks(&stream, data_size, *compressed_size, data, data_size,
|
||||
data_size, compressed_buf, *compressed_size, 0);
|
||||
|
||||
if (level_buf != NULL)
|
||||
free(level_buf);
|
||||
|
||||
/* Check if the compression is completed */
|
||||
if (state->state == ZSTATE_END)
|
||||
*compressed_size = stream.total_out;
|
||||
@ -855,10 +884,13 @@ int compress_single_pass(uint8_t * data, uint32_t data_size, uint8_t * compresse
|
||||
|
||||
/* Statelessly compress the input buffer into the output buffer */
|
||||
int compress_stateless(uint8_t * data, uint32_t data_size, uint8_t * compressed_buf,
|
||||
uint32_t * compressed_size, uint32_t flush_type, uint32_t gzip_flag)
|
||||
uint32_t * compressed_size, uint32_t flush_type, uint32_t gzip_flag,
|
||||
uint32_t level)
|
||||
{
|
||||
int ret = IGZIP_COMP_OK;
|
||||
struct isal_zstream stream;
|
||||
uint32_t level_buf_size;
|
||||
uint8_t *level_buf = NULL;
|
||||
|
||||
create_rand_repeat_data((uint8_t *) & stream, sizeof(stream));
|
||||
|
||||
@ -874,9 +906,23 @@ int compress_stateless(uint8_t * data, uint32_t data_size, uint8_t * compressed_
|
||||
stream.avail_out = *compressed_size;
|
||||
stream.next_out = compressed_buf;
|
||||
stream.gzip_flag = gzip_flag;
|
||||
stream.level = level;
|
||||
|
||||
if (level >= 1) {
|
||||
level_buf_size = rand() % IBUF_SIZE;
|
||||
if (level_buf_size >= ISAL_DEF_LVL1_MIN) {
|
||||
level_buf = malloc(level_buf_size);
|
||||
create_rand_repeat_data(level_buf, level_buf_size);
|
||||
stream.level_buf = level_buf;
|
||||
stream.level_buf_size = level_buf_size;
|
||||
}
|
||||
}
|
||||
|
||||
ret = isal_deflate_stateless(&stream);
|
||||
|
||||
if (level_buf != NULL)
|
||||
free(level_buf);
|
||||
|
||||
/* verify the stream */
|
||||
if (stream.next_in - data != stream.total_in ||
|
||||
stream.total_in + stream.avail_in != data_size)
|
||||
@ -911,11 +957,11 @@ int compress_stateless(uint8_t * data, uint32_t data_size, uint8_t * compressed_
|
||||
|
||||
/* Statelessly compress the input buffer into the output buffer */
|
||||
int compress_stateless_full_flush(uint8_t * data, uint32_t data_size, uint8_t * compressed_buf,
|
||||
uint32_t * compressed_size)
|
||||
uint32_t * compressed_size, uint32_t level)
|
||||
{
|
||||
int ret = IGZIP_COMP_OK;
|
||||
uint8_t *in_buf = NULL, *out_buf = compressed_buf;
|
||||
uint32_t in_size = 0;
|
||||
uint8_t *in_buf = NULL, *level_buf = NULL, *out_buf = compressed_buf;
|
||||
uint32_t in_size = 0, level_buf_size;
|
||||
uint32_t in_processed = 00;
|
||||
struct isal_zstream stream;
|
||||
uint32_t loop_count = 0;
|
||||
@ -932,6 +978,17 @@ int compress_stateless_full_flush(uint8_t * data, uint32_t data_size, uint8_t *
|
||||
stream.end_of_stream = 0;
|
||||
stream.avail_out = *compressed_size;
|
||||
stream.next_out = compressed_buf;
|
||||
stream.level = level;
|
||||
|
||||
if (level >= 1) {
|
||||
level_buf_size = rand() % IBUF_SIZE;
|
||||
if (level_buf_size >= ISAL_DEF_LVL1_MIN) {
|
||||
level_buf = malloc(level_buf_size);
|
||||
create_rand_repeat_data(level_buf, level_buf_size);
|
||||
stream.level_buf = level_buf;
|
||||
stream.level_buf_size = level_buf_size;
|
||||
}
|
||||
}
|
||||
|
||||
while (1) {
|
||||
loop_count++;
|
||||
@ -992,6 +1049,9 @@ int compress_stateless_full_flush(uint8_t * data, uint32_t data_size, uint8_t *
|
||||
|
||||
}
|
||||
|
||||
if (level_buf != NULL)
|
||||
free(level_buf);
|
||||
|
||||
if (in_buf != NULL)
|
||||
free(in_buf);
|
||||
|
||||
@ -1006,11 +1066,11 @@ int compress_stateless_full_flush(uint8_t * data, uint32_t data_size, uint8_t *
|
||||
* is randomly segmented to test for independence of blocks in full flush
|
||||
* compression*/
|
||||
int compress_full_flush(uint8_t * data, uint32_t data_size, uint8_t * compressed_buf,
|
||||
uint32_t * compressed_size, uint32_t gzip_flag)
|
||||
uint32_t * compressed_size, uint32_t gzip_flag, uint32_t level)
|
||||
{
|
||||
int ret = IGZIP_COMP_OK;
|
||||
uint8_t *in_buf = NULL, *out_buf = compressed_buf;
|
||||
uint32_t in_size = 0;
|
||||
uint8_t *in_buf = NULL, *out_buf = compressed_buf, *level_buf = NULL;
|
||||
uint32_t in_size = 0, level_buf_size;
|
||||
uint32_t in_processed = 00;
|
||||
struct isal_zstream stream;
|
||||
struct isal_zstate *state = &stream.internal_state;
|
||||
@ -1033,6 +1093,17 @@ int compress_full_flush(uint8_t * data, uint32_t data_size, uint8_t * compressed
|
||||
stream.next_out = compressed_buf;
|
||||
stream.total_out = 0;
|
||||
stream.gzip_flag = gzip_flag;
|
||||
stream.level = level;
|
||||
|
||||
if (level >= 1) {
|
||||
level_buf_size = rand() % IBUF_SIZE + ISAL_DEF_LVL1_MIN;
|
||||
if (level_buf_size >= ISAL_DEF_LVL1_MIN) {
|
||||
level_buf = malloc(level_buf_size);
|
||||
create_rand_repeat_data(level_buf, level_buf_size);
|
||||
stream.level_buf = level_buf;
|
||||
stream.level_buf_size = level_buf_size;
|
||||
}
|
||||
}
|
||||
|
||||
while (1) {
|
||||
loop_count++;
|
||||
@ -1098,6 +1169,9 @@ int compress_full_flush(uint8_t * data, uint32_t data_size, uint8_t * compressed
|
||||
|
||||
}
|
||||
|
||||
if (level_buf != NULL)
|
||||
free(level_buf);
|
||||
|
||||
if (in_buf != NULL)
|
||||
free(in_buf);
|
||||
|
||||
@ -1173,11 +1247,12 @@ int compress_swap_flush(uint8_t * data, uint32_t data_size, uint8_t * compressed
|
||||
int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_type)
|
||||
{
|
||||
int ret = IGZIP_COMP_OK;
|
||||
uint32_t z_size, overflow, gzip_flag;
|
||||
uint32_t z_size, overflow, gzip_flag, level;
|
||||
uint8_t *z_buf = NULL;
|
||||
uint8_t *in_buf = NULL;
|
||||
|
||||
gzip_flag = rand() % 3;
|
||||
level = rand() % 2;
|
||||
|
||||
if (in_size != 0) {
|
||||
in_buf = malloc(in_size);
|
||||
@ -1203,7 +1278,8 @@ int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_
|
||||
/* If flush type is invalid */
|
||||
if (flush_type != NO_FLUSH && flush_type != FULL_FLUSH) {
|
||||
ret =
|
||||
compress_stateless(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag);
|
||||
compress_stateless(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag,
|
||||
level);
|
||||
|
||||
if (ret != INVALID_FLUSH_ERROR)
|
||||
print_error(ret);
|
||||
@ -1220,14 +1296,15 @@ int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_
|
||||
}
|
||||
|
||||
/* Else test valid flush type */
|
||||
ret = compress_stateless(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag);
|
||||
ret =
|
||||
compress_stateless(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag, level);
|
||||
|
||||
if (!ret)
|
||||
ret = inflate_check(z_buf, z_size, in_buf, in_size, gzip_flag);
|
||||
|
||||
#ifdef VERBOSE
|
||||
if (ret) {
|
||||
printf("Compressed array: ");
|
||||
printf("Compressed array at level %d with gzip flag %d: ", level, gzip_flag);
|
||||
print_uint8_t(z_buf, z_size);
|
||||
printf("\n");
|
||||
printf("Data: ");
|
||||
@ -1261,12 +1338,13 @@ int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_
|
||||
|
||||
create_rand_repeat_data(z_buf, z_size);
|
||||
|
||||
ret = compress_stateless(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag);
|
||||
ret =
|
||||
compress_stateless(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag, level);
|
||||
if (!ret)
|
||||
ret = inflate_check(z_buf, z_size, in_buf, in_size, gzip_flag);
|
||||
#ifdef VERBOSE
|
||||
if (ret) {
|
||||
printf("Compressed array: ");
|
||||
printf("Compressed array at level %d with gzip flag %d: ", level, gzip_flag);
|
||||
print_uint8_t(z_buf, z_size);
|
||||
printf("\n");
|
||||
printf("Data: ");
|
||||
@ -1292,7 +1370,8 @@ int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_
|
||||
}
|
||||
|
||||
overflow =
|
||||
compress_stateless(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag);
|
||||
compress_stateless(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag,
|
||||
level);
|
||||
|
||||
if (overflow != COMPRESS_OUT_BUFFER_OVERFLOW) {
|
||||
#ifdef VERBOSE
|
||||
@ -1304,7 +1383,8 @@ int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_
|
||||
printf("inflate ret = %d\n", overflow);
|
||||
print_error(overflow);
|
||||
}
|
||||
printf("Compressed array: ");
|
||||
printf("Compressed array at level %d with gzip flag %d: ", level,
|
||||
gzip_flag);
|
||||
print_uint8_t(z_buf, z_size);
|
||||
printf("\n");
|
||||
printf("Data: ");
|
||||
@ -1338,7 +1418,7 @@ int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_
|
||||
create_rand_repeat_data(z_buf, z_size);
|
||||
|
||||
/* Else test valid flush type */
|
||||
ret = compress_stateless_full_flush(in_buf, in_size, z_buf, &z_size);
|
||||
ret = compress_stateless_full_flush(in_buf, in_size, z_buf, &z_size, level);
|
||||
|
||||
if (!ret)
|
||||
ret = inflate_check(z_buf, z_size, in_buf, in_size, 0);
|
||||
@ -1348,7 +1428,8 @@ int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_
|
||||
print_error(ret);
|
||||
#ifdef VERBOSE
|
||||
if (ret) {
|
||||
printf("Compressed array: ");
|
||||
printf("Compressed array at level %d with gzip flag %d: ", level,
|
||||
gzip_flag);
|
||||
print_uint8_t(z_buf, z_size);
|
||||
printf("\n");
|
||||
printf("Data: ");
|
||||
@ -1369,7 +1450,7 @@ int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_
|
||||
int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type)
|
||||
{
|
||||
int ret = IGZIP_COMP_OK, fin_ret = IGZIP_COMP_OK;
|
||||
uint32_t overflow = 0, gzip_flag;
|
||||
uint32_t overflow = 0, gzip_flag, level;
|
||||
uint32_t z_size, z_size_max, z_compressed_size;
|
||||
uint8_t *z_buf = NULL;
|
||||
|
||||
@ -1384,6 +1465,7 @@ int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type)
|
||||
}
|
||||
|
||||
gzip_flag = rand() % 3;
|
||||
level = rand() % 2;
|
||||
if (gzip_flag)
|
||||
z_size_max += gzip_extra_bytes;
|
||||
|
||||
@ -1396,14 +1478,15 @@ int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type)
|
||||
}
|
||||
create_rand_repeat_data(z_buf, z_size);
|
||||
|
||||
ret = compress_single_pass(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag);
|
||||
ret = compress_single_pass(in_buf, in_size, z_buf, &z_size, flush_type,
|
||||
gzip_flag, level);
|
||||
|
||||
if (!ret)
|
||||
ret = inflate_check(z_buf, z_size, in_buf, in_size, gzip_flag);
|
||||
|
||||
if (ret) {
|
||||
#ifdef VERBOSE
|
||||
printf("Compressed array: ");
|
||||
printf("Compressed array at level %d with gzip flag %d: ", level, gzip_flag);
|
||||
print_uint8_t(z_buf, z_size);
|
||||
printf("\n");
|
||||
printf("Data: ");
|
||||
@ -1419,14 +1502,15 @@ int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type)
|
||||
z_size = z_size_max;
|
||||
create_rand_repeat_data(z_buf, z_size_max);
|
||||
|
||||
ret = compress_multi_pass(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag);
|
||||
ret =
|
||||
compress_multi_pass(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag, level);
|
||||
|
||||
if (!ret)
|
||||
ret = inflate_check(z_buf, z_size, in_buf, in_size, gzip_flag);
|
||||
|
||||
if (ret) {
|
||||
#ifdef VERBOSE
|
||||
printf("Compressed array: ");
|
||||
printf("Compressed array at level %d with gzip flag %d: ", level, gzip_flag);
|
||||
print_uint8_t(z_buf, z_size);
|
||||
printf("\n");
|
||||
printf("Data: ");
|
||||
@ -1447,8 +1531,8 @@ int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type)
|
||||
z_size = rand() % z_compressed_size;
|
||||
create_rand_repeat_data(z_buf, z_size_max);
|
||||
|
||||
overflow =
|
||||
compress_single_pass(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag);
|
||||
overflow = compress_single_pass(in_buf, in_size, z_buf, &z_size, flush_type,
|
||||
gzip_flag, level);
|
||||
|
||||
if (overflow != COMPRESS_OUT_BUFFER_OVERFLOW) {
|
||||
if (overflow == 0)
|
||||
@ -1464,7 +1548,8 @@ int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type)
|
||||
printf("inflate ret = %d\n", ret);
|
||||
print_error(overflow);
|
||||
|
||||
printf("Compressed array: ");
|
||||
printf("Compressed array at level %d with gzip flag %d: ", level,
|
||||
gzip_flag);
|
||||
print_uint8_t(z_buf, z_size);
|
||||
printf("\n");
|
||||
printf("Data: ");
|
||||
@ -1483,7 +1568,7 @@ int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type)
|
||||
|
||||
overflow =
|
||||
compress_multi_pass(in_buf, in_size, z_buf, &z_size, flush_type,
|
||||
gzip_flag);
|
||||
gzip_flag, level);
|
||||
|
||||
if (overflow != COMPRESS_OUT_BUFFER_OVERFLOW) {
|
||||
if (overflow == 0)
|
||||
@ -1499,7 +1584,8 @@ int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type)
|
||||
printf("inflate ret = %d\n", ret);
|
||||
print_error(overflow);
|
||||
|
||||
printf("Compressed array: ");
|
||||
printf("Compressed array at level %d with gzip flag %d: ",
|
||||
level, gzip_flag);
|
||||
print_uint8_t(z_buf, z_size);
|
||||
printf("\n");
|
||||
printf("Data: ");
|
||||
@ -1522,10 +1608,12 @@ int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type)
|
||||
int test_flush(uint8_t * in_buf, uint32_t in_size)
|
||||
{
|
||||
int fin_ret = IGZIP_COMP_OK, ret;
|
||||
uint32_t z_size, flush_type = 0, gzip_flag;
|
||||
uint32_t z_size, flush_type = 0, gzip_flag, level;
|
||||
uint8_t *z_buf = NULL;
|
||||
|
||||
gzip_flag = rand() % 3;
|
||||
level = rand() % 2;
|
||||
|
||||
z_size = 2 * in_size + 2 * hdr_bytes + 8;
|
||||
if (gzip_flag)
|
||||
z_size += gzip_extra_bytes;
|
||||
@ -1540,7 +1628,8 @@ int test_flush(uint8_t * in_buf, uint32_t in_size)
|
||||
flush_type = rand();
|
||||
|
||||
/* Test invalid flush */
|
||||
ret = compress_single_pass(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag);
|
||||
ret = compress_single_pass(in_buf, in_size, z_buf, &z_size, flush_type,
|
||||
gzip_flag, level);
|
||||
|
||||
if (ret == COMPRESS_GENERAL_ERROR)
|
||||
ret = 0;
|
||||
@ -1562,7 +1651,7 @@ int test_flush(uint8_t * in_buf, uint32_t in_size)
|
||||
|
||||
if (ret) {
|
||||
#ifdef VERBOSE
|
||||
printf("Compressed array: ");
|
||||
printf("Compressed array at level %d with gzip flag %d: ", level, gzip_flag);
|
||||
print_uint8_t(z_buf, z_size);
|
||||
printf("\n");
|
||||
printf("Data: ");
|
||||
@ -1582,10 +1671,11 @@ int test_flush(uint8_t * in_buf, uint32_t in_size)
|
||||
int test_full_flush(uint8_t * in_buf, uint32_t in_size)
|
||||
{
|
||||
int ret = IGZIP_COMP_OK;
|
||||
uint32_t z_size, gzip_flag;
|
||||
uint32_t z_size, gzip_flag, level;
|
||||
uint8_t *z_buf = NULL;
|
||||
|
||||
gzip_flag = rand() % 3;
|
||||
level = rand() % 2;
|
||||
z_size = 2 * in_size + MAX_LOOPS * (hdr_bytes + 5);
|
||||
|
||||
if (gzip_flag)
|
||||
@ -1599,7 +1689,7 @@ int test_full_flush(uint8_t * in_buf, uint32_t in_size)
|
||||
|
||||
create_rand_repeat_data(z_buf, z_size);
|
||||
|
||||
ret = compress_full_flush(in_buf, in_size, z_buf, &z_size, gzip_flag);
|
||||
ret = compress_full_flush(in_buf, in_size, z_buf, &z_size, gzip_flag, level);
|
||||
|
||||
if (!ret)
|
||||
ret = inflate_check(z_buf, z_size, in_buf, in_size, gzip_flag);
|
||||
|
@ -27,6 +27,8 @@
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%include "options.asm"
|
||||
|
||||
%assign K 1024
|
||||
%assign D IGZIP_HIST_SIZE ;; Amount of history
|
||||
%assign LA 18 * 16 ;; Max look-ahead, rounded up to 32 byte boundary
|
||||
@ -45,3 +47,7 @@
|
||||
%assign SHORTEST_MATCH 4
|
||||
|
||||
%assign SLOP 8
|
||||
|
||||
%define DIST_OFFSET 14
|
||||
%define EXTRA_BITS_OFFSET (DIST_OFFSET + 5)
|
||||
%define LIT (0x1E << DIST_OFFSET)
|
||||
|
89
igzip/proc_heap.asm
Normal file
89
igzip/proc_heap.asm
Normal file
@ -0,0 +1,89 @@
|
||||
; returns modified node_ptr
|
||||
; uint32_t proc_heap(uint64_t *heap, uint32_t heap_size);
|
||||
|
||||
%include "reg_sizes.asm"
|
||||
%include "heap_macros.asm"
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define heap rcx ; pointer, 64-bit
|
||||
%define heap_size rdx
|
||||
%define arg3 r8
|
||||
%define child rsi
|
||||
%define tmp32 rdi
|
||||
%else
|
||||
%define heap rdi
|
||||
%define heap_size rsi
|
||||
%define arg3 rdx
|
||||
%define child rcx
|
||||
%define tmp32 rdx
|
||||
%endif
|
||||
|
||||
%define node_ptr rax
|
||||
%define h1 r8
|
||||
%define h2 r9
|
||||
%define h3 r10
|
||||
%define i r11
|
||||
%define tmp2 r12
|
||||
|
||||
global build_huff_tree
|
||||
build_huff_tree:
|
||||
push child
|
||||
push tmp32
|
||||
push r12
|
||||
; mov node_ptr, 3*286
|
||||
mov node_ptr, arg3 ;;;@@@@
|
||||
.main_loop:
|
||||
; REMOVE_MIN64(heap, heap_size, h1);
|
||||
mov h2, [heap + heap_size*8]
|
||||
mov h1, [heap + 1*8]
|
||||
mov qword [heap + heap_size*8], -1
|
||||
dec heap_size
|
||||
mov [heap + 1*8], h2
|
||||
|
||||
mov i, 1
|
||||
heapify heap, heap_size, i, child, h2, h3, tmp32, tmp2
|
||||
|
||||
mov h2, [heap + 1*8]
|
||||
lea h3, [h1 + h2]
|
||||
mov [heap + node_ptr*8], h1 %+ w
|
||||
mov [heap + node_ptr*8 - 8], h2 %+ w
|
||||
|
||||
and h3, 0XFFFF0000 ; sign extends to FFFFFFFFFFFF0000
|
||||
or h3, node_ptr
|
||||
sub node_ptr, 2
|
||||
|
||||
; replace_min64(heap, heap_size, h3)
|
||||
mov [heap + 1*8], h3
|
||||
mov i, 1
|
||||
heapify heap, heap_size, i, child, h2, h3, tmp32, tmp2
|
||||
|
||||
cmp heap_size, 1
|
||||
ja .main_loop
|
||||
|
||||
mov h1, [heap + 1*8]
|
||||
mov [heap + node_ptr*8], h1 %+ w
|
||||
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
ret
|
||||
|
||||
align 32
|
||||
global build_heap_asm
|
||||
build_heap_asm:
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
mov qword [heap + heap_size*8 + 8], -1
|
||||
mov i, heap_size
|
||||
shr i, 1
|
||||
.loop:
|
||||
mov h1, i
|
||||
heapify heap, heap_size, h1, child, h2, h3, tmp32, tmp2
|
||||
dec i
|
||||
jnz .loop
|
||||
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
ret
|
@ -150,6 +150,7 @@ enum {IGZIP_LIT_TABLE_SIZE = ISAL_DEF_LIT_SYMBOLS};
|
||||
#define INVALID_PARAM -8
|
||||
#define STATELESS_OVERFLOW -1
|
||||
#define ISAL_INVALID_OPERATION -9
|
||||
#define ISAL_INVALID_LEVEL -4 /* Invalid Compression level set */
|
||||
|
||||
/**
|
||||
* @enum isal_zstate_state
|
||||
@ -163,16 +164,20 @@ enum {IGZIP_LIT_TABLE_SIZE = ISAL_DEF_LIT_SYMBOLS};
|
||||
enum isal_zstate_state {
|
||||
ZSTATE_NEW_HDR, //!< Header to be written
|
||||
ZSTATE_HDR, //!< Header state
|
||||
ZSTATE_CREATE_HDR, //!< Header to be created
|
||||
ZSTATE_BODY, //!< Body state
|
||||
ZSTATE_FLUSH_READ_BUFFER, //!< Flush buffer
|
||||
ZSTATE_FLUSH_ICF_BUFFER,
|
||||
ZSTATE_SYNC_FLUSH, //!< Write sync flush block
|
||||
ZSTATE_FLUSH_WRITE_BUFFER, //!< Flush bitbuf
|
||||
ZSTATE_TRL, //!< Trailer state
|
||||
ZSTATE_END, //!< End state
|
||||
ZSTATE_TMP_NEW_HDR, //!< Temporary Header to be written
|
||||
ZSTATE_TMP_HDR, //!< Temporary Header state
|
||||
ZSTATE_TMP_CREATE_HDR, //!< Temporary Header to be created state
|
||||
ZSTATE_TMP_BODY, //!< Temporary Body state
|
||||
ZSTATE_TMP_FLUSH_READ_BUFFER, //!< Flush buffer
|
||||
ZSTATE_TMP_FLUSH_ICF_BUFFER,
|
||||
ZSTATE_TMP_SYNC_FLUSH, //!< Write sync flush block
|
||||
ZSTATE_TMP_FLUSH_WRITE_BUFFER, //!< Flush bitbuf
|
||||
ZSTATE_TMP_TRL, //!< Temporary Trailer state
|
||||
@ -206,7 +211,6 @@ enum isal_block_state {
|
||||
#define ISAL_INVALID_SYMBOL -2 /* Invalid deflate symbol found */
|
||||
#define ISAL_INVALID_LOOKBACK -3 /* Invalid lookback distance found */
|
||||
|
||||
|
||||
/******************************************************************************/
|
||||
/* Compression structures */
|
||||
/******************************************************************************/
|
||||
@ -217,6 +221,20 @@ struct isal_huff_histogram {
|
||||
uint16_t hash_table[IGZIP_HASH_SIZE]; //!< Tmp space used as a hash table
|
||||
};
|
||||
|
||||
struct isal_mod_hist {
|
||||
uint16_t d_hist[30];
|
||||
uint16_t ll_hist[513];
|
||||
};
|
||||
|
||||
/* Data sizes for level specific data options */
|
||||
#define ISAL_DEF_LVL1_REQ 4 * IGZIP_K /* has to be at least sizeof(struct level_2_buf) */
|
||||
#define ISAL_DEF_LVL1_TOKEN_SIZE 4
|
||||
#define ISAL_DEF_LVL1_MIN (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 1 * IGZIP_K)
|
||||
#define ISAL_DEF_LVL1_SMALL (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 16 * IGZIP_K)
|
||||
#define ISAL_DEF_LVL1_MEDIUM (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 32 * IGZIP_K)
|
||||
#define ISAL_DEF_LVL1_LARGE (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 64 * IGZIP_K)
|
||||
#define ISAL_DEF_LVL1_EXTRA_LARGE (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 128 * IGZIP_K)
|
||||
|
||||
/** @brief Holds Bit Buffer information*/
|
||||
struct BitBuf2 {
|
||||
uint64_t m_bits; //!< bits in the bit buffer
|
||||
@ -247,6 +265,8 @@ struct isal_zstate {
|
||||
uint32_t has_eob_hdr; //!< keeps track of eob hdr (with BFINAL set)
|
||||
uint32_t has_hist; //!< flag to track if there is match history
|
||||
|
||||
struct isal_mod_hist hist;
|
||||
|
||||
DECLARE_ALIGNED(uint8_t buffer[2 * IGZIP_HIST_SIZE + ISAL_LOOK_AHEAD], 32); //!< Internal buffer
|
||||
DECLARE_ALIGNED(uint16_t head[IGZIP_HASH_SIZE], 16); //!< Hash array
|
||||
|
||||
@ -278,6 +298,9 @@ struct isal_zstream {
|
||||
uint32_t total_out; //!< total number of bytes written so far
|
||||
|
||||
struct isal_hufftables *hufftables; //!< Huffman encoding used when compressing
|
||||
uint32_t level; //!< Compression level to use
|
||||
uint32_t level_buf_size; //!< Size of level_buf
|
||||
uint8_t * level_buf; //!< User allocated buffer required for different compression levels
|
||||
uint32_t end_of_stream; //!< non-zero if this is the last input buffer
|
||||
uint32_t flush; //!< Flush type can be NO_FLUSH, SYNC_FLUSH or FULL_FLUSH
|
||||
uint32_t gzip_flag; //!< Indicate if gzip compression is to be performed
|
||||
|
Loading…
Reference in New Issue
Block a user