igzip: implement igzip two pass

Change-Id: I9564b2da251a02197b39cab5f141e7aff1ae8439
Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
This commit is contained in:
Roy Oursler 2016-10-03 14:50:52 -07:00 committed by Greg Tucker
parent 43d1029b81
commit 01dfbcc484
28 changed files with 2747 additions and 748 deletions

View File

@ -32,16 +32,25 @@ lsrc += igzip/igzip.c igzip/hufftables_c.c \
igzip/igzip_body_02.asm \
igzip/igzip_body_04.asm \
igzip/igzip_finish.asm \
igzip/igzip_icf_body_01.asm \
igzip/igzip_icf_body_02.asm \
igzip/igzip_icf_body_04.asm \
igzip/igzip_icf_finish.asm \
igzip/crc_data.asm \
igzip/rfc1951_lookup.asm \
igzip/crc32_gzip.asm igzip/detect_repeated_char.asm \
igzip/igzip_multibinary.asm \
igzip/igzip_base.c \
igzip/igzip_icf_base.c \
igzip/igzip_update_histogram_01.asm \
igzip/igzip_update_histogram_04.asm \
igzip/igzip_decode_block_stateless_01.asm \
igzip/igzip_decode_block_stateless_04.asm \
igzip/igzip_inflate_multibinary.asm
igzip/igzip_inflate_multibinary.asm \
igzip/flatten_ll.c \
igzip/encode_df.c \
igzip/encode_df_04.asm \
igzip/proc_heap.asm
src_include += -I $(srcdir)/igzip
extern_hdrs += include/igzip_lib.h
@ -55,11 +64,12 @@ perf_tests += igzip/igzip_perf igzip/igzip_sync_flush_perf
other_tests += igzip/igzip_file_perf igzip/igzip_sync_flush_file_perf igzip/igzip_stateless_file_perf igzip/igzip_hist_perf
other_tests += igzip/igzip_semi_dyn_file_perf
other_src += igzip/bitbuf2.asm igzip/data_struct2.asm \
other_src += igzip/bitbuf2.asm igzip/data_struct2.asm \
igzip/inflate_data_structs.asm \
igzip/igzip_body.asm igzip/igzip_finish.asm \
igzip/lz0a_const.asm igzip/options.asm igzip/stdmac.asm igzip/igzip_compare_types.asm \
igzip/bitbuf2.h igzip/repeated_char_result.h \
igzip/bitbuf2.h \
igzip/repeated_char_result.h \
igzip/igzip_body.asm \
igzip/igzip_update_histogram.asm \
igzip/huffman.asm \
@ -67,8 +77,13 @@ other_src += igzip/bitbuf2.asm igzip/data_struct2.asm \
include/multibinary.asm \
include/test.h \
igzip/huffman.h \
igzip/igzip_level_buf_structs.h \
igzip/igzip_decode_block_stateless.asm \
igzip/inflate_std_vects.h
igzip/inflate_std_vects.h \
igzip/flatten_ll.h \
igzip/encode_df.h \
igzip/encode_df_asm.asm\
igzip/heap_macros.asm
examples += igzip/igzip_example igzip/igzip_sync_flush_example

View File

@ -203,3 +203,10 @@
; code2 is clobbered, rcx is clobbered
%endif
%endm
%macro write_dword 2
%define %%data %1d
%define %%addr %2
movnti [%%addr], %%data
add %%addr, 4
%endm

View File

@ -102,19 +102,28 @@ static inline uint32_t buffer_used(struct BitBuf2 *me)
return (uint32_t)(me->m_out_buf - me->m_out_start);
}
static inline uint32_t buffer_bits_used(struct BitBuf2 *me)
{
return (8 * (uint32_t)(me->m_out_buf - me->m_out_start) + me->m_bit_count);
}
static inline void flush_bits(struct BitBuf2 *me)
{
uint32_t bits;
_mm_stream_si64x((int64_t *) me->m_out_buf, me->m_bits);
bits = me->m_bit_count & ~7;
me->m_bit_count -= bits;
me->m_out_buf += bits/8;
me->m_bits >>= bits;
}
static inline void check_space(struct BitBuf2 *me, uint32_t num_bits)
{
/* Checks if bitbuf has num_bits extra space and flushes the bytes in
* the bitbuf if it doesn't. */
uint32_t bytes;
if (63 - me->m_bit_count < num_bits) {
_mm_stream_si64x((int64_t *) me->m_out_buf, me->m_bits);
bytes = me->m_bit_count / 8;
me->m_out_buf += bytes;
bytes *= 8;
me->m_bit_count -= bytes;
me->m_bits >>= bytes;
}
if (63 - me->m_bit_count < num_bits)
flush_bits(me);
}
static inline void write_bits_unsafe(struct BitBuf2 *me, uint64_t code, uint32_t count)
@ -136,16 +145,10 @@ static inline void write_bits(struct BitBuf2 *me, uint64_t code, uint32_t count)
}
#elif defined(USE_BITBUFB) /* Write bits always */
/* Assumes there is space to fit code into m_bits. */
uint32_t bits;
me->m_bits |= code << me->m_bit_count;
me->m_bit_count += count;
if (me->m_bit_count >= 8) {
_mm_stream_si64x((int64_t *) me->m_out_buf, me->m_bits);
bits = me->m_bit_count & ~7;
me->m_bit_count -= bits;
me->m_out_buf += bits/8;
me->m_bits >>= bits;
}
if (me->m_bit_count >= 8)
flush_bits(me);
#else /* USE_BITBUF_ELSE */
check_space(me, count);
write_bits_unsafe(me, code, count);

View File

@ -67,6 +67,52 @@ FIELD _m_out_start, 8, 8
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
START_FIELDS ;; isal_mod_hist
;; name size align
FIELD _d_hist, 30*2, 2
FIELD _ll_hist, 513*2, 2
%assign _isal_mod_hist_size _FIELD_OFFSET
%assign _isal_mod_hist_align _STRUCT_ALIGN
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%define HUFF_CODE_SIZE 4
START_FIELDS ;; hufftables_icf
;; name size align
FIELD _lit_len_table, 513 * HUFF_CODE_SIZE, HUFF_CODE_SIZE
FIELD _dist_table, 31 * HUFF_CODE_SIZE, HUFF_CODE_SIZE
%assign _hufftables_icf_size _FIELD_OFFSET
%assign _hufftables_icf_align _STRUCT_ALIGN
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%define DEF_MAX_HDR_SIZE 328
START_FIELDS ;; level_2_buf
;; name size align
FIELD _encode_tables, _hufftables_icf_size, _hufftables_icf_align
FIELD _deflate_hdr_buf_used, 8, 8
FIELD _deflate_hdr_buf, DEF_MAX_HDR_SIZE, 1
FIELD _icf_buf_next, 8, 8
FIELD _icf_buf_avail_out, 8, 8
FIELD _icf_buf_start, 0, 0
%assign _level_2_buf_size _FIELD_OFFSET
%assign _level_2_buf_align _STRUCT_ALIGN
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
START_FIELDS ;; isal_zstate
;; name size align
@ -82,7 +128,8 @@ FIELD _tmp_out_start, 4, 4
FIELD _tmp_out_end, 4, 4
FIELD _has_eob, 4, 4
FIELD _has_eob_hdr, 4, 4
FIELD _has_hist, 4, 4
FIELD _has_hist, 4, 4
FIELD _hist, _isal_mod_hist_size, _isal_mod_hist_align
FIELD _buffer, BSIZE, 32
FIELD _head, IGZIP_HASH_SIZE*2, 16
@ -95,6 +142,8 @@ _bitbuf_m_out_buf equ _bitbuf+_m_out_buf
_bitbuf_m_out_end equ _bitbuf+_m_out_end
_bitbuf_m_out_start equ _bitbuf+_m_out_start
_hist_lit_len equ _hist+_ll_hist
_hist_dist equ _hist+_d_hist
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -109,6 +158,9 @@ FIELD _next_out, 8, 8
FIELD _avail_out, 4, 4
FIELD _total_out, 4, 4
FIELD _hufftables, 8, 8
FIELD _level, 4, 4
FIELD _level_buf_size, 4, 4
FIELD _level_buf, 8, 8
FIELD _end_of_stream, 4, 4
FIELD _flush, 4, 4
FIELD _gzip_flag, 4, 4
@ -137,16 +189,22 @@ _internal_state_bitbuf_m_bit_count equ _internal_state+_bitbuf_m_bit_count
_internal_state_bitbuf_m_out_buf equ _internal_state+_bitbuf_m_out_buf
_internal_state_bitbuf_m_out_end equ _internal_state+_bitbuf_m_out_end
_internal_state_bitbuf_m_out_start equ _internal_state+_bitbuf_m_out_start
_internal_state_hist_lit_len equ _internal_state+_hist_lit_len
_internal_state_hist_dist equ _internal_state+_hist_dist
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
ZSTATE_HDR equ 1
ZSTATE_BODY equ 2
ZSTATE_FLUSH_READ_BUFFER equ 3
ZSTATE_SYNC_FLUSH equ 4
ZSTATE_TRL equ 6
;; Internal States
ZSTATE_NEW_HDR equ 0
ZSTATE_HDR equ (ZSTATE_NEW_HDR + 1)
ZSTATE_CREATE_HDR equ (ZSTATE_HDR + 1)
ZSTATE_BODY equ (ZSTATE_CREATE_HDR + 1)
ZSTATE_FLUSH_READ_BUFFER equ (ZSTATE_BODY + 1)
ZSTATE_FLUSH_ICF_BUFFER equ (ZSTATE_FLUSH_READ_BUFFER + 1)
ZSTATE_SYNC_FLUSH equ (ZSTATE_FLUSH_ICF_BUFFER + 1)
ZSTATE_FLUSH_WRITE_BUFFER equ (ZSTATE_SYNC_FLUSH + 1)
ZSTATE_TRL equ (ZSTATE_FLUSH_WRITE_BUFFER + 1)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

36
igzip/encode_df.c Normal file
View File

@ -0,0 +1,36 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <memory.h>
#include <assert.h>
#ifdef _MSC_VER
# include <intrin.h>
#else
# include <x86intrin.h>
#endif
#include "encode_df.h"
#include "bitbuf2.h"
struct deflate_icf *encode_deflate_icf_base(struct deflate_icf *next_in,
struct deflate_icf *end_in, struct BitBuf2 *bb,
struct hufftables_icf *hufftables)
{
struct huff_code lsym, dsym;
while (next_in < end_in && !is_full(bb)) {
lsym = hufftables->lit_len_table[next_in->lit_len];
dsym = hufftables->dist_table[next_in->lit_dist];
// insert ll code, dist_code, and extra_bits
write_bits_unsafe(bb, lsym.code_and_extra, lsym.length);
write_bits_unsafe(bb, dsym.code, dsym.length);
write_bits_unsafe(bb, next_in->dist_extra, dsym.extra_bit_count);
flush_bits(bb);
next_in++;
}
return next_in;
}

19
igzip/encode_df.h Normal file
View File

@ -0,0 +1,19 @@
#ifndef ENCODE_DF_H
#define ENCODE_DF_H
#include <stdint.h>
#include "huff_codes.h"
/* Deflate Intermediate Compression Format */
#define ICF_DIST_OFFSET 14
#define NULL_DIST_SYM 30
struct deflate_icf {
uint32_t lit_len:ICF_DIST_OFFSET;
uint32_t lit_dist:5;
uint32_t dist_extra:32 - 5 - ICF_DIST_OFFSET;
};
struct deflate_icf *encode_deflate_icf(struct deflate_icf *next_in, struct deflate_icf *end_in,
struct BitBuf2 *bb, struct hufftables_icf * hufftables);
#endif

4
igzip/encode_df_04.asm Normal file
View File

@ -0,0 +1,4 @@
%define ARCH 04
%define USE_HSWNI
%include "encode_df_asm.asm"

169
igzip/encode_df_asm.asm Normal file
View File

@ -0,0 +1,169 @@
%include "reg_sizes.asm"
%include "lz0a_const.asm"
%include "data_struct2.asm"
%include "stdmac.asm"
; tree entry is 4 bytes:
; lit/len tree (513 entries)
; | 3 | 2 | 1 | 0 |
; | len | code |
;
; dist tree
; | 3 | 2 | 1 | 0 |
; |eblen:codlen| code |
; token format:
; DIST_OFFSET:0 : lit/len
; 31:(DIST_OFFSET + 5) : dist Extra Bits
; (DIST_OFFSET + 5):DIST_OFFSET : dist code
; lit/len: 0-256 (literal)
; 257-512 (dist + 254)
; returns final token pointer
; equal to token_end if successful
; uint32_t* encode_df(uint32_t *token_start, uint32_t *token_end,
; BitBuf *bb, uint32_t *trees);
%ifidn __OUTPUT_FORMAT__, win64
%define ARG1 rcx
%define ARG2 rdx
%define ARG3 r8
%define ARG4 r9
%define TMP1 rsi
%define TMP2 rdi
%define ll_tree ARG4
%define ptr r11
%else
; Linux
%define ARG1 rdi
%define ARG2 rsi
%define ARG3 rdx
%define ARG4 rcx
%define TMP1 r8
%define TMP2 r9
%define ll_tree r11 ; ARG4
%define ptr ARG1 ; r11
%endif
%define in_buf_end ARG2
%define bb ARG3
%define out_buf bb
; bit_count is rcx
%define bits rax
%define data r12
%define tmp rbx
%define sym TMP1
%define dsym TMP2
%define len dsym
%define tmp2 r10
%define end_ptr rbp
%define dist_tree ll_tree + 4*513
global encode_deflate_icf_ %+ ARCH
encode_deflate_icf_ %+ ARCH:
push rbx
%ifidn __OUTPUT_FORMAT__, win64
push rsi
push rdi
%endif
push r12
push rbp
push bb
; free up rcx
%ifidn __OUTPUT_FORMAT__, win64
mov ptr, ARG1
%else
mov ll_tree, ARG4
%endif
mov bits, [bb + _m_bits]
mov ecx, [bb + _m_bit_count]
mov end_ptr, [bb + _m_out_end]
mov out_buf, [bb + _m_out_buf] ; clobbers bb
.start_loop:
mov DWORD(data), [ptr]
cmp out_buf, end_ptr
ja .overflow
mov sym, data
and sym, 0x3FF ; sym has ll_code
mov DWORD(sym), [ll_tree + sym * 4]
; look up dist sym
mov dsym, data
shr dsym, DIST_OFFSET
and dsym, 0x1F
mov DWORD(dsym), [dist_tree + dsym * 4]
; insert LL code
; sym: 31:24 length; 23:0 code
mov tmp2, sym
and sym, 0xFFFFFF
shl sym, cl
shr tmp2, 24
or bits, sym
add rcx, tmp2
; insert dist code
movzx tmp, WORD(dsym)
shl tmp, cl
or bits, tmp
mov tmp, dsym
shr tmp, 24
add rcx, tmp
; insert dist extra bits
shr data, EXTRA_BITS_OFFSET
add ptr, 4
shl data, cl
or bits, data
shr dsym, 16
and dsym, 0xFF
add rcx, dsym
; empty bits
mov [out_buf], bits
mov tmp, rcx
shr tmp, 3 ; byte count
add out_buf, tmp
mov tmp, rcx
and rcx, ~7
shr bits, cl
mov rcx, tmp
and rcx, 7
cmp ptr, in_buf_end
jb .start_loop
;.end:
; ; empty bits
; mov [out_buf], bits
; mov tmp, rcx
; shr tmp, 3 ; byte count
; add out_buf, tmp
; mov tmp, rcx
; and rcx, ~7
; shr bits, cl
; mov rcx, tmp
; and rcx, 7
.overflow:
pop TMP1 ; TMP1 now points to bb
mov [TMP1 + _m_bits], bits
mov [TMP1 + _m_bit_count], ecx
mov [TMP1 + _m_out_buf], out_buf
pop rbp
pop r12
%ifidn __OUTPUT_FORMAT__, win64
pop rdi
pop rsi
%endif
pop rbx
mov rax, ptr
ret

41
igzip/flatten_ll.c Normal file
View File

@ -0,0 +1,41 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include "flatten_ll.h"
void flatten_ll(uint16_t * ll_hist)
{
uint32_t i, j;
uint16_t *s = ll_hist, x, *p;
s[265] += s[266];
s[266] = s[267] + s[268];
s[267] = s[269] + s[270];
s[268] = s[271] + s[272];
s[269] = s[273] + s[274] + s[275] + s[276];
s[270] = s[277] + s[278] + s[279] + s[280];
s[271] = s[281] + s[282] + s[283] + s[284];
s[272] = s[285] + s[286] + s[287] + s[288];
p = s + 289;
for (i = 273; i < 277; i++) {
x = *(p++);
for (j = 1; j < 8; j++)
x += *(p++);
s[i] = x;
}
for (; i < 281; i++) {
x = *(p++);
for (j = 1; j < 16; j++)
x += *(p++);
s[i] = x;
}
for (; i < 285; i++) {
x = *(p++);
for (j = 1; j < 32; j++)
x += *(p++);
s[i] = x;
}
s[284] -= s[512];
s[285] = s[512];
}

3
igzip/flatten_ll.h Normal file
View File

@ -0,0 +1,3 @@
#include <stdint.h>
void flatten_ll(uint16_t *ll_hist);

69
igzip/heap_macros.asm Normal file
View File

@ -0,0 +1,69 @@
; heapify heap, heap_size, i, child, tmp1, tmp2, tmpd
%macro heapify2 7
%define %%heap %1 ; qword ptr
%define %%heap_size %2 ; dword
%define %%i %3 ; dword
%define %%child %4 ; dword
%define %%tmp1 %5 ; qword
%define %%tmp2 %6 ; qword
%define %%tmpd %7 ; dword
align 16
%%heapify1:
lea %%child, [%%i + %%i]
cmp %%child, %%heap_size
ja %%end_heapify1
mov %%tmp1, [%%heap + %%child]
mov %%tmpd, %%child
mov %%tmp2, [%%heap + %%child) + 8]
lea %%child, [%%child + 1]
cmove %%tmp2, %%tmp1
cmp %%tmp1, %%tmp2
cmovbe %%child, %%tmpd
cmovbe %%tmp2, %%tmp1
; child is correct, %%tmp2 = heap[child]
mov %%tmp1, [%%heap + %%i]
cmp %%tmp1, %%tmp2
jbe %%end_heapify1
mov [%%heap + %%i], %%tmp2
mov [%%heap + %%child], %%tmp1
mov %%i, %%child
jmp %%heapify1
%%end_heapify1
%endm
; heapify heap, heap_size, i, child, tmp1, tmp2, tmpd, tmp3
%macro heapify 8
%define %%heap %1 ; qword ptr
%define %%heap_size %2 ; qword
%define %%i %3 ; qword
%define %%child %4 ; qword
%define %%tmp1 %5 ; qword
%define %%tmp2 %6 ; qword
%define %%tmpd %7 ; qword
%define %%tmp3 %8
align 16
%%heapify1:
lea %%child, [%%i + %%i]
; mov %%child, %%i
; add %%child, %%child
cmp %%child, %%heap_size
ja %%end_heapify1
mov %%tmp1, [%%heap + %%child*8]
mov %%tmp2, [%%heap + %%child*8 + 8]
mov %%tmp3, [%%heap + %%i*8]
mov %%tmpd, %%child
add %%tmpd, 1
cmp %%tmp2, %%tmp1
cmovb %%child, %%tmpd
cmovb %%tmp1, %%tmp2
; child is correct, tmp1 = heap[child]
cmp %%tmp3, %%tmp1
jbe %%end_heapify1
; swap i and child
mov [%%heap + %%i*8], %%tmp1
mov [%%heap + %%child*8], %%tmp3
mov %%i, %%child
jmp %%heapify1
%%end_heapify1:
%endm

File diff suppressed because it is too large Load Diff

View File

@ -79,81 +79,57 @@
#define HASH_MASK (IGZIP_HASH_SIZE - 1)
#define SHORTEST_MATCH 4
#define LENGTH_BITS 5
#define FREQ_SHIFT 16
#define FREQ_MASK_HI (0xFFFFFFFFFFFF0000)
#define DEPTH_SHIFT 24
#define DEPTH_MASK 0x7F
#define DEPTH_MASK_HI (DEPTH_MASK << DEPTH_SHIFT)
#define DEPTH_1 (1 << DEPTH_SHIFT)
#define HEAP_TREE_SIZE (3*MAX_HISTHEAP_SIZE + 1)
#define HEAP_TREE_NODE_START (HEAP_TREE_SIZE-1)
#define MAX_BL_CODE_LEN 7
/**
* @brief Structure used to store huffman codes
*/
struct huff_code {
uint16_t code;
uint8_t length;
union {
struct {
uint16_t code;
uint8_t extra_bit_count;
uint8_t length;
};
struct {
uint32_t code_and_extra:24;
uint8_t length2;
};
};
};
/**
* @brief Binary tree used to store and create a huffman tree.
*/
struct huff_tree {
uint16_t value;
uint64_t frequency;
struct huff_tree *left;
struct huff_tree *right;
struct tree_node {
uint32_t child;
uint32_t depth;
};
/**
* @brief Nodes in a doubly linked list.
*/
struct linked_list_node {
uint16_t value;
struct linked_list_node *next;
struct linked_list_node *previous;
struct heap_tree {
union {
uint64_t heap[HEAP_TREE_SIZE];
uint64_t code_len_count[MAX_HUFF_TREE_DEPTH + 1];
struct tree_node tree[HEAP_TREE_SIZE];
};
};
/**
* @brief This structure is a doubly linked list.
*/
struct linked_list {
uint64_t length;
struct linked_list_node *start;
struct linked_list_node *end;
struct rl_code {
uint8_t code;
uint8_t extra_bits;
};
/**
* @brief This is a binary minheap structure which stores huffman trees.
* @details The huffman trees are sorted by the frequency of the root.
* The structure is represented in a fixed sized array.
*/
struct histheap {
struct huff_tree tree[MAX_HISTHEAP_SIZE];
uint16_t size;
struct hufftables_icf {
struct huff_code lit_len_table[513];
struct huff_code dist_table[31];
};
/**
* @brief Inserts a hufftree into a histheap.
* @param element: the hufftree to be inserted
* @param heap: the heap which element is being inserted into.
* @requires This function assumes the heap has enough allocated space.
* @returns Returns the index in heap of the inserted element
*/
int heap_push(struct huff_tree element, struct histheap *heap);
/**
* @brief Removes the top element from the heap and returns it.
*/
struct huff_tree heap_pop(struct histheap *heap);
/**
* @brief Removes the first element from list and returns it.
*/
struct linked_list_node *pop_from_front(struct linked_list *list);
/**
* @brief Adds new_element to the front of list.
*/
void append_to_front(struct linked_list *list, struct linked_list_node *new_element);
/**
* @brief Adds new_element to the end of list.
*/
void append_to_back(struct linked_list *list, struct linked_list_node *new_element);
/**
* @brief Returns the deflate symbol value for a repeat length.
*/
@ -164,68 +140,6 @@ uint32_t convert_length_to_len_sym(uint32_t length);
*/
uint32_t convert_dist_to_dist_sym(uint32_t dist);
/**
* Constructs a huffman tree on tree_array which only uses elements with non-zero frequency.
* @requires Assumes there will be at least two symbols in the produced tree.
* @requires tree_array must have length at least 2*size-1, and size must be less than 286.
* @param tree_array: array of huff_tree elements used to create a huffman tree, the first
* size elements of the array are the leaf elements in the huffman tree.
* @param histogram: a histogram of the frequency of elements in tree_array.
* @param size: the number of leaf elements in the huffman tree.
*/
struct huff_tree create_symbol_subset_huff_tree(struct huff_tree *tree_array,
uint64_t * histogram, uint32_t size);
/**
* @brief Construct a huffman tree on tree_array which uses every symbol.
* @requires tree_array must have length at least 2*size-1, and size must be less than 286.
* @param tree_array: array of huff_tree elements used to create a huffman tree, the first
* @param size elements of the array are the leaf elements in the huffman tree.
* @param histogram: a histogram of the frequency of elements in tree_array.
* @param size: the number of leaf elements in the huffman tree.
*/
struct huff_tree create_huff_tree(struct huff_tree *tree_array, uint64_t * histogram,
uint32_t size);
/**
* @brief Creates a deflate compliant huffman tree with maximum depth max_depth.
* @details The huffman tree is represented as a lookup table.
* @param huff_lookup_table: The output lookup table.
* @param table_length: The length of table.
* @param root: the input huffman tree the created tree is based on.
* @param max_depth: maximum depth the huffman tree can have
* @returns Returns 0 if sucessful and returns 1 otherwise.
*/
int create_huff_lookup(struct huff_code *huff_lookup_table, int table_length,
struct huff_tree root, uint8_t max_depth);
/**
* @brief Determines the code length for every value in a huffmant tree.
* @param huff_lookup_table: An output lookup table used to store the code lengths
* @param corresponding to the possible values
* @param count: An output histogram representing code length versus number of occurences.
* @param current_node: A node of the huffman tree being analyzed currently.
* @param current_depth: The depth of the current node in the huffman tree.
* @returns Returns 0 if sucessful and returns 1 otherwise.
*/
int find_code_lengths(struct huff_code *huff_lookup_table, uint16_t * count,
struct huff_tree root, uint8_t max_depth);
/**
* @brief Creates an array of linked lists.
* @detail Each linked list contains all the elements with codes of a given length for
* lengths less than 16, and an list for all elements with codes at least 16. These lists
* are sorted by frequency from least frequent to most frequent within any given code length.
* @param depth_array: depth_array[i] is a linked list of elements with code length i
* @param linked_lists: An input structure the linked lists in depth array are built on.
* @param current_node: the current node being visited in a huffman tree
* @param current_depth: the depth of current_node in a huffman tree
*/
void huffman_tree_traversal(struct linked_list *depth_array,
struct linked_list_node *linked_lists, uint16_t * extra_nodes,
uint8_t max_depth, struct huff_tree current_node,
uint16_t current_depth);
/**
* @brief Determines the code each element of a deflate compliant huffman tree and stores
* it in a lookup table
@ -234,10 +148,7 @@ void huffman_tree_traversal(struct linked_list *depth_array,
* @param table_length: The length of table.
* @param count: a histogram representing the number of occurences of codes of a given length
*/
void set_huff_codes(struct huff_code *table, int table_length, uint16_t * count);
/* Reverse the first length bits in bits and returns that value */
uint16_t bit_reverse(uint16_t bits, uint8_t length);
uint32_t set_huff_codes(struct huff_code *table, int table_length, uint32_t * count);
/**
* @brief Checks if a literal/length huffman table can be stored in the igzip hufftables files.
@ -263,32 +174,8 @@ uint16_t valid_dist_huff_table(struct huff_code *huff_code_table);
* @param end_of_block: Value determining whether end of block header is produced or not;
* 0 corresponds to not end of block and all other inputs correspond to end of block.
*/
int create_header(uint8_t *header, uint32_t header_length, struct huff_code *lit_huff_table,
struct huff_code *dist_huff_table, uint32_t end_of_block);
/**
* @brief Creates a run length encoded reprsentation of huff_table.
* @details Also creates a histogram representing the frequency of each symbols
* @returns Returns the number of symbols written into huffman_rep.
* @param huffman_rep: The output run length encoded version of huff_table.
* @param histogram: The output histogram of frequencies of elements in huffman_rep.
* @param extra_bits: An output table storing extra bits associated with huffman_rep.
* @param huff_table: The input huffman_table or concatonation of huffman_tables.
* @parma len: The length of huff_table.
*/
uint16_t create_huffman_rep(uint16_t * huffman_rep, uint64_t * histogram,
uint16_t * extra_bits, struct huff_code *huff_table, uint16_t len);
/**
* @brief Flushes the symbols for a repeat of last_code for length run_length into huffman_rep.
* @param huffman_rep: pointer to array containing the output huffman_rep.
* @param histogram: histogram of elements seen in huffman_rep.
* @param extra_bits: an array holding extra bits for the corresponding symbol in huffman_rep.
* @param huff_table: a concatenated list of huffman lookup tables.
* @param current_index: The next spot elements will be written in huffman_rep.
*/
uint16_t flush_repeats(uint16_t * huffman_rep, uint64_t * histogram, uint16_t * extra_bits,
uint16_t last_code, uint16_t run_length, uint16_t current_index);
int create_header(struct BitBuf2 *header_bitbuf, struct rl_code *huffman_rep, uint32_t length,
uint64_t * histogram, uint32_t hlit, uint32_t hdist, uint32_t end_of_block);
/**
* @brief Creates the header for run length encoded huffman trees.
@ -303,10 +190,10 @@ uint16_t flush_repeats(uint16_t * huffman_rep, uint64_t * histogram, uint16_t *
* @param hlit: Length of literal/length table minus 257.
* @parm hdist: Length of distance table minus 1.
*/
int create_huffman_header(uint8_t *header, uint32_t header_length, struct huff_code *lookup_table,
uint16_t * huffman_rep, uint16_t * extra_bits,
uint16_t huffman_rep_length, uint32_t end_of_block, uint32_t hclen,
uint32_t hlit, uint32_t hdist);
int create_huffman_header(struct BitBuf2 *header_bitbuf, struct huff_code *lookup_table,
struct rl_code * huffman_rep, uint16_t huffman_rep_length,
uint32_t end_of_block, uint32_t hclen, uint32_t hlit,
uint32_t hdist);
/**
* @brief Creates a two table representation of huffman codes.
@ -348,4 +235,18 @@ void create_packed_dist_table(uint32_t * packed_table, uint32_t length,
*/
int are_hufftables_useable(struct huff_code *lit_len_hufftable,
struct huff_code *dist_hufftable);
/**
* @brief Creates a representation of the huffman code from a histogram used to
* decompress the intermediate compression format.
*
* @param bb: bitbuf structure where the header huffman code header is written
* @param hufftables: output huffman code representation
* @param hist: histogram used to generat huffman code
* @param end_of_block: flag whether this is the final huffman code
*/
void
create_hufftables_icf(struct BitBuf2 *bb, struct hufftables_icf * hufftables,
struct isal_mod_hist *hist, uint32_t end_of_block);
#endif

View File

@ -168,6 +168,49 @@
%endif
; Macros for doing Huffman Encoding
; Assumes (dist != 0)
; Uses RCX, clobbers dist
; void compute_dist_code dist, code, len
%macro compute_dist_icf_code 3
%define %%dist %1 ; IN, clobbered
%define %%distq %1
%define %%code %2 ; OUT
%define %%tmp1 %3
bsr rcx, %%dist ; ecx = msb = bsr(dist)
dec rcx ; ecx = num_extra_bits = msb - N
BZHI %%code, %%dist, rcx, %%tmp1
SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits
lea %%dist, [%%dist + 2*rcx] ; code = sym = dist + num_extra_bits*2
shl %%code, EXTRA_BITS_OFFSET - DIST_OFFSET
add %%code, %%dist ; code = extra_bits | sym
%endm
; Uses RCX, clobbers dist
; get_dist_code dist, code, len
%macro get_dist_icf_code 3
%define %%dist %1 ; 32-bit IN, clobbered
%define %%distq %1 ; 64-bit IN, clobbered
%define %%code %2 ; 32-bit OUT
%define %%tmp1 %3
cmp %%dist, 1
jg %%do_compute
%ifnidn %%code, %%dist
mov %%code, %%dist
%endif
jmp %%done
%%do_compute:
compute_dist_icf_code %%distq, %%code, %%tmp1
%%done:
shl %%code, DIST_OFFSET
%endm
; "len" can be same register as "length"
; get_len_code length, code, len
%macro get_len_code 4

View File

@ -126,6 +126,48 @@ static inline void get_lit_code(struct isal_hufftables *hufftables, uint32_t lit
*len = hufftables->lit_table_sizes[lit];
}
static void compute_dist_icf_code(uint32_t dist, uint32_t *code, uint32_t *extra_bits)
{
uint32_t msb;
uint32_t num_extra_bits;
dist -= 1;
msb = bsr(dist);
assert(msb >= 1);
num_extra_bits = msb - 2;
*extra_bits = dist & ((1 << num_extra_bits) - 1);
dist >>= num_extra_bits;
*code = dist + 2 * num_extra_bits;
assert(*code < 30);
}
static inline void get_dist_icf_code(uint32_t dist, uint32_t *code, uint32_t *extra_bits)
{
assert(dist >= 1);
assert(dist <= 32768);
if (dist <= 2) {
*code = dist - 1;
*extra_bits = 0;
} else {
compute_dist_icf_code(dist, code, extra_bits);
}
}
static inline void get_len_icf_code(uint32_t length, uint32_t *code)
{
assert(length >= 3);
assert(length <= 258);
*code = length + 254;
}
static inline void get_lit_icf_code(uint32_t lit, uint32_t *code)
{
assert(lit <= 256);
*code = lit;
}
/**
* @brief Returns a hash of the first 3 bytes of input data.
*/

View File

@ -41,10 +41,15 @@
#define NON_EMPTY_BLOCK_SIZE 6
#define MAX_SYNC_FLUSH_SIZE NON_EMPTY_BLOCK_SIZE + MAX_WRITE_BITS_SIZE
#define MAX_TOKENS (16 * 1024)
#include "huffman.h"
#include "bitbuf2.h"
#include "igzip_lib.h"
#include "repeated_char_result.h"
#include "huff_codes.h"
#include "encode_df.h"
#include "igzip_level_buf_structs.h"
extern const uint8_t gzip_hdr[];
extern const uint32_t gzip_hdr_bytes;
@ -59,6 +64,7 @@ static int write_stored_block_stateless(struct isal_zstream *stream, uint32_t st
uint32_t crc32);
static int write_gzip_header_stateless(struct isal_zstream *stream);
static void write_gzip_header(struct isal_zstream *stream);
static int write_deflate_header_stateless(struct isal_zstream *stream);
static int write_deflate_header_unaligned_stateless(struct isal_zstream *stream);
@ -70,11 +76,15 @@ unsigned int detect_repeated_char(uint8_t * buf, uint32_t size);
void isal_deflate_body(struct isal_zstream *stream);
void isal_deflate_finish(struct isal_zstream *stream);
void isal_deflate_icf_body(struct isal_zstream *stream);
void isal_deflate_icf_finish(struct isal_zstream *stream);
/*****************************************************************/
/* Forward declarations */
static inline void reset_match_history(struct isal_zstream *stream);
void write_header(struct isal_zstream *stream);
void write_header(struct isal_zstream *stream, uint8_t * deflate_hdr,
uint32_t deflate_hdr_count, uint32_t extra_bits_count, uint32_t next_state,
uint32_t toggle_end_of_stream);
void write_deflate_header(struct isal_zstream *stream);
void write_trailer(struct isal_zstream *stream);
@ -157,13 +167,122 @@ static void flush_write_buffer(struct isal_zstream *stream)
}
}
static void flush_icf_block(struct isal_zstream *stream)
{
struct isal_zstate *state = &stream->internal_state;
struct level_2_buf *level_buf = (struct level_2_buf *)stream->level_buf;
struct BitBuf2 *write_buf = &state->bitbuf;
struct deflate_icf *icf_buf_encoded_next;
set_buf(write_buf, stream->next_out, stream->avail_out);
#if defined (USE_BITBUF8) || (USE_BITBUF_ELSE)
if (!is_full(write_buf))
flush_bits(write_buf);
#endif
icf_buf_encoded_next = encode_deflate_icf(level_buf->icf_buf_start + state->count,
level_buf->icf_buf_next, write_buf,
&level_buf->encode_tables);
state->count = icf_buf_encoded_next - level_buf->icf_buf_start;
stream->next_out = buffer_ptr(write_buf);
stream->total_out += buffer_used(write_buf);
stream->avail_out -= buffer_used(write_buf);
if (level_buf->icf_buf_next <= icf_buf_encoded_next) {
state->count = 0;
if (stream->avail_in == 0 && stream->end_of_stream)
state->state = ZSTATE_TRL;
else if (stream->avail_in == 0 && stream->flush != NO_FLUSH)
state->state = ZSTATE_SYNC_FLUSH;
else
state->state = ZSTATE_NEW_HDR;
}
}
static void init_new_icf_block(struct isal_zstream *stream)
{
struct isal_zstate *state = &stream->internal_state;
struct level_2_buf *level_buf = (struct level_2_buf *)stream->level_buf;
if (stream->level_buf_size >=
sizeof(struct level_2_buf) + 100 * sizeof(struct deflate_icf)) {
level_buf->icf_buf_next = level_buf->icf_buf_start;
level_buf->icf_buf_avail_out =
stream->level_buf_size - sizeof(struct level_2_buf) -
sizeof(struct deflate_icf);
memset(&state->hist, 0, sizeof(struct isal_mod_hist));
state->state = ZSTATE_BODY;
}
}
static void create_icf_block_hdr(struct isal_zstream *stream)
{
struct isal_zstate *state = &stream->internal_state;
struct level_2_buf *level_buf = (struct level_2_buf *)stream->level_buf;
struct BitBuf2 *write_buf = &state->bitbuf;
struct BitBuf2 write_buf_tmp;
uint32_t out_size = stream->avail_out;
uint8_t *end_out = stream->next_out + out_size;
/* Write EOB in icf_buf */
state->hist.ll_hist[256] = 1;
level_buf->icf_buf_next->lit_len = 0x100;
level_buf->icf_buf_next->lit_dist = NULL_DIST_SYM;
level_buf->icf_buf_next->dist_extra = 0;
level_buf->icf_buf_next++;
state->has_eob_hdr = stream->end_of_stream && !stream->avail_in;
if (end_out - stream->next_out >= ISAL_DEF_MAX_HDR_SIZE) {
/* Determine whether this is the final block */
if (stream->gzip_flag == IGZIP_GZIP)
write_gzip_header_stateless(stream);
set_buf(write_buf, stream->next_out, stream->avail_out);
create_hufftables_icf(write_buf, &level_buf->encode_tables, &state->hist,
state->has_eob_hdr);
state->state = ZSTATE_FLUSH_ICF_BUFFER;
stream->next_out = buffer_ptr(write_buf);
stream->total_out += buffer_used(write_buf);
stream->avail_out -= buffer_used(write_buf);
} else {
/* Start writing into temporary buffer */
write_buf_tmp.m_bits = write_buf->m_bits;
write_buf_tmp.m_bit_count = write_buf->m_bit_count;
write_buf->m_bits = 0;
write_buf->m_bit_count = 0;
set_buf(&write_buf_tmp, level_buf->deflate_hdr, ISAL_DEF_MAX_HDR_SIZE);
create_hufftables_icf(&write_buf_tmp, &level_buf->encode_tables,
&state->hist, state->has_eob_hdr);
level_buf->deflate_hdr_count = buffer_used(&write_buf_tmp);
level_buf->deflate_hdr_extra_bits = write_buf_tmp.m_bit_count;
flush(&write_buf_tmp);
state->state = ZSTATE_HDR;
}
}
static void isal_deflate_pass(struct isal_zstream *stream)
{
struct isal_zstate *state = &stream->internal_state;
struct isal_hufftables *hufftables = stream->hufftables;
uint8_t *start_in = stream->next_in;
if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR)
write_header(stream);
if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR) {
if (state->count == 0)
/* Assume the final header is being written since the header
* stored in hufftables is the final header. */
state->has_eob_hdr = 1;
write_header(stream, hufftables->deflate_hdr, hufftables->deflate_hdr_count,
hufftables->deflate_hdr_extra_bits, ZSTATE_BODY,
!stream->end_of_stream);
}
if (state->state == ZSTATE_BODY)
isal_deflate_body(stream);
@ -184,6 +303,52 @@ static void isal_deflate_pass(struct isal_zstream *stream)
write_trailer(stream);
}
static void isal_deflate_icf_pass(struct isal_zstream *stream)
{
uint8_t *start_in = stream->next_in;
struct isal_zstate *state = &stream->internal_state;
struct level_2_buf *level_buf = (struct level_2_buf *)stream->level_buf;
do {
if (state->state == ZSTATE_NEW_HDR)
init_new_icf_block(stream);
if (state->state == ZSTATE_BODY)
isal_deflate_icf_body(stream);
if (state->state == ZSTATE_FLUSH_READ_BUFFER)
isal_deflate_icf_finish(stream);
if (state->state == ZSTATE_CREATE_HDR)
create_icf_block_hdr(stream);
if (state->state == ZSTATE_HDR)
/* Note that the header may be prepended by the
* remaining bits in the previous block, as such the
* toggle header flag cannot be used */
write_header(stream, level_buf->deflate_hdr,
level_buf->deflate_hdr_count,
level_buf->deflate_hdr_extra_bits,
ZSTATE_FLUSH_ICF_BUFFER, 0);
if (state->state == ZSTATE_FLUSH_ICF_BUFFER)
flush_icf_block(stream);
} while (state->state == ZSTATE_NEW_HDR);
if (state->state == ZSTATE_SYNC_FLUSH)
sync_flush(stream);
if (state->state == ZSTATE_FLUSH_WRITE_BUFFER)
flush_write_buffer(stream);
if (stream->gzip_flag)
state->crc = crc32_gzip(state->crc, start_in, stream->next_in - start_in);
if (state->state == ZSTATE_TRL)
write_trailer(stream);
}
static void isal_deflate_int(struct isal_zstream *stream)
{
struct isal_zstate *state = &stream->internal_state;
@ -209,7 +374,10 @@ static void isal_deflate_int(struct isal_zstream *stream)
}
assert(state->tmp_out_start == state->tmp_out_end);
isal_deflate_pass(stream);
if (stream->level == 0)
isal_deflate_pass(stream);
else
isal_deflate_icf_pass(stream);
/* Fill temporary output buffer then complete filling output buffer */
if (stream->avail_out > 0 && stream->avail_out < 8 && state->state != ZSTATE_NEW_HDR) {
@ -225,7 +393,10 @@ static void isal_deflate_int(struct isal_zstream *stream)
stream->avail_out = sizeof(state->tmp_out_buff);
stream->total_out = 0;
isal_deflate_pass(stream);
if (stream->level == 0)
isal_deflate_pass(stream);
else
isal_deflate_icf_pass(stream);
state->tmp_out_start = 0;
state->tmp_out_end = stream->total_out;
@ -304,8 +475,8 @@ static void write_constant_compressed_stateless(struct isal_zstream *stream,
if (rep_extra >= 230) {
write_bits(&state->bitbuf,
CODE_280 | ((rep_extra / 2 - 115) << CODE_280_LENGTH),
CODE_280_TOTAL_LENGTH);
CODE_280 | ((rep_extra / 2 - 115) <<
CODE_280_LENGTH), CODE_280_TOTAL_LENGTH);
rep_extra -= rep_extra / 2;
}
@ -377,16 +548,34 @@ static int isal_deflate_int_stateless(struct isal_zstream *stream)
write_constant_compressed_stateless(stream, repeat_length);
}
if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR) {
write_deflate_header_unaligned_stateless(stream);
if (stream->level == 0) {
if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR) {
write_deflate_header_unaligned_stateless(stream);
if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR)
return STATELESS_OVERFLOW;
reset_match_history(stream);
}
state->file_start = stream->next_in - stream->total_in;
isal_deflate_pass(stream);
} else if (stream->level == 1) {
if (stream->level_buf == NULL || stream->level_buf_size < ISAL_DEF_LVL1_MIN) {
/* Default to internal buffer if invalid size is supplied */
stream->level_buf = state->buffer;
stream->level_buf_size = sizeof(state->buffer);
}
if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR)
return STATELESS_OVERFLOW;
reset_match_history(stream);
reset_match_history(stream);
}
state->count = 0;
state->file_start = stream->next_in - stream->total_in;
isal_deflate_icf_pass(stream);
state->file_start = stream->next_in - stream->total_in;
isal_deflate_pass(stream);
} else
return ISAL_INVALID_LEVEL;
if (state->state == ZSTATE_END
|| (state->state == ZSTATE_NEW_HDR && stream->flush == FULL_FLUSH))
@ -481,7 +670,11 @@ void isal_deflate_init(struct isal_zstream *stream)
stream->total_in = 0;
stream->total_out = 0;
stream->hufftables = (struct isal_hufftables *)&hufftables_default;
stream->flush = 0;
stream->level = 0;
stream->level_buf = NULL;
stream->level_buf_size = 0;
stream->end_of_stream = 0;
stream->flush = NO_FLUSH;
stream->gzip_flag = 0;
state->b_bytes_valid = 0;
@ -536,8 +729,11 @@ void isal_deflate_stateless_init(struct isal_zstream *stream)
stream->total_in = 0;
stream->total_out = 0;
stream->hufftables = (struct isal_hufftables *)&hufftables_default;
stream->flush = NO_FLUSH;
stream->level = 0;
stream->level_buf = NULL;
stream->level_buf_size = 0;
stream->end_of_stream = 0;
stream->flush = NO_FLUSH;
stream->gzip_flag = 0;
stream->internal_state.state = ZSTATE_NEW_HDR;
return;
@ -581,6 +777,9 @@ int isal_deflate_stateless(struct isal_zstream *stream)
if (stream->flush != NO_FLUSH && stream->flush != FULL_FLUSH)
return INVALID_FLUSH;
if (stream->level != 0 && stream->level != 1)
return ISAL_INVALID_LEVEL;
if (avail_in == 0)
stored_len = STORED_BLK_HDR_BZ;
else
@ -890,13 +1089,14 @@ static int write_deflate_header_unaligned_stateless(struct isal_zstream *stream)
return COMP_OK;
}
void write_header(struct isal_zstream *stream)
/* Toggle end of stream only works when deflate header is aligned */
void write_header(struct isal_zstream *stream, uint8_t * deflate_hdr,
uint32_t deflate_hdr_count, uint32_t extra_bits_count, uint32_t next_state,
uint32_t toggle_end_of_stream)
{
struct isal_zstate *state = &stream->internal_state;
struct isal_hufftables *hufftables = stream->hufftables;
uint64_t hdr_extra_bits = hufftables->deflate_hdr[hufftables->deflate_hdr_count];
uint32_t hdr_extra_bits = deflate_hdr[deflate_hdr_count];
uint32_t count;
state->state = ZSTATE_HDR;
if (state->bitbuf.m_bit_count != 0) {
@ -913,19 +1113,18 @@ void write_header(struct isal_zstream *stream)
if (stream->gzip_flag == IGZIP_GZIP)
write_gzip_header(stream);
count = hufftables->deflate_hdr_count - state->count;
count = deflate_hdr_count - state->count;
if (count != 0) {
if (count > stream->avail_out)
count = stream->avail_out;
memcpy(stream->next_out, hufftables->deflate_hdr + state->count, count);
memcpy(stream->next_out, deflate_hdr + state->count, count);
if (state->count == 0 && count > 0) {
if (!stream->end_of_stream)
*stream->next_out -= 1;
else
state->has_eob_hdr = 1;
if (toggle_end_of_stream && state->count == 0 && count > 0) {
/* Assumes the final block bit is the first bit */
*stream->next_out ^= 1;
state->has_eob_hdr = !state->has_eob_hdr;
}
stream->next_out += count;
@ -933,21 +1132,20 @@ void write_header(struct isal_zstream *stream)
stream->total_out += count;
state->count += count;
count = hufftables->deflate_hdr_count - state->count;
} else if (hufftables->deflate_hdr_count == 0) {
if (!stream->end_of_stream)
hdr_extra_bits -= 1;
else
state->has_eob_hdr = 1;
count = deflate_hdr_count - state->count;
} else if (toggle_end_of_stream && deflate_hdr_count == 0) {
/* Assumes the final block bit is the first bit */
hdr_extra_bits ^= 1;
state->has_eob_hdr = !state->has_eob_hdr;
}
if ((count == 0) && (stream->avail_out >= 8)) {
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
write_bits(&state->bitbuf, hdr_extra_bits, hufftables->deflate_hdr_extra_bits);
write_bits(&state->bitbuf, hdr_extra_bits, extra_bits_count);
state->state = ZSTATE_BODY;
state->state = next_state;
state->count = 0;
count = buffer_used(&state->bitbuf);

223
igzip/igzip_icf_base.c Normal file
View File

@ -0,0 +1,223 @@
#include <stdint.h>
#include "igzip_lib.h"
#include "huffman.h"
#include "huff_codes.h"
#include "encode_df.h"
#include "igzip_level_buf_structs.h"
static inline void write_deflate_icf(struct deflate_icf *icf, uint32_t lit_len,
uint32_t lit_dist, uint32_t extra_bits)
{
icf->lit_len = lit_len;
icf->lit_dist = lit_dist;
icf->dist_extra = extra_bits;
}
static inline void update_state(struct isal_zstream *stream, uint8_t * start_in,
uint8_t * next_in, uint8_t * end_in,
struct deflate_icf *start_out, struct deflate_icf *next_out,
struct deflate_icf *end_out)
{
stream->next_in = next_in;
stream->total_in += next_in - start_in;
stream->avail_in = end_in - next_in;
((struct level_2_buf *)stream->level_buf)->icf_buf_next = next_out;
((struct level_2_buf *)stream->level_buf)->icf_buf_avail_out = end_out - next_out;
}
void isal_deflate_icf_body_base(struct isal_zstream *stream)
{
uint32_t literal, hash;
uint8_t *start_in, *next_in, *end_in, *end, *next_hash;
struct deflate_icf *start_out, *next_out, *end_out;
uint16_t match_length;
uint32_t dist;
uint32_t code, code2, extra_bits;
struct isal_zstate *state = &stream->internal_state;
uint16_t *last_seen = state->head;
if (stream->avail_in == 0) {
if (stream->end_of_stream || stream->flush != NO_FLUSH)
state->state = ZSTATE_FLUSH_READ_BUFFER;
return;
}
start_in = stream->next_in;
end_in = start_in + stream->avail_in;
next_in = start_in;
start_out = ((struct level_2_buf *)stream->level_buf)->icf_buf_next;
end_out =
start_out + ((struct level_2_buf *)stream->level_buf)->icf_buf_avail_out /
sizeof(struct deflate_icf);
next_out = start_out;
while (next_in < end_in - ISAL_LOOK_AHEAD) {
if (next_out >= end_out) {
state->state = ZSTATE_CREATE_HDR;
update_state(stream, start_in, next_in, end_in, start_out, next_out,
end_out);
return;
}
literal = *(uint32_t *) next_in;
hash = compute_hash(literal) & HASH_MASK;
dist = (next_in - state->file_start - last_seen[hash]) & 0xFFFF;
last_seen[hash] = (uint64_t) (next_in - state->file_start);
/* The -1 are to handle the case when dist = 0 */
if (dist - 1 < IGZIP_HIST_SIZE - 1) {
assert(dist != 0);
match_length = compare258(next_in - dist, next_in, 258);
if (match_length >= SHORTEST_MATCH) {
next_hash = next_in;
#ifdef ISAL_LIMIT_HASH_UPDATE
end = next_hash + 3;
#else
end = next_hash + match_length;
#endif
next_hash++;
for (; next_hash < end; next_hash++) {
literal = *(uint32_t *) next_hash;
hash = compute_hash(literal) & HASH_MASK;
last_seen[hash] =
(uint64_t) (next_hash - state->file_start);
}
get_len_icf_code(match_length, &code);
get_dist_icf_code(dist, &code2, &extra_bits);
state->hist.ll_hist[code]++;
state->hist.d_hist[code2]++;
write_deflate_icf(next_out, code, code2, extra_bits);
next_out++;
next_in += match_length;
continue;
}
}
get_lit_icf_code(literal & 0xFF, &code);
state->hist.ll_hist[code]++;
write_deflate_icf(next_out, code, NULL_DIST_SYM, 0);
next_out++;
next_in++;
}
update_state(stream, start_in, next_in, end_in, start_out, next_out, end_out);
assert(stream->avail_in <= ISAL_LOOK_AHEAD);
if (stream->end_of_stream || stream->flush != NO_FLUSH)
state->state = ZSTATE_FLUSH_READ_BUFFER;
return;
}
void isal_deflate_icf_finish_base(struct isal_zstream *stream)
{
uint32_t literal = 0, hash;
uint8_t *start_in, *next_in, *end_in, *end, *next_hash;
struct deflate_icf *start_out, *next_out, *end_out;
uint16_t match_length;
uint32_t dist;
uint32_t code, code2, extra_bits;
struct isal_zstate *state = &stream->internal_state;
uint16_t *last_seen = state->head;
start_in = stream->next_in;
end_in = start_in + stream->avail_in;
next_in = start_in;
start_out = ((struct level_2_buf *)stream->level_buf)->icf_buf_next;
end_out = start_out + ((struct level_2_buf *)stream->level_buf)->icf_buf_avail_out /
sizeof(struct deflate_icf);
next_out = start_out;
while (next_in < end_in - 3) {
if (next_out >= end_out) {
state->state = ZSTATE_CREATE_HDR;
update_state(stream, start_in, next_in, end_in, start_out, next_out,
end_out);
return;
}
literal = *(uint32_t *) next_in;
hash = compute_hash(literal) & HASH_MASK;
dist = (next_in - state->file_start - last_seen[hash]) & 0xFFFF;
last_seen[hash] = (uint64_t) (next_in - state->file_start);
if (dist - 1 < IGZIP_HIST_SIZE - 1) { /* The -1 are to handle the case when dist = 0 */
match_length = compare258(next_in - dist, next_in, end_in - next_in);
if (match_length >= SHORTEST_MATCH) {
next_hash = next_in;
#ifdef ISAL_LIMIT_HASH_UPDATE
end = next_hash + 3;
#else
end = next_hash + match_length;
#endif
next_hash++;
for (; next_hash < end - 3; next_hash++) {
literal = *(uint32_t *) next_hash;
hash = compute_hash(literal) & HASH_MASK;
last_seen[hash] =
(uint64_t) (next_hash - state->file_start);
}
get_len_icf_code(match_length, &code);
get_dist_icf_code(dist, &code2, &extra_bits);
state->hist.ll_hist[code]++;
state->hist.d_hist[code2]++;
write_deflate_icf(next_out, code, code2, extra_bits);
next_out++;
next_in += match_length;
continue;
}
}
get_lit_icf_code(literal & 0xFF, &code);
state->hist.ll_hist[code]++;
write_deflate_icf(next_out, code, NULL_DIST_SYM, 0);
next_out++;
next_in++;
}
while (next_in < end_in) {
if (next_out >= end_out) {
state->state = ZSTATE_CREATE_HDR;
update_state(stream, start_in, next_in, end_in, start_out, next_out,
end_out);
return;
}
literal = *next_in;
get_lit_icf_code(literal & 0xFF, &code);
state->hist.ll_hist[code]++;
write_deflate_icf(next_out, code, NULL_DIST_SYM, 0);
next_out++;
next_in++;
}
if (next_in == end_in) {
if (stream->end_of_stream || stream->flush != NO_FLUSH)
state->state = ZSTATE_CREATE_HDR;
}
update_state(stream, start_in, next_in, end_in, start_out, next_out, end_out);
return;
}

550
igzip/igzip_icf_body.asm Normal file
View File

@ -0,0 +1,550 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%include "options.asm"
%include "lz0a_const.asm"
%include "data_struct2.asm"
%include "bitbuf2.asm"
%include "huffman.asm"
%include "igzip_compare_types.asm"
%include "reg_sizes.asm"
%include "stdmac.asm"
%ifdef DEBUG
%macro MARK 1
global %1
%1:
%endm
%else
%macro MARK 1
%endm
%endif
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%define tmp2 rcx
%define hash2 rcx
%define curr_data rax
%define code rax
%define tmp5 rax
%define tmp4 rbx
%define dist rbx
%define code2 rbx
%define hash rdx
%define len rdx
%define code_len3 rdx
%define tmp8 rdx
%define tmp1 rsi
%define code_len2 rsi
%define code5 rsi
%define file_start rdi
%define curr_data2 r8
%define len2 r8
%define tmp6 r8
%define f_i r10
%define m_out_buf r11
%define f_end_i r12
%define dist2 r12
%define tmp7 r12
%define code4 r12
%define tmp3 r13
%define code3 r13
%define stream r14
%define hufftables r15
;; GPR r8 & r15 can be used
%define xtmp0 xmm0 ; tmp
%define xtmp1 xmm1 ; tmp
%define xhash xmm2
%define xmask xmm3
%define xdata xmm4
%define ytmp0 ymm0 ; tmp
%define ytmp1 ymm1 ; tmp
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
m_out_end equ 0 ; local variable (8 bytes)
m_out_start equ 8
f_end_i_mem_offset equ 16
gpr_save_mem_offset equ 24 ; gpr save area (8*8 bytes)
xmm_save_mem_offset equ 24 + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned)
stack_size equ 3*8 + 8*8 + 4*16
;;; 8 because stack address is odd multiple of 8 after a function call and
;;; we want it aligned to 16 bytes
; void isal_deflate_icf_body ( isal_zstream *stream )
; arg 1: rcx: addr of stream
global isal_deflate_icf_body_ %+ ARCH
isal_deflate_icf_body_ %+ ARCH %+ :
%ifidn __OUTPUT_FORMAT__, elf64
mov rcx, rdi
%endif
;; do nothing if (avail_in == 0)
cmp dword [rcx + _avail_in], 0
jne skip1
;; Set stream's next state
mov rdx, ZSTATE_FLUSH_READ_BUFFER
mov rax, ZSTATE_CREATE_HDR
cmp dword [rcx + _end_of_stream], 0
cmovne rax, rdx
cmp dword [rcx + _flush], _NO_FLUSH
cmovne rax, rdx
mov dword [rcx + _internal_state_state], eax
ret
skip1:
%ifdef ALIGN_STACK
push rbp
mov rbp, rsp
sub rsp, stack_size
and rsp, ~15
%else
sub rsp, stack_size
%endif
mov [rsp + gpr_save_mem_offset + 0*8], rbx
mov [rsp + gpr_save_mem_offset + 1*8], rsi
mov [rsp + gpr_save_mem_offset + 2*8], rdi
mov [rsp + gpr_save_mem_offset + 3*8], rbp
mov [rsp + gpr_save_mem_offset + 4*8], r12
mov [rsp + gpr_save_mem_offset + 5*8], r13
mov [rsp + gpr_save_mem_offset + 6*8], r14
mov [rsp + gpr_save_mem_offset + 7*8], r15
mov stream, rcx
mov dword [stream + _internal_state_has_eob], 0
MOVDQU xmask, [mask]
; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
mov tmp1, [stream + _level_buf]
mov m_out_buf, [tmp1 + _icf_buf_next]
mov [rsp + m_out_start], m_out_buf
mov tmp1, [tmp1 + _icf_buf_avail_out]
add tmp1, m_out_buf
sub tmp1, SLOP
mov [rsp + m_out_end], tmp1
mov hufftables, [stream + _hufftables]
mov file_start, [stream + _next_in]
mov f_i %+ d, dword [stream + _total_in]
sub file_start, f_i
mov f_end_i %+ d, [stream + _avail_in]
add f_end_i, f_i
; f_end_i -= LA;
sub f_end_i, LA
mov [rsp + f_end_i_mem_offset], f_end_i
; if (f_end_i <= 0) continue;
cmp f_end_i, f_i
jle input_end
; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
MARK __body_compute_hash_ %+ ARCH
MOVDQU xdata, [file_start + f_i]
mov curr_data, [file_start + f_i]
mov tmp3, curr_data
mov tmp6, curr_data
compute_hash hash, curr_data
shr tmp3, 8
compute_hash hash2, tmp3
and hash, HASH_MASK
and hash2, HASH_MASK
cmp dword [stream + _internal_state_has_hist], 0
je write_first_byte
jmp loop2
align 16
loop2:
; if (state->bitbuf.is_full()) {
cmp m_out_buf, [rsp + m_out_end]
ja output_end
xor dist, dist
xor dist2, dist2
xor tmp3, tmp3
lea tmp1, [file_start + f_i]
mov dist %+ w, f_i %+ w
dec dist
sub dist %+ w, word [stream + _internal_state_head + 2 * hash]
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
inc f_i
MOVQ tmp6, xdata
shr tmp5, 16
mov tmp8, tmp5
compute_hash tmp6, tmp5
mov dist2 %+ w, f_i %+ w
dec dist2
sub dist2 %+ w, word [stream + _internal_state_head + 2 * hash2]
mov [stream + _internal_state_head + 2 * hash2], f_i %+ w
; if ((dist-1) < (D-1)) {
and dist %+ d, (D-1)
neg dist
shr tmp8, 8
compute_hash tmp2, tmp8
and dist2 %+ d, (D-1)
neg dist2
MARK __body_compare_ %+ ARCH
;; Check for long len/dist match (>7) with first literal
MOVQ len, xdata
mov curr_data, len
PSRLDQ xdata, 1
xor len, [tmp1 + dist - 1]
jz compare_loop
MOVD xhash, tmp6 %+ d
PINSRD xhash, tmp2 %+ d, 1
PAND xhash, xhash, xmask
;; Check for len/dist match (>7) with second literal
MOVQ len2, xdata
xor len2, [tmp1 + dist2]
jz compare_loop2
;; Check for len/dist match for first literal
test len %+ d, 0xFFFFFFFF
jz len_dist_huffman_pre
;; Check for len/dist match for second literal
test len2 %+ d, 0xFFFFFFFF
jnz write_lit_bits
MARK __body_len_dist_lit_huffman_ %+ ARCH
len_dist_lit_huffman_pre:
bsf len2, len2
shr len2, 3
len_dist_lit_huffman:
neg dist2
%ifndef LONGER_HUFFTABLE
mov tmp4, dist2
get_dist_icf_code tmp4, code4, tmp1 ;; clobbers dist, rcx
%else
get_dist_icf_code dist2, code4, tmp1
%endif
movzx code5, curr_data %+ b
;; get_len_code
add f_i, len2
neg len2
MOVQ tmp5, xdata
shr tmp5, 24
compute_hash tmp4, tmp5
and tmp4, HASH_MASK
;; Setup for updating hash
lea tmp3, [f_i + len2 + 1] ; tmp3 <= k
MOVDQU xdata, [file_start + f_i]
mov curr_data, [file_start + f_i]
MOVD hash %+ d, xhash
PEXTRD hash2 %+ d, xhash, 1
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
compute_hash hash, curr_data
add tmp3,1
mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
add tmp3, 1
mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w
neg len2
add len2, 254
or code4, len2
inc word [stream + _internal_state_hist_lit_len + 2*code5]
or code5, LIT
inc word [stream + _internal_state_hist_lit_len + 2*len2]
write_dword code5, m_out_buf
write_dword code4, m_out_buf
shr code4, DIST_OFFSET
and code4, 0x1F
inc word [stream + _internal_state_hist_dist + 2*code4]
mov f_end_i, [rsp + f_end_i_mem_offset]
mov curr_data2, curr_data
shr curr_data2, 8
compute_hash hash2, curr_data2
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
and hash %+ d, HASH_MASK
and hash2 %+ d, HASH_MASK
; continue
cmp f_i, f_end_i
jl loop2
jmp input_end
;; encode as dist/len
MARK __body_len_dist_huffman_ %+ ARCH
len_dist_huffman_pre:
bsf len, len
shr len, 3
len_dist_huffman:
dec f_i
neg dist
; get_dist_code(dist, &code2, &code_len2);
%ifndef LONGER_HUFFTABLE
mov tmp3, dist ; since code2 and dist are rbx
get_dist_icf_code tmp3, code2, tmp1 ;; clobbers dist, rcx
%else
get_dist_icf_code dist, code2, tmp1
%endif
; get_len_code(len, &code, &code_len);
lea code5, [len + 254]
or code2, code5
;; Setup for updateing hash
lea tmp3, [f_i + 2] ; tmp3 <= k
add f_i, len
MOVD hash %+ d, xhash
PEXTRD hash2 %+ d, xhash, 1
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
add tmp3,1
mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
MOVDQU xdata, [file_start + f_i]
mov curr_data, [file_start + f_i]
mov curr_data2, curr_data
compute_hash hash, curr_data
inc word [stream + _internal_state_hist_lit_len + 2*code5]
write_dword code2, m_out_buf
shr code2, DIST_OFFSET
and code2, 0x1F
inc word [stream + _internal_state_hist_dist + 2*code2]
mov f_end_i, [rsp + f_end_i_mem_offset]
shr curr_data2, 8
compute_hash hash2, curr_data2
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
and hash %+ d, HASH_MASK
and hash2 %+ d, HASH_MASK
; continue
cmp f_i, f_end_i
jl loop2
jmp input_end
MARK __body_write_lit_bits_ %+ ARCH
write_lit_bits:
movzx code3, curr_data %+ b
shr curr_data, 8
and curr_data, 0xff
mov code2, curr_data
MOVDQU xdata, [file_start + f_i + 1]
mov f_end_i, [rsp + f_end_i_mem_offset]
add f_i, 1
mov curr_data, [file_start + f_i]
MOVD hash %+ d, xhash
inc word [stream + _internal_state_hist_lit_len + 2*code3]
or code3, LIT
inc word [stream + _internal_state_hist_lit_len + 2*code2]
or code2, LIT
write_dword code3, m_out_buf
write_dword code2, m_out_buf
PEXTRD hash2 %+ d, xhash, 1
and hash %+ d, HASH_MASK
and hash2 %+ d, HASH_MASK
; continue
cmp f_i, f_end_i
jl loop2
input_end:
mov tmp1, ZSTATE_FLUSH_READ_BUFFER
mov tmp5, ZSTATE_BODY
cmp dword [stream + _end_of_stream], 0
cmovne tmp5, tmp1
cmp dword [stream + _flush], _NO_FLUSH
cmovne tmp5, tmp1
mov dword [stream + _internal_state_state], tmp5 %+ d
jmp end
output_end:
mov dword [stream + _internal_state_state], ZSTATE_CREATE_HDR
end:
;; update input buffer
add f_end_i, LA
mov [stream + _total_in], f_i %+ d
add file_start, f_i
mov [stream + _next_in], file_start
sub f_end_i, f_i
mov [stream + _avail_in], f_end_i %+ d
;; update output buffer
mov tmp1, [stream + _level_buf]
mov [tmp1 + _icf_buf_next], m_out_buf
sub m_out_buf, [rsp + m_out_start]
sub [tmp1 + _icf_buf_avail_out], m_out_buf %+ d
mov rbx, [rsp + gpr_save_mem_offset + 0*8]
mov rsi, [rsp + gpr_save_mem_offset + 1*8]
mov rdi, [rsp + gpr_save_mem_offset + 2*8]
mov rbp, [rsp + gpr_save_mem_offset + 3*8]
mov r12, [rsp + gpr_save_mem_offset + 4*8]
mov r13, [rsp + gpr_save_mem_offset + 5*8]
mov r14, [rsp + gpr_save_mem_offset + 6*8]
mov r15, [rsp + gpr_save_mem_offset + 7*8]
%ifndef ALIGN_STACK
add rsp, stack_size
%else
mov rsp, rbp
pop rbp
%endif
ret
MARK __body_compare_loops_ %+ ARCH
compare_loop:
MOVD xhash, tmp6 %+ d
PINSRD xhash, tmp2 %+ d, 1
PAND xhash, xhash, xmask
lea tmp2, [tmp1 + dist - 1]
%if (COMPARE_TYPE == 1)
compare250 tmp1, tmp2, len, tmp3
%elif (COMPARE_TYPE == 2)
compare250_x tmp1, tmp2, len, tmp3, xtmp0, xtmp1
%elif (COMPARE_TYPE == 3)
compare250_y tmp1, tmp2, len, tmp3, ytmp0, ytmp1
%else
%error Unknown Compare type COMPARE_TYPE
% error
%endif
jmp len_dist_huffman
compare_loop2:
lea tmp2, [tmp1 + dist2]
add tmp1, 1
%if (COMPARE_TYPE == 1)
compare250 tmp1, tmp2, len2, tmp3
%elif (COMPARE_TYPE == 2)
compare250_x tmp1, tmp2, len2, tmp3, xtmp0, xtmp1
%elif (COMPARE_TYPE == 3)
compare250_y tmp1, tmp2, len2, tmp3, ytmp0, ytmp1
%else
%error Unknown Compare type COMPARE_TYPE
% error
%endif
jmp len_dist_lit_huffman
MARK __write_first_byte_ %+ ARCH
write_first_byte:
cmp m_out_buf, [rsp + m_out_end]
ja output_end
mov dword [stream + _internal_state_has_hist], 1
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
mov hash, hash2
shr tmp6, 16
compute_hash hash2, tmp6
and curr_data, 0xff
inc word [stream + _internal_state_hist_lit_len + 2*curr_data]
or curr_data, LIT
write_dword curr_data, m_out_buf
MOVDQU xdata, [file_start + f_i + 1]
add f_i, 1
mov curr_data, [file_start + f_i]
and hash %+ d, HASH_MASK
and hash2 %+ d, HASH_MASK
cmp f_i, [rsp + f_end_i_mem_offset]
jl loop2
jmp input_end
section .data
align 16
mask: dd HASH_MASK, HASH_MASK, HASH_MASK, HASH_MASK
const_D: dq D

View File

@ -0,0 +1,7 @@
%define ARCH 01
%ifndef COMPARE_TYPE
%define COMPARE_TYPE 2
%endif
%include "igzip_icf_body.asm"

View File

@ -0,0 +1,7 @@
%define ARCH 02
%ifndef COMPARE_TYPE
%define COMPARE_TYPE 2
%endif
%include "igzip_icf_body.asm"

View File

@ -0,0 +1,8 @@
%define ARCH 04
%define USE_HSWNI
%ifndef COMPARE_TYPE
%define COMPARE_TYPE 3
%endif
%include "igzip_icf_body.asm"

299
igzip/igzip_icf_finish.asm Normal file
View File

@ -0,0 +1,299 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%include "options.asm"
%include "lz0a_const.asm"
%include "data_struct2.asm"
%include "bitbuf2.asm"
%include "huffman.asm"
%include "igzip_compare_types.asm"
%include "stdmac.asm"
%include "reg_sizes.asm"
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%define curr_data rax
%define tmp1 rax
%define f_index rbx
%define code rbx
%define tmp4 rbx
%define tmp5 rbx
%define tmp6 rbx
%define tmp2 rcx
%define hash rcx
%define tmp3 rdx
%define stream rsi
%define f_i rdi
%define code_len2 rbp
%define m_out_buf r8
%define dist r10
%define code2 r12
%define f_end_i r12
%define file_start r13
%define len r14
%define hufftables r15
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
f_end_i_mem_offset equ 0 ; local variable (8 bytes)
m_out_end equ 8
m_out_start equ 16
stack_size equ 32
; void isal_deflate_icf_finish ( isal_zstream *stream )
; arg 1: rcx: addr of stream
global isal_deflate_icf_finish_01
isal_deflate_icf_finish_01:
PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15
sub rsp, stack_size
%ifidn __OUTPUT_FORMAT__, elf64
mov rcx, rdi
%endif
mov stream, rcx
; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
mov tmp1, [stream + _level_buf]
mov m_out_buf, [tmp1 + _icf_buf_next]
mov [rsp + m_out_start], m_out_buf
mov tmp1, [tmp1 + _icf_buf_avail_out]
add tmp1, m_out_buf
sub tmp1, 4
mov [rsp + m_out_end], tmp1
mov hufftables, [stream + _hufftables]
mov file_start, [stream + _next_in]
mov f_i %+ d, dword [stream + _total_in]
sub file_start, f_i
mov f_end_i %+ d, dword [stream + _avail_in]
add f_end_i, f_i
sub f_end_i, LAST_BYTES_COUNT
mov [rsp + f_end_i_mem_offset], f_end_i
; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
cmp f_i, f_end_i
jge end_loop_2
mov curr_data %+ d, [file_start + f_i]
cmp dword [stream + _internal_state_has_hist], 0
jne skip_write_first_byte
cmp m_out_buf, [rsp + m_out_end]
ja end_loop_2
compute_hash hash, curr_data
and hash %+ d, HASH_MASK
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
mov dword [stream + _internal_state_has_hist], 1
jmp encode_literal
skip_write_first_byte:
loop2:
; if (state->bitbuf.is_full()) {
cmp m_out_buf, [rsp + m_out_end]
ja end_loop_2
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
mov curr_data %+ d, [file_start + f_i]
compute_hash hash, curr_data
and hash %+ d, HASH_MASK
; f_index = state->head[hash];
movzx f_index %+ d, word [stream + _internal_state_head + 2 * hash]
; state->head[hash] = (uint16_t) f_i;
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
; dist = f_i - f_index; // mod 64k
mov dist %+ d, f_i %+ d
sub dist %+ d, f_index %+ d
and dist %+ d, 0xFFFF
; if ((dist-1) <= (D-1)) {
mov tmp1 %+ d, dist %+ d
sub tmp1 %+ d, 1
cmp tmp1 %+ d, (D-1)
jae encode_literal
; len = f_end_i - f_i;
mov tmp4, [rsp + f_end_i_mem_offset]
sub tmp4, f_i
add tmp4, LAST_BYTES_COUNT
; if (len > 258) len = 258;
cmp tmp4, 258
cmovg tmp4, [c258]
; len = compare(state->file_start + f_i,
; state->file_start + f_i - dist, len);
lea tmp1, [file_start + f_i]
mov tmp2, tmp1
sub tmp2, dist
compare tmp4, tmp1, tmp2, len, tmp3
; if (len >= SHORTEST_MATCH) {
cmp len, SHORTEST_MATCH
jb encode_literal
;; encode as dist/len
; get_dist_code(dist, &code2, &code_len2);
dec dist
get_dist_icf_code dist, code2, tmp3 ;; clobbers dist, rcx
;; get_len_code
lea code, [len + 254]
or code2, code
inc word [stream + _internal_state_hist_lit_len + 2*code]
; for (k = f_i+1, f_i += len-1; k <= f_i; k++) {
lea tmp3, [f_i + 1] ; tmp3 <= k
add f_i, len
cmp f_i, [rsp + f_end_i_mem_offset]
jae skip_hash_update
; only update hash twice
; hash = compute_hash(state->file_start + k) & HASH_MASK;
mov tmp6 %+ d, dword [file_start + tmp3]
compute_hash hash, tmp6
and hash %+ d, HASH_MASK
; state->head[hash] = k;
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
add tmp3, 1
; hash = compute_hash(state->file_start + k) & HASH_MASK;
mov tmp6 %+ d, dword [file_start + tmp3]
compute_hash hash, tmp6
and hash %+ d, HASH_MASK
; state->head[hash] = k;
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
skip_hash_update:
write_dword code2, m_out_buf
shr code2, DIST_OFFSET
and code2, 0x1F
inc word [stream + _internal_state_hist_dist + 2*code2]
; continue
cmp f_i, [rsp + f_end_i_mem_offset]
jl loop2
jmp end_loop_2
encode_literal:
; get_lit_code(state->file_start[f_i], &code2, &code_len2);
movzx tmp5, byte [file_start + f_i]
inc word [stream + _internal_state_hist_lit_len + 2*tmp5]
or tmp5, LIT
write_dword tmp5, m_out_buf
; continue
add f_i, 1
cmp f_i, [rsp + f_end_i_mem_offset]
jl loop2
end_loop_2:
mov f_end_i, [rsp + f_end_i_mem_offset]
add f_end_i, LAST_BYTES_COUNT
mov [rsp + f_end_i_mem_offset], f_end_i
; if ((f_i >= f_end_i) && ! state->bitbuf.is_full()) {
cmp f_i, f_end_i
jge input_end
xor tmp5, tmp5
final_bytes:
cmp m_out_buf, [rsp + m_out_end]
ja out_end
movzx tmp5, byte [file_start + f_i]
inc word [stream + _internal_state_hist_lit_len + 2*tmp5]
or tmp5, LIT
write_dword tmp5, m_out_buf
inc f_i
cmp f_i, [rsp + f_end_i_mem_offset]
jl final_bytes
input_end:
cmp dword [stream + _end_of_stream], 0
jne out_end
cmp dword [stream + _flush], _NO_FLUSH
jne out_end
jmp end
out_end:
mov dword [stream + _internal_state_state], ZSTATE_CREATE_HDR
end:
;; Update input buffer
mov f_end_i, [rsp + f_end_i_mem_offset]
mov [stream + _total_in], f_i %+ d
add file_start, f_i
mov [stream + _next_in], file_start
sub f_end_i, f_i
mov [stream + _avail_in], f_end_i %+ d
;; Update output buffer
mov tmp1, [stream + _level_buf]
mov [tmp1 + _icf_buf_next], m_out_buf
; len = state->bitbuf.buffer_used();
sub m_out_buf, [rsp + m_out_start]
; stream->avail_out -= len;
sub [tmp1 + _icf_buf_avail_out], m_out_buf
add rsp, stack_size
POP_ALL
ret
section .data
align 4
c258: dq 258

View File

@ -0,0 +1,16 @@
#ifndef IGZIP_LEVEL_BUF_STRUCTS_H
#define IGZIP_LEVEL_BUF_STRUCTS_H
#include "huff_codes.h"
#include "encode_df.h"
struct level_2_buf {
struct hufftables_icf encode_tables;
uint32_t deflate_hdr_count;
uint32_t deflate_hdr_extra_bits;
uint8_t deflate_hdr[ISAL_DEF_MAX_HDR_SIZE];
struct deflate_icf *icf_buf_next;
uint64_t icf_buf_avail_out;
struct deflate_icf icf_buf_start[0];
};
#endif

View File

@ -45,10 +45,21 @@ extern isal_deflate_body_04
extern isal_deflate_finish_base
extern isal_deflate_finish_01
extern isal_deflate_icf_body_base
extern isal_deflate_icf_body_01
extern isal_deflate_icf_body_02
extern isal_deflate_icf_body_04
extern isal_deflate_icf_finish_base
extern isal_deflate_icf_finish_01
extern isal_update_histogram_base
extern isal_update_histogram_01
extern isal_update_histogram_04
extern encode_deflate_icf_base
extern encode_deflate_icf_04
extern crc32_gzip_base
extern crc32_gzip_01
@ -61,8 +72,16 @@ mbin_dispatch_init5 isal_deflate_body, isal_deflate_body_base, isal_deflate_body
mbin_interface isal_deflate_finish
mbin_dispatch_init5 isal_deflate_finish, isal_deflate_finish_base, isal_deflate_finish_01, isal_deflate_finish_01, isal_deflate_finish_01
mbin_interface isal_deflate_icf_body
mbin_dispatch_init5 isal_deflate_icf_body, isal_deflate_icf_body_base, isal_deflate_icf_body_01, isal_deflate_icf_body_02, isal_deflate_icf_body_04
mbin_interface isal_deflate_icf_finish
mbin_dispatch_init5 isal_deflate_icf_finish, isal_deflate_icf_finish_base, isal_deflate_icf_finish_01, isal_deflate_icf_finish_01, isal_deflate_icf_finish_01
mbin_interface isal_update_histogram
mbin_dispatch_init5 isal_update_histogram, isal_update_histogram_base, isal_update_histogram_01, isal_update_histogram_01, isal_update_histogram_04
mbin_interface encode_deflate_icf
mbin_dispatch_init5 encode_deflate_icf, encode_deflate_icf_base, encode_deflate_icf_base, encode_deflate_icf_base, encode_deflate_icf_04
mbin_interface crc32_gzip
mbin_dispatch_init5 crc32_gzip, crc32_gzip_base, crc32_gzip_base, crc32_gzip_01, crc32_gzip_01

View File

@ -676,7 +676,8 @@ void set_random_hufftable(struct isal_zstream *stream)
* output buffer are randomly segmented to test state information for the
* compression*/
int compress_multi_pass(uint8_t * data, uint32_t data_size, uint8_t * compressed_buf,
uint32_t * compressed_size, uint32_t flush_type, uint32_t gzip_flag)
uint32_t * compressed_size, uint32_t flush_type, uint32_t gzip_flag,
uint32_t level)
{
int ret = IGZIP_COMP_OK;
uint8_t *in_buf = NULL, *out_buf = NULL;
@ -685,6 +686,8 @@ int compress_multi_pass(uint8_t * data, uint32_t data_size, uint8_t * compressed
struct isal_zstream stream;
struct isal_zstate *state = &stream.internal_state;
uint32_t loop_count = 0;
uint32_t level_buf_size;
uint8_t *level_buf = NULL;
#ifdef VERBOSE
printf("Starting Compress Multi Pass\n");
@ -704,6 +707,15 @@ int compress_multi_pass(uint8_t * data, uint32_t data_size, uint8_t * compressed
stream.avail_in = 0;
stream.avail_out = 0;
stream.gzip_flag = gzip_flag;
stream.level = level;
if (level >= 1) {
level_buf_size = rand() % IBUF_SIZE + ISAL_DEF_LVL1_MIN;
level_buf = malloc(level_buf_size);
create_rand_repeat_data(level_buf, level_buf_size);
stream.level_buf = level_buf;
stream.level_buf_size = level_buf_size;
}
while (1) {
loop_count++;
@ -797,6 +809,8 @@ int compress_multi_pass(uint8_t * data, uint32_t data_size, uint8_t * compressed
}
if (level_buf != NULL)
free(level_buf);
if (in_buf != NULL)
free(in_buf);
if (out_buf != NULL)
@ -812,11 +826,14 @@ int compress_multi_pass(uint8_t * data, uint32_t data_size, uint8_t * compressed
/* Compress the input data into the outbuffer in one call to isal_deflate */
int compress_single_pass(uint8_t * data, uint32_t data_size, uint8_t * compressed_buf,
uint32_t * compressed_size, uint32_t flush_type, uint32_t gzip_flag)
uint32_t * compressed_size, uint32_t flush_type, uint32_t gzip_flag,
uint32_t level)
{
int ret = IGZIP_COMP_OK;
struct isal_zstream stream;
struct isal_zstate *state = &stream.internal_state;
uint32_t level_buf_size;
uint8_t *level_buf = NULL;
#ifdef VERBOSE
printf("Starting Compress Single Pass\n");
@ -838,11 +855,23 @@ int compress_single_pass(uint8_t * data, uint32_t data_size, uint8_t * compresse
stream.next_out = compressed_buf;
stream.end_of_stream = 1;
stream.gzip_flag = gzip_flag;
stream.level = level;
if (level >= 1) {
level_buf_size = rand() % IBUF_SIZE + ISAL_DEF_LVL1_MIN;
level_buf = malloc(level_buf_size);
create_rand_repeat_data(level_buf, level_buf_size);
stream.level_buf = level_buf;
stream.level_buf_size = level_buf_size;
}
ret =
isal_deflate_with_checks(&stream, data_size, *compressed_size, data, data_size,
data_size, compressed_buf, *compressed_size, 0);
if (level_buf != NULL)
free(level_buf);
/* Check if the compression is completed */
if (state->state == ZSTATE_END)
*compressed_size = stream.total_out;
@ -855,10 +884,13 @@ int compress_single_pass(uint8_t * data, uint32_t data_size, uint8_t * compresse
/* Statelessly compress the input buffer into the output buffer */
int compress_stateless(uint8_t * data, uint32_t data_size, uint8_t * compressed_buf,
uint32_t * compressed_size, uint32_t flush_type, uint32_t gzip_flag)
uint32_t * compressed_size, uint32_t flush_type, uint32_t gzip_flag,
uint32_t level)
{
int ret = IGZIP_COMP_OK;
struct isal_zstream stream;
uint32_t level_buf_size;
uint8_t *level_buf = NULL;
create_rand_repeat_data((uint8_t *) & stream, sizeof(stream));
@ -874,9 +906,23 @@ int compress_stateless(uint8_t * data, uint32_t data_size, uint8_t * compressed_
stream.avail_out = *compressed_size;
stream.next_out = compressed_buf;
stream.gzip_flag = gzip_flag;
stream.level = level;
if (level >= 1) {
level_buf_size = rand() % IBUF_SIZE;
if (level_buf_size >= ISAL_DEF_LVL1_MIN) {
level_buf = malloc(level_buf_size);
create_rand_repeat_data(level_buf, level_buf_size);
stream.level_buf = level_buf;
stream.level_buf_size = level_buf_size;
}
}
ret = isal_deflate_stateless(&stream);
if (level_buf != NULL)
free(level_buf);
/* verify the stream */
if (stream.next_in - data != stream.total_in ||
stream.total_in + stream.avail_in != data_size)
@ -911,11 +957,11 @@ int compress_stateless(uint8_t * data, uint32_t data_size, uint8_t * compressed_
/* Statelessly compress the input buffer into the output buffer */
int compress_stateless_full_flush(uint8_t * data, uint32_t data_size, uint8_t * compressed_buf,
uint32_t * compressed_size)
uint32_t * compressed_size, uint32_t level)
{
int ret = IGZIP_COMP_OK;
uint8_t *in_buf = NULL, *out_buf = compressed_buf;
uint32_t in_size = 0;
uint8_t *in_buf = NULL, *level_buf = NULL, *out_buf = compressed_buf;
uint32_t in_size = 0, level_buf_size;
uint32_t in_processed = 00;
struct isal_zstream stream;
uint32_t loop_count = 0;
@ -932,6 +978,17 @@ int compress_stateless_full_flush(uint8_t * data, uint32_t data_size, uint8_t *
stream.end_of_stream = 0;
stream.avail_out = *compressed_size;
stream.next_out = compressed_buf;
stream.level = level;
if (level >= 1) {
level_buf_size = rand() % IBUF_SIZE;
if (level_buf_size >= ISAL_DEF_LVL1_MIN) {
level_buf = malloc(level_buf_size);
create_rand_repeat_data(level_buf, level_buf_size);
stream.level_buf = level_buf;
stream.level_buf_size = level_buf_size;
}
}
while (1) {
loop_count++;
@ -992,6 +1049,9 @@ int compress_stateless_full_flush(uint8_t * data, uint32_t data_size, uint8_t *
}
if (level_buf != NULL)
free(level_buf);
if (in_buf != NULL)
free(in_buf);
@ -1006,11 +1066,11 @@ int compress_stateless_full_flush(uint8_t * data, uint32_t data_size, uint8_t *
* is randomly segmented to test for independence of blocks in full flush
* compression*/
int compress_full_flush(uint8_t * data, uint32_t data_size, uint8_t * compressed_buf,
uint32_t * compressed_size, uint32_t gzip_flag)
uint32_t * compressed_size, uint32_t gzip_flag, uint32_t level)
{
int ret = IGZIP_COMP_OK;
uint8_t *in_buf = NULL, *out_buf = compressed_buf;
uint32_t in_size = 0;
uint8_t *in_buf = NULL, *out_buf = compressed_buf, *level_buf = NULL;
uint32_t in_size = 0, level_buf_size;
uint32_t in_processed = 00;
struct isal_zstream stream;
struct isal_zstate *state = &stream.internal_state;
@ -1033,6 +1093,17 @@ int compress_full_flush(uint8_t * data, uint32_t data_size, uint8_t * compressed
stream.next_out = compressed_buf;
stream.total_out = 0;
stream.gzip_flag = gzip_flag;
stream.level = level;
if (level >= 1) {
level_buf_size = rand() % IBUF_SIZE + ISAL_DEF_LVL1_MIN;
if (level_buf_size >= ISAL_DEF_LVL1_MIN) {
level_buf = malloc(level_buf_size);
create_rand_repeat_data(level_buf, level_buf_size);
stream.level_buf = level_buf;
stream.level_buf_size = level_buf_size;
}
}
while (1) {
loop_count++;
@ -1098,6 +1169,9 @@ int compress_full_flush(uint8_t * data, uint32_t data_size, uint8_t * compressed
}
if (level_buf != NULL)
free(level_buf);
if (in_buf != NULL)
free(in_buf);
@ -1173,11 +1247,12 @@ int compress_swap_flush(uint8_t * data, uint32_t data_size, uint8_t * compressed
int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_type)
{
int ret = IGZIP_COMP_OK;
uint32_t z_size, overflow, gzip_flag;
uint32_t z_size, overflow, gzip_flag, level;
uint8_t *z_buf = NULL;
uint8_t *in_buf = NULL;
gzip_flag = rand() % 3;
level = rand() % 2;
if (in_size != 0) {
in_buf = malloc(in_size);
@ -1203,7 +1278,8 @@ int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_
/* If flush type is invalid */
if (flush_type != NO_FLUSH && flush_type != FULL_FLUSH) {
ret =
compress_stateless(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag);
compress_stateless(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag,
level);
if (ret != INVALID_FLUSH_ERROR)
print_error(ret);
@ -1220,14 +1296,15 @@ int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_
}
/* Else test valid flush type */
ret = compress_stateless(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag);
ret =
compress_stateless(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag, level);
if (!ret)
ret = inflate_check(z_buf, z_size, in_buf, in_size, gzip_flag);
#ifdef VERBOSE
if (ret) {
printf("Compressed array: ");
printf("Compressed array at level %d with gzip flag %d: ", level, gzip_flag);
print_uint8_t(z_buf, z_size);
printf("\n");
printf("Data: ");
@ -1261,12 +1338,13 @@ int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_
create_rand_repeat_data(z_buf, z_size);
ret = compress_stateless(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag);
ret =
compress_stateless(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag, level);
if (!ret)
ret = inflate_check(z_buf, z_size, in_buf, in_size, gzip_flag);
#ifdef VERBOSE
if (ret) {
printf("Compressed array: ");
printf("Compressed array at level %d with gzip flag %d: ", level, gzip_flag);
print_uint8_t(z_buf, z_size);
printf("\n");
printf("Data: ");
@ -1292,7 +1370,8 @@ int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_
}
overflow =
compress_stateless(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag);
compress_stateless(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag,
level);
if (overflow != COMPRESS_OUT_BUFFER_OVERFLOW) {
#ifdef VERBOSE
@ -1304,7 +1383,8 @@ int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_
printf("inflate ret = %d\n", overflow);
print_error(overflow);
}
printf("Compressed array: ");
printf("Compressed array at level %d with gzip flag %d: ", level,
gzip_flag);
print_uint8_t(z_buf, z_size);
printf("\n");
printf("Data: ");
@ -1338,7 +1418,7 @@ int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_
create_rand_repeat_data(z_buf, z_size);
/* Else test valid flush type */
ret = compress_stateless_full_flush(in_buf, in_size, z_buf, &z_size);
ret = compress_stateless_full_flush(in_buf, in_size, z_buf, &z_size, level);
if (!ret)
ret = inflate_check(z_buf, z_size, in_buf, in_size, 0);
@ -1348,7 +1428,8 @@ int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_
print_error(ret);
#ifdef VERBOSE
if (ret) {
printf("Compressed array: ");
printf("Compressed array at level %d with gzip flag %d: ", level,
gzip_flag);
print_uint8_t(z_buf, z_size);
printf("\n");
printf("Data: ");
@ -1369,7 +1450,7 @@ int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_
int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type)
{
int ret = IGZIP_COMP_OK, fin_ret = IGZIP_COMP_OK;
uint32_t overflow = 0, gzip_flag;
uint32_t overflow = 0, gzip_flag, level;
uint32_t z_size, z_size_max, z_compressed_size;
uint8_t *z_buf = NULL;
@ -1384,6 +1465,7 @@ int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type)
}
gzip_flag = rand() % 3;
level = rand() % 2;
if (gzip_flag)
z_size_max += gzip_extra_bytes;
@ -1396,14 +1478,15 @@ int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type)
}
create_rand_repeat_data(z_buf, z_size);
ret = compress_single_pass(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag);
ret = compress_single_pass(in_buf, in_size, z_buf, &z_size, flush_type,
gzip_flag, level);
if (!ret)
ret = inflate_check(z_buf, z_size, in_buf, in_size, gzip_flag);
if (ret) {
#ifdef VERBOSE
printf("Compressed array: ");
printf("Compressed array at level %d with gzip flag %d: ", level, gzip_flag);
print_uint8_t(z_buf, z_size);
printf("\n");
printf("Data: ");
@ -1419,14 +1502,15 @@ int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type)
z_size = z_size_max;
create_rand_repeat_data(z_buf, z_size_max);
ret = compress_multi_pass(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag);
ret =
compress_multi_pass(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag, level);
if (!ret)
ret = inflate_check(z_buf, z_size, in_buf, in_size, gzip_flag);
if (ret) {
#ifdef VERBOSE
printf("Compressed array: ");
printf("Compressed array at level %d with gzip flag %d: ", level, gzip_flag);
print_uint8_t(z_buf, z_size);
printf("\n");
printf("Data: ");
@ -1447,8 +1531,8 @@ int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type)
z_size = rand() % z_compressed_size;
create_rand_repeat_data(z_buf, z_size_max);
overflow =
compress_single_pass(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag);
overflow = compress_single_pass(in_buf, in_size, z_buf, &z_size, flush_type,
gzip_flag, level);
if (overflow != COMPRESS_OUT_BUFFER_OVERFLOW) {
if (overflow == 0)
@ -1464,7 +1548,8 @@ int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type)
printf("inflate ret = %d\n", ret);
print_error(overflow);
printf("Compressed array: ");
printf("Compressed array at level %d with gzip flag %d: ", level,
gzip_flag);
print_uint8_t(z_buf, z_size);
printf("\n");
printf("Data: ");
@ -1483,7 +1568,7 @@ int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type)
overflow =
compress_multi_pass(in_buf, in_size, z_buf, &z_size, flush_type,
gzip_flag);
gzip_flag, level);
if (overflow != COMPRESS_OUT_BUFFER_OVERFLOW) {
if (overflow == 0)
@ -1499,7 +1584,8 @@ int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type)
printf("inflate ret = %d\n", ret);
print_error(overflow);
printf("Compressed array: ");
printf("Compressed array at level %d with gzip flag %d: ",
level, gzip_flag);
print_uint8_t(z_buf, z_size);
printf("\n");
printf("Data: ");
@ -1522,10 +1608,12 @@ int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type)
int test_flush(uint8_t * in_buf, uint32_t in_size)
{
int fin_ret = IGZIP_COMP_OK, ret;
uint32_t z_size, flush_type = 0, gzip_flag;
uint32_t z_size, flush_type = 0, gzip_flag, level;
uint8_t *z_buf = NULL;
gzip_flag = rand() % 3;
level = rand() % 2;
z_size = 2 * in_size + 2 * hdr_bytes + 8;
if (gzip_flag)
z_size += gzip_extra_bytes;
@ -1540,7 +1628,8 @@ int test_flush(uint8_t * in_buf, uint32_t in_size)
flush_type = rand();
/* Test invalid flush */
ret = compress_single_pass(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag);
ret = compress_single_pass(in_buf, in_size, z_buf, &z_size, flush_type,
gzip_flag, level);
if (ret == COMPRESS_GENERAL_ERROR)
ret = 0;
@ -1562,7 +1651,7 @@ int test_flush(uint8_t * in_buf, uint32_t in_size)
if (ret) {
#ifdef VERBOSE
printf("Compressed array: ");
printf("Compressed array at level %d with gzip flag %d: ", level, gzip_flag);
print_uint8_t(z_buf, z_size);
printf("\n");
printf("Data: ");
@ -1582,10 +1671,11 @@ int test_flush(uint8_t * in_buf, uint32_t in_size)
int test_full_flush(uint8_t * in_buf, uint32_t in_size)
{
int ret = IGZIP_COMP_OK;
uint32_t z_size, gzip_flag;
uint32_t z_size, gzip_flag, level;
uint8_t *z_buf = NULL;
gzip_flag = rand() % 3;
level = rand() % 2;
z_size = 2 * in_size + MAX_LOOPS * (hdr_bytes + 5);
if (gzip_flag)
@ -1599,7 +1689,7 @@ int test_full_flush(uint8_t * in_buf, uint32_t in_size)
create_rand_repeat_data(z_buf, z_size);
ret = compress_full_flush(in_buf, in_size, z_buf, &z_size, gzip_flag);
ret = compress_full_flush(in_buf, in_size, z_buf, &z_size, gzip_flag, level);
if (!ret)
ret = inflate_check(z_buf, z_size, in_buf, in_size, gzip_flag);

View File

@ -27,6 +27,8 @@
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%include "options.asm"
%assign K 1024
%assign D IGZIP_HIST_SIZE ;; Amount of history
%assign LA 18 * 16 ;; Max look-ahead, rounded up to 32 byte boundary
@ -45,3 +47,7 @@
%assign SHORTEST_MATCH 4
%assign SLOP 8
%define DIST_OFFSET 14
%define EXTRA_BITS_OFFSET (DIST_OFFSET + 5)
%define LIT (0x1E << DIST_OFFSET)

89
igzip/proc_heap.asm Normal file
View File

@ -0,0 +1,89 @@
; returns modified node_ptr
; uint32_t proc_heap(uint64_t *heap, uint32_t heap_size);
%include "reg_sizes.asm"
%include "heap_macros.asm"
%ifidn __OUTPUT_FORMAT__, win64
%define heap rcx ; pointer, 64-bit
%define heap_size rdx
%define arg3 r8
%define child rsi
%define tmp32 rdi
%else
%define heap rdi
%define heap_size rsi
%define arg3 rdx
%define child rcx
%define tmp32 rdx
%endif
%define node_ptr rax
%define h1 r8
%define h2 r9
%define h3 r10
%define i r11
%define tmp2 r12
global build_huff_tree
build_huff_tree:
push child
push tmp32
push r12
; mov node_ptr, 3*286
mov node_ptr, arg3 ;;;@@@@
.main_loop:
; REMOVE_MIN64(heap, heap_size, h1);
mov h2, [heap + heap_size*8]
mov h1, [heap + 1*8]
mov qword [heap + heap_size*8], -1
dec heap_size
mov [heap + 1*8], h2
mov i, 1
heapify heap, heap_size, i, child, h2, h3, tmp32, tmp2
mov h2, [heap + 1*8]
lea h3, [h1 + h2]
mov [heap + node_ptr*8], h1 %+ w
mov [heap + node_ptr*8 - 8], h2 %+ w
and h3, 0XFFFF0000 ; sign extends to FFFFFFFFFFFF0000
or h3, node_ptr
sub node_ptr, 2
; replace_min64(heap, heap_size, h3)
mov [heap + 1*8], h3
mov i, 1
heapify heap, heap_size, i, child, h2, h3, tmp32, tmp2
cmp heap_size, 1
ja .main_loop
mov h1, [heap + 1*8]
mov [heap + node_ptr*8], h1 %+ w
pop r12
pop rdi
pop rsi
ret
align 32
global build_heap_asm
build_heap_asm:
push rsi
push rdi
push r12
mov qword [heap + heap_size*8 + 8], -1
mov i, heap_size
shr i, 1
.loop:
mov h1, i
heapify heap, heap_size, h1, child, h2, h3, tmp32, tmp2
dec i
jnz .loop
pop r12
pop rdi
pop rsi
ret

View File

@ -150,6 +150,7 @@ enum {IGZIP_LIT_TABLE_SIZE = ISAL_DEF_LIT_SYMBOLS};
#define INVALID_PARAM -8
#define STATELESS_OVERFLOW -1
#define ISAL_INVALID_OPERATION -9
#define ISAL_INVALID_LEVEL -4 /* Invalid Compression level set */
/**
* @enum isal_zstate_state
@ -163,16 +164,20 @@ enum {IGZIP_LIT_TABLE_SIZE = ISAL_DEF_LIT_SYMBOLS};
enum isal_zstate_state {
ZSTATE_NEW_HDR, //!< Header to be written
ZSTATE_HDR, //!< Header state
ZSTATE_CREATE_HDR, //!< Header to be created
ZSTATE_BODY, //!< Body state
ZSTATE_FLUSH_READ_BUFFER, //!< Flush buffer
ZSTATE_FLUSH_ICF_BUFFER,
ZSTATE_SYNC_FLUSH, //!< Write sync flush block
ZSTATE_FLUSH_WRITE_BUFFER, //!< Flush bitbuf
ZSTATE_TRL, //!< Trailer state
ZSTATE_END, //!< End state
ZSTATE_TMP_NEW_HDR, //!< Temporary Header to be written
ZSTATE_TMP_HDR, //!< Temporary Header state
ZSTATE_TMP_CREATE_HDR, //!< Temporary Header to be created state
ZSTATE_TMP_BODY, //!< Temporary Body state
ZSTATE_TMP_FLUSH_READ_BUFFER, //!< Flush buffer
ZSTATE_TMP_FLUSH_ICF_BUFFER,
ZSTATE_TMP_SYNC_FLUSH, //!< Write sync flush block
ZSTATE_TMP_FLUSH_WRITE_BUFFER, //!< Flush bitbuf
ZSTATE_TMP_TRL, //!< Temporary Trailer state
@ -206,7 +211,6 @@ enum isal_block_state {
#define ISAL_INVALID_SYMBOL -2 /* Invalid deflate symbol found */
#define ISAL_INVALID_LOOKBACK -3 /* Invalid lookback distance found */
/******************************************************************************/
/* Compression structures */
/******************************************************************************/
@ -217,6 +221,20 @@ struct isal_huff_histogram {
uint16_t hash_table[IGZIP_HASH_SIZE]; //!< Tmp space used as a hash table
};
struct isal_mod_hist {
uint16_t d_hist[30];
uint16_t ll_hist[513];
};
/* Data sizes for level specific data options */
#define ISAL_DEF_LVL1_REQ 4 * IGZIP_K /* has to be at least sizeof(struct level_2_buf) */
#define ISAL_DEF_LVL1_TOKEN_SIZE 4
#define ISAL_DEF_LVL1_MIN (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 1 * IGZIP_K)
#define ISAL_DEF_LVL1_SMALL (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 16 * IGZIP_K)
#define ISAL_DEF_LVL1_MEDIUM (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 32 * IGZIP_K)
#define ISAL_DEF_LVL1_LARGE (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 64 * IGZIP_K)
#define ISAL_DEF_LVL1_EXTRA_LARGE (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 128 * IGZIP_K)
/** @brief Holds Bit Buffer information*/
struct BitBuf2 {
uint64_t m_bits; //!< bits in the bit buffer
@ -247,6 +265,8 @@ struct isal_zstate {
uint32_t has_eob_hdr; //!< keeps track of eob hdr (with BFINAL set)
uint32_t has_hist; //!< flag to track if there is match history
struct isal_mod_hist hist;
DECLARE_ALIGNED(uint8_t buffer[2 * IGZIP_HIST_SIZE + ISAL_LOOK_AHEAD], 32); //!< Internal buffer
DECLARE_ALIGNED(uint16_t head[IGZIP_HASH_SIZE], 16); //!< Hash array
@ -278,6 +298,9 @@ struct isal_zstream {
uint32_t total_out; //!< total number of bytes written so far
struct isal_hufftables *hufftables; //!< Huffman encoding used when compressing
uint32_t level; //!< Compression level to use
uint32_t level_buf_size; //!< Size of level_buf
uint8_t * level_buf; //!< User allocated buffer required for different compression levels
uint32_t end_of_stream; //!< non-zero if this is the last input buffer
uint32_t flush; //!< Flush type can be NO_FLUSH, SYNC_FLUSH or FULL_FLUSH
uint32_t gzip_flag; //!< Indicate if gzip compression is to be performed