From bbc886cf019657c591c2364adceebfc97066a96a Mon Sep 17 00:00:00 2001 From: Roy Oursler Date: Mon, 13 Jun 2016 11:20:26 -0700 Subject: [PATCH] igzip: Modify igzip to ignore matches which are shorter than 4 Signed-off-by: Roy Oursler Reviewed-by: Greg Tucker --- igzip/huffman.asm | 1 - igzip/huffman.h | 1 - igzip/igzip_body.asm | 54 +++++++------------------------------ igzip/igzip_finish.asm | 8 +++--- igzip/igzip_stateless.asm | 56 +++++++-------------------------------- igzip/lz0a_const.asm | 2 +- include/igzip_lib.h | 2 +- 7 files changed, 25 insertions(+), 99 deletions(-) diff --git a/igzip/huffman.asm b/igzip/huffman.asm index fbe402c..678bd8f 100644 --- a/igzip/huffman.asm +++ b/igzip/huffman.asm @@ -202,7 +202,6 @@ %define %%result %1d ; 32-bit reg %define %%data %2d ; 32-bit reg (low byte not clobbered) - and %%data, 0x00FFFFFF xor %%result, %%result crc32 %%result, %%data %endm diff --git a/igzip/huffman.h b/igzip/huffman.h index eda23c4..64814b8 100644 --- a/igzip/huffman.h +++ b/igzip/huffman.h @@ -138,7 +138,6 @@ static inline void get_lit_code(struct isal_hufftables *hufftables, uint32_t lit */ static inline uint32_t compute_hash(uint32_t data) { - data &= 0x00FFFFFF; #ifdef __SSE4_2__ return _mm_crc32_u32(0, data); diff --git a/igzip/igzip_body.asm b/igzip/igzip_body.asm index a4a3516..b2f5c23 100644 --- a/igzip/igzip_body.asm +++ b/igzip/igzip_body.asm @@ -334,7 +334,7 @@ skip_move_zero: jge end_loop_2 MARK __misc_compute_hash_lookup_ %+ ARCH - mov curr_data %+ d, [file_start + f_i] + mov curr_data, [file_start + f_i] cmp dword [rsp + empty_buffer_flag], 0 jne write_first_byte @@ -348,8 +348,7 @@ MARK __misc_compute_hash_lookup_ %+ ARCH loop2: shr curr_data2, 8 - xor hash2 %+ d, hash2 %+ d - crc32 hash2 %+ d, curr_data2 %+ d + compute_hash hash2, curr_data2 ; hash = compute_hash(state->file_start + f_i) & HASH_MASK; and hash %+ d, HASH_MASK @@ -407,11 +406,6 @@ MARK __compare_ %+ ARCH xor len, [tmp2] jz compare_loop -%ifdef USE_HSWNI - blsmsk tmp3, len - or tmp3, 0xFFFFFF -%endif - lea tmp1, [file_start + f_i] mov tmp2, tmp1 sub tmp2, dist2 @@ -424,27 +418,12 @@ MARK __compare_ %+ ARCH xor len2, [tmp2] jz compare_loop2 -%ifdef USE_HSWNI - ;; Check for len/dist match for first literal - test tmp3, len2 - jz len_dist_lit_huffman_pre - - cmp tmp3, 0xFFFFFF - je encode_2_literals - jmp len_dist_huffman_pre - - -MARK __len_dist_lit_huffman_ %+ ARCH -len_dist_lit_huffman_pre: - movzx tmp1, curr_data %+ b - get_lit_code tmp1, code3, code_len3, hufftables -%else ;; Specutively load the code for the first literal movzx tmp1, curr_data %+ b get_lit_code tmp1, code3, rcx, hufftables ;; Check for len/dist match for first literal - test len, 0xFFFFFF + test len %+ d, 0xFFFFFFFF jz len_dist_huffman_pre ;; Specutively load the code for the second literal @@ -457,13 +436,12 @@ len_dist_lit_huffman_pre: add code_len2, rcx ;; Check for len/dist match for second literal - test len2, 0xFFFFFF + test len2 %+ d, 0xFFFFFFFF jnz write_lit_bits MARK __len_dist_lit_huffman_ %+ ARCH len_dist_lit_huffman_pre: mov code_len3, rcx -%endif bsf len2, len2 shr len2, 3 @@ -500,7 +478,7 @@ len_dist_lit_huffman: add f_i, len2 ; hash = compute_hash(state->file_start + k) & HASH_MASK; - mov tmp5 %+ d, [file_start + tmp3] + mov tmp5, [file_start + tmp3] mov tmp7, tmp5 shr tmp7, 8 @@ -546,11 +524,11 @@ len_dist_huffman: ;; Setup for updateing hash lea tmp3, [f_i + 2] ; tmp3 <= k add f_i, len - mov tmp7 %+ d, [file_start + tmp3] + mov tmp7, [file_start + tmp3] MARK __update_hash_for_symbol_ %+ ARCH update_hash_for_symbol: - mov curr_data %+ d, [file_start + f_i] + mov curr_data, [file_start + f_i] mov curr_data2, curr_data compute_hash hash, curr_data %ifdef LIMIT_HASH_UPDATE @@ -565,7 +543,7 @@ update_hash_for_symbol: %else loop3: ; hash = compute_hash(state->file_start + k) & HASH_MASK; - mov tmp7 %+ d, [file_start + tmp3] + mov tmp7, [file_start + tmp3] compute_hash hash2, tmp7 and hash2 %+ d, HASH_MASK ; state->head[hash] = k; @@ -587,24 +565,10 @@ MARK __write_len_dist_bits_ %+ ARCH MARK __write_lit_bits_ %+ ARCH -%ifdef USE_HSWNI -encode_2_literals: - movzx tmp1, curr_data %+ b - get_lit_code tmp1, code3, rcx, hufftables - - shr curr_data, 8 - and curr_data, 0xff - get_lit_code curr_data, code2, code_len2, hufftables - - ;; Calculate code associated with both literals - shlx code2, code2, rcx - or code2, code3 - add code_len2, rcx -%endif write_lit_bits: mov f_end_i, [rsp + f_end_i_mem_offset] add f_i, 1 - mov curr_data %+ d, [file_start + f_i] + mov curr_data, [file_start + f_i] mov curr_data2, curr_data compute_hash hash, curr_data diff --git a/igzip/igzip_finish.asm b/igzip/igzip_finish.asm index 69b9281..8e24f37 100644 --- a/igzip/igzip_finish.asm +++ b/igzip/igzip_finish.asm @@ -127,7 +127,7 @@ skip_SLOP: cmp f_i, f_end_i jge end_loop_2 - mov tmp1 %+ d, [file_start + f_i] + mov tmp1, [file_start + f_i] loop2: ; if (state->bitbuf.is_full()) { @@ -200,7 +200,7 @@ loop2: ; only update hash twice ; hash = compute_hash(state->file_start + k) & HASH_MASK; - mov tmp6 %+ d, [file_start + tmp3] + mov tmp6, [file_start + tmp3] compute_hash hash, tmp6 and hash %+ d, HASH_MASK ; state->head[hash] = k; @@ -209,7 +209,7 @@ loop2: add tmp3, 1 ; hash = compute_hash(state->file_start + k) & HASH_MASK; - mov tmp6 %+ d, [file_start + tmp3] + mov tmp6, [file_start + tmp3] compute_hash hash, tmp6 and hash %+ d, HASH_MASK ; state->head[hash] = k; @@ -218,7 +218,7 @@ loop2: %else loop3: ; hash = compute_hash(state->file_start + k) & HASH_MASK; - mov tmp6 %+ d, [file_start + tmp3] + mov tmp6, [file_start + tmp3] compute_hash hash, tmp6 and hash %+ d, HASH_MASK ; state->head[hash] = k; diff --git a/igzip/igzip_stateless.asm b/igzip/igzip_stateless.asm index 5946145..d26cc33 100644 --- a/igzip/igzip_stateless.asm +++ b/igzip/igzip_stateless.asm @@ -185,7 +185,7 @@ skip_SLOP: ; for (f_i = f_start_i; f_i < f_end_i; f_i++) { MARK __stateless_compute_hash_ %+ ARCH - mov curr_data %+ d, [file_start + f_i] + mov curr_data, [file_start + f_i] cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] ja end @@ -202,8 +202,7 @@ MARK __stateless_compute_hash_ %+ ARCH loop2: shr curr_data2, 8 - xor hash2 %+ d, hash2 %+ d - crc32 hash2 %+ d, curr_data2 %+ d + compute_hash hash2, curr_data2 ; hash = compute_hash(state->file_start + f_i) & HASH_MASK; and hash %+ d, HASH_MASK @@ -261,11 +260,6 @@ MARK __stateless_compare_ %+ ARCH xor len, [tmp2] jz compare_loop -%ifdef USE_HSWNI - blsmsk tmp3, len - or tmp3, 0xFFFFFF -%endif - lea tmp1, [file_start + f_i] mov tmp2, tmp1 sub tmp2, dist2 @@ -278,27 +272,12 @@ MARK __stateless_compare_ %+ ARCH xor len2, [tmp2] jz compare_loop2 -%ifdef USE_HSWNI - ;; Check for len/dist match for first literal - test tmp3, len2 - jz len_dist_lit_huffman_pre - - cmp tmp3, 0xFFFFFF - je encode_2_literals - jmp len_dist_huffman_pre - - -MARK __stateless_len_dist_lit_huffman_ %+ ARCH -len_dist_lit_huffman_pre: - movzx tmp1, curr_data %+ b - get_lit_code tmp1, code3, code_len3, hufftables -%else ;; Specutively load the code for the first literal movzx tmp1, curr_data %+ b get_lit_code tmp1, code3, rcx, hufftables ;; Check for len/dist match for first literal - test len, 0xFFFFFF + test len %+ d, 0xFFFFFFFF jz len_dist_huffman_pre ;; Specutively load the code for the second literal @@ -311,13 +290,12 @@ len_dist_lit_huffman_pre: add code_len2, rcx ;; Check for len/dist match for second literal - test len2, 0xFFFFFF + test len2 %+ d, 0xFFFFFFFF jnz write_lit_bits MARK __stateless_len_dist_lit_huffman_ %+ ARCH len_dist_lit_huffman_pre: mov code_len3, rcx -%endif bsf len2, len2 shr len2, 3 @@ -355,7 +333,7 @@ len_dist_lit_huffman: add f_i, len2 ; hash = compute_hash(state->file_start + k) & HASH_MASK; - mov tmp5 %+ d, [file_start + tmp3] + mov tmp5, [file_start + tmp3] mov tmp7, tmp5 shr tmp7, 8 @@ -402,11 +380,11 @@ len_dist_huffman: ;; Setup for updateing hash lea tmp3, [f_i + 2] ; tmp3 <= k add f_i, len - mov tmp7 %+ d, [file_start + tmp3] + mov tmp7, [file_start + tmp3] MARK __stateless_update_hash_for_symbol_ %+ ARCH update_hash_for_symbol: - mov curr_data %+ d, [file_start + f_i] + mov curr_data, [file_start + f_i] mov curr_data2, curr_data compute_hash hash, curr_data %ifdef LIMIT_HASH_UPDATE @@ -421,7 +399,7 @@ update_hash_for_symbol: %else loop3: ; hash = compute_hash(state->file_start + k) & HASH_MASK; - mov tmp7 %+ d, [file_start + tmp3] + mov tmp7, [file_start + tmp3] compute_hash hash2, tmp7 and hash2 %+ d, HASH_MASK ; state->head[hash] = k; @@ -443,24 +421,10 @@ MARK __stateless_write_len_dist_bits_ %+ ARCH MARK __stateless_write_lit_bits_ %+ ARCH -%ifdef USE_HSWNI -encode_2_literals: - movzx tmp1, curr_data %+ b - get_lit_code tmp1, code3, rcx, hufftables - - shr curr_data, 8 - and curr_data, 0xff - get_lit_code curr_data, code2, code_len2, hufftables - - ;; Calculate code associated with both literals - shlx code2, code2, rcx - or code2, code3 - add code_len2, rcx -%endif write_lit_bits: mov f_end_i, [rsp + f_end_i_mem_offset] add f_i, 1 - mov curr_data %+ d, [file_start + f_i] + mov curr_data, [file_start + f_i] mov curr_data2, curr_data compute_hash hash, curr_data @@ -483,7 +447,7 @@ loop2_finish: cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] ja end - mov curr_data %+ d, [file_start + f_i] + mov curr_data, [file_start + f_i] compute_hash hash, curr_data and hash %+ d, HASH_MASK diff --git a/igzip/lz0a_const.asm b/igzip/lz0a_const.asm index 4d95739..e788c79 100644 --- a/igzip/lz0a_const.asm +++ b/igzip/lz0a_const.asm @@ -39,6 +39,6 @@ %assign HASH_SIZE D %assign HASH_MASK (HASH_SIZE - 1) -%assign SHORTEST_MATCH 3 +%assign SHORTEST_MATCH 4 %assign SLOP 8 diff --git a/include/igzip_lib.h b/include/igzip_lib.h index 1dd930c..79d86f5 100644 --- a/include/igzip_lib.h +++ b/include/igzip_lib.h @@ -124,7 +124,7 @@ extern "C" { #define HASH_SIZE IGZIP_D #define HASH_MASK (HASH_SIZE - 1) -#define SHORTEST_MATCH 3 +#define SHORTEST_MATCH 4 #define IGZIP_MAX_DEF_HDR_SIZE 328