igzip: Load memory into xmm in stateless registers
Signed-off-by: Roy Oursler <roy.j.oursler@intel.com> Reviewed-by: Greg Tucker <greg.b.tucker@intel.com>
This commit is contained in:
parent
4d1fe78bfa
commit
cf30138c7b
@ -31,7 +31,9 @@ lsrc += igzip/igzip.c igzip/hufftables_c.c \
|
||||
igzip/crc_utils_01.asm \
|
||||
igzip/crc_utils_04.asm \
|
||||
igzip/igzip_body_01.asm igzip/igzip_body_04.asm igzip/igzip_finish.asm \
|
||||
igzip/igzip_stateless_01.asm igzip/igzip_stateless_04.asm \
|
||||
igzip/igzip_stateless_01.asm \
|
||||
igzip/igzip_stateless_02.asm \
|
||||
igzip/igzip_stateless_04.asm \
|
||||
igzip/crc_data.asm \
|
||||
igzip/crc32_gzip.asm igzip/detect_repeated_char.asm \
|
||||
igzip/igzip_multibinary.asm \
|
||||
|
@ -40,6 +40,7 @@ default rel
|
||||
|
||||
extern isal_deflate_body_stateless_base
|
||||
extern isal_deflate_body_stateless_01
|
||||
extern isal_deflate_body_stateless_02
|
||||
extern isal_deflate_body_stateless_04
|
||||
|
||||
extern isal_deflate_body_base
|
||||
@ -66,7 +67,7 @@ mbin_interface isal_deflate_init
|
||||
mbin_dispatch_init5 isal_deflate_init, isal_deflate_init_base, isal_deflate_init_01, isal_deflate_init_01, isal_deflate_init_01
|
||||
|
||||
mbin_interface isal_deflate_body_stateless
|
||||
mbin_dispatch_init5 isal_deflate_body_stateless, isal_deflate_body_stateless_base, isal_deflate_body_stateless_01, isal_deflate_body_stateless_01, isal_deflate_body_stateless_04
|
||||
mbin_dispatch_init5 isal_deflate_body_stateless, isal_deflate_body_stateless_base, isal_deflate_body_stateless_01, isal_deflate_body_stateless_02, isal_deflate_body_stateless_04
|
||||
|
||||
mbin_interface isal_deflate_body
|
||||
mbin_dispatch_init5 isal_deflate_body, isal_deflate_body_base, isal_deflate_body_01, isal_deflate_body_01, isal_deflate_body_04
|
||||
|
@ -38,7 +38,7 @@
|
||||
%include "stdmac.asm"
|
||||
|
||||
%define LAST_BYTES_COUNT 3 ; Bytes to prevent reading out of array bounds
|
||||
%define LA_STATELESS 264 ; Max number of bytes read in loop2 rounded up to 8 byte boundary
|
||||
%define LA_STATELESS 280 ; Max number of bytes read in loop2 rounded up to 8 byte boundary
|
||||
|
||||
%ifdef DEBUG
|
||||
%macro MARK 1
|
||||
@ -105,6 +105,7 @@ global %1
|
||||
%define xtmp1 xmm1 ; tmp
|
||||
%define xhash xmm2
|
||||
%define xmask xmm3
|
||||
%define xdata xmm4
|
||||
|
||||
%define ytmp0 ymm0 ; tmp
|
||||
%define ytmp1 ymm1 ; tmp
|
||||
@ -158,7 +159,7 @@ skip1:
|
||||
mov stream, rcx
|
||||
mov dword [stream + _internal_state_has_eob], 0
|
||||
|
||||
vmovdqu xmask, [mask]
|
||||
MOVDQU xmask, [mask]
|
||||
|
||||
; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
|
||||
mov m_out_buf, [stream + _next_out]
|
||||
@ -209,9 +210,9 @@ MARK __stateless_compute_hash_ %+ ARCH
|
||||
shr tmp6, 8
|
||||
compute_hash hash2, tmp6
|
||||
|
||||
vmovd xhash, hash %+ d
|
||||
vpinsrd xhash, hash2 %+ d, 1
|
||||
vpand xhash, xhash, xmask
|
||||
MOVD xhash, hash %+ d
|
||||
PINSRD xhash, hash2 %+ d, 1
|
||||
PAND xhash, xhash, xmask
|
||||
|
||||
jmp write_lit_bits
|
||||
|
||||
@ -240,7 +241,8 @@ loop2:
|
||||
mov [stream + _internal_state_head + 2 * hash2], f_i %+ w
|
||||
dec dist2
|
||||
|
||||
mov tmp8, [file_start + f_i + 1]
|
||||
MOVQ tmp8, xdata
|
||||
shr tmp8, 16
|
||||
mov tmp6, tmp8
|
||||
compute_hash tmp2, tmp8
|
||||
|
||||
@ -258,18 +260,20 @@ loop2:
|
||||
shr tmp6, 8
|
||||
compute_hash tmp3, tmp6
|
||||
|
||||
vmovd xhash, tmp2 %+ d
|
||||
vpinsrd xhash, tmp3 %+ d, 1
|
||||
vpand xhash, xmask
|
||||
MOVD xhash, tmp2 %+ d
|
||||
PINSRD xhash, tmp3 %+ d, 1
|
||||
PAND xhash, xhash, xmask
|
||||
|
||||
MARK __stateless_compare_ %+ ARCH
|
||||
;; Check for long len/dist match (>7) with first literal
|
||||
mov len, [tmp1]
|
||||
MOVQ len, xdata
|
||||
mov curr_data, len
|
||||
PSRLDQ xdata, 1
|
||||
xor len, [tmp1 + dist]
|
||||
jz compare_loop
|
||||
|
||||
;; Check for len/dist match (>7) with second literal
|
||||
mov len2, [tmp1 + 1]
|
||||
MOVQ len2, xdata
|
||||
xor len2, [tmp1 + dist2 + 1]
|
||||
jz compare_loop2
|
||||
|
||||
@ -300,7 +304,6 @@ len_dist_lit_huffman_pre:
|
||||
bsf len2, len2
|
||||
shr len2, 3
|
||||
|
||||
|
||||
len_dist_lit_huffman:
|
||||
neg dist2
|
||||
%ifndef LONGER_HUFFTABLE
|
||||
@ -317,7 +320,8 @@ len_dist_lit_huffman:
|
||||
|
||||
mov rcx, code_len3
|
||||
|
||||
mov tmp5, [file_start + f_i + 3]
|
||||
MOVQ tmp5, xdata
|
||||
shr tmp5, 24
|
||||
compute_hash tmp4, tmp5
|
||||
and tmp4, HASH_MASK
|
||||
|
||||
@ -329,11 +333,12 @@ len_dist_lit_huffman:
|
||||
lea tmp3, [f_i + 1] ; tmp3 <= k
|
||||
|
||||
add f_i, len2
|
||||
MOVDQU xdata, [file_start + f_i]
|
||||
mov curr_data, [file_start + f_i]
|
||||
mov curr_data2, curr_data
|
||||
|
||||
vmovd hash %+ d, xhash
|
||||
vpextrd hash2 %+ d, xhash, 1
|
||||
MOVD hash %+ d, xhash
|
||||
PEXTRD hash2 %+ d, xhash, 1
|
||||
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
|
||||
|
||||
compute_hash hash, curr_data
|
||||
@ -403,12 +408,13 @@ len_dist_huffman:
|
||||
lea tmp3, [f_i + 2] ; tmp3 <= k
|
||||
add f_i, len
|
||||
|
||||
vmovd hash %+ d, xhash
|
||||
vpextrd hash2 %+ d, xhash, 1
|
||||
MOVD hash %+ d, xhash
|
||||
PEXTRD hash2 %+ d, xhash, 1
|
||||
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
|
||||
add tmp3,1
|
||||
mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
|
||||
|
||||
MOVDQU xdata, [file_start + f_i]
|
||||
mov curr_data, [file_start + f_i]
|
||||
mov curr_data2, curr_data
|
||||
compute_hash hash, curr_data
|
||||
@ -441,18 +447,18 @@ loop4_done:
|
||||
jl loop2
|
||||
jmp end_loop_2
|
||||
|
||||
|
||||
MARK __stateless_write_lit_bits_ %+ ARCH
|
||||
write_lit_bits:
|
||||
MOVDQU xdata, [file_start + f_i + 1]
|
||||
mov f_end_i, [rsp + f_end_i_mem_offset]
|
||||
add f_i, 1
|
||||
mov curr_data, [file_start + f_i]
|
||||
|
||||
vmovd hash %+ d, xhash
|
||||
MOVD hash %+ d, xhash
|
||||
|
||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
|
||||
|
||||
vpextrd hash2 %+ d, xhash, 1
|
||||
PEXTRD hash2 %+ d, xhash, 1
|
||||
|
||||
; continue
|
||||
cmp f_i, f_end_i
|
||||
|
7
igzip/igzip_stateless_02.asm
Normal file
7
igzip/igzip_stateless_02.asm
Normal file
@ -0,0 +1,7 @@
|
||||
%define ARCH 02
|
||||
|
||||
%ifndef COMPARE_TYPE
|
||||
%define COMPARE_TYPE 1
|
||||
%endif
|
||||
|
||||
%include "igzip_stateless.asm"
|
@ -14,7 +14,7 @@ extern rfc1951_lookup_table
|
||||
_len_to_code_offset equ 0
|
||||
|
||||
%define LAST_BYTES_COUNT 3 ; Bytes to prevent reading out of array bounds
|
||||
%define LA_STATELESS 264 ; Max number of bytes read in loop2 rounded up to 8 byte boundary
|
||||
%define LA_STATELESS 280 ; Max number of bytes read in loop2 rounded up to 8 byte boundary
|
||||
%define LIT_LEN 286
|
||||
%define DIST_LEN 30
|
||||
%define HIST_ELEM_SIZE 8
|
||||
|
@ -271,3 +271,79 @@ ssc:
|
||||
shl %%dest, cl
|
||||
%endif
|
||||
%endm
|
||||
|
||||
%macro MOVDQU 2
|
||||
%define %%dest %1
|
||||
%define %%src %2
|
||||
%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
|
||||
vmovdqu %%dest, %%src
|
||||
%else
|
||||
movdqu %%dest, %%src
|
||||
%endif
|
||||
%endm
|
||||
|
||||
%macro MOVD 2
|
||||
%define %%dest %1
|
||||
%define %%src %2
|
||||
%if (ARCH == 02 || ARCH == 03 || ARCH == 04)
|
||||
vmovd %%dest, %%src
|
||||
%else
|
||||
movd %%dest, %%src
|
||||
%endif
|
||||
%endm
|
||||
|
||||
%macro MOVQ 2
|
||||
%define %%dest %1
|
||||
%define %%src %2
|
||||
%if (ARCH == 02 || ARCH == 03 || ARCH == 04)
|
||||
vmovq %%dest, %%src
|
||||
%else
|
||||
movq %%dest, %%src
|
||||
%endif
|
||||
%endm
|
||||
|
||||
%macro PINSRD 3
|
||||
%define %%dest %1
|
||||
%define %%src %2
|
||||
%define %%offset %3
|
||||
%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
|
||||
vpinsrd %%dest, %%src, %%offset
|
||||
%else
|
||||
pinsrd %%dest, %%src, %%offset
|
||||
%endif
|
||||
%endm
|
||||
|
||||
%macro PEXTRD 3
|
||||
%define %%dest %1
|
||||
%define %%src %2
|
||||
%define %%offset %3
|
||||
%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
|
||||
vpextrd %%dest, %%src, %%offset
|
||||
%else
|
||||
pextrd %%dest, %%src, %%offset
|
||||
%endif
|
||||
%endm
|
||||
|
||||
%macro PSRLDQ 2
|
||||
%define %%dest %1
|
||||
%define %%offset %2
|
||||
%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
|
||||
vpsrldq %%dest, %%offset
|
||||
%else
|
||||
psrldq %%dest, %%offset
|
||||
%endif
|
||||
%endm
|
||||
|
||||
%macro PAND 3
|
||||
%define %%dest %1
|
||||
%define %%src1 %2
|
||||
%define %%src2 %3
|
||||
%if (ARCH == 02 || ARCH == 03 || ARCH == 04)
|
||||
vpand %%dest, %%src1, %%src2
|
||||
%else
|
||||
%ifnidn %%dest, %%src1
|
||||
movdqa %%dest, %%src1
|
||||
%endif
|
||||
pand %%dest, %%src2
|
||||
%endif
|
||||
%endm
|
||||
|
Loading…
Reference in New Issue
Block a user