igzip: Create assembly version of isal_update_histogram
Signed-off-by: Roy Oursler <roy.j.oursler@intel.com> Reviewed-by: Greg Tucker <greg.b.tucker@intel.com>
This commit is contained in:
parent
7c91df5e50
commit
31814483c0
@ -36,7 +36,10 @@ lsrc += igzip/igzip.c igzip/hufftables_c.c \
|
||||
igzip/crc32_gzip.asm igzip/detect_repeated_char.asm \
|
||||
igzip/igzip_multibinary.asm \
|
||||
igzip/igzip_stateless_base.c \
|
||||
igzip/igzip_base.c
|
||||
igzip/igzip_base.c \
|
||||
igzip/igzip_update_histogram_01.asm \
|
||||
igzip/igzip_update_histogram_04.asm \
|
||||
igzip/rfc1951_lookup.asm
|
||||
|
||||
src_include += -I $(srcdir)/igzip
|
||||
extern_hdrs += include/igzip_lib.h
|
||||
@ -49,7 +52,7 @@ check_tests += igzip/igzip_check
|
||||
|
||||
perf_tests += igzip/igzip_perf igzip/igzip_sync_flush_perf
|
||||
|
||||
other_tests += igzip/igzip_file_perf igzip/igzip_sync_flush_file_perf igzip/igzip_stateless_file_perf
|
||||
other_tests += igzip/igzip_file_perf igzip/igzip_sync_flush_file_perf igzip/igzip_stateless_file_perf igzip/igzip_hist_perf
|
||||
|
||||
other_src += igzip/bitbuf2.asm igzip/data_struct2.asm \
|
||||
igzip/igzip_buffer_utils_01.asm \
|
||||
@ -59,6 +62,7 @@ other_src += igzip/bitbuf2.asm igzip/data_struct2.asm \
|
||||
igzip/bitbuf2.h igzip/repeated_char_result.h \
|
||||
igzip/igzip_body.asm \
|
||||
igzip/igzip_stateless.asm \
|
||||
igzip/igzip_update_histogram.asm \
|
||||
igzip/huffman.asm \
|
||||
include/reg_sizes.asm \
|
||||
include/multibinary.asm \
|
||||
@ -94,3 +98,5 @@ igzip_igzip_inflate_test_LDADD = igzip/igzip_inflate_ref.lo libisal.la
|
||||
igzip_igzip_inflate_test_LDFLAGS = -lz
|
||||
igzip_check: igzip_inflate_ref.o
|
||||
igzip_igzip_check_LDADD = igzip/igzip_inflate_ref.lo libisal.la
|
||||
igzip_hist_perf: igzip_inflate_ref.o
|
||||
igzip_igzip_hist_perf_LDADD = igzip/igzip_inflate_ref.lo libisal.la
|
||||
|
@ -142,8 +142,8 @@ void append_to_back(struct linked_list *list, struct linked_list_node *new_eleme
|
||||
return;
|
||||
}
|
||||
|
||||
void isal_update_histogram(uint8_t * start_stream, int length,
|
||||
struct isal_huff_histogram *histogram)
|
||||
void isal_update_histogram_base(uint8_t * start_stream, int length,
|
||||
struct isal_huff_histogram *histogram)
|
||||
{
|
||||
uint32_t literal = 0, hash;
|
||||
uint8_t *last_seen[HASH_SIZE];
|
||||
|
348
igzip/igzip_hist_perf.c
Normal file
348
igzip/igzip_hist_perf.c
Normal file
@ -0,0 +1,348 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include "igzip_lib.h"
|
||||
#include "test.h"
|
||||
#include "igzip_inflate_ref.h"
|
||||
|
||||
#define BUF_SIZE 1024
|
||||
#define MIN_TEST_LOOPS 8
|
||||
#ifndef RUN_MEM_SIZE
|
||||
# define RUN_MEM_SIZE 2000000000
|
||||
#endif
|
||||
|
||||
/* Inflates and fills a histogram of lit, len, and dist codes seen in non-type 0 blocks.*/
|
||||
int igzip_inflate_hist(struct inflate_state *state, struct isal_huff_histogram *histogram)
|
||||
{
|
||||
/* The following tables are based on the tables in the deflate standard,
|
||||
* RFC 1951 page 11. */
|
||||
const uint16_t len_start[29] = {
|
||||
0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
|
||||
0x0b, 0x0d, 0x0f, 0x11, 0x13, 0x17, 0x1b, 0x1f,
|
||||
0x23, 0x2b, 0x33, 0x3b, 0x43, 0x53, 0x63, 0x73,
|
||||
0x83, 0xa3, 0xc3, 0xe3, 0x102
|
||||
};
|
||||
const uint8_t len_extra_bit_count[29] = {
|
||||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x1, 0x1, 0x1, 0x2, 0x2, 0x2, 0x2,
|
||||
0x3, 0x3, 0x3, 0x3, 0x4, 0x4, 0x4, 0x4,
|
||||
0x5, 0x5, 0x5, 0x5, 0x0
|
||||
};
|
||||
const uint32_t dist_start[30] = {
|
||||
0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d,
|
||||
0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1,
|
||||
0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01,
|
||||
0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001
|
||||
};
|
||||
const uint8_t dist_extra_bit_count[30] = {
|
||||
0x0, 0x0, 0x0, 0x0, 0x1, 0x1, 0x2, 0x2,
|
||||
0x3, 0x3, 0x4, 0x4, 0x5, 0x5, 0x6, 0x6,
|
||||
0x7, 0x7, 0x8, 0x8, 0x9, 0x9, 0xa, 0xa,
|
||||
0xb, 0xb, 0xc, 0xc, 0xd, 0xd
|
||||
};
|
||||
|
||||
uint16_t next_lit, len, nlen;
|
||||
uint8_t next_dist;
|
||||
uint32_t repeat_length;
|
||||
uint32_t look_back_dist;
|
||||
uint32_t tmp;
|
||||
|
||||
memset(histogram, 0, sizeof(struct isal_huff_histogram));
|
||||
while (state->new_block == 0 || state->bfinal == 0) {
|
||||
if (state->new_block != 0) {
|
||||
tmp = read_header(state);
|
||||
|
||||
if (tmp)
|
||||
return tmp;
|
||||
}
|
||||
|
||||
if (state->btype == 0) {
|
||||
/* If the block is uncompressed, update state data accordingly */
|
||||
if (state->in_buffer.avail_in < 4)
|
||||
return END_OF_INPUT;
|
||||
|
||||
len = *(uint16_t *) state->in_buffer.next_in;
|
||||
state->in_buffer.next_in += 2;
|
||||
nlen = *(uint16_t *) state->in_buffer.next_in;
|
||||
state->in_buffer.next_in += 2;
|
||||
|
||||
/* Check if len and nlen match */
|
||||
if (len != (~nlen & 0xffff))
|
||||
return INVALID_NON_COMPRESSED_BLOCK_LENGTH;
|
||||
|
||||
if (state->in_buffer.avail_in < len)
|
||||
len = state->in_buffer.avail_in;
|
||||
else
|
||||
state->new_block = 1;
|
||||
|
||||
state->out_buffer.total_out += len;
|
||||
state->in_buffer.next_in += len;
|
||||
state->in_buffer.avail_in -= len + 4;
|
||||
|
||||
if (state->in_buffer.avail_in == 0 && state->new_block == 0)
|
||||
return END_OF_INPUT;
|
||||
|
||||
} else {
|
||||
/* Else decode a huffman encoded block */
|
||||
while (state->new_block == 0) {
|
||||
/* While not at the end of block, decode the next
|
||||
* symbol */
|
||||
next_lit =
|
||||
decode_next(&state->in_buffer, &state->lit_huff_code);
|
||||
|
||||
histogram->lit_len_histogram[next_lit] += 1;
|
||||
|
||||
if (state->in_buffer.read_in_length < 0)
|
||||
return END_OF_INPUT;
|
||||
|
||||
if (next_lit < 256)
|
||||
/* Next symbol is a literal */
|
||||
state->out_buffer.total_out++;
|
||||
|
||||
else if (next_lit == 256)
|
||||
/* Next symbol is end of block */
|
||||
state->new_block = 1;
|
||||
|
||||
else if (next_lit < 286) {
|
||||
/* Next symbol is a repeat length followed by a
|
||||
lookback distance */
|
||||
repeat_length =
|
||||
len_start[next_lit - 257] +
|
||||
inflate_in_read_bits(&state->in_buffer,
|
||||
len_extra_bit_count[next_lit -
|
||||
257]);
|
||||
|
||||
next_dist = decode_next(&state->in_buffer,
|
||||
&state->dist_huff_code);
|
||||
|
||||
histogram->dist_histogram[next_dist] += 1;
|
||||
|
||||
look_back_dist = dist_start[next_dist] +
|
||||
inflate_in_read_bits(&state->in_buffer,
|
||||
dist_extra_bit_count
|
||||
[next_dist]);
|
||||
|
||||
if (state->in_buffer.read_in_length < 0)
|
||||
return END_OF_INPUT;
|
||||
|
||||
if (look_back_dist > state->out_buffer.total_out)
|
||||
return INVALID_LOOK_BACK_DISTANCE;
|
||||
|
||||
state->out_buffer.total_out += repeat_length;
|
||||
|
||||
} else
|
||||
return INVALID_SYMBOL;
|
||||
}
|
||||
}
|
||||
}
|
||||
state->in_buffer.next_in -= state->in_buffer.read_in_length / 8;
|
||||
state->in_buffer.avail_in += state->in_buffer.read_in_length / 8;
|
||||
|
||||
return DECOMPRESSION_FINISHED;
|
||||
}
|
||||
|
||||
int get_filesize(FILE * f)
|
||||
{
|
||||
int curr, end;
|
||||
|
||||
curr = ftell(f); /* Save current position */
|
||||
fseek(f, 0L, SEEK_END);
|
||||
end = ftell(f);
|
||||
fseek(f, curr, SEEK_SET); /* Restore position */
|
||||
return end;
|
||||
}
|
||||
|
||||
void print_histogram(struct isal_huff_histogram *histogram)
|
||||
{
|
||||
int i;
|
||||
printf("Lit Len histogram");
|
||||
for (i = 0; i < IGZIP_LIT_LEN; i++) {
|
||||
if (i % 16 == 0)
|
||||
printf("\n");
|
||||
else
|
||||
printf(", ");
|
||||
printf("%4lu", histogram->lit_len_histogram[i]);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
printf("Dist histogram");
|
||||
for (i = 0; i < IGZIP_DIST_LEN; i++) {
|
||||
if (i % 16 == 0)
|
||||
printf("\n");
|
||||
else
|
||||
printf(", ");
|
||||
printf("%4lu", histogram->dist_histogram[i]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void print_diff_histogram(struct isal_huff_histogram *histogram1,
|
||||
struct isal_huff_histogram *histogram2)
|
||||
{
|
||||
int i;
|
||||
double relative_error;
|
||||
printf("Lit Len histogram relative error");
|
||||
for (i = 0; i < IGZIP_LIT_LEN; i++) {
|
||||
if (i % 16 == 0)
|
||||
printf("\n");
|
||||
else
|
||||
printf(", ");
|
||||
|
||||
if (histogram1->lit_len_histogram[i] == histogram2->lit_len_histogram[i]) {
|
||||
printf(" % 4.0f %%", 0.0);
|
||||
} else {
|
||||
relative_error =
|
||||
abs(histogram1->lit_len_histogram[i] -
|
||||
histogram2->lit_len_histogram[i]);
|
||||
relative_error = relative_error / histogram1->lit_len_histogram[i];
|
||||
relative_error = 100.0 * relative_error;
|
||||
printf("~% 4.0f %%", relative_error);
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
printf("Dist histogram relative error");
|
||||
for (i = 0; i < IGZIP_DIST_LEN; i++) {
|
||||
if (i % 16 == 0)
|
||||
printf("\n");
|
||||
else
|
||||
printf(", ");
|
||||
|
||||
if (histogram1->dist_histogram[i] == histogram2->dist_histogram[i]) {
|
||||
printf(" % 4.0f %%", 0.0);
|
||||
} else {
|
||||
relative_error =
|
||||
abs(histogram1->dist_histogram[i] - histogram2->dist_histogram[i]);
|
||||
relative_error = relative_error / histogram1->dist_histogram[i];
|
||||
relative_error = 100.0 * relative_error;
|
||||
printf("~% 4.0f %%", relative_error);
|
||||
}
|
||||
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
FILE *in;
|
||||
unsigned char *inbuf, *outbuf;
|
||||
int i, infile_size, outbuf_size, iterations, avail_in;
|
||||
struct isal_huff_histogram histogram1, histogram2;
|
||||
struct isal_hufftables hufftables_custom;
|
||||
struct isal_zstream stream;
|
||||
struct inflate_state gstream;
|
||||
|
||||
memset(&histogram1, 0, sizeof(histogram1));
|
||||
memset(&histogram2, 0, sizeof(histogram2));
|
||||
|
||||
if (argc > 3 || argc < 2) {
|
||||
fprintf(stderr, "Usage: igzip_file_perf infile [outfile]\n"
|
||||
"\t - Runs multiple iterations of igzip on a file to "
|
||||
"get more accurate time results.\n");
|
||||
exit(0);
|
||||
}
|
||||
in = fopen(argv[1], "rb");
|
||||
if (!in) {
|
||||
fprintf(stderr, "Can't open %s for reading\n", argv[1]);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
/* Allocate space for entire input file and output
|
||||
* (assuming some possible expansion on output size)
|
||||
*/
|
||||
infile_size = get_filesize(in);
|
||||
outbuf_size = 2 * infile_size;
|
||||
|
||||
if (infile_size != 0)
|
||||
iterations = RUN_MEM_SIZE / infile_size;
|
||||
else
|
||||
iterations = MIN_TEST_LOOPS;
|
||||
|
||||
if (iterations < MIN_TEST_LOOPS)
|
||||
iterations = MIN_TEST_LOOPS;
|
||||
|
||||
inbuf = malloc(infile_size);
|
||||
outbuf = malloc(outbuf_size);
|
||||
if (inbuf == NULL) {
|
||||
fprintf(stderr, "Can't allocate input buffer memory\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (outbuf == NULL) {
|
||||
fprintf(stderr, "Can't allocate output buffer memory\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
avail_in = fread(inbuf, 1, infile_size, in);
|
||||
if (avail_in != infile_size) {
|
||||
fprintf(stderr, "Couldn't fit all of input file into buffer\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
struct perf start, stop;
|
||||
perf_start(&start);
|
||||
|
||||
for (i = 0; i < iterations; i++)
|
||||
isal_update_histogram(inbuf, infile_size, &histogram1);
|
||||
perf_stop(&stop);
|
||||
|
||||
printf(" file %s - in_size=%d iter=%d\n", argv[1], infile_size, i);
|
||||
printf("igzip_file: ");
|
||||
perf_print(stop, start, (long long)infile_size * i);
|
||||
|
||||
memset(&histogram1, 0, sizeof(histogram1));
|
||||
|
||||
isal_update_histogram(inbuf, infile_size, &histogram1);
|
||||
|
||||
isal_create_hufftables(&hufftables_custom, &histogram1);
|
||||
|
||||
isal_deflate_init(&stream);
|
||||
stream.end_of_stream = 1; /* Do the entire file at once */
|
||||
stream.flush = NO_FLUSH;
|
||||
stream.next_in = inbuf;
|
||||
stream.avail_in = infile_size;
|
||||
stream.next_out = outbuf;
|
||||
stream.avail_out = outbuf_size;
|
||||
stream.hufftables = &hufftables_custom;
|
||||
isal_deflate_stateless(&stream);
|
||||
|
||||
igzip_inflate_init(&gstream, outbuf, stream.total_out, NULL, 0);
|
||||
igzip_inflate_hist(&gstream, &histogram2);
|
||||
|
||||
printf("Histogram Error \n");
|
||||
print_diff_histogram(&histogram1, &histogram2);
|
||||
|
||||
fclose(in);
|
||||
fflush(0);
|
||||
return 0;
|
||||
}
|
@ -51,6 +51,10 @@ extern isal_deflate_finish_01
|
||||
extern get_crc_base
|
||||
extern get_crc_01
|
||||
|
||||
extern isal_update_histogram_base
|
||||
extern isal_update_histogram_01
|
||||
extern isal_update_histogram_04
|
||||
|
||||
extern isal_deflate_init_base
|
||||
extern isal_deflate_init_01
|
||||
|
||||
@ -71,3 +75,6 @@ mbin_dispatch_init5 isal_deflate_finish, isal_deflate_finish_base, isal_deflate_
|
||||
|
||||
mbin_interface get_crc
|
||||
mbin_dispatch_init5 get_crc, get_crc_base, get_crc_01, get_crc_01, get_crc_01
|
||||
|
||||
mbin_interface isal_update_histogram
|
||||
mbin_dispatch_init5 isal_update_histogram, isal_update_histogram_base, isal_update_histogram_01, isal_update_histogram_01, isal_update_histogram_04
|
||||
|
467
igzip/igzip_update_histogram.asm
Normal file
467
igzip/igzip_update_histogram.asm
Normal file
@ -0,0 +1,467 @@
|
||||
|
||||
%include "options.asm"
|
||||
|
||||
%include "lz0a_const.asm"
|
||||
%include "data_struct2.asm"
|
||||
%include "bitbuf2.asm"
|
||||
%include "huffman.asm"
|
||||
%include "igzip_compare_types.asm"
|
||||
%include "reg_sizes.asm"
|
||||
|
||||
%include "stdmac.asm"
|
||||
|
||||
extern rfc1951_lookup_table
|
||||
_len_to_code_offset equ 0
|
||||
|
||||
%define LAST_BYTES_COUNT 3 ; Bytes to prevent reading out of array bounds
|
||||
%define LA_STATELESS 264 ; Max number of bytes read in loop2 rounded up to 8 byte boundary
|
||||
%define LIT_LEN 286
|
||||
%define DIST_LEN 30
|
||||
%define HIST_ELEM_SIZE 8
|
||||
|
||||
%ifdef DEBUG
|
||||
%macro MARK 1
|
||||
global %1
|
||||
%1:
|
||||
%endm
|
||||
%else
|
||||
%macro MARK 1
|
||||
%endm
|
||||
%endif
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
%define file_start rdi
|
||||
%define file_length rsi
|
||||
%define histogram rdx
|
||||
%define rfc_lookup r9
|
||||
%define f_i r10
|
||||
|
||||
%define curr_data rax
|
||||
|
||||
%define tmp2 rcx
|
||||
|
||||
%define dist rbx
|
||||
%define dist_code2 rbx
|
||||
|
||||
%define dist2 r12
|
||||
%define dist_code r12
|
||||
|
||||
%define len rbp
|
||||
%define len_code rbp
|
||||
%define hash3 rbp
|
||||
|
||||
%define curr_data2 r8
|
||||
%define len2 r8
|
||||
|
||||
%define tmp1 r11
|
||||
|
||||
%define tmp3 r13
|
||||
|
||||
%define hash r14
|
||||
|
||||
%define hash2 r15
|
||||
|
||||
%define xtmp0 xmm0
|
||||
%define xtmp1 xmm1
|
||||
|
||||
%define ytmp0 ymm0
|
||||
%define ytmp1 ymm1
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
|
||||
_eob_count_offset equ 0 ; local variable (8 bytes)
|
||||
f_end_i_mem_offset equ 8
|
||||
gpr_save_mem_offset equ 16 ; gpr save area (8*8 bytes)
|
||||
xmm_save_mem_offset equ 16 + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned)
|
||||
stack_size equ 2*8 + 8*8 + 4*16 + 8
|
||||
;;; 8 because stack address is odd multiple of 8 after a function call and
|
||||
;;; we want it aligned to 16 bytes
|
||||
_lit_len_offset equ 0
|
||||
_dist_offset equ (8 * LIT_LEN)
|
||||
_hash_offset equ (_dist_offset + 8 * DIST_LEN)
|
||||
|
||||
%macro len_to_len_code 3
|
||||
%define %%len_code %1 ; Output
|
||||
%define %%len %2 ; Input
|
||||
%define %%rfc_lookup %3
|
||||
movzx %%len_code, byte [%%rfc_lookup + _len_to_code_offset + %%len]
|
||||
or %%len_code, 0x100
|
||||
%endm
|
||||
|
||||
;;; Clobbers rcx and dist
|
||||
%macro dist_to_dist_code 2
|
||||
%define %%dist_code %1 ; Output code associated with dist
|
||||
%define %%dist_coded %1d
|
||||
%define %%dist %2d ; Input dist
|
||||
dec %%dist
|
||||
mov %%dist_coded, %%dist
|
||||
bsr ecx, %%dist_coded
|
||||
dec ecx
|
||||
SHRX %%dist_code, %%dist_code, rcx
|
||||
lea %%dist_coded, [%%dist_coded + 2*ecx]
|
||||
|
||||
cmp %%dist, 1
|
||||
cmovle %%dist_coded, %%dist
|
||||
%endm
|
||||
|
||||
;;; Clobbers rcx and dist
|
||||
%macro dist_to_dist_code2 2
|
||||
%define %%dist_code %1 ; Output code associated with dist
|
||||
%define %%dist_coded %1d
|
||||
%define %%dist %2d ; Input -(dist - 1)
|
||||
neg %%dist
|
||||
mov %%dist_coded, %%dist
|
||||
bsr ecx, %%dist_coded
|
||||
dec ecx
|
||||
SHRX %%dist_code, %%dist_code, rcx
|
||||
lea %%dist_coded, [%%dist_coded + 2*ecx]
|
||||
|
||||
cmp %%dist, 1
|
||||
cmovle %%dist_coded, %%dist
|
||||
%endm
|
||||
|
||||
; void isal_update_histogram
|
||||
global isal_update_histogram_ %+ ARCH
|
||||
isal_update_histogram_ %+ ARCH %+ :
|
||||
|
||||
;; do nothing if (avail_in == 0)
|
||||
cmp file_length, 0
|
||||
jne skip1
|
||||
ret
|
||||
skip1:
|
||||
|
||||
%ifdef ALIGN_STACK
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
sub rsp, stack_size
|
||||
and rsp, ~15
|
||||
%else
|
||||
sub rsp, stack_size
|
||||
%endif
|
||||
|
||||
mov [rsp + gpr_save_mem_offset + 0*8], rbx
|
||||
mov [rsp + gpr_save_mem_offset + 1*8], rsi
|
||||
mov [rsp + gpr_save_mem_offset + 2*8], rdi
|
||||
mov [rsp + gpr_save_mem_offset + 3*8], rbp
|
||||
mov [rsp + gpr_save_mem_offset + 4*8], r12
|
||||
mov [rsp + gpr_save_mem_offset + 5*8], r13
|
||||
mov [rsp + gpr_save_mem_offset + 6*8], r14
|
||||
mov [rsp + gpr_save_mem_offset + 7*8], r15
|
||||
mov f_i, 0
|
||||
|
||||
mov tmp1, qword [histogram + _lit_len_offset + 8*256]
|
||||
inc tmp1
|
||||
mov [rsp + _eob_count_offset], tmp1
|
||||
|
||||
lea rfc_lookup, [rfc1951_lookup_table]
|
||||
|
||||
;; Init hash_table
|
||||
mov rcx, (HASH_SIZE-1)
|
||||
init_hash_table:
|
||||
mov word [histogram + _hash_offset + 2*rcx], -(D+1)
|
||||
sub rcx, 1
|
||||
jge init_hash_table
|
||||
|
||||
sub file_length, LA_STATELESS
|
||||
cmp file_length, 0
|
||||
jle end_loop_2
|
||||
|
||||
|
||||
;; Load first literal into histogram
|
||||
mov curr_data, [file_start + f_i]
|
||||
compute_hash hash, curr_data
|
||||
and hash %+ d, HASH_MASK
|
||||
mov [histogram + _hash_offset + 2 * hash], f_i %+ w
|
||||
and curr_data, 0xff
|
||||
inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * curr_data]
|
||||
inc f_i
|
||||
|
||||
;; Setup to begin loop 2
|
||||
mov curr_data, [file_start + f_i]
|
||||
mov curr_data2, curr_data
|
||||
compute_hash hash, curr_data
|
||||
shr curr_data2, 8
|
||||
compute_hash hash2, curr_data2
|
||||
|
||||
and hash2 %+ d, HASH_MASK
|
||||
and hash, HASH_MASK
|
||||
loop2:
|
||||
xor dist, dist
|
||||
xor dist2, dist2
|
||||
xor tmp3, tmp3
|
||||
|
||||
lea tmp1, [file_start + f_i]
|
||||
|
||||
;; Load possible look back distances and update hash data
|
||||
mov dist %+ w, f_i %+ w
|
||||
sub dist %+ w, word [histogram + _hash_offset + 2 * hash]
|
||||
mov [histogram + _hash_offset + 2 * hash], f_i %+ w
|
||||
|
||||
add f_i, 1
|
||||
|
||||
mov dist2 %+ w, f_i %+ w
|
||||
sub dist2 %+ w, word [histogram + _hash_offset + 2 * hash2]
|
||||
mov [histogram + _hash_offset + 2 * hash2], f_i %+ w
|
||||
|
||||
;; Start computing hashes to be used in either the next loop or
|
||||
;; for updating the hash if a match is found
|
||||
mov curr_data2, [file_start + f_i + 1]
|
||||
mov tmp2, curr_data2
|
||||
compute_hash hash, curr_data2
|
||||
|
||||
;; Check if look back distances are valid. Load a junk distance of 1
|
||||
;; if the look back distance is too long for speculative lookups.
|
||||
sub dist, 1
|
||||
cmp dist %+ d, (D-1)
|
||||
cmovae dist, tmp3
|
||||
neg dist
|
||||
|
||||
sub dist2, 1
|
||||
cmp dist2 %+ d, (D-1)
|
||||
cmovae dist2, tmp3
|
||||
neg dist2
|
||||
|
||||
shr tmp2, 8
|
||||
compute_hash hash2, tmp2
|
||||
|
||||
;; Check for long len/dist matches (>7)
|
||||
mov len, [tmp1]
|
||||
xor len, [tmp1 + dist - 1]
|
||||
jz compare_loop
|
||||
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash2 %+ d, HASH_MASK
|
||||
|
||||
mov len2, [tmp1 + 1]
|
||||
xor len2, [tmp1 + dist2]
|
||||
jz compare_loop2
|
||||
|
||||
;; Specutively load the code for the first literal
|
||||
movzx tmp1, curr_data %+ b
|
||||
shr curr_data, 8
|
||||
|
||||
lea tmp3, [f_i + 1]
|
||||
|
||||
;; Check for len/dist match for first literal
|
||||
test len %+ d, 0xFFFFFFFF
|
||||
jz len_dist_huffman_pre
|
||||
|
||||
;; Store first literal
|
||||
inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * tmp1]
|
||||
|
||||
;; Specutively load the code for the second literal
|
||||
and curr_data, 0xff
|
||||
|
||||
;; Check for len/dist match for second literal
|
||||
test len2 %+ d, 0xFFFFFFFF
|
||||
jnz lit_lit_huffman
|
||||
len_dist_lit_huffman_pre:
|
||||
;; Calculate repeat length
|
||||
tzcnt len2, len2
|
||||
shr len2, 3
|
||||
|
||||
len_dist_lit_huffman:
|
||||
;; Store updated hashes
|
||||
mov [histogram + _hash_offset + 2 * hash], tmp3 %+ w
|
||||
add tmp3,1
|
||||
mov [histogram + _hash_offset + 2 * hash2], tmp3 %+ w
|
||||
|
||||
add f_i, len2
|
||||
|
||||
mov curr_data, [file_start + f_i]
|
||||
mov tmp1, curr_data
|
||||
compute_hash hash, curr_data
|
||||
|
||||
dist_to_dist_code2 dist_code2, dist2
|
||||
|
||||
len_to_len_code len_code, len2, rfc_lookup
|
||||
|
||||
shr tmp1, 8
|
||||
compute_hash hash2, tmp1
|
||||
|
||||
inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * len_code]
|
||||
inc qword [histogram + _dist_offset + HIST_ELEM_SIZE * dist_code2]
|
||||
|
||||
and hash2 %+ d, HASH_MASK
|
||||
and hash, HASH_MASK
|
||||
|
||||
cmp f_i, file_length
|
||||
jl loop2
|
||||
jmp end_loop_2
|
||||
;; encode as dist/len
|
||||
|
||||
len_dist_huffman_pre:
|
||||
tzcnt len, len
|
||||
shr len, 3
|
||||
|
||||
len_dist_huffman:
|
||||
mov [histogram + _hash_offset + 2 * hash], tmp3 %+ w
|
||||
|
||||
dec f_i
|
||||
add f_i, len
|
||||
|
||||
mov curr_data, [file_start + f_i]
|
||||
mov tmp1, curr_data
|
||||
compute_hash hash, curr_data
|
||||
|
||||
dist_to_dist_code2 dist_code, dist
|
||||
|
||||
len_to_len_code len_code, len, rfc_lookup
|
||||
|
||||
shr tmp1, 8
|
||||
compute_hash hash2, tmp1
|
||||
|
||||
inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * len_code]
|
||||
inc qword [histogram + _dist_offset + HIST_ELEM_SIZE * dist_code]
|
||||
|
||||
and hash2 %+ d, HASH_MASK
|
||||
and hash, HASH_MASK
|
||||
|
||||
cmp f_i, file_length
|
||||
jl loop2
|
||||
jmp end_loop_2
|
||||
|
||||
lit_lit_huffman:
|
||||
add f_i, 1
|
||||
inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * curr_data]
|
||||
|
||||
mov curr_data %+ d, [file_start + f_i]
|
||||
|
||||
cmp f_i, file_length
|
||||
jl loop2
|
||||
|
||||
end_loop_2:
|
||||
add file_length, LA_STATELESS - LAST_BYTES_COUNT
|
||||
cmp f_i, file_length
|
||||
jge final_bytes
|
||||
|
||||
loop2_finish:
|
||||
mov curr_data, [file_start + f_i]
|
||||
compute_hash hash, curr_data
|
||||
and hash %+ d, HASH_MASK
|
||||
|
||||
;; Calculate possible distance for length/dist pair.
|
||||
xor dist, dist
|
||||
mov dist %+ w, f_i %+ w
|
||||
sub dist %+ w, word [histogram + _hash_offset + 2 * hash]
|
||||
mov [histogram + _hash_offset + 2 * hash], f_i %+ w
|
||||
|
||||
;; Check if look back distance is valid (the dec is to handle when dist = 0)
|
||||
dec dist
|
||||
cmp dist %+ d, (D-1)
|
||||
jae encode_literal_finish
|
||||
inc dist
|
||||
|
||||
;; Check if look back distance is a match
|
||||
lea tmp3, [file_length + LAST_BYTES_COUNT]
|
||||
sub tmp3, f_i
|
||||
lea tmp1, [file_start + f_i]
|
||||
mov tmp2, tmp1
|
||||
sub tmp2, dist
|
||||
compare tmp3, tmp1, tmp2, len, tmp3
|
||||
|
||||
;; Limit len to maximum value of 258
|
||||
mov tmp2, 258
|
||||
cmp len, 258
|
||||
cmova len, tmp2
|
||||
cmp len, SHORTEST_MATCH
|
||||
jb encode_literal_finish
|
||||
|
||||
add f_i, len
|
||||
|
||||
len_to_len_code len_code, len, rfc_lookup
|
||||
dist_to_dist_code dist_code, dist
|
||||
|
||||
inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * len_code]
|
||||
inc qword [histogram + _dist_offset + HIST_ELEM_SIZE * dist_code]
|
||||
|
||||
cmp f_i, file_length
|
||||
jl loop2_finish
|
||||
jmp final_bytes
|
||||
|
||||
encode_literal_finish:
|
||||
;; Encode literal
|
||||
and curr_data %+ d, 0xFF
|
||||
inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * curr_data]
|
||||
|
||||
;; Setup for next loop
|
||||
add f_i, 1
|
||||
cmp f_i, file_length
|
||||
jl loop2_finish
|
||||
|
||||
final_bytes:
|
||||
add file_length, LAST_BYTES_COUNT
|
||||
final_bytes_loop:
|
||||
cmp f_i, file_length
|
||||
jge end
|
||||
movzx curr_data, byte [file_start + f_i]
|
||||
inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * curr_data]
|
||||
inc f_i
|
||||
jmp final_bytes_loop
|
||||
|
||||
end:
|
||||
;; Handle eob at end of stream
|
||||
mov tmp1, [rsp + _eob_count_offset]
|
||||
mov qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * 256], tmp1
|
||||
|
||||
mov rbx, [rsp + gpr_save_mem_offset + 0*8]
|
||||
mov rsi, [rsp + gpr_save_mem_offset + 1*8]
|
||||
mov rdi, [rsp + gpr_save_mem_offset + 2*8]
|
||||
mov rbp, [rsp + gpr_save_mem_offset + 3*8]
|
||||
mov r12, [rsp + gpr_save_mem_offset + 4*8]
|
||||
mov r13, [rsp + gpr_save_mem_offset + 5*8]
|
||||
mov r14, [rsp + gpr_save_mem_offset + 6*8]
|
||||
mov r15, [rsp + gpr_save_mem_offset + 7*8]
|
||||
|
||||
%ifndef ALIGN_STACK
|
||||
add rsp, stack_size
|
||||
%else
|
||||
mov rsp, rbp
|
||||
pop rbp
|
||||
%endif
|
||||
ret
|
||||
|
||||
compare_loop:
|
||||
and hash %+ d, HASH_MASK
|
||||
lea tmp2, [tmp1 + dist - 1]
|
||||
%if (COMPARE_TYPE == 1)
|
||||
compare250 tmp1, tmp2, len, tmp3
|
||||
%elif (COMPARE_TYPE == 2)
|
||||
compare250_x tmp1, tmp2, len, tmp3, xtmp0, xtmp1
|
||||
%elif (COMPARE_TYPE == 3)
|
||||
compare250_y tmp1, tmp2, len, tmp3, ytmp0, ytmp1
|
||||
%else
|
||||
%error Unknown Compare type COMPARE_TYPE
|
||||
% error
|
||||
%endif
|
||||
lea tmp3, [f_i + 1]
|
||||
jmp len_dist_huffman
|
||||
|
||||
compare_loop2:
|
||||
add tmp1, 1
|
||||
lea tmp2, [tmp1 + dist2 - 1]
|
||||
|
||||
%if (COMPARE_TYPE == 1)
|
||||
compare250 tmp1, tmp2, len2, tmp3
|
||||
%elif (COMPARE_TYPE == 2)
|
||||
compare250_x tmp1, tmp2, len2, tmp3, xtmp0, xtmp1
|
||||
%elif (COMPARE_TYPE == 3)
|
||||
compare250_y tmp1, tmp2, len2, tmp3, ytmp0, ytmp1
|
||||
%else
|
||||
%error Unknown Compare type COMPARE_TYPE
|
||||
% error
|
||||
%endif
|
||||
and curr_data, 0xff
|
||||
inc qword [histogram + _lit_len_offset + 8 * curr_data]
|
||||
lea tmp3, [f_i + 1]
|
||||
jmp len_dist_lit_huffman
|
||||
|
||||
section .data
|
||||
align 4
|
||||
const_D: dq D
|
||||
const_30: dq 30
|
7
igzip/igzip_update_histogram_01.asm
Normal file
7
igzip/igzip_update_histogram_01.asm
Normal file
@ -0,0 +1,7 @@
|
||||
%define ARCH 01
|
||||
|
||||
%ifndef COMPARE_TYPE
|
||||
%define COMPARE_TYPE 2
|
||||
%endif
|
||||
|
||||
%include "igzip_update_histogram.asm"
|
8
igzip/igzip_update_histogram_04.asm
Normal file
8
igzip/igzip_update_histogram_04.asm
Normal file
@ -0,0 +1,8 @@
|
||||
%define ARCH 04
|
||||
%define USE_HSWNI
|
||||
|
||||
%ifndef COMPARE_TYPE
|
||||
%define COMPARE_TYPE 3
|
||||
%endif
|
||||
|
||||
%include "igzip_update_histogram.asm"
|
44
igzip/rfc1951_lookup.asm
Normal file
44
igzip/rfc1951_lookup.asm
Normal file
@ -0,0 +1,44 @@
|
||||
%ifndef RFC1951_LOOKUP
|
||||
%define RFC1951_LOOKUP
|
||||
|
||||
section .data
|
||||
|
||||
align 8
|
||||
|
||||
global rfc1951_lookup_table:data internal
|
||||
rfc1951_lookup_table:
|
||||
len_to_code:
|
||||
db 0x00, 0x00, 0x00
|
||||
db 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08
|
||||
db 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, 0x0c
|
||||
db 0x0d, 0x0d, 0x0d, 0x0d, 0x0e, 0x0e, 0x0e, 0x0e
|
||||
db 0x0f, 0x0f, 0x0f, 0x0f, 0x10, 0x10, 0x10, 0x10
|
||||
db 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11
|
||||
db 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12
|
||||
db 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13
|
||||
db 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14
|
||||
db 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15
|
||||
db 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15
|
||||
db 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16
|
||||
db 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16
|
||||
db 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17
|
||||
db 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17
|
||||
db 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18
|
||||
db 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18
|
||||
db 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
|
||||
db 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
|
||||
db 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
|
||||
db 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
|
||||
db 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
|
||||
db 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
|
||||
db 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
|
||||
db 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
|
||||
db 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
|
||||
db 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
|
||||
db 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
|
||||
db 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
|
||||
db 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
|
||||
db 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
|
||||
db 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
|
||||
db 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1d
|
||||
%endif
|
@ -190,6 +190,7 @@ enum isal_zstate_state {
|
||||
struct isal_huff_histogram {
|
||||
uint64_t lit_len_histogram[IGZIP_LIT_LEN];
|
||||
uint64_t dist_histogram[IGZIP_DIST_LEN];
|
||||
uint16_t hash_table[HASH_SIZE];
|
||||
};
|
||||
|
||||
/** @brief Holds Bit Buffer information*/
|
||||
|
Loading…
Reference in New Issue
Block a user