Index: third_party/zlib/deflate.c |
diff --git a/third_party/zlib/deflate.c b/third_party/zlib/deflate.c |
index 8043e5bd30945f03967ede05b09d26375ecb533b..bd198a3ef09ab76ee196873acac3a85bfceffe2b 100644 |
--- a/third_party/zlib/deflate.c |
+++ b/third_party/zlib/deflate.c |
@@ -85,7 +85,7 @@ local block_state deflate_huff OF((deflate_state *s, int flush)); |
local void lm_init OF((deflate_state *s)); |
local void putShortMSB OF((deflate_state *s, uInt b)); |
local void flush_pending OF((z_streamp strm)); |
-local int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); |
+ZLIB_INTERNAL int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); |
agl
2014/09/23 21:41:40
you need to adjust the spacing here so that the li
|
#ifdef ASMV |
void match_init OF((void)); /* asm code initialization */ |
uInt longest_match OF((deflate_state *s, IPos cur_match, int clas)); |
@@ -98,6 +98,10 @@ local void check_match OF((deflate_state *s, IPos start, IPos match, |
int length)); |
#endif |
+extern void crc_reset(deflate_state *const s); |
+extern void crc_finalize(deflate_state *const s); |
+extern void copy_with_crc(z_streamp strm, Bytef *dst, long size); |
+ |
/* =========================================================================== |
* Local data |
*/ |
@@ -156,14 +160,12 @@ local const config configuration_table[10] = { |
struct static_tree_desc_s {int dummy;}; /* for buggy compilers */ |
#endif |
-/* =========================================================================== |
- * Update a hash value with the given input byte |
- * IN assertion: all calls to to UPDATE_HASH are made with consecutive |
- * input characters, so that a running hash key can be computed from the |
- * previous key instead of complete recalculation each time. |
- */ |
-#define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask) |
+#ifdef _MSC_VER |
+#define INLINE __inline |
+#else |
+#define INLINE inline |
+#endif |
/* =========================================================================== |
* Insert string str in the dictionary and set match_head to the previous head |
@@ -175,17 +177,56 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compilers */ |
* input characters and the first MIN_MATCH bytes of str are valid |
* (except for the last MIN_MATCH-1 bytes of the input file). |
*/ |
+#ifdef USE_SSE4_2_CRC_HASH |
+#include "x86.h" |
+local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str) |
+{ |
+ Pos ret; |
+ unsigned *ip, val, h = 0; |
+ |
+ ip = (unsigned *)&s->window[str]; |
+ val = *ip; |
+ |
+ if (s->level >= 6) |
+ val &= 0xFFFFFF; |
+ |
+ __asm__ __volatile__ ( |
+ "crc32 %1,%0\n\t" |
+ : "+r" (h) |
+ : "r" (val) |
+ ); |
+ |
+ ret = s->head[h & s->hash_mask]; |
+ s->head[h & s->hash_mask] = str; |
+ s->prev[str & s->w_mask] = ret; |
+ return ret; |
+} |
+#endif |
+ |
+local INLINE Pos insert_string_c(deflate_state *const s, const Pos str) |
+{ |
+ Pos ret; |
+ |
+ UPDATE_HASH(s, s->ins_h, str); |
#ifdef FASTEST |
-#define INSERT_STRING(s, str, match_head) \ |
- (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ |
- match_head = s->head[s->ins_h], \ |
- s->head[s->ins_h] = (Pos)(str)) |
+ ret = s->head[s->ins_h]; |
#else |
-#define INSERT_STRING(s, str, match_head) \ |
- (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ |
- match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \ |
- s->head[s->ins_h] = (Pos)(str)) |
+ ret = s->prev[str & s->w_mask] = s->head[s->ins_h]; |
#endif |
+ s->head[s->ins_h] = str; |
+ |
+ return ret; |
+} |
+ |
+local INLINE Pos insert_string(deflate_state *const s, const Pos str) |
+{ |
+#ifdef USE_SSE4_2_CRC_HASH |
+ if (x86_cpu_has_sse42) |
+ return insert_string_sse(s, str); |
+#endif |
+ return insert_string_c(s, str); |
+} |
+ |
/* =========================================================================== |
* Initialize the hash table (avoiding 64K overflow for 16 bit systems). |
@@ -195,6 +236,10 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compilers */ |
s->head[s->hash_size-1] = NIL; \ |
zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head)); |
+#ifdef CHECK_SSE2 |
+#include "x86.h" |
+#endif |
+ |
/* ========================================================================= */ |
int ZEXPORT deflateInit_(strm, level, version, stream_size) |
z_streamp strm; |
@@ -219,6 +264,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, |
const char *version; |
int stream_size; |
{ |
+ unsigned window_padding = 0; |
deflate_state *s; |
int wrap = 1; |
static const char my_version[] = ZLIB_VERSION; |
@@ -228,6 +274,10 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, |
* output size for (length,distance) codes is <= 24 bits. |
*/ |
+#if defined(CHECK_SSE2) || defined(USE_SSE4_2_CRC_HASH) |
+ x86_check_features(); |
+#endif |
+ |
if (version == Z_NULL || version[0] != my_version[0] || |
stream_size != sizeof(z_stream)) { |
return Z_VERSION_ERROR; |
@@ -274,12 +324,22 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, |
s->w_size = 1 << s->w_bits; |
s->w_mask = s->w_size - 1; |
+#ifdef USE_SSE4_2_CRC_HASH |
+ if (x86_cpu_has_sse42) |
+ s->hash_bits = 15; |
+ else |
+#endif |
s->hash_bits = memLevel + 7; |
+ |
s->hash_size = 1 << s->hash_bits; |
s->hash_mask = s->hash_size - 1; |
s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); |
- s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); |
+#ifdef HAVE_PCLMULQDQ |
+ window_padding = 8; |
+#endif |
+ |
+ s->window = (Bytef *) ZALLOC(strm, s->w_size + window_padding, 2*sizeof(Byte)); |
s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); |
s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); |
s->class_bitmap = NULL; |
@@ -320,7 +380,6 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) |
deflate_state *s; |
uInt length = dictLength; |
uInt n; |
- IPos hash_head = 0; |
if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL || |
strm->state->wrap == 2 || |
@@ -345,11 +404,10 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) |
* call of fill_window. |
*/ |
s->ins_h = s->window[0]; |
- UPDATE_HASH(s, s->ins_h, s->window[1]); |
+ UPDATE_HASH(s, s->ins_h, 2 - MIN_MATCH); |
for (n = 0; n <= length - MIN_MATCH; n++) { |
- INSERT_STRING(s, n, hash_head); |
+ insert_string(s, n); |
} |
- if (hash_head) hash_head = 0; /* to make compiler happy */ |
return Z_OK; |
} |
@@ -613,7 +671,7 @@ int ZEXPORT deflate (strm, flush) |
if (s->status == INIT_STATE) { |
#ifdef GZIP |
if (s->wrap == 2) { |
- strm->adler = crc32(0L, Z_NULL, 0); |
+ crc_reset(s); |
put_byte(s, 31); |
put_byte(s, 139); |
put_byte(s, 8); |
@@ -891,6 +949,7 @@ int ZEXPORT deflate (strm, flush) |
/* Write the trailer */ |
#ifdef GZIP |
if (s->wrap == 2) { |
+ crc_finalize(s); |
put_byte(s, (Byte)(strm->adler & 0xff)); |
put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); |
put_byte(s, (Byte)((strm->adler >> 16) & 0xff)); |
@@ -1013,7 +1072,7 @@ int ZEXPORT deflateCopy (dest, source) |
* allocating a large strm->next_in buffer and copying from it. |
* (See also flush_pending()). |
*/ |
-local int read_buf(strm, buf, size) |
+ZLIB_INTERNAL int read_buf(strm, buf, size) |
z_streamp strm; |
Bytef *buf; |
unsigned size; |
@@ -1025,15 +1084,17 @@ local int read_buf(strm, buf, size) |
strm->avail_in -= len; |
- if (strm->state->wrap == 1) { |
- strm->adler = adler32(strm->adler, strm->next_in, len); |
- } |
#ifdef GZIP |
- else if (strm->state->wrap == 2) { |
- strm->adler = crc32(strm->adler, strm->next_in, len); |
+ if (strm->state->wrap == 2) { |
+ copy_with_crc(strm, buf, len); |
} |
+ else |
#endif |
- zmemcpy(buf, strm->next_in, len); |
+ { |
+ zmemcpy(buf, strm->next_in, len); |
+ if (strm->state->wrap == 1) |
+ strm->adler = adler32(strm->adler, buf, len); |
+ } |
strm->next_in += len; |
strm->total_in += len; |
@@ -1445,10 +1506,31 @@ local void check_match(s, start, match, length) |
* performed for at least two bytes (required for the zip translate_eol |
* option -- not supported here). |
*/ |
-local void fill_window(s) |
+#ifdef HAVE_SSE2 |
+extern void fill_window_sse(deflate_state *s); |
+#endif |
+local void fill_window_c(deflate_state *s); |
+ |
+local void fill_window(deflate_state *s) |
+{ |
+#ifdef HAVE_SSE2 |
+#ifdef CHECK_SSE2 |
+ if (x86_cpu_has_sse2) { |
+#endif |
+ fill_window_sse(s); |
+ return; |
+#ifdef CHECK_SSE2 |
+ } |
+#endif |
+#endif |
+ |
+ fill_window_c(s); |
+} |
+ |
+local void fill_window_c(s) |
deflate_state *s; |
{ |
- register unsigned n, m; |
+ register unsigned n; |
register Posf *p; |
unsigned more; /* Amount of free space at the end of the window. */ |
uInt wsize = s->w_size; |
@@ -1488,6 +1570,7 @@ local void fill_window(s) |
n = s->hash_size; |
p = &s->head[n]; |
do { |
+ unsigned m; |
m = *--p; |
*p = (Pos)(m >= wsize ? m-wsize : NIL); |
} while (--n); |
@@ -1496,6 +1579,7 @@ local void fill_window(s) |
#ifndef FASTEST |
p = &s->prev[n]; |
do { |
+ unsigned m; |
m = *--p; |
*p = (Pos)(m >= wsize ? m-wsize : NIL); |
/* If n is not on any hash chain, prev[n] is garbage but |
@@ -1544,7 +1628,7 @@ local void fill_window(s) |
/* Initialize the hash value now that we have some input: */ |
if (s->lookahead >= MIN_MATCH) { |
s->ins_h = s->window[s->strstart]; |
- UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); |
+ UPDATE_HASH(s, s->ins_h, s->strstart+1 - (MIN_MATCH - 1)); |
#if MIN_MATCH != 3 |
Call UPDATE_HASH() MIN_MATCH-3 more times |
#endif |
@@ -1711,7 +1795,7 @@ local block_state deflate_fast(s, flush, clas) |
*/ |
hash_head = NIL; |
if (s->lookahead >= MIN_MATCH) { |
- INSERT_STRING(s, s->strstart, hash_head); |
+ hash_head = insert_string(s, s->strstart); |
} |
/* Find the longest match, discarding those <= prev_length. |
@@ -1742,7 +1826,7 @@ local block_state deflate_fast(s, flush, clas) |
s->match_length--; /* string at strstart already in table */ |
do { |
s->strstart++; |
- INSERT_STRING(s, s->strstart, hash_head); |
+ hash_head = insert_string(s, s->strstart); |
/* strstart never exceeds WSIZE-MAX_MATCH, so there are |
* always MIN_MATCH bytes ahead. |
*/ |
@@ -1754,7 +1838,7 @@ local block_state deflate_fast(s, flush, clas) |
s->strstart += s->match_length; |
s->match_length = 0; |
s->ins_h = s->window[s->strstart]; |
- UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); |
+ UPDATE_HASH(s, s->ins_h, s->strstart+2 - (MIN_MATCH)); |
#if MIN_MATCH != 3 |
Call UPDATE_HASH() MIN_MATCH-3 more times |
#endif |
@@ -1821,7 +1905,7 @@ local block_state deflate_slow(s, flush, clas) |
*/ |
hash_head = NIL; |
if (s->lookahead >= MIN_MATCH) { |
- INSERT_STRING(s, s->strstart, hash_head); |
+ hash_head = insert_string(s, s->strstart); |
} |
/* Find the longest match, discarding those <= prev_length. |
@@ -1890,7 +1974,7 @@ local block_state deflate_slow(s, flush, clas) |
s->prev_length -= 2; |
do { |
if (++s->strstart <= max_insert) { |
- INSERT_STRING(s, s->strstart, hash_head); |
+ hash_head = insert_string(s, s->strstart); |
} |
} while (--s->prev_length != 0); |
s->match_available = 0; |