| Index: third_party/zlib/deflate.c
|
| diff --git a/third_party/zlib/deflate.c b/third_party/zlib/deflate.c
|
| index 8043e5bd30945f03967ede05b09d26375ecb533b..55ec21570a1a99b54bae2ecaefb9ec4a085080e3 100644
|
| --- a/third_party/zlib/deflate.c
|
| +++ b/third_party/zlib/deflate.c
|
| @@ -49,7 +49,10 @@
|
|
|
| /* @(#) $Id$ */
|
|
|
| +#include <assert.h>
|
| +
|
| #include "deflate.h"
|
| +#include "x86.h"
|
|
|
| const char deflate_copyright[] =
|
| " deflate 1.2.5 Copyright 1995-2010 Jean-loup Gailly and Mark Adler ";
|
| @@ -85,7 +88,7 @@ local block_state deflate_huff OF((deflate_state *s, int flush));
|
| local void lm_init OF((deflate_state *s));
|
| local void putShortMSB OF((deflate_state *s, uInt b));
|
| local void flush_pending OF((z_streamp strm));
|
| -local int read_buf OF((z_streamp strm, Bytef *buf, unsigned size));
|
| +
|
| #ifdef ASMV
|
| void match_init OF((void)); /* asm code initialization */
|
| uInt longest_match OF((deflate_state *s, IPos cur_match, int clas));
|
| @@ -98,6 +101,23 @@ local void check_match OF((deflate_state *s, IPos start, IPos match,
|
| int length));
|
| #endif
|
|
|
| +/* For fill_window_sse.c to use */
|
| +ZLIB_INTERNAL int read_buf OF((z_streamp strm, Bytef *buf, unsigned size));
|
| +
|
| +/* From crc32.c */
|
| +extern void ZLIB_INTERNAL crc_reset(deflate_state *const s);
|
| +extern void ZLIB_INTERNAL crc_finalize(deflate_state *const s);
|
| +extern void ZLIB_INTERNAL copy_with_crc(z_streamp strm, Bytef *dst, long size);
|
| +
|
| +#ifdef _MSC_VER
|
| +#define INLINE __inline
|
| +#else
|
| +#define INLINE inline
|
| +#endif
|
| +
|
| +/* Inline optimisation */
|
| +local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str);
|
| +
|
| /* ===========================================================================
|
| * Local data
|
| */
|
| @@ -164,7 +184,6 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compilers */
|
| */
|
| #define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask)
|
|
|
| -
|
| /* ===========================================================================
|
| * Insert string str in the dictionary and set match_head to the previous head
|
| * of the hash chain (the most recent string with same hash key). Return
|
| @@ -175,17 +194,28 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compilers */
|
| * input characters and the first MIN_MATCH bytes of str are valid
|
| * (except for the last MIN_MATCH-1 bytes of the input file).
|
| */
|
| +local INLINE Pos insert_string_c(deflate_state *const s, const Pos str)
|
| +{
|
| + Pos ret;
|
| +
|
| + UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]);
|
| #ifdef FASTEST
|
| -#define INSERT_STRING(s, str, match_head) \
|
| - (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
|
| - match_head = s->head[s->ins_h], \
|
| - s->head[s->ins_h] = (Pos)(str))
|
| + ret = s->head[s->ins_h];
|
| #else
|
| -#define INSERT_STRING(s, str, match_head) \
|
| - (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
|
| - match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \
|
| - s->head[s->ins_h] = (Pos)(str))
|
| + ret = s->prev[str & s->w_mask] = s->head[s->ins_h];
|
| #endif
|
| + s->head[s->ins_h] = str;
|
| +
|
| + return ret;
|
| +}
|
| +
|
| +local INLINE Pos insert_string(deflate_state *const s, const Pos str)
|
| +{
|
| + if (x86_cpu_enable_simd)
|
| + return insert_string_sse(s, str);
|
| + return insert_string_c(s, str);
|
| +}
|
| +
|
|
|
| /* ===========================================================================
|
| * Initialize the hash table (avoiding 64K overflow for 16 bit systems).
|
| @@ -219,6 +249,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
|
| const char *version;
|
| int stream_size;
|
| {
|
| + unsigned window_padding = 8;
|
| deflate_state *s;
|
| int wrap = 1;
|
| static const char my_version[] = ZLIB_VERSION;
|
| @@ -228,6 +259,8 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
|
| * output size for (length,distance) codes is <= 24 bits.
|
| */
|
|
|
| + x86_check_features();
|
| +
|
| if (version == Z_NULL || version[0] != my_version[0] ||
|
| stream_size != sizeof(z_stream)) {
|
| return Z_VERSION_ERROR;
|
| @@ -274,12 +307,17 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
|
| s->w_size = 1 << s->w_bits;
|
| s->w_mask = s->w_size - 1;
|
|
|
| - s->hash_bits = memLevel + 7;
|
| + if (x86_cpu_enable_simd) {
|
| + s->hash_bits = 15;
|
| + } else {
|
| + s->hash_bits = memLevel + 7;
|
| + }
|
| +
|
| s->hash_size = 1 << s->hash_bits;
|
| s->hash_mask = s->hash_size - 1;
|
| s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH);
|
|
|
| - s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte));
|
| + s->window = (Bytef *) ZALLOC(strm, s->w_size + window_padding, 2*sizeof(Byte));
|
| s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos));
|
| s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos));
|
| s->class_bitmap = NULL;
|
| @@ -347,7 +385,7 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength)
|
| s->ins_h = s->window[0];
|
| UPDATE_HASH(s, s->ins_h, s->window[1]);
|
| for (n = 0; n <= length - MIN_MATCH; n++) {
|
| - INSERT_STRING(s, n, hash_head);
|
| + insert_string(s, n);
|
| }
|
| if (hash_head) hash_head = 0; /* to make compiler happy */
|
| return Z_OK;
|
| @@ -613,7 +651,7 @@ int ZEXPORT deflate (strm, flush)
|
| if (s->status == INIT_STATE) {
|
| #ifdef GZIP
|
| if (s->wrap == 2) {
|
| - strm->adler = crc32(0L, Z_NULL, 0);
|
| + crc_reset(s);
|
| put_byte(s, 31);
|
| put_byte(s, 139);
|
| put_byte(s, 8);
|
| @@ -891,6 +929,7 @@ int ZEXPORT deflate (strm, flush)
|
| /* Write the trailer */
|
| #ifdef GZIP
|
| if (s->wrap == 2) {
|
| + crc_finalize(s);
|
| put_byte(s, (Byte)(strm->adler & 0xff));
|
| put_byte(s, (Byte)((strm->adler >> 8) & 0xff));
|
| put_byte(s, (Byte)((strm->adler >> 16) & 0xff));
|
| @@ -1013,7 +1052,7 @@ int ZEXPORT deflateCopy (dest, source)
|
| * allocating a large strm->next_in buffer and copying from it.
|
| * (See also flush_pending()).
|
| */
|
| -local int read_buf(strm, buf, size)
|
| +ZLIB_INTERNAL int read_buf(strm, buf, size)
|
| z_streamp strm;
|
| Bytef *buf;
|
| unsigned size;
|
| @@ -1025,15 +1064,17 @@ local int read_buf(strm, buf, size)
|
|
|
| strm->avail_in -= len;
|
|
|
| - if (strm->state->wrap == 1) {
|
| - strm->adler = adler32(strm->adler, strm->next_in, len);
|
| - }
|
| #ifdef GZIP
|
| - else if (strm->state->wrap == 2) {
|
| - strm->adler = crc32(strm->adler, strm->next_in, len);
|
| + if (strm->state->wrap == 2) {
|
| + copy_with_crc(strm, buf, len);
|
| }
|
| + else
|
| #endif
|
| - zmemcpy(buf, strm->next_in, len);
|
| + {
|
| + zmemcpy(buf, strm->next_in, len);
|
| + if (strm->state->wrap == 1)
|
| + strm->adler = adler32(strm->adler, buf, len);
|
| + }
|
| strm->next_in += len;
|
| strm->total_in += len;
|
|
|
| @@ -1445,7 +1486,19 @@ local void check_match(s, start, match, length)
|
| * performed for at least two bytes (required for the zip translate_eol
|
| * option -- not supported here).
|
| */
|
| -local void fill_window(s)
|
| +local void fill_window_c(deflate_state *s);
|
| +
|
| +local void fill_window(deflate_state *s)
|
| +{
|
| + if (x86_cpu_enable_simd) {
|
| + fill_window_sse(s);
|
| + return;
|
| + }
|
| +
|
| + fill_window_c(s);
|
| +}
|
| +
|
| +local void fill_window_c(s)
|
| deflate_state *s;
|
| {
|
| register unsigned n, m;
|
| @@ -1711,7 +1764,7 @@ local block_state deflate_fast(s, flush, clas)
|
| */
|
| hash_head = NIL;
|
| if (s->lookahead >= MIN_MATCH) {
|
| - INSERT_STRING(s, s->strstart, hash_head);
|
| + hash_head = insert_string(s, s->strstart);
|
| }
|
|
|
| /* Find the longest match, discarding those <= prev_length.
|
| @@ -1742,7 +1795,7 @@ local block_state deflate_fast(s, flush, clas)
|
| s->match_length--; /* string at strstart already in table */
|
| do {
|
| s->strstart++;
|
| - INSERT_STRING(s, s->strstart, hash_head);
|
| + hash_head = insert_string(s, s->strstart);
|
| /* strstart never exceeds WSIZE-MAX_MATCH, so there are
|
| * always MIN_MATCH bytes ahead.
|
| */
|
| @@ -1821,7 +1874,7 @@ local block_state deflate_slow(s, flush, clas)
|
| */
|
| hash_head = NIL;
|
| if (s->lookahead >= MIN_MATCH) {
|
| - INSERT_STRING(s, s->strstart, hash_head);
|
| + hash_head = insert_string(s, s->strstart);
|
| }
|
|
|
| /* Find the longest match, discarding those <= prev_length.
|
| @@ -1890,7 +1943,7 @@ local block_state deflate_slow(s, flush, clas)
|
| s->prev_length -= 2;
|
| do {
|
| if (++s->strstart <= max_insert) {
|
| - INSERT_STRING(s, s->strstart, hash_head);
|
| + hash_head = insert_string(s, s->strstart);
|
| }
|
| } while (--s->prev_length != 0);
|
| s->match_available = 0;
|
| @@ -2031,3 +2084,37 @@ local block_state deflate_huff(s, flush)
|
| FLUSH_BLOCK(s, flush == Z_FINISH);
|
| return flush == Z_FINISH ? finish_done : block_done;
|
| }
|
| +
|
| +/* Safe to inline this as GCC/clang will use inline asm and Visual Studio will
|
| + * use intrinsic without extra params
|
| + */
|
| +local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str)
|
| +{
|
| + Pos ret;
|
| + unsigned *ip, val, h = 0;
|
| +
|
| + ip = (unsigned *)&s->window[str];
|
| + val = *ip;
|
| +
|
| + if (s->level >= 6)
|
| + val &= 0xFFFFFF;
|
| +
|
| +/* Windows clang should use inline asm */
|
| +#if defined(_MSC_VER) && !defined(__clang__)
|
| + h = _mm_crc32_u32(h, val);
|
| +#elif defined(__i386__) || defined(__amd64__)
|
| + __asm__ __volatile__ (
|
| + "crc32 %1,%0\n\t"
|
| + : "+r" (h)
|
| + : "r" (val)
|
| + );
|
| +#else
|
| + /* This should never happen */
|
| + assert(0);
|
| +#endif
|
| +
|
| + ret = s->head[h & s->hash_mask];
|
| + s->head[h & s->hash_mask] = str;
|
| + s->prev[str & s->w_mask] = ret;
|
| + return ret;
|
| +}
|
|
|