| Index: third_party/zlib/deflate.c | 
| diff --git a/third_party/zlib/deflate.c b/third_party/zlib/deflate.c | 
| index 8043e5bd30945f03967ede05b09d26375ecb533b..55ec21570a1a99b54bae2ecaefb9ec4a085080e3 100644 | 
| --- a/third_party/zlib/deflate.c | 
| +++ b/third_party/zlib/deflate.c | 
| @@ -49,7 +49,10 @@ | 
|  | 
| /* @(#) $Id$ */ | 
|  | 
| +#include <assert.h> | 
| + | 
| #include "deflate.h" | 
| +#include "x86.h" | 
|  | 
| const char deflate_copyright[] = | 
| " deflate 1.2.5 Copyright 1995-2010 Jean-loup Gailly and Mark Adler "; | 
| @@ -85,7 +88,7 @@ local block_state deflate_huff   OF((deflate_state *s, int flush)); | 
| local void lm_init        OF((deflate_state *s)); | 
| local void putShortMSB    OF((deflate_state *s, uInt b)); | 
| local void flush_pending  OF((z_streamp strm)); | 
| -local int read_buf        OF((z_streamp strm, Bytef *buf, unsigned size)); | 
| + | 
| #ifdef ASMV | 
| void match_init OF((void)); /* asm code initialization */ | 
| uInt longest_match  OF((deflate_state *s, IPos cur_match, int clas)); | 
| @@ -98,6 +101,23 @@ local  void check_match OF((deflate_state *s, IPos start, IPos match, | 
| int length)); | 
| #endif | 
|  | 
| +/* For fill_window_sse.c to use */ | 
| +ZLIB_INTERNAL int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); | 
| + | 
| +/* From crc32.c */ | 
| +extern void ZLIB_INTERNAL crc_reset(deflate_state *const s); | 
| +extern void ZLIB_INTERNAL crc_finalize(deflate_state *const s); | 
| +extern void ZLIB_INTERNAL copy_with_crc(z_streamp strm, Bytef *dst, long size); | 
| + | 
| +#ifdef _MSC_VER | 
| +#define INLINE __inline | 
| +#else | 
| +#define INLINE inline | 
| +#endif | 
| + | 
| +/* Inline optimisation */ | 
| +local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str); | 
| + | 
| /* =========================================================================== | 
| * Local data | 
| */ | 
| @@ -164,7 +184,6 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compilers */ | 
| */ | 
| #define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask) | 
|  | 
| - | 
| /* =========================================================================== | 
| * Insert string str in the dictionary and set match_head to the previous head | 
| * of the hash chain (the most recent string with same hash key). Return | 
| @@ -175,17 +194,28 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compilers */ | 
| *    input characters and the first MIN_MATCH bytes of str are valid | 
| *    (except for the last MIN_MATCH-1 bytes of the input file). | 
| */ | 
| +local INLINE Pos insert_string_c(deflate_state *const s, const Pos str) | 
| +{ | 
| +    Pos ret; | 
| + | 
| +    UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]); | 
| #ifdef FASTEST | 
| -#define INSERT_STRING(s, str, match_head) \ | 
| -   (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ | 
| -    match_head = s->head[s->ins_h], \ | 
| -    s->head[s->ins_h] = (Pos)(str)) | 
| +    ret = s->head[s->ins_h]; | 
| #else | 
| -#define INSERT_STRING(s, str, match_head) \ | 
| -   (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ | 
| -    match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \ | 
| -    s->head[s->ins_h] = (Pos)(str)) | 
| +    ret = s->prev[str & s->w_mask] = s->head[s->ins_h]; | 
| #endif | 
| +    s->head[s->ins_h] = str; | 
| + | 
| +    return ret; | 
| +} | 
| + | 
| +local INLINE Pos insert_string(deflate_state *const s, const Pos str) | 
| +{ | 
| +    if (x86_cpu_enable_simd) | 
| +        return insert_string_sse(s, str); | 
| +    return insert_string_c(s, str); | 
| +} | 
| + | 
|  | 
| /* =========================================================================== | 
| * Initialize the hash table (avoiding 64K overflow for 16 bit systems). | 
| @@ -219,6 +249,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, | 
| const char *version; | 
| int stream_size; | 
| { | 
| +    unsigned window_padding = 8; | 
| deflate_state *s; | 
| int wrap = 1; | 
| static const char my_version[] = ZLIB_VERSION; | 
| @@ -228,6 +259,8 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, | 
| * output size for (length,distance) codes is <= 24 bits. | 
| */ | 
|  | 
| +    x86_check_features(); | 
| + | 
| if (version == Z_NULL || version[0] != my_version[0] || | 
| stream_size != sizeof(z_stream)) { | 
| return Z_VERSION_ERROR; | 
| @@ -274,12 +307,17 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, | 
| s->w_size = 1 << s->w_bits; | 
| s->w_mask = s->w_size - 1; | 
|  | 
| -    s->hash_bits = memLevel + 7; | 
| +    if (x86_cpu_enable_simd) { | 
| +        s->hash_bits = 15; | 
| +    } else { | 
| +        s->hash_bits = memLevel + 7; | 
| +    } | 
| + | 
| s->hash_size = 1 << s->hash_bits; | 
| s->hash_mask = s->hash_size - 1; | 
| s->hash_shift =  ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); | 
|  | 
| -    s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); | 
| +    s->window = (Bytef *) ZALLOC(strm, s->w_size + window_padding, 2*sizeof(Byte)); | 
| s->prev   = (Posf *)  ZALLOC(strm, s->w_size, sizeof(Pos)); | 
| s->head   = (Posf *)  ZALLOC(strm, s->hash_size, sizeof(Pos)); | 
| s->class_bitmap = NULL; | 
| @@ -347,7 +385,7 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) | 
| s->ins_h = s->window[0]; | 
| UPDATE_HASH(s, s->ins_h, s->window[1]); | 
| for (n = 0; n <= length - MIN_MATCH; n++) { | 
| -        INSERT_STRING(s, n, hash_head); | 
| +        insert_string(s, n); | 
| } | 
| if (hash_head) hash_head = 0;  /* to make compiler happy */ | 
| return Z_OK; | 
| @@ -613,7 +651,7 @@ int ZEXPORT deflate (strm, flush) | 
| if (s->status == INIT_STATE) { | 
| #ifdef GZIP | 
| if (s->wrap == 2) { | 
| -            strm->adler = crc32(0L, Z_NULL, 0); | 
| +            crc_reset(s); | 
| put_byte(s, 31); | 
| put_byte(s, 139); | 
| put_byte(s, 8); | 
| @@ -891,6 +929,7 @@ int ZEXPORT deflate (strm, flush) | 
| /* Write the trailer */ | 
| #ifdef GZIP | 
| if (s->wrap == 2) { | 
| +        crc_finalize(s); | 
| put_byte(s, (Byte)(strm->adler & 0xff)); | 
| put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); | 
| put_byte(s, (Byte)((strm->adler >> 16) & 0xff)); | 
| @@ -1013,7 +1052,7 @@ int ZEXPORT deflateCopy (dest, source) | 
| * allocating a large strm->next_in buffer and copying from it. | 
| * (See also flush_pending()). | 
| */ | 
| -local int read_buf(strm, buf, size) | 
| +ZLIB_INTERNAL int read_buf(strm, buf, size) | 
| z_streamp strm; | 
| Bytef *buf; | 
| unsigned size; | 
| @@ -1025,15 +1064,17 @@ local int read_buf(strm, buf, size) | 
|  | 
| strm->avail_in  -= len; | 
|  | 
| -    if (strm->state->wrap == 1) { | 
| -        strm->adler = adler32(strm->adler, strm->next_in, len); | 
| -    } | 
| #ifdef GZIP | 
| -    else if (strm->state->wrap == 2) { | 
| -        strm->adler = crc32(strm->adler, strm->next_in, len); | 
| +    if (strm->state->wrap == 2) { | 
| +        copy_with_crc(strm, buf, len); | 
| } | 
| +    else | 
| #endif | 
| -    zmemcpy(buf, strm->next_in, len); | 
| +    { | 
| +        zmemcpy(buf, strm->next_in, len); | 
| +        if (strm->state->wrap == 1) | 
| +            strm->adler = adler32(strm->adler, buf, len); | 
| +    } | 
| strm->next_in  += len; | 
| strm->total_in += len; | 
|  | 
| @@ -1445,7 +1486,19 @@ local void check_match(s, start, match, length) | 
| *    performed for at least two bytes (required for the zip translate_eol | 
| *    option -- not supported here). | 
| */ | 
| -local void fill_window(s) | 
| +local void fill_window_c(deflate_state *s); | 
| + | 
| +local void fill_window(deflate_state *s) | 
| +{ | 
| +    if (x86_cpu_enable_simd) { | 
| +        fill_window_sse(s); | 
| +        return; | 
| +    } | 
| + | 
| +    fill_window_c(s); | 
| +} | 
| + | 
| +local void fill_window_c(s) | 
| deflate_state *s; | 
| { | 
| register unsigned n, m; | 
| @@ -1711,7 +1764,7 @@ local block_state deflate_fast(s, flush, clas) | 
| */ | 
| hash_head = NIL; | 
| if (s->lookahead >= MIN_MATCH) { | 
| -            INSERT_STRING(s, s->strstart, hash_head); | 
| +            hash_head = insert_string(s, s->strstart); | 
| } | 
|  | 
| /* Find the longest match, discarding those <= prev_length. | 
| @@ -1742,7 +1795,7 @@ local block_state deflate_fast(s, flush, clas) | 
| s->match_length--; /* string at strstart already in table */ | 
| do { | 
| s->strstart++; | 
| -                    INSERT_STRING(s, s->strstart, hash_head); | 
| +                    hash_head = insert_string(s, s->strstart); | 
| /* strstart never exceeds WSIZE-MAX_MATCH, so there are | 
| * always MIN_MATCH bytes ahead. | 
| */ | 
| @@ -1821,7 +1874,7 @@ local block_state deflate_slow(s, flush, clas) | 
| */ | 
| hash_head = NIL; | 
| if (s->lookahead >= MIN_MATCH) { | 
| -            INSERT_STRING(s, s->strstart, hash_head); | 
| +            hash_head = insert_string(s, s->strstart); | 
| } | 
|  | 
| /* Find the longest match, discarding those <= prev_length. | 
| @@ -1890,7 +1943,7 @@ local block_state deflate_slow(s, flush, clas) | 
| s->prev_length -= 2; | 
| do { | 
| if (++s->strstart <= max_insert) { | 
| -                    INSERT_STRING(s, s->strstart, hash_head); | 
| +                    hash_head = insert_string(s, s->strstart); | 
| } | 
| } while (--s->prev_length != 0); | 
| s->match_available = 0; | 
| @@ -2031,3 +2084,37 @@ local block_state deflate_huff(s, flush) | 
| FLUSH_BLOCK(s, flush == Z_FINISH); | 
| return flush == Z_FINISH ? finish_done : block_done; | 
| } | 
| + | 
| +/* Safe to inline this as GCC/clang will use inline asm and Visual Studio will | 
| + * use intrinsic without extra params | 
| + */ | 
| +local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str) | 
| +{ | 
| +    Pos ret; | 
| +    unsigned *ip, val, h = 0; | 
| + | 
| +    ip = (unsigned *)&s->window[str]; | 
| +    val = *ip; | 
| + | 
| +    if (s->level >= 6) | 
| +        val &= 0xFFFFFF; | 
| + | 
| +/* Windows clang should use inline asm */ | 
| +#if defined(_MSC_VER) && !defined(__clang__) | 
| +    h = _mm_crc32_u32(h, val); | 
| +#elif defined(__i386__) || defined(__amd64__) | 
| +    __asm__ __volatile__ ( | 
| +        "crc32 %1,%0\n\t" | 
| +    : "+r" (h) | 
| +    : "r" (val) | 
| +    ); | 
| +#else | 
| +    /* This should never happen */ | 
| +    assert(0); | 
| +#endif | 
| + | 
| +    ret = s->head[h & s->hash_mask]; | 
| +    s->head[h & s->hash_mask] = str; | 
| +    s->prev[str & s->w_mask] = ret; | 
| +    return ret; | 
| +} | 
|  |