| OLD | NEW |
| 1 diff --git a/third_party/zlib/crc32.c b/third_party/zlib/crc32.c | 1 diff --git a/crc32.c b/crc32.c |
| 2 index 979a7190a3ca..09228ed9c76f 100644 | 2 index 9580440c0e6b..9162429cc7b4 100644 |
| 3 --- a/third_party/zlib/crc32.c | 3 --- a/crc32.c |
| 4 +++ b/third_party/zlib/crc32.c | 4 +++ b/crc32.c |
| 5 @@ -28,6 +28,8 @@ | 5 @@ -28,6 +28,8 @@ |
| 6 # endif /* !DYNAMIC_CRC_TABLE */ | 6 # endif /* !DYNAMIC_CRC_TABLE */ |
| 7 #endif /* MAKECRCH */ | 7 #endif /* MAKECRCH */ |
| 8 | 8 |
| 9 +#include "deflate.h" | 9 +#include "deflate.h" |
| 10 +#include "x86.h" | 10 +#include "x86.h" |
| 11 #include "zutil.h" /* for STDC and FAR definitions */ | 11 #include "zutil.h" /* for STDC and FAR definitions */ |
| 12 | 12 |
| 13 #define local static | 13 /* Definitions for doing the crc four data bytes at a time. */ |
| 14 @@ -423,3 +425,28 @@ uLong ZEXPORT crc32_combine64(crc1, crc2, len2) | 14 @@ -440,3 +442,28 @@ uLong ZEXPORT crc32_combine64(crc1, crc2, len2) |
| 15 { | 15 { |
| 16 return crc32_combine_(crc1, crc2, len2); | 16 return crc32_combine_(crc1, crc2, len2); |
| 17 } | 17 } |
| 18 + | 18 + |
| 19 +ZLIB_INTERNAL void crc_reset(deflate_state *const s) | 19 +ZLIB_INTERNAL void crc_reset(deflate_state *const s) |
| 20 +{ | 20 +{ |
| 21 + if (x86_cpu_enable_simd) { | 21 + if (x86_cpu_enable_simd) { |
| 22 + crc_fold_init(s); | 22 + crc_fold_init(s); |
| 23 + return; | 23 + return; |
| 24 + } | 24 + } |
| 25 + s->strm->adler = crc32(0L, Z_NULL, 0); | 25 + s->strm->adler = crc32(0L, Z_NULL, 0); |
| 26 +} | 26 +} |
| 27 + | 27 + |
| 28 +ZLIB_INTERNAL void crc_finalize(deflate_state *const s) | 28 +ZLIB_INTERNAL void crc_finalize(deflate_state *const s) |
| 29 +{ | 29 +{ |
| 30 + if (x86_cpu_enable_simd) | 30 + if (x86_cpu_enable_simd) |
| 31 + s->strm->adler = crc_fold_512to32(s); | 31 + s->strm->adler = crc_fold_512to32(s); |
| 32 +} | 32 +} |
| 33 + | 33 + |
| 34 +ZLIB_INTERNAL void copy_with_crc(z_streamp strm, Bytef *dst, long size) | 34 +ZLIB_INTERNAL void copy_with_crc(z_streamp strm, Bytef *dst, long size) |
| 35 +{ | 35 +{ |
| 36 + if (x86_cpu_enable_simd) { | 36 + if (x86_cpu_enable_simd) { |
| 37 + crc_fold_copy(strm->state, dst, strm->next_in, size); | 37 + crc_fold_copy(strm->state, dst, strm->next_in, size); |
| 38 + return; | 38 + return; |
| 39 + } | 39 + } |
| 40 + zmemcpy(dst, strm->next_in, size); | 40 + zmemcpy(dst, strm->next_in, size); |
| 41 + strm->adler = crc32(strm->adler, dst, size); | 41 + strm->adler = crc32(strm->adler, dst, size); |
| 42 +} | 42 +} |
| 43 diff --git a/third_party/zlib/crc_folding.c b/third_party/zlib/crc_folding.c | 43 diff --git a/crc_folding.c b/crc_folding.c |
| 44 index e69de29bb2d1..48d77744aaf4 100644 | 44 new file mode 100644 |
| 45 --- a/third_party/zlib/crc_folding.c | 45 index 000000000000..48d77744aaf4 |
| 46 +++ b/third_party/zlib/crc_folding.c | 46 --- /dev/null |
| 47 +++ b/crc_folding.c |
| 47 @@ -0,0 +1,493 @@ | 48 @@ -0,0 +1,493 @@ |
| 48 +/* | 49 +/* |
| 49 + * Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ | 50 + * Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ |
| 50 + * instruction. | 51 + * instruction. |
| 51 + * | 52 + * |
| 52 + * A white paper describing this algorithm can be found at: | 53 + * A white paper describing this algorithm can be found at: |
| 53 + * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fas
t-crc-computation-generic-polynomials-pclmulqdq-paper.pdf | 54 + * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fas
t-crc-computation-generic-polynomials-pclmulqdq-paper.pdf |
| 54 + * | 55 + * |
| 55 + * Copyright (C) 2013 Intel Corporation. All rights reserved. | 56 + * Copyright (C) 2013 Intel Corporation. All rights reserved. |
| 56 + * Authors: | 57 + * Authors: |
| (...skipping 474 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 531 + | 532 + |
| 532 + xmm_crc2 = xmm_crc3; | 533 + xmm_crc2 = xmm_crc3; |
| 533 + xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); | 534 + xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); |
| 534 + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2); | 535 + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2); |
| 535 + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc1); | 536 + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc1); |
| 536 + | 537 + |
| 537 + crc = _mm_extract_epi32(xmm_crc3, 2); | 538 + crc = _mm_extract_epi32(xmm_crc3, 2); |
| 538 + return ~crc; | 539 + return ~crc; |
| 539 + CRC_SAVE(s) | 540 + CRC_SAVE(s) |
| 540 +} | 541 +} |
| 541 diff --git a/third_party/zlib/deflate.c b/third_party/zlib/deflate.c | 542 diff --git a/deflate.c b/deflate.c |
| 542 index 696957705b75..02fa2f64f83e 100644 | 543 index 1ec761448de9..aa0c9c67a6dc 100644 |
| 543 --- a/third_party/zlib/deflate.c | 544 --- a/deflate.c |
| 544 +++ b/third_party/zlib/deflate.c | 545 +++ b/deflate.c |
| 545 @@ -48,8 +48,9 @@ | 546 @@ -48,8 +48,9 @@ |
| 546 */ | 547 */ |
| 547 | 548 |
| 548 /* @(#) $Id$ */ | 549 /* @(#) $Id$ */ |
| 549 - | 550 - |
| 550 +#include <assert.h> | 551 +#include <assert.h> |
| 551 #include "deflate.h" | 552 #include "deflate.h" |
| 552 +#include "x86.h" | 553 +#include "x86.h" |
| 553 | 554 |
| 554 const char deflate_copyright[] = | 555 const char deflate_copyright[] = |
| 555 " deflate 1.2.8 Copyright 1995-2013 Jean-loup Gailly and Mark Adler "; | 556 " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler "; |
| 556 @@ -84,7 +85,7 @@ local block_state deflate_huff OF((deflate_state *s, int flu
sh)); | 557 @@ -86,7 +87,7 @@ local block_state deflate_huff OF((deflate_state *s, int flu
sh)); |
| 557 local void lm_init OF((deflate_state *s)); | 558 local void lm_init OF((deflate_state *s)); |
| 558 local void putShortMSB OF((deflate_state *s, uInt b)); | 559 local void putShortMSB OF((deflate_state *s, uInt b)); |
| 559 local void flush_pending OF((z_streamp strm)); | 560 local void flush_pending OF((z_streamp strm)); |
| 560 -local int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); | 561 -local unsigned read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); |
| 561 + | 562 +unsigned ZLIB_INTERNAL read_buf OF((z_streamp strm, Bytef *buf, unsigned size))
; |
| 562 #ifdef ASMV | 563 #ifdef ASMV |
| 564 # pragma message("Assembler code may have bugs -- use at your own risk") |
| 563 void match_init OF((void)); /* asm code initialization */ | 565 void match_init OF((void)); /* asm code initialization */ |
| 564 uInt longest_match OF((deflate_state *s, IPos cur_match)); | 566 @@ -100,6 +101,20 @@ local void check_match OF((deflate_state *s, IPos start, I
Pos match, |
| 565 @@ -97,6 +98,23 @@ local void check_match OF((deflate_state *s, IPos start, IPo
s match, | |
| 566 int length)); | 567 int length)); |
| 567 #endif | 568 #endif |
| 568 | 569 |
| 569 +/* For fill_window_sse.c to use */ | |
| 570 +ZLIB_INTERNAL int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); | |
| 571 + | |
| 572 +/* From crc32.c */ | 570 +/* From crc32.c */ |
| 573 +extern void ZLIB_INTERNAL crc_reset(deflate_state *const s); | 571 +extern void ZLIB_INTERNAL crc_reset(deflate_state *const s); |
| 574 +extern void ZLIB_INTERNAL crc_finalize(deflate_state *const s); | 572 +extern void ZLIB_INTERNAL crc_finalize(deflate_state *const s); |
| 575 +extern void ZLIB_INTERNAL copy_with_crc(z_streamp strm, Bytef *dst, long size); | 573 +extern void ZLIB_INTERNAL copy_with_crc(z_streamp strm, Bytef *dst, long size); |
| 576 + | 574 + |
| 577 +#ifdef _MSC_VER | 575 +#ifdef _MSC_VER |
| 578 +#define INLINE __inline | 576 +#define INLINE __inline |
| 579 +#else | 577 +#else |
| 580 +#define INLINE inline | 578 +#define INLINE inline |
| 581 +#endif | 579 +#endif |
| 582 + | 580 + |
| 583 +/* Inline optimisation */ | 581 +/* Inline optimisation */ |
| 584 +local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str); | 582 +local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str); |
| 585 + | 583 + |
| 586 /* =========================================================================== | 584 /* =========================================================================== |
| 587 * Local data | 585 * Local data |
| 588 */ | 586 */ |
| 589 @@ -166,7 +184,6 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compile
rs */ | 587 @@ -162,7 +177,6 @@ local const config configuration_table[10] = { |
| 590 */ | 588 */ |
| 591 #define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask) | 589 #define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask) |
| 592 | 590 |
| 593 - | 591 - |
| 594 /* =========================================================================== | 592 /* =========================================================================== |
| 595 * Insert string str in the dictionary and set match_head to the previous head | 593 * Insert string str in the dictionary and set match_head to the previous head |
| 596 * of the hash chain (the most recent string with same hash key). Return | 594 * of the hash chain (the most recent string with same hash key). Return |
| 597 @@ -177,17 +194,28 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compi
lers */ | 595 @@ -173,17 +187,28 @@ local const config configuration_table[10] = { |
| 598 * input characters and the first MIN_MATCH bytes of str are valid | 596 * characters and the first MIN_MATCH bytes of str are valid (except for |
| 599 * (except for the last MIN_MATCH-1 bytes of the input file). | 597 * the last MIN_MATCH-1 bytes of the input file). |
| 600 */ | 598 */ |
| 601 +local INLINE Pos insert_string_c(deflate_state *const s, const Pos str) | 599 +local INLINE Pos insert_string_c(deflate_state *const s, const Pos str) |
| 602 +{ | 600 +{ |
| 603 + Pos ret; | 601 + Pos ret; |
| 604 + | 602 + |
| 605 + UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]); | 603 + UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]); |
| 606 #ifdef FASTEST | 604 #ifdef FASTEST |
| 607 -#define INSERT_STRING(s, str, match_head) \ | 605 -#define INSERT_STRING(s, str, match_head) \ |
| 608 - (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ | 606 - (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ |
| 609 - match_head = s->head[s->ins_h], \ | 607 - match_head = s->head[s->ins_h], \ |
| (...skipping 14 matching lines...) Expand all Loading... |
| 624 +local INLINE Pos insert_string(deflate_state *const s, const Pos str) | 622 +local INLINE Pos insert_string(deflate_state *const s, const Pos str) |
| 625 +{ | 623 +{ |
| 626 + if (x86_cpu_enable_simd) | 624 + if (x86_cpu_enable_simd) |
| 627 + return insert_string_sse(s, str); | 625 + return insert_string_sse(s, str); |
| 628 + return insert_string_c(s, str); | 626 + return insert_string_c(s, str); |
| 629 +} | 627 +} |
| 630 + | 628 + |
| 631 | 629 |
| 632 /* =========================================================================== | 630 /* =========================================================================== |
| 633 * Initialize the hash table (avoiding 64K overflow for 16 bit systems). | 631 * Initialize the hash table (avoiding 64K overflow for 16 bit systems). |
| 634 @@ -221,6 +249,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, m
emLevel, strategy, | 632 @@ -248,6 +273,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, m
emLevel, strategy, |
| 635 const char *version; | 633 const char *version; |
| 636 int stream_size; | 634 int stream_size; |
| 637 { | 635 { |
| 638 + unsigned window_padding = 8; | 636 + unsigned window_padding = 8; |
| 639 deflate_state *s; | 637 deflate_state *s; |
| 640 int wrap = 1; | 638 int wrap = 1; |
| 641 static const char my_version[] = ZLIB_VERSION; | 639 static const char my_version[] = ZLIB_VERSION; |
| 642 @@ -230,6 +259,8 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, m
emLevel, strategy, | 640 @@ -257,6 +283,8 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, m
emLevel, strategy, |
| 643 * output size for (length,distance) codes is <= 24 bits. | 641 * output size for (length,distance) codes is <= 24 bits. |
| 644 */ | 642 */ |
| 645 | 643 |
| 646 + x86_check_features(); | 644 + x86_check_features(); |
| 647 + | 645 + |
| 648 if (version == Z_NULL || version[0] != my_version[0] || | 646 if (version == Z_NULL || version[0] != my_version[0] || |
| 649 stream_size != sizeof(z_stream)) { | 647 stream_size != sizeof(z_stream)) { |
| 650 return Z_VERSION_ERROR; | 648 return Z_VERSION_ERROR; |
| 651 @@ -285,12 +316,17 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits,
memLevel, strategy, | 649 @@ -313,12 +341,19 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits,
memLevel, strategy, |
| 652 s->w_size = 1 << s->w_bits; | 650 s->w_size = 1 << s->w_bits; |
| 653 s->w_mask = s->w_size - 1; | 651 s->w_mask = s->w_size - 1; |
| 654 | 652 |
| 655 - s->hash_bits = memLevel + 7; | 653 - s->hash_bits = (uInt)memLevel + 7; |
| 656 + if (x86_cpu_enable_simd) { | 654 + if (x86_cpu_enable_simd) { |
| 657 + s->hash_bits = 15; | 655 + s->hash_bits = 15; |
| 658 + } else { | 656 + } else { |
| 659 + s->hash_bits = memLevel + 7; | 657 + s->hash_bits = memLevel + 7; |
| 660 + } | 658 + } |
| 661 + | 659 + |
| 662 s->hash_size = 1 << s->hash_bits; | 660 s->hash_size = 1 << s->hash_bits; |
| 663 s->hash_mask = s->hash_size - 1; | 661 s->hash_mask = s->hash_size - 1; |
| 664 s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); | 662 s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); |
| 665 | 663 |
| 666 - s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); | 664 - s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); |
| 667 + s->window = (Bytef *) ZALLOC(strm, s->w_size + window_padding, 2*sizeof(Byt
e)); | 665 + s->window = (Bytef *) ZALLOC(strm, |
| 666 + s->w_size + window_padding, |
| 667 + 2*sizeof(Byte)); |
| 668 s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); | 668 s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); |
| 669 s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); | 669 s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); |
| 670 | 670 |
| 671 @@ -365,11 +401,7 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLen
gth) | 671 @@ -418,11 +453,7 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLen
gth) |
| 672 str = s->strstart; | 672 str = s->strstart; |
| 673 n = s->lookahead - (MIN_MATCH-1); | 673 n = s->lookahead - (MIN_MATCH-1); |
| 674 do { | 674 do { |
| 675 - UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); | 675 - UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); |
| 676 -#ifndef FASTEST | 676 -#ifndef FASTEST |
| 677 - s->prev[str & s->w_mask] = s->head[s->ins_h]; | 677 - s->prev[str & s->w_mask] = s->head[s->ins_h]; |
| 678 -#endif | 678 -#endif |
| 679 - s->head[s->ins_h] = (Pos)str; | 679 - s->head[s->ins_h] = (Pos)str; |
| 680 + insert_string(s, str); | 680 + insert_string(s, str); |
| 681 str++; | 681 str++; |
| 682 } while (--n); | 682 } while (--n); |
| 683 s->strstart = str; | 683 s->strstart = str; |
| 684 @@ -690,7 +722,7 @@ int ZEXPORT deflate (strm, flush) | 684 @@ -848,7 +879,7 @@ int ZEXPORT deflate (strm, flush) |
| 685 if (s->status == INIT_STATE) { | |
| 686 #ifdef GZIP | 685 #ifdef GZIP |
| 687 if (s->wrap == 2) { | 686 if (s->status == GZIP_STATE) { |
| 688 - strm->adler = crc32(0L, Z_NULL, 0); | 687 /* gzip header */ |
| 689 + crc_reset(s); | 688 - strm->adler = crc32(0L, Z_NULL, 0); |
| 690 put_byte(s, 31); | 689 + crc_reset(s); |
| 691 put_byte(s, 139); | 690 put_byte(s, 31); |
| 692 put_byte(s, 8); | 691 put_byte(s, 139); |
| 693 @@ -952,6 +984,7 @@ int ZEXPORT deflate (strm, flush) | 692 put_byte(s, 8); |
| 693 @@ -1049,6 +1080,7 @@ int ZEXPORT deflate (strm, flush) |
| 694 /* Write the trailer */ | 694 /* Write the trailer */ |
| 695 #ifdef GZIP | 695 #ifdef GZIP |
| 696 if (s->wrap == 2) { | 696 if (s->wrap == 2) { |
| 697 + crc_finalize(s); | 697 + crc_finalize(s); |
| 698 put_byte(s, (Byte)(strm->adler & 0xff)); | 698 put_byte(s, (Byte)(strm->adler & 0xff)); |
| 699 put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); | 699 put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); |
| 700 put_byte(s, (Byte)((strm->adler >> 16) & 0xff)); | 700 put_byte(s, (Byte)((strm->adler >> 16) & 0xff)); |
| 701 @@ -1073,7 +1106,7 @@ int ZEXPORT deflateCopy (dest, source) | 701 @@ -1161,7 +1193,7 @@ int ZEXPORT deflateCopy (dest, source) |
| 702 * allocating a large strm->next_in buffer and copying from it. | 702 * allocating a large strm->next_in buffer and copying from it. |
| 703 * (See also flush_pending()). | 703 * (See also flush_pending()). |
| 704 */ | 704 */ |
| 705 -local int read_buf(strm, buf, size) | 705 -local unsigned read_buf(strm, buf, size) |
| 706 +ZLIB_INTERNAL int read_buf(strm, buf, size) | 706 +ZLIB_INTERNAL unsigned read_buf(strm, buf, size) |
| 707 z_streamp strm; | 707 z_streamp strm; |
| 708 Bytef *buf; | 708 Bytef *buf; |
| 709 unsigned size; | 709 unsigned size; |
| 710 @@ -1085,15 +1118,16 @@ local int read_buf(strm, buf, size) | 710 @@ -1173,15 +1205,16 @@ local unsigned read_buf(strm, buf, size) |
| 711 | 711 |
| 712 strm->avail_in -= len; | 712 strm->avail_in -= len; |
| 713 | 713 |
| 714 - zmemcpy(buf, strm->next_in, len); | 714 - zmemcpy(buf, strm->next_in, len); |
| 715 - if (strm->state->wrap == 1) { | 715 - if (strm->state->wrap == 1) { |
| 716 - strm->adler = adler32(strm->adler, buf, len); | 716 - strm->adler = adler32(strm->adler, buf, len); |
| 717 - } | 717 - } |
| 718 #ifdef GZIP | 718 #ifdef GZIP |
| 719 - else if (strm->state->wrap == 2) { | 719 - else if (strm->state->wrap == 2) { |
| 720 - strm->adler = crc32(strm->adler, buf, len); | 720 - strm->adler = crc32(strm->adler, buf, len); |
| 721 - } | 721 - } |
| 722 + if (strm->state->wrap == 2) | 722 + if (strm->state->wrap == 2) |
| 723 + copy_with_crc(strm, buf, len); | 723 + copy_with_crc(strm, buf, len); |
| 724 + else | 724 + else |
| 725 #endif | 725 #endif |
| 726 + { | 726 + { |
| 727 + zmemcpy(buf, strm->next_in, len); | 727 + zmemcpy(buf, strm->next_in, len); |
| 728 + if (strm->state->wrap == 1) | 728 + if (strm->state->wrap == 1) |
| 729 + strm->adler = adler32(strm->adler, buf, len); | 729 + strm->adler = adler32(strm->adler, buf, len); |
| 730 + } | 730 + } |
| 731 strm->next_in += len; | 731 strm->next_in += len; |
| 732 strm->total_in += len; | 732 strm->total_in += len; |
| 733 | 733 |
| 734 @@ -1387,7 +1421,19 @@ local void check_match(s, start, match, length) | 734 @@ -1479,7 +1512,19 @@ local void check_match(s, start, match, length) |
| 735 * performed for at least two bytes (required for the zip translate_eol | 735 * performed for at least two bytes (required for the zip translate_eol |
| 736 * option -- not supported here). | 736 * option -- not supported here). |
| 737 */ | 737 */ |
| 738 -local void fill_window(s) | 738 -local void fill_window(s) |
| 739 +local void fill_window_c(deflate_state *s); | 739 +local void fill_window_c(deflate_state *s); |
| 740 + | 740 + |
| 741 +local void fill_window(deflate_state *s) | 741 +local void fill_window(deflate_state *s) |
| 742 +{ | 742 +{ |
| 743 + if (x86_cpu_enable_simd) { | 743 + if (x86_cpu_enable_simd) { |
| 744 + fill_window_sse(s); | 744 + fill_window_sse(s); |
| 745 + return; | 745 + return; |
| 746 + } | 746 + } |
| 747 + | 747 + |
| 748 + fill_window_c(s); | 748 + fill_window_c(s); |
| 749 +} | 749 +} |
| 750 + | 750 + |
| 751 +local void fill_window_c(s) | 751 +local void fill_window_c(s) |
| 752 deflate_state *s; | 752 deflate_state *s; |
| 753 { | 753 { |
| 754 register unsigned n, m; | 754 unsigned n; |
| 755 @@ -1651,7 +1697,7 @@ local block_state deflate_fast(s, flush) | 755 @@ -1847,7 +1892,7 @@ local block_state deflate_fast(s, flush) |
| 756 */ | 756 */ |
| 757 hash_head = NIL; | 757 hash_head = NIL; |
| 758 if (s->lookahead >= MIN_MATCH) { | 758 if (s->lookahead >= MIN_MATCH) { |
| 759 - INSERT_STRING(s, s->strstart, hash_head); | 759 - INSERT_STRING(s, s->strstart, hash_head); |
| 760 + hash_head = insert_string(s, s->strstart); | 760 + hash_head = insert_string(s, s->strstart); |
| 761 } | 761 } |
| 762 | 762 |
| 763 /* Find the longest match, discarding those <= prev_length. | 763 /* Find the longest match, discarding those <= prev_length. |
| 764 @@ -1682,7 +1728,7 @@ local block_state deflate_fast(s, flush) | 764 @@ -1878,7 +1923,7 @@ local block_state deflate_fast(s, flush) |
| 765 s->match_length--; /* string at strstart already in table */ | 765 s->match_length--; /* string at strstart already in table */ |
| 766 do { | 766 do { |
| 767 s->strstart++; | 767 s->strstart++; |
| 768 - INSERT_STRING(s, s->strstart, hash_head); | 768 - INSERT_STRING(s, s->strstart, hash_head); |
| 769 + hash_head = insert_string(s, s->strstart); | 769 + hash_head = insert_string(s, s->strstart); |
| 770 /* strstart never exceeds WSIZE-MAX_MATCH, so there are | 770 /* strstart never exceeds WSIZE-MAX_MATCH, so there are |
| 771 * always MIN_MATCH bytes ahead. | 771 * always MIN_MATCH bytes ahead. |
| 772 */ | 772 */ |
| 773 @@ -1754,7 +1800,7 @@ local block_state deflate_slow(s, flush) | 773 @@ -1950,7 +1995,7 @@ local block_state deflate_slow(s, flush) |
| 774 */ | 774 */ |
| 775 hash_head = NIL; | 775 hash_head = NIL; |
| 776 if (s->lookahead >= MIN_MATCH) { | 776 if (s->lookahead >= MIN_MATCH) { |
| 777 - INSERT_STRING(s, s->strstart, hash_head); | 777 - INSERT_STRING(s, s->strstart, hash_head); |
| 778 + hash_head = insert_string(s, s->strstart); | 778 + hash_head = insert_string(s, s->strstart); |
| 779 } | 779 } |
| 780 | 780 |
| 781 /* Find the longest match, discarding those <= prev_length. | 781 /* Find the longest match, discarding those <= prev_length. |
| 782 @@ -1805,7 +1851,7 @@ local block_state deflate_slow(s, flush) | 782 @@ -2001,7 +2046,7 @@ local block_state deflate_slow(s, flush) |
| 783 s->prev_length -= 2; | 783 s->prev_length -= 2; |
| 784 do { | 784 do { |
| 785 if (++s->strstart <= max_insert) { | 785 if (++s->strstart <= max_insert) { |
| 786 - INSERT_STRING(s, s->strstart, hash_head); | 786 - INSERT_STRING(s, s->strstart, hash_head); |
| 787 + hash_head = insert_string(s, s->strstart); | 787 + hash_head = insert_string(s, s->strstart); |
| 788 } | 788 } |
| 789 } while (--s->prev_length != 0); | 789 } while (--s->prev_length != 0); |
| 790 s->match_available = 0; | 790 s->match_available = 0; |
| 791 @@ -1965,3 +2011,37 @@ local block_state deflate_huff(s, flush) | 791 @@ -2161,3 +2206,37 @@ local block_state deflate_huff(s, flush) |
| 792 FLUSH_BLOCK(s, 0); | 792 FLUSH_BLOCK(s, 0); |
| 793 return block_done; | 793 return block_done; |
| 794 } | 794 } |
| 795 + | 795 + |
| 796 +/* Safe to inline this as GCC/clang will use inline asm and Visual Studio will | 796 +/* Safe to inline this as GCC/clang will use inline asm and Visual Studio will |
| 797 + * use intrinsic without extra params | 797 + * use intrinsic without extra params |
| 798 + */ | 798 + */ |
| 799 +local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str) | 799 +local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str) |
| 800 +{ | 800 +{ |
| 801 + Pos ret; | 801 + Pos ret; |
| (...skipping 17 matching lines...) Expand all Loading... |
| 819 +#else | 819 +#else |
| 820 + /* This should never happen */ | 820 + /* This should never happen */ |
| 821 + assert(0); | 821 + assert(0); |
| 822 +#endif | 822 +#endif |
| 823 + | 823 + |
| 824 + ret = s->head[h & s->hash_mask]; | 824 + ret = s->head[h & s->hash_mask]; |
| 825 + s->head[h & s->hash_mask] = str; | 825 + s->head[h & s->hash_mask] = str; |
| 826 + s->prev[str & s->w_mask] = ret; | 826 + s->prev[str & s->w_mask] = ret; |
| 827 + return ret; | 827 + return ret; |
| 828 +} | 828 +} |
| 829 diff --git a/third_party/zlib/deflate.h b/third_party/zlib/deflate.h | 829 diff --git a/deflate.h b/deflate.h |
| 830 index ce0299edd191..022d9c326ecc 100644 | 830 index 23ecdd312bc0..ab56df7663b6 100644 |
| 831 --- a/third_party/zlib/deflate.h | 831 --- a/deflate.h |
| 832 +++ b/third_party/zlib/deflate.h | 832 +++ b/deflate.h |
| 833 @@ -106,7 +106,7 @@ typedef struct internal_state { | 833 @@ -109,7 +109,7 @@ typedef struct internal_state { |
| 834 uInt gzindex; /* where in extra, name, or comment */ | 834 ulg gzindex; /* where in extra, name, or comment */ |
| 835 Byte method; /* can only be DEFLATED */ | 835 Byte method; /* can only be DEFLATED */ |
| 836 int last_flush; /* value of flush param for previous deflate call */ | 836 int last_flush; /* value of flush param for previous deflate call */ |
| 837 - | 837 - |
| 838 + unsigned zalign(16) crc0[4 * 5]; | 838 + unsigned zalign(16) crc0[4 * 5]; |
| 839 /* used by deflate.c: */ | 839 /* used by deflate.c: */ |
| 840 | 840 |
| 841 uInt w_size; /* LZ77 window size (32K by default) */ | 841 uInt w_size; /* LZ77 window size (32K by default) */ |
| 842 @@ -343,4 +343,14 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, c
harf *buf, | 842 @@ -346,4 +346,14 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, c
harf *buf, |
| 843 flush = _tr_tally(s, distance, length) | 843 flush = _tr_tally(s, distance, length) |
| 844 #endif | 844 #endif |
| 845 | 845 |
| 846 +/* Functions that are SIMD optimised on x86 */ | 846 +/* Functions that are SIMD optimised on x86 */ |
| 847 +void ZLIB_INTERNAL crc_fold_init(deflate_state* const s); | 847 +void ZLIB_INTERNAL crc_fold_init(deflate_state* const s); |
| 848 +void ZLIB_INTERNAL crc_fold_copy(deflate_state* const s, | 848 +void ZLIB_INTERNAL crc_fold_copy(deflate_state* const s, |
| 849 + unsigned char* dst, | 849 + unsigned char* dst, |
| 850 + const unsigned char* src, | 850 + const unsigned char* src, |
| 851 + long len); | 851 + long len); |
| 852 +unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state* const s); | 852 +unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state* const s); |
| 853 + | 853 + |
| 854 +void ZLIB_INTERNAL fill_window_sse(deflate_state* s); | 854 +void ZLIB_INTERNAL fill_window_sse(deflate_state* s); |
| 855 + | 855 + |
| 856 #endif /* DEFLATE_H */ | 856 #endif /* DEFLATE_H */ |
| 857 diff --git a/third_party/zlib/fill_window_sse.c b/third_party/zlib/fill_window_s
se.c | 857 diff --git a/fill_window_sse.c b/fill_window_sse.c |
| 858 index e69de29bb2d1..949ccce1ba9c 100644 | 858 new file mode 100644 |
| 859 --- a/third_party/zlib/fill_window_sse.c | 859 index 000000000000..949ccce1ba9c |
| 860 +++ b/third_party/zlib/fill_window_sse.c | 860 --- /dev/null |
| 861 +++ b/fill_window_sse.c |
| 861 @@ -0,0 +1,175 @@ | 862 @@ -0,0 +1,175 @@ |
| 862 +/* | 863 +/* |
| 863 + * Fill Window with SSE2-optimized hash shifting | 864 + * Fill Window with SSE2-optimized hash shifting |
| 864 + * | 865 + * |
| 865 + * Copyright (C) 2013 Intel Corporation | 866 + * Copyright (C) 2013 Intel Corporation |
| 866 + * Authors: | 867 + * Authors: |
| 867 + * Arjan van de Ven <arjan@linux.intel.com> | 868 + * Arjan van de Ven <arjan@linux.intel.com> |
| 868 + * Jim Kukunas <james.t.kukunas@linux.intel.com> | 869 + * Jim Kukunas <james.t.kukunas@linux.intel.com> |
| 869 + * | 870 + * |
| 870 + * For conditions of distribution and use, see copyright notice in zlib.h | 871 + * For conditions of distribution and use, see copyright notice in zlib.h |
| (...skipping 156 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1027 + if (init > s->window_size - s->high_water) | 1028 + if (init > s->window_size - s->high_water) |
| 1028 + init = s->window_size - s->high_water; | 1029 + init = s->window_size - s->high_water; |
| 1029 + zmemzero(s->window + s->high_water, (unsigned)init); | 1030 + zmemzero(s->window + s->high_water, (unsigned)init); |
| 1030 + s->high_water += init; | 1031 + s->high_water += init; |
| 1031 + } | 1032 + } |
| 1032 + } | 1033 + } |
| 1033 + | 1034 + |
| 1034 + Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD, | 1035 + Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD, |
| 1035 + "not enough room for search"); | 1036 + "not enough room for search"); |
| 1036 +} | 1037 +} |
| 1037 diff --git a/third_party/zlib/mozzconf.h b/third_party/zlib/mozzconf.h | 1038 diff --git a/names.h b/names.h |
| 1038 index d7e9f91c5a12..10aca44b29a4 100644 | 1039 index f18df5684dc5..3436baa4eb57 100644 |
| 1039 --- a/third_party/zlib/mozzconf.h | 1040 --- a/names.h |
| 1040 +++ b/third_party/zlib/mozzconf.h | 1041 +++ b/names.h |
| 1041 @@ -169,6 +169,14 @@ | 1042 @@ -152,4 +152,16 @@ |
| 1042 #define inflateResetKeep MOZ_Z_inflateResetKeep | 1043 /* An exported symbol that isn't handled by Z_PREFIX in zconf.h */ |
| 1043 #define gzopen_w MOZ_Z_gzopen_w | 1044 #define z_errmsg Cr_z_z_errmsg |
| 1044 | 1045 |
| 1045 +/* Chromium-specific modifications */ | 1046 +/* Symbols added in simd.patch */ |
| 1046 +#define copy_with_crc MOZ_Z__copy_with_crc | 1047 +#define copy_with_crc Cr_z_copy_with_crc |
| 1047 +#define crc_finalize MOZ_Z__crc_finalize | 1048 +#define crc_finalize Cr_z_crc_finalize |
| 1048 +#define crc_reset MOZ_Z__crc_reset | 1049 +#define crc_fold_512to32 Cr_z_crc_fold_512to32 |
| 1049 +// read_buf used to be local, but this was changed in simd.patch. | 1050 +#define crc_fold_copy Cr_z_crc_fold_copy |
| 1050 +#define read_buf MOZ_Z__read_buf | 1051 +#define crc_fold_init Cr_z_crc_fold_init |
| 1051 +#define x86_check_features MOZ_Z__x86_check_features | 1052 +#define crc_reset Cr_z_crc_reset |
| 1053 +#define fill_window_sse Cr_z_fill_window_sse |
| 1054 +#define read_buf Cr_z_read_buf |
| 1055 +#define x86_check_features Cr_z_x86_check_features |
| 1056 +#define x86_cpu_enable_simd Cr_z_x86_cpu_enable_simd |
| 1052 + | 1057 + |
| 1053 /* Mangle Byte types except on Mac. */ | 1058 #endif /* THIRD_PARTY_ZLIB_NAMES_H_ */ |
| 1054 #if !defined(__MACTYPES__) | 1059 diff --git a/simd_stub.c b/simd_stub.c |
| 1055 #define Byte MOZ_Z_Byte | 1060 new file mode 100644 |
| 1056 diff --git a/third_party/zlib/simd_stub.c b/third_party/zlib/simd_stub.c | 1061 index 000000000000..c6d46051498f |
| 1057 index e69de29bb2d1..796f1f63f690 100644 | 1062 --- /dev/null |
| 1058 --- a/third_party/zlib/simd_stub.c | 1063 +++ b/simd_stub.c |
| 1059 +++ b/third_party/zlib/simd_stub.c | |
| 1060 @@ -0,0 +1,35 @@ | 1064 @@ -0,0 +1,35 @@ |
| 1061 +/* simd_stub.c -- stub implementations | 1065 +/* simd_stub.c -- stub implementations |
| 1062 +* Copyright (C) 2014 Intel Corporation | 1066 +* Copyright (C) 2014 Intel Corporation |
| 1063 +* For conditions of distribution and use, see copyright notice in zlib.h | 1067 +* For conditions of distribution and use, see copyright notice in zlib.h |
| 1064 +*/ | 1068 +*/ |
| 1065 +#include <assert.h> | 1069 +#include <assert.h> |
| 1066 + | 1070 + |
| 1067 +#include "deflate.h" | 1071 +#include "deflate.h" |
| 1068 +#include "x86.h" | 1072 +#include "x86.h" |
| 1069 + | 1073 + |
| 1070 +int x86_cpu_enable_simd = 0; | 1074 +int ZLIB_INTERNAL x86_cpu_enable_simd = 0; |
| 1071 + | 1075 + |
| 1072 +void ZLIB_INTERNAL crc_fold_init(deflate_state *const s) { | 1076 +void ZLIB_INTERNAL crc_fold_init(deflate_state *const s) { |
| 1073 + assert(0); | 1077 + assert(0); |
| 1074 +} | 1078 +} |
| 1075 + | 1079 + |
| 1076 +void ZLIB_INTERNAL crc_fold_copy(deflate_state *const s, | 1080 +void ZLIB_INTERNAL crc_fold_copy(deflate_state *const s, |
| 1077 + unsigned char *dst, | 1081 + unsigned char *dst, |
| 1078 + const unsigned char *src, | 1082 + const unsigned char *src, |
| 1079 + long len) { | 1083 + long len) { |
| 1080 + assert(0); | 1084 + assert(0); |
| 1081 +} | 1085 +} |
| 1082 + | 1086 + |
| 1083 +unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state *const s) { | 1087 +unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state *const s) { |
| 1084 + assert(0); | 1088 + assert(0); |
| 1085 + return 0; | 1089 + return 0; |
| 1086 +} | 1090 +} |
| 1087 + | 1091 + |
| 1088 +void ZLIB_INTERNAL fill_window_sse(deflate_state *s) | 1092 +void ZLIB_INTERNAL fill_window_sse(deflate_state *s) |
| 1089 +{ | 1093 +{ |
| 1090 + assert(0); | 1094 + assert(0); |
| 1091 +} | 1095 +} |
| 1092 + | 1096 + |
| 1093 +void x86_check_features(void) | 1097 +void x86_check_features(void) |
| 1094 +{ | 1098 +{ |
| 1095 +} | 1099 +} |
| 1096 diff --git a/third_party/zlib/x86.c b/third_party/zlib/x86.c | 1100 diff --git a/x86.c b/x86.c |
| 1097 index e69de29bb2d1..e6532fd10ddb 100644 | 1101 new file mode 100644 |
| 1098 --- a/third_party/zlib/x86.c | 1102 index 000000000000..e56fe8b85a39 |
| 1099 +++ b/third_party/zlib/x86.c | 1103 --- /dev/null |
| 1100 @@ -0,0 +1,91 @@ | 1104 +++ b/x86.c |
| 1105 @@ -0,0 +1,92 @@ |
| 1101 +/* | 1106 +/* |
| 1102 + * x86 feature check | 1107 + * x86 feature check |
| 1103 + * | 1108 + * |
| 1104 + * Copyright (C) 2013 Intel Corporation. All rights reserved. | 1109 + * Copyright (C) 2013 Intel Corporation. All rights reserved. |
| 1105 + * Author: | 1110 + * Author: |
| 1106 + * Jim Kukunas | 1111 + * Jim Kukunas |
| 1107 + * | 1112 + * |
| 1108 + * For conditions of distribution and use, see copyright notice in zlib.h | 1113 + * For conditions of distribution and use, see copyright notice in zlib.h |
| 1109 + */ | 1114 + */ |
| 1110 + | 1115 + |
| 1111 +#include "x86.h" | 1116 +#include "x86.h" |
| 1117 +#include "zutil.h" |
| 1112 + | 1118 + |
| 1113 +int x86_cpu_enable_simd = 0; | 1119 +int ZLIB_INTERNAL x86_cpu_enable_simd = 0; |
| 1114 + | 1120 + |
| 1115 +#ifndef _MSC_VER | 1121 +#ifndef _MSC_VER |
| 1116 +#include <pthread.h> | 1122 +#include <pthread.h> |
| 1117 + | 1123 + |
| 1118 +pthread_once_t cpu_check_inited_once = PTHREAD_ONCE_INIT; | 1124 +pthread_once_t cpu_check_inited_once = PTHREAD_ONCE_INIT; |
| 1119 +static void _x86_check_features(void); | 1125 +static void _x86_check_features(void); |
| 1120 + | 1126 + |
| 1121 +void x86_check_features(void) | 1127 +void x86_check_features(void) |
| 1122 +{ | 1128 +{ |
| 1123 + pthread_once(&cpu_check_inited_once, _x86_check_features); | 1129 + pthread_once(&cpu_check_inited_once, _x86_check_features); |
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1182 + x86_cpu_has_sse2 = regs[3] & 0x4000000; | 1188 + x86_cpu_has_sse2 = regs[3] & 0x4000000; |
| 1183 + x86_cpu_has_sse42= regs[2] & 0x100000; | 1189 + x86_cpu_has_sse42= regs[2] & 0x100000; |
| 1184 + x86_cpu_has_pclmulqdq = regs[2] & 0x2; | 1190 + x86_cpu_has_pclmulqdq = regs[2] & 0x2; |
| 1185 + | 1191 + |
| 1186 + x86_cpu_enable_simd = x86_cpu_has_sse2 && | 1192 + x86_cpu_enable_simd = x86_cpu_has_sse2 && |
| 1187 + x86_cpu_has_sse42 && | 1193 + x86_cpu_has_sse42 && |
| 1188 + x86_cpu_has_pclmulqdq; | 1194 + x86_cpu_has_pclmulqdq; |
| 1189 + return TRUE; | 1195 + return TRUE; |
| 1190 +} | 1196 +} |
| 1191 +#endif /* _MSC_VER */ | 1197 +#endif /* _MSC_VER */ |
| 1192 diff --git a/third_party/zlib/x86.h b/third_party/zlib/x86.h | 1198 diff --git a/x86.h b/x86.h |
| 1193 index e69de29bb2d1..ebcf10ab09d2 100644 | 1199 new file mode 100644 |
| 1194 --- a/third_party/zlib/x86.h | 1200 index 000000000000..ebcf10ab09d2 |
| 1195 +++ b/third_party/zlib/x86.h | 1201 --- /dev/null |
| 1202 +++ b/x86.h |
| 1196 @@ -0,0 +1,15 @@ | 1203 @@ -0,0 +1,15 @@ |
| 1197 +/* x86.h -- check for x86 CPU features | 1204 +/* x86.h -- check for x86 CPU features |
| 1198 +* Copyright (C) 2013 Intel Corporation Jim Kukunas | 1205 +* Copyright (C) 2013 Intel Corporation Jim Kukunas |
| 1199 +* For conditions of distribution and use, see copyright notice in zlib.h | 1206 +* For conditions of distribution and use, see copyright notice in zlib.h |
| 1200 +*/ | 1207 +*/ |
| 1201 + | 1208 + |
| 1202 +#ifndef X86_H | 1209 +#ifndef X86_H |
| 1203 +#define X86_H | 1210 +#define X86_H |
| 1204 + | 1211 + |
| 1205 +#include "zlib.h" | 1212 +#include "zlib.h" |
| 1206 + | 1213 + |
| 1207 +extern int x86_cpu_enable_simd; | 1214 +extern int x86_cpu_enable_simd; |
| 1208 + | 1215 + |
| 1209 +void x86_check_features(void); | 1216 +void x86_check_features(void); |
| 1210 + | 1217 + |
| 1211 +#endif /* X86_H */ | 1218 +#endif /* X86_H */ |
| 1219 diff --git a/zutil.h b/zutil.h |
| 1220 index 80375b8b6109..4425bcf75eb3 100644 |
| 1221 --- a/zutil.h |
| 1222 +++ b/zutil.h |
| 1223 @@ -283,4 +283,10 @@ extern z_const char * const z_errmsg[10]; /* indexed by 2-z
lib_error */ |
| 1224 #define ZSWAP32(q) ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \ |
| 1225 (((q) & 0xff00) << 8) + (((q) & 0xff) << 24)) |
| 1226 |
| 1227 +#ifdef _MSC_VER |
| 1228 +#define zalign(x) __declspec(align(x)) |
| 1229 +#else |
| 1230 +#define zalign(x) __attribute__((aligned((x)))) |
| 1231 +#endif |
| 1232 + |
| 1233 #endif /* ZUTIL_H */ |
| OLD | NEW |