| OLD | NEW |
| 1 diff --git a/third_party/zlib/crc32.c b/third_party/zlib/crc32.c | 1 diff --git a/third_party/zlib/crc32.c b/third_party/zlib/crc32.c |
| 2 index 979a719..09228ed 100644 | 2 index 979a7190a3ca..09228ed9c76f 100644 |
| 3 --- a/third_party/zlib/crc32.c | 3 --- a/third_party/zlib/crc32.c |
| 4 +++ b/third_party/zlib/crc32.c | 4 +++ b/third_party/zlib/crc32.c |
| 5 @@ -28,6 +28,8 @@ | 5 @@ -28,6 +28,8 @@ |
| 6 # endif /* !DYNAMIC_CRC_TABLE */ | 6 # endif /* !DYNAMIC_CRC_TABLE */ |
| 7 #endif /* MAKECRCH */ | 7 #endif /* MAKECRCH */ |
| 8 | 8 |
| 9 +#include "deflate.h" | 9 +#include "deflate.h" |
| 10 +#include "x86.h" | 10 +#include "x86.h" |
| 11 #include "zutil.h" /* for STDC and FAR definitions */ | 11 #include "zutil.h" /* for STDC and FAR definitions */ |
| 12 | 12 |
| (...skipping 21 matching lines...) Expand all Loading... |
| 34 +ZLIB_INTERNAL void copy_with_crc(z_streamp strm, Bytef *dst, long size) | 34 +ZLIB_INTERNAL void copy_with_crc(z_streamp strm, Bytef *dst, long size) |
| 35 +{ | 35 +{ |
| 36 + if (x86_cpu_enable_simd) { | 36 + if (x86_cpu_enable_simd) { |
| 37 + crc_fold_copy(strm->state, dst, strm->next_in, size); | 37 + crc_fold_copy(strm->state, dst, strm->next_in, size); |
| 38 + return; | 38 + return; |
| 39 + } | 39 + } |
| 40 + zmemcpy(dst, strm->next_in, size); | 40 + zmemcpy(dst, strm->next_in, size); |
| 41 + strm->adler = crc32(strm->adler, dst, size); | 41 + strm->adler = crc32(strm->adler, dst, size); |
| 42 +} | 42 +} |
| 43 diff --git a/third_party/zlib/crc_folding.c b/third_party/zlib/crc_folding.c | 43 diff --git a/third_party/zlib/crc_folding.c b/third_party/zlib/crc_folding.c |
| 44 new file mode 100644 | 44 index e69de29bb2d1..48d77744aaf4 100644 |
| 45 index 0000000..48d7774 | 45 --- a/third_party/zlib/crc_folding.c |
| 46 --- /dev/null | |
| 47 +++ b/third_party/zlib/crc_folding.c | 46 +++ b/third_party/zlib/crc_folding.c |
| 48 @@ -0,0 +1,493 @@ | 47 @@ -0,0 +1,493 @@ |
| 49 +/* | 48 +/* |
| 50 + * Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ | 49 + * Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ |
| 51 + * instruction. | 50 + * instruction. |
| 52 + * | 51 + * |
| 53 + * A white paper describing this algorithm can be found at: | 52 + * A white paper describing this algorithm can be found at: |
| 54 + * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fas
t-crc-computation-generic-polynomials-pclmulqdq-paper.pdf | 53 + * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fas
t-crc-computation-generic-polynomials-pclmulqdq-paper.pdf |
| 55 + * | 54 + * |
| 56 + * Copyright (C) 2013 Intel Corporation. All rights reserved. | 55 + * Copyright (C) 2013 Intel Corporation. All rights reserved. |
| (...skipping 476 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 533 + xmm_crc2 = xmm_crc3; | 532 + xmm_crc2 = xmm_crc3; |
| 534 + xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); | 533 + xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); |
| 535 + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2); | 534 + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2); |
| 536 + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc1); | 535 + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc1); |
| 537 + | 536 + |
| 538 + crc = _mm_extract_epi32(xmm_crc3, 2); | 537 + crc = _mm_extract_epi32(xmm_crc3, 2); |
| 539 + return ~crc; | 538 + return ~crc; |
| 540 + CRC_SAVE(s) | 539 + CRC_SAVE(s) |
| 541 +} | 540 +} |
| 542 diff --git a/third_party/zlib/deflate.c b/third_party/zlib/deflate.c | 541 diff --git a/third_party/zlib/deflate.c b/third_party/zlib/deflate.c |
| 543 index 7c95b30..59645eb 100644 | 542 index 696957705b75..02fa2f64f83e 100644 |
| 544 --- a/third_party/zlib/deflate.c | 543 --- a/third_party/zlib/deflate.c |
| 545 +++ b/third_party/zlib/deflate.c | 544 +++ b/third_party/zlib/deflate.c |
| 546 @@ -48,8 +48,9 @@ | 545 @@ -48,8 +48,9 @@ |
| 547 */ | 546 */ |
| 548 | 547 |
| 549 /* @(#) $Id$ */ | 548 /* @(#) $Id$ */ |
| 550 - | 549 - |
| 551 +#include <assert.h> | 550 +#include <assert.h> |
| 552 #include "deflate.h" | 551 #include "deflate.h" |
| 553 +#include "x86.h" | 552 +#include "x86.h" |
| 554 | 553 |
| 555 const char deflate_copyright[] = | 554 const char deflate_copyright[] = |
| 556 " deflate 1.2.8 Copyright 1995-2013 Jean-loup Gailly and Mark Adler "; | 555 " deflate 1.2.8 Copyright 1995-2013 Jean-loup Gailly and Mark Adler "; |
| 557 @@ -85,7 +86,7 @@ local block_state deflate_huff OF((deflate_state *s, int flu
sh)); | 556 @@ -84,7 +85,7 @@ local block_state deflate_huff OF((deflate_state *s, int flu
sh)); |
| 558 local void lm_init OF((deflate_state *s)); | 557 local void lm_init OF((deflate_state *s)); |
| 559 local void putShortMSB OF((deflate_state *s, uInt b)); | 558 local void putShortMSB OF((deflate_state *s, uInt b)); |
| 560 local void flush_pending OF((z_streamp strm)); | 559 local void flush_pending OF((z_streamp strm)); |
| 561 -local int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); | 560 -local int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); |
| 562 + | 561 + |
| 563 #ifdef ASMV | 562 #ifdef ASMV |
| 564 void match_init OF((void)); /* asm code initialization */ | 563 void match_init OF((void)); /* asm code initialization */ |
| 565 uInt longest_match OF((deflate_state *s, IPos cur_match, int clas)); | 564 uInt longest_match OF((deflate_state *s, IPos cur_match)); |
| 566 @@ -98,6 +99,23 @@ local void check_match OF((deflate_state *s, IPos start, IPo
s match, | 565 @@ -97,6 +98,23 @@ local void check_match OF((deflate_state *s, IPos start, IPo
s match, |
| 567 int length)); | 566 int length)); |
| 568 #endif | 567 #endif |
| 569 | 568 |
| 570 +/* For fill_window_sse.c to use */ | 569 +/* For fill_window_sse.c to use */ |
| 571 +ZLIB_INTERNAL int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); | 570 +ZLIB_INTERNAL int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); |
| 572 + | 571 + |
| 573 +/* From crc32.c */ | 572 +/* From crc32.c */ |
| 574 +extern void ZLIB_INTERNAL crc_reset(deflate_state *const s); | 573 +extern void ZLIB_INTERNAL crc_reset(deflate_state *const s); |
| 575 +extern void ZLIB_INTERNAL crc_finalize(deflate_state *const s); | 574 +extern void ZLIB_INTERNAL crc_finalize(deflate_state *const s); |
| 576 +extern void ZLIB_INTERNAL copy_with_crc(z_streamp strm, Bytef *dst, long size); | 575 +extern void ZLIB_INTERNAL copy_with_crc(z_streamp strm, Bytef *dst, long size); |
| 577 + | 576 + |
| 578 +#ifdef _MSC_VER | 577 +#ifdef _MSC_VER |
| 579 +#define INLINE __inline | 578 +#define INLINE __inline |
| 580 +#else | 579 +#else |
| 581 +#define INLINE inline | 580 +#define INLINE inline |
| 582 +#endif | 581 +#endif |
| 583 + | 582 + |
| 584 +/* Inline optimisation */ | 583 +/* Inline optimisation */ |
| 585 +local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str); | 584 +local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str); |
| 586 + | 585 + |
| 587 /* =========================================================================== | 586 /* =========================================================================== |
| 588 * Local data | 587 * Local data |
| 589 */ | 588 */ |
| 590 @@ -167,7 +185,6 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compile
rs */ | 589 @@ -166,7 +184,6 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compile
rs */ |
| 591 */ | 590 */ |
| 592 #define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask) | 591 #define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask) |
| 593 | 592 |
| 594 - | 593 - |
| 595 /* =========================================================================== | 594 /* =========================================================================== |
| 596 * Insert string str in the dictionary and set match_head to the previous head | 595 * Insert string str in the dictionary and set match_head to the previous head |
| 597 * of the hash chain (the most recent string with same hash key). Return | 596 * of the hash chain (the most recent string with same hash key). Return |
| 598 @@ -178,17 +195,28 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compi
lers */ | 597 @@ -177,17 +194,28 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compi
lers */ |
| 599 * input characters and the first MIN_MATCH bytes of str are valid | 598 * input characters and the first MIN_MATCH bytes of str are valid |
| 600 * (except for the last MIN_MATCH-1 bytes of the input file). | 599 * (except for the last MIN_MATCH-1 bytes of the input file). |
| 601 */ | 600 */ |
| 602 +local INLINE Pos insert_string_c(deflate_state *const s, const Pos str) | 601 +local INLINE Pos insert_string_c(deflate_state *const s, const Pos str) |
| 603 +{ | 602 +{ |
| 604 + Pos ret; | 603 + Pos ret; |
| 605 + | 604 + |
| 606 + UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]); | 605 + UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]); |
| 607 #ifdef FASTEST | 606 #ifdef FASTEST |
| 608 -#define INSERT_STRING(s, str, match_head) \ | 607 -#define INSERT_STRING(s, str, match_head) \ |
| (...skipping 16 matching lines...) Expand all Loading... |
| 625 +local INLINE Pos insert_string(deflate_state *const s, const Pos str) | 624 +local INLINE Pos insert_string(deflate_state *const s, const Pos str) |
| 626 +{ | 625 +{ |
| 627 + if (x86_cpu_enable_simd) | 626 + if (x86_cpu_enable_simd) |
| 628 + return insert_string_sse(s, str); | 627 + return insert_string_sse(s, str); |
| 629 + return insert_string_c(s, str); | 628 + return insert_string_c(s, str); |
| 630 +} | 629 +} |
| 631 + | 630 + |
| 632 | 631 |
| 633 /* =========================================================================== | 632 /* =========================================================================== |
| 634 * Initialize the hash table (avoiding 64K overflow for 16 bit systems). | 633 * Initialize the hash table (avoiding 64K overflow for 16 bit systems). |
| 635 @@ -222,6 +250,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, m
emLevel, strategy, | 634 @@ -221,6 +249,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, m
emLevel, strategy, |
| 636 const char *version; | 635 const char *version; |
| 637 int stream_size; | 636 int stream_size; |
| 638 { | 637 { |
| 639 + unsigned window_padding = 8; | 638 + unsigned window_padding = 8; |
| 640 deflate_state *s; | 639 deflate_state *s; |
| 641 int wrap = 1; | 640 int wrap = 1; |
| 642 static const char my_version[] = ZLIB_VERSION; | 641 static const char my_version[] = ZLIB_VERSION; |
| 643 @@ -231,6 +260,8 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, m
emLevel, strategy, | 642 @@ -230,6 +259,8 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, m
emLevel, strategy, |
| 644 * output size for (length,distance) codes is <= 24 bits. | 643 * output size for (length,distance) codes is <= 24 bits. |
| 645 */ | 644 */ |
| 646 | 645 |
| 647 + x86_check_features(); | 646 + x86_check_features(); |
| 648 + | 647 + |
| 649 if (version == Z_NULL || version[0] != my_version[0] || | 648 if (version == Z_NULL || version[0] != my_version[0] || |
| 650 stream_size != sizeof(z_stream)) { | 649 stream_size != sizeof(z_stream)) { |
| 651 return Z_VERSION_ERROR; | 650 return Z_VERSION_ERROR; |
| 652 @@ -286,12 +317,17 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits,
memLevel, strategy, | 651 @@ -285,12 +316,17 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits,
memLevel, strategy, |
| 653 s->w_size = 1 << s->w_bits; | 652 s->w_size = 1 << s->w_bits; |
| 654 s->w_mask = s->w_size - 1; | 653 s->w_mask = s->w_size - 1; |
| 655 | 654 |
| 656 - s->hash_bits = memLevel + 7; | 655 - s->hash_bits = memLevel + 7; |
| 657 + if (x86_cpu_enable_simd) { | 656 + if (x86_cpu_enable_simd) { |
| 658 + s->hash_bits = 15; | 657 + s->hash_bits = 15; |
| 659 + } else { | 658 + } else { |
| 660 + s->hash_bits = memLevel + 7; | 659 + s->hash_bits = memLevel + 7; |
| 661 + } | 660 + } |
| 662 + | 661 + |
| 663 s->hash_size = 1 << s->hash_bits; | 662 s->hash_size = 1 << s->hash_bits; |
| 664 s->hash_mask = s->hash_size - 1; | 663 s->hash_mask = s->hash_size - 1; |
| 665 s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); | 664 s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); |
| 666 | 665 |
| 667 - s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); | 666 - s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); |
| 668 + s->window = (Bytef *) ZALLOC(strm, s->w_size + window_padding, 2*sizeof(Byt
e)); | 667 + s->window = (Bytef *) ZALLOC(strm, s->w_size + window_padding, 2*sizeof(Byt
e)); |
| 669 s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); | 668 s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); |
| 670 s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); | 669 s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); |
| 671 s->class_bitmap = NULL; | 670 |
| 672 @@ -369,11 +405,7 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLen
gth) | 671 @@ -365,11 +401,7 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLen
gth) |
| 673 str = s->strstart; | 672 str = s->strstart; |
| 674 n = s->lookahead - (MIN_MATCH-1); | 673 n = s->lookahead - (MIN_MATCH-1); |
| 675 do { | 674 do { |
| 676 - UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); | 675 - UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); |
| 677 -#ifndef FASTEST | 676 -#ifndef FASTEST |
| 678 - s->prev[str & s->w_mask] = s->head[s->ins_h]; | 677 - s->prev[str & s->w_mask] = s->head[s->ins_h]; |
| 679 -#endif | 678 -#endif |
| 680 - s->head[s->ins_h] = (Pos)str; | 679 - s->head[s->ins_h] = (Pos)str; |
| 681 + insert_string(s, str); | 680 + insert_string(s, str); |
| 682 str++; | 681 str++; |
| 683 } while (--n); | 682 } while (--n); |
| 684 s->strstart = str; | 683 s->strstart = str; |
| 685 @@ -696,7 +728,7 @@ int ZEXPORT deflate (strm, flush) | 684 @@ -690,7 +722,7 @@ int ZEXPORT deflate (strm, flush) |
| 686 if (s->status == INIT_STATE) { | 685 if (s->status == INIT_STATE) { |
| 687 #ifdef GZIP | 686 #ifdef GZIP |
| 688 if (s->wrap == 2) { | 687 if (s->wrap == 2) { |
| 689 - strm->adler = crc32(0L, Z_NULL, 0); | 688 - strm->adler = crc32(0L, Z_NULL, 0); |
| 690 + crc_reset(s); | 689 + crc_reset(s); |
| 691 put_byte(s, 31); | 690 put_byte(s, 31); |
| 692 put_byte(s, 139); | 691 put_byte(s, 139); |
| 693 put_byte(s, 8); | 692 put_byte(s, 8); |
| 694 @@ -975,6 +1007,7 @@ int ZEXPORT deflate (strm, flush) | 693 @@ -952,6 +984,7 @@ int ZEXPORT deflate (strm, flush) |
| 695 /* Write the trailer */ | 694 /* Write the trailer */ |
| 696 #ifdef GZIP | 695 #ifdef GZIP |
| 697 if (s->wrap == 2) { | 696 if (s->wrap == 2) { |
| 698 + crc_finalize(s); | 697 + crc_finalize(s); |
| 699 put_byte(s, (Byte)(strm->adler & 0xff)); | 698 put_byte(s, (Byte)(strm->adler & 0xff)); |
| 700 put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); | 699 put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); |
| 701 put_byte(s, (Byte)((strm->adler >> 16) & 0xff)); | 700 put_byte(s, (Byte)((strm->adler >> 16) & 0xff)); |
| 702 @@ -1097,7 +1130,7 @@ int ZEXPORT deflateCopy (dest, source) | 701 @@ -1073,7 +1106,7 @@ int ZEXPORT deflateCopy (dest, source) |
| 703 * allocating a large strm->next_in buffer and copying from it. | 702 * allocating a large strm->next_in buffer and copying from it. |
| 704 * (See also flush_pending()). | 703 * (See also flush_pending()). |
| 705 */ | 704 */ |
| 706 -local int read_buf(strm, buf, size) | 705 -local int read_buf(strm, buf, size) |
| 707 +ZLIB_INTERNAL int read_buf(strm, buf, size) | 706 +ZLIB_INTERNAL int read_buf(strm, buf, size) |
| 708 z_streamp strm; | 707 z_streamp strm; |
| 709 Bytef *buf; | 708 Bytef *buf; |
| 710 unsigned size; | 709 unsigned size; |
| 711 @@ -1109,15 +1142,16 @@ local int read_buf(strm, buf, size) | 710 @@ -1085,15 +1118,16 @@ local int read_buf(strm, buf, size) |
| 712 | 711 |
| 713 strm->avail_in -= len; | 712 strm->avail_in -= len; |
| 714 | 713 |
| 715 - zmemcpy(buf, strm->next_in, len); | 714 - zmemcpy(buf, strm->next_in, len); |
| 716 - if (strm->state->wrap == 1) { | 715 - if (strm->state->wrap == 1) { |
| 717 - strm->adler = adler32(strm->adler, buf, len); | 716 - strm->adler = adler32(strm->adler, buf, len); |
| 718 - } | 717 - } |
| 719 #ifdef GZIP | 718 #ifdef GZIP |
| 720 - else if (strm->state->wrap == 2) { | 719 - else if (strm->state->wrap == 2) { |
| 721 - strm->adler = crc32(strm->adler, buf, len); | 720 - strm->adler = crc32(strm->adler, buf, len); |
| 722 - } | 721 - } |
| 723 + if (strm->state->wrap == 2) | 722 + if (strm->state->wrap == 2) |
| 724 + copy_with_crc(strm, buf, len); | 723 + copy_with_crc(strm, buf, len); |
| 725 + else | 724 + else |
| 726 #endif | 725 #endif |
| 727 + { | 726 + { |
| 728 + zmemcpy(buf, strm->next_in, len); | 727 + zmemcpy(buf, strm->next_in, len); |
| 729 + if (strm->state->wrap == 1) | 728 + if (strm->state->wrap == 1) |
| 730 + strm->adler = adler32(strm->adler, buf, len); | 729 + strm->adler = adler32(strm->adler, buf, len); |
| 731 + } | 730 + } |
| 732 strm->next_in += len; | 731 strm->next_in += len; |
| 733 strm->total_in += len; | 732 strm->total_in += len; |
| 734 | 733 |
| 735 @@ -1530,7 +1564,19 @@ local void check_match(s, start, match, length) | 734 @@ -1387,7 +1421,19 @@ local void check_match(s, start, match, length) |
| 736 * performed for at least two bytes (required for the zip translate_eol | 735 * performed for at least two bytes (required for the zip translate_eol |
| 737 * option -- not supported here). | 736 * option -- not supported here). |
| 738 */ | 737 */ |
| 739 -local void fill_window(s) | 738 -local void fill_window(s) |
| 740 +local void fill_window_c(deflate_state *s); | 739 +local void fill_window_c(deflate_state *s); |
| 741 + | 740 + |
| 742 +local void fill_window(deflate_state *s) | 741 +local void fill_window(deflate_state *s) |
| 743 +{ | 742 +{ |
| 744 + if (x86_cpu_enable_simd) { | 743 + if (x86_cpu_enable_simd) { |
| 745 + fill_window_sse(s); | 744 + fill_window_sse(s); |
| 746 + return; | 745 + return; |
| 747 + } | 746 + } |
| 748 + | 747 + |
| 749 + fill_window_c(s); | 748 + fill_window_c(s); |
| 750 +} | 749 +} |
| 751 + | 750 + |
| 752 +local void fill_window_c(s) | 751 +local void fill_window_c(s) |
| 753 deflate_state *s; | 752 deflate_state *s; |
| 754 { | 753 { |
| 755 register unsigned n, m; | 754 register unsigned n, m; |
| 756 @@ -1818,7 +1864,7 @@ local block_state deflate_fast(s, flush, clas) | 755 @@ -1651,7 +1697,7 @@ local block_state deflate_fast(s, flush) |
| 757 */ | 756 */ |
| 758 hash_head = NIL; | 757 hash_head = NIL; |
| 759 if (s->lookahead >= MIN_MATCH) { | 758 if (s->lookahead >= MIN_MATCH) { |
| 760 - INSERT_STRING(s, s->strstart, hash_head); | 759 - INSERT_STRING(s, s->strstart, hash_head); |
| 761 + hash_head = insert_string(s, s->strstart); | 760 + hash_head = insert_string(s, s->strstart); |
| 762 } | 761 } |
| 763 | 762 |
| 764 /* Find the longest match, discarding those <= prev_length. | 763 /* Find the longest match, discarding those <= prev_length. |
| 765 @@ -1849,7 +1895,7 @@ local block_state deflate_fast(s, flush, clas) | 764 @@ -1682,7 +1728,7 @@ local block_state deflate_fast(s, flush) |
| 766 s->match_length--; /* string at strstart already in table */ | 765 s->match_length--; /* string at strstart already in table */ |
| 767 do { | 766 do { |
| 768 s->strstart++; | 767 s->strstart++; |
| 769 - INSERT_STRING(s, s->strstart, hash_head); | 768 - INSERT_STRING(s, s->strstart, hash_head); |
| 770 + hash_head = insert_string(s, s->strstart); | 769 + hash_head = insert_string(s, s->strstart); |
| 771 /* strstart never exceeds WSIZE-MAX_MATCH, so there are | 770 /* strstart never exceeds WSIZE-MAX_MATCH, so there are |
| 772 * always MIN_MATCH bytes ahead. | 771 * always MIN_MATCH bytes ahead. |
| 773 */ | 772 */ |
| 774 @@ -1934,7 +1980,7 @@ local block_state deflate_slow(s, flush, clas) | 773 @@ -1754,7 +1800,7 @@ local block_state deflate_slow(s, flush) |
| 775 */ | 774 */ |
| 776 hash_head = NIL; | 775 hash_head = NIL; |
| 777 if (s->lookahead >= MIN_MATCH) { | 776 if (s->lookahead >= MIN_MATCH) { |
| 778 - INSERT_STRING(s, s->strstart, hash_head); | 777 - INSERT_STRING(s, s->strstart, hash_head); |
| 779 + hash_head = insert_string(s, s->strstart); | 778 + hash_head = insert_string(s, s->strstart); |
| 780 } | 779 } |
| 781 | 780 |
| 782 /* Find the longest match, discarding those <= prev_length. | 781 /* Find the longest match, discarding those <= prev_length. |
| 783 @@ -2003,7 +2049,7 @@ local block_state deflate_slow(s, flush, clas) | 782 @@ -1805,7 +1851,7 @@ local block_state deflate_slow(s, flush) |
| 784 s->prev_length -= 2; | 783 s->prev_length -= 2; |
| 785 do { | 784 do { |
| 786 if (++s->strstart <= max_insert) { | 785 if (++s->strstart <= max_insert) { |
| 787 - INSERT_STRING(s, s->strstart, hash_head); | 786 - INSERT_STRING(s, s->strstart, hash_head); |
| 788 + hash_head = insert_string(s, s->strstart); | 787 + hash_head = insert_string(s, s->strstart); |
| 789 } | 788 } |
| 790 } while (--s->prev_length != 0); | 789 } while (--s->prev_length != 0); |
| 791 s->match_available = 0; | 790 s->match_available = 0; |
| 792 @@ -2163,3 +2209,37 @@ local block_state deflate_huff(s, flush) | 791 @@ -1965,3 +2011,37 @@ local block_state deflate_huff(s, flush) |
| 793 FLUSH_BLOCK(s, 0); | 792 FLUSH_BLOCK(s, 0); |
| 794 return block_done; | 793 return block_done; |
| 795 } | 794 } |
| 796 + | 795 + |
| 797 +/* Safe to inline this as GCC/clang will use inline asm and Visual Studio will | 796 +/* Safe to inline this as GCC/clang will use inline asm and Visual Studio will |
| 798 + * use intrinsic without extra params | 797 + * use intrinsic without extra params |
| 799 + */ | 798 + */ |
| 800 +local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str) | 799 +local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str) |
| 801 +{ | 800 +{ |
| 802 + Pos ret; | 801 + Pos ret; |
| (...skipping 18 matching lines...) Expand all Loading... |
| 821 + /* This should never happen */ | 820 + /* This should never happen */ |
| 822 + assert(0); | 821 + assert(0); |
| 823 +#endif | 822 +#endif |
| 824 + | 823 + |
| 825 + ret = s->head[h & s->hash_mask]; | 824 + ret = s->head[h & s->hash_mask]; |
| 826 + s->head[h & s->hash_mask] = str; | 825 + s->head[h & s->hash_mask] = str; |
| 827 + s->prev[str & s->w_mask] = ret; | 826 + s->prev[str & s->w_mask] = ret; |
| 828 + return ret; | 827 + return ret; |
| 829 +} | 828 +} |
| 830 diff --git a/third_party/zlib/deflate.h b/third_party/zlib/deflate.h | 829 diff --git a/third_party/zlib/deflate.h b/third_party/zlib/deflate.h |
| 831 index c795034..c61e4ab 100644 | 830 index ce0299edd191..022d9c326ecc 100644 |
| 832 --- a/third_party/zlib/deflate.h | 831 --- a/third_party/zlib/deflate.h |
| 833 +++ b/third_party/zlib/deflate.h | 832 +++ b/third_party/zlib/deflate.h |
| 834 @@ -109,7 +109,7 @@ typedef struct internal_state { | 833 @@ -106,7 +106,7 @@ typedef struct internal_state { |
| 835 uInt gzindex; /* where in extra, name, or comment */ | 834 uInt gzindex; /* where in extra, name, or comment */ |
| 836 Byte method; /* can only be DEFLATED */ | 835 Byte method; /* can only be DEFLATED */ |
| 837 int last_flush; /* value of flush param for previous deflate call */ | 836 int last_flush; /* value of flush param for previous deflate call */ |
| 838 - | 837 - |
| 839 + unsigned zalign(16) crc0[4 * 5]; | 838 + unsigned zalign(16) crc0[4 * 5]; |
| 840 /* used by deflate.c: */ | 839 /* used by deflate.c: */ |
| 841 | 840 |
| 842 uInt w_size; /* LZ77 window size (32K by default) */ | 841 uInt w_size; /* LZ77 window size (32K by default) */ |
| 843 @@ -348,4 +348,14 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, c
harf *buf, | 842 @@ -343,4 +343,14 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, c
harf *buf, |
| 844 flush = _tr_tally(s, distance, length) | 843 flush = _tr_tally(s, distance, length) |
| 845 #endif | 844 #endif |
| 846 | 845 |
| 847 +/* Functions that are SIMD optimised on x86 */ | 846 +/* Functions that are SIMD optimised on x86 */ |
| 848 +void ZLIB_INTERNAL crc_fold_init(deflate_state* const s); | 847 +void ZLIB_INTERNAL crc_fold_init(deflate_state* const s); |
| 849 +void ZLIB_INTERNAL crc_fold_copy(deflate_state* const s, | 848 +void ZLIB_INTERNAL crc_fold_copy(deflate_state* const s, |
| 850 + unsigned char* dst, | 849 + unsigned char* dst, |
| 851 + const unsigned char* src, | 850 + const unsigned char* src, |
| 852 + long len); | 851 + long len); |
| 853 +unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state* const s); | 852 +unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state* const s); |
| 854 + | 853 + |
| 855 +void ZLIB_INTERNAL fill_window_sse(deflate_state* s); | 854 +void ZLIB_INTERNAL fill_window_sse(deflate_state* s); |
| 856 + | 855 + |
| 857 #endif /* DEFLATE_H */ | 856 #endif /* DEFLATE_H */ |
| 858 diff --git a/third_party/zlib/fill_window_sse.c b/third_party/zlib/fill_window_s
se.c | 857 diff --git a/third_party/zlib/fill_window_sse.c b/third_party/zlib/fill_window_s
se.c |
| 859 new file mode 100644 | 858 index e69de29bb2d1..949ccce1ba9c 100644 |
| 860 index 0000000..949ccce | 859 --- a/third_party/zlib/fill_window_sse.c |
| 861 --- /dev/null | |
| 862 +++ b/third_party/zlib/fill_window_sse.c | 860 +++ b/third_party/zlib/fill_window_sse.c |
| 863 @@ -0,0 +1,175 @@ | 861 @@ -0,0 +1,175 @@ |
| 864 +/* | 862 +/* |
| 865 + * Fill Window with SSE2-optimized hash shifting | 863 + * Fill Window with SSE2-optimized hash shifting |
| 866 + * | 864 + * |
| 867 + * Copyright (C) 2013 Intel Corporation | 865 + * Copyright (C) 2013 Intel Corporation |
| 868 + * Authors: | 866 + * Authors: |
| 869 + * Arjan van de Ven <arjan@linux.intel.com> | 867 + * Arjan van de Ven <arjan@linux.intel.com> |
| 870 + * Jim Kukunas <james.t.kukunas@linux.intel.com> | 868 + * Jim Kukunas <james.t.kukunas@linux.intel.com> |
| 871 + * | 869 + * |
| (...skipping 157 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1029 + if (init > s->window_size - s->high_water) | 1027 + if (init > s->window_size - s->high_water) |
| 1030 + init = s->window_size - s->high_water; | 1028 + init = s->window_size - s->high_water; |
| 1031 + zmemzero(s->window + s->high_water, (unsigned)init); | 1029 + zmemzero(s->window + s->high_water, (unsigned)init); |
| 1032 + s->high_water += init; | 1030 + s->high_water += init; |
| 1033 + } | 1031 + } |
| 1034 + } | 1032 + } |
| 1035 + | 1033 + |
| 1036 + Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD, | 1034 + Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD, |
| 1037 + "not enough room for search"); | 1035 + "not enough room for search"); |
| 1038 +} | 1036 +} |
| 1037 diff --git a/third_party/zlib/mozzconf.h b/third_party/zlib/mozzconf.h |
| 1038 index d7e9f91c5a12..10aca44b29a4 100644 |
| 1039 --- a/third_party/zlib/mozzconf.h |
| 1040 +++ b/third_party/zlib/mozzconf.h |
| 1041 @@ -169,6 +169,14 @@ |
| 1042 #define inflateResetKeep MOZ_Z_inflateResetKeep |
| 1043 #define gzopen_w MOZ_Z_gzopen_w |
| 1044 |
| 1045 +/* Chromium-specific modifications */ |
| 1046 +#define copy_with_crc MOZ_Z__copy_with_crc |
| 1047 +#define crc_finalize MOZ_Z__crc_finalize |
| 1048 +#define crc_reset MOZ_Z__crc_reset |
| 1049 +// read_buf used to be local, but this was changed in simd.patch. |
| 1050 +#define read_buf MOZ_Z__read_buf |
| 1051 +#define x86_check_features MOZ_Z__x86_check_features |
| 1052 + |
| 1053 /* Mangle Byte types except on Mac. */ |
| 1054 #if !defined(__MACTYPES__) |
| 1055 #define Byte MOZ_Z_Byte |
| 1039 diff --git a/third_party/zlib/simd_stub.c b/third_party/zlib/simd_stub.c | 1056 diff --git a/third_party/zlib/simd_stub.c b/third_party/zlib/simd_stub.c |
| 1040 new file mode 100644 | 1057 index e69de29bb2d1..796f1f63f690 100644 |
| 1041 index 0000000..796f1f6 | 1058 --- a/third_party/zlib/simd_stub.c |
| 1042 --- /dev/null | |
| 1043 +++ b/third_party/zlib/simd_stub.c | 1059 +++ b/third_party/zlib/simd_stub.c |
| 1044 @@ -0,0 +1,35 @@ | 1060 @@ -0,0 +1,35 @@ |
| 1045 +/* simd_stub.c -- stub implementations | 1061 +/* simd_stub.c -- stub implementations |
| 1046 +* Copyright (C) 2014 Intel Corporation | 1062 +* Copyright (C) 2014 Intel Corporation |
| 1047 +* For conditions of distribution and use, see copyright notice in zlib.h | 1063 +* For conditions of distribution and use, see copyright notice in zlib.h |
| 1048 +*/ | 1064 +*/ |
| 1049 +#include <assert.h> | 1065 +#include <assert.h> |
| 1050 + | 1066 + |
| 1051 +#include "deflate.h" | 1067 +#include "deflate.h" |
| 1052 +#include "x86.h" | 1068 +#include "x86.h" |
| (...skipping 18 matching lines...) Expand all Loading... |
| 1071 + | 1087 + |
| 1072 +void ZLIB_INTERNAL fill_window_sse(deflate_state *s) | 1088 +void ZLIB_INTERNAL fill_window_sse(deflate_state *s) |
| 1073 +{ | 1089 +{ |
| 1074 + assert(0); | 1090 + assert(0); |
| 1075 +} | 1091 +} |
| 1076 + | 1092 + |
| 1077 +void x86_check_features(void) | 1093 +void x86_check_features(void) |
| 1078 +{ | 1094 +{ |
| 1079 +} | 1095 +} |
| 1080 diff --git a/third_party/zlib/x86.c b/third_party/zlib/x86.c | 1096 diff --git a/third_party/zlib/x86.c b/third_party/zlib/x86.c |
| 1081 new file mode 100644 | 1097 index e69de29bb2d1..e6532fd10ddb 100644 |
| 1082 index 0000000..e6532fd | 1098 --- a/third_party/zlib/x86.c |
| 1083 --- /dev/null | |
| 1084 +++ b/third_party/zlib/x86.c | 1099 +++ b/third_party/zlib/x86.c |
| 1085 @@ -0,0 +1,91 @@ | 1100 @@ -0,0 +1,91 @@ |
| 1086 +/* | 1101 +/* |
| 1087 + * x86 feature check | 1102 + * x86 feature check |
| 1088 + * | 1103 + * |
| 1089 + * Copyright (C) 2013 Intel Corporation. All rights reserved. | 1104 + * Copyright (C) 2013 Intel Corporation. All rights reserved. |
| 1090 + * Author: | 1105 + * Author: |
| 1091 + * Jim Kukunas | 1106 + * Jim Kukunas |
| 1092 + * | 1107 + * |
| 1093 + * For conditions of distribution and use, see copyright notice in zlib.h | 1108 + * For conditions of distribution and use, see copyright notice in zlib.h |
| (...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1168 + x86_cpu_has_sse42= regs[2] & 0x100000; | 1183 + x86_cpu_has_sse42= regs[2] & 0x100000; |
| 1169 + x86_cpu_has_pclmulqdq = regs[2] & 0x2; | 1184 + x86_cpu_has_pclmulqdq = regs[2] & 0x2; |
| 1170 + | 1185 + |
| 1171 + x86_cpu_enable_simd = x86_cpu_has_sse2 && | 1186 + x86_cpu_enable_simd = x86_cpu_has_sse2 && |
| 1172 + x86_cpu_has_sse42 && | 1187 + x86_cpu_has_sse42 && |
| 1173 + x86_cpu_has_pclmulqdq; | 1188 + x86_cpu_has_pclmulqdq; |
| 1174 + return TRUE; | 1189 + return TRUE; |
| 1175 +} | 1190 +} |
| 1176 +#endif /* _MSC_VER */ | 1191 +#endif /* _MSC_VER */ |
| 1177 diff --git a/third_party/zlib/x86.h b/third_party/zlib/x86.h | 1192 diff --git a/third_party/zlib/x86.h b/third_party/zlib/x86.h |
| 1178 new file mode 100644 | 1193 index e69de29bb2d1..ebcf10ab09d2 100644 |
| 1179 index 0000000..ac3d180 | 1194 --- a/third_party/zlib/x86.h |
| 1180 --- /dev/null | |
| 1181 +++ b/third_party/zlib/x86.h | 1195 +++ b/third_party/zlib/x86.h |
| 1182 @@ -0,0 +1,15 @@ | 1196 @@ -0,0 +1,15 @@ |
| 1183 +/* x86.h -- check for x86 CPU features | 1197 +/* x86.h -- check for x86 CPU features |
| 1184 +* Copyright (C) 2013 Intel Corporation Jim Kukunas | 1198 +* Copyright (C) 2013 Intel Corporation Jim Kukunas |
| 1185 +* For conditions of distribution and use, see copyright notice in zlib.h | 1199 +* For conditions of distribution and use, see copyright notice in zlib.h |
| 1186 +*/ | 1200 +*/ |
| 1187 + | 1201 + |
| 1188 +#ifndef X86_H | 1202 +#ifndef X86_H |
| 1189 +#define X86_H | 1203 +#define X86_H |
| 1190 + | 1204 + |
| 1191 +#include "zlib.h" | 1205 +#include "zlib.h" |
| 1192 + | 1206 + |
| 1193 +extern int x86_cpu_enable_simd; | 1207 +extern int x86_cpu_enable_simd; |
| 1194 + | 1208 + |
| 1195 +void x86_check_features(void); | 1209 +void x86_check_features(void); |
| 1196 + | 1210 + |
| 1197 +#endif /* X86_H */ | 1211 +#endif /* X86_H */ |
| 1198 diff --git a/third_party/zlib/mozzconf.h b/third_party/zlib/mozzconf.h | |
| 1199 index 06918bc..10aca44 100644 | |
| 1200 --- a/third_party/zlib/mozzconf.h | |
| 1201 +++ b/third_party/zlib/mozzconf.h | |
| 1202 @@ -169,6 +169,14 @@ | |
| 1203 #define inflateResetKeep MOZ_Z_inflateResetKeep | |
| 1204 #define gzopen_w MOZ_Z_gzopen_w | |
| 1205 | |
| 1206 +/* Chromium-specific modifications */ | |
| 1207 +#define copy_with_crc MOZ_Z__copy_with_crc | |
| 1208 +#define crc_finalize MOZ_Z__crc_finalize | |
| 1209 +#define crc_reset MOZ_Z__crc_reset | |
| 1210 +// read_buf used to be local, but this was changed in simd.patch. | |
| 1211 +#define read_buf MOZ_Z__read_buf | |
| 1212 +#define x86_check_features MOZ_Z__x86_check_features | |
| 1213 + | |
| 1214 /* Mangle Byte types except on Mac. */ | |
| 1215 #if !defined(__MACTYPES__) | |
| 1216 #define Byte MOZ_Z_Byte | |
| 1217 -- | |
| 1218 2.7.4 | |
| 1219 | |
| OLD | NEW |