OLD | NEW |
1 diff --git a/third_party/zlib/crc32.c b/third_party/zlib/crc32.c | 1 diff --git a/crc32.c b/crc32.c |
2 index 979a7190a3ca..09228ed9c76f 100644 | 2 index 9580440c0e6b..9162429cc7b4 100644 |
3 --- a/third_party/zlib/crc32.c | 3 --- a/crc32.c |
4 +++ b/third_party/zlib/crc32.c | 4 +++ b/crc32.c |
5 @@ -28,6 +28,8 @@ | 5 @@ -28,6 +28,8 @@ |
6 # endif /* !DYNAMIC_CRC_TABLE */ | 6 # endif /* !DYNAMIC_CRC_TABLE */ |
7 #endif /* MAKECRCH */ | 7 #endif /* MAKECRCH */ |
8 | 8 |
9 +#include "deflate.h" | 9 +#include "deflate.h" |
10 +#include "x86.h" | 10 +#include "x86.h" |
11 #include "zutil.h" /* for STDC and FAR definitions */ | 11 #include "zutil.h" /* for STDC and FAR definitions */ |
12 | 12 |
13 #define local static | 13 /* Definitions for doing the crc four data bytes at a time. */ |
14 @@ -423,3 +425,28 @@ uLong ZEXPORT crc32_combine64(crc1, crc2, len2) | 14 @@ -440,3 +442,28 @@ uLong ZEXPORT crc32_combine64(crc1, crc2, len2) |
15 { | 15 { |
16 return crc32_combine_(crc1, crc2, len2); | 16 return crc32_combine_(crc1, crc2, len2); |
17 } | 17 } |
18 + | 18 + |
19 +ZLIB_INTERNAL void crc_reset(deflate_state *const s) | 19 +ZLIB_INTERNAL void crc_reset(deflate_state *const s) |
20 +{ | 20 +{ |
21 + if (x86_cpu_enable_simd) { | 21 + if (x86_cpu_enable_simd) { |
22 + crc_fold_init(s); | 22 + crc_fold_init(s); |
23 + return; | 23 + return; |
24 + } | 24 + } |
25 + s->strm->adler = crc32(0L, Z_NULL, 0); | 25 + s->strm->adler = crc32(0L, Z_NULL, 0); |
26 +} | 26 +} |
27 + | 27 + |
28 +ZLIB_INTERNAL void crc_finalize(deflate_state *const s) | 28 +ZLIB_INTERNAL void crc_finalize(deflate_state *const s) |
29 +{ | 29 +{ |
30 + if (x86_cpu_enable_simd) | 30 + if (x86_cpu_enable_simd) |
31 + s->strm->adler = crc_fold_512to32(s); | 31 + s->strm->adler = crc_fold_512to32(s); |
32 +} | 32 +} |
33 + | 33 + |
34 +ZLIB_INTERNAL void copy_with_crc(z_streamp strm, Bytef *dst, long size) | 34 +ZLIB_INTERNAL void copy_with_crc(z_streamp strm, Bytef *dst, long size) |
35 +{ | 35 +{ |
36 + if (x86_cpu_enable_simd) { | 36 + if (x86_cpu_enable_simd) { |
37 + crc_fold_copy(strm->state, dst, strm->next_in, size); | 37 + crc_fold_copy(strm->state, dst, strm->next_in, size); |
38 + return; | 38 + return; |
39 + } | 39 + } |
40 + zmemcpy(dst, strm->next_in, size); | 40 + zmemcpy(dst, strm->next_in, size); |
41 + strm->adler = crc32(strm->adler, dst, size); | 41 + strm->adler = crc32(strm->adler, dst, size); |
42 +} | 42 +} |
43 diff --git a/third_party/zlib/crc_folding.c b/third_party/zlib/crc_folding.c | 43 diff --git a/crc_folding.c b/crc_folding.c |
44 index e69de29bb2d1..48d77744aaf4 100644 | 44 new file mode 100644 |
45 --- a/third_party/zlib/crc_folding.c | 45 index 000000000000..48d77744aaf4 |
46 +++ b/third_party/zlib/crc_folding.c | 46 --- /dev/null |
| 47 +++ b/crc_folding.c |
47 @@ -0,0 +1,493 @@ | 48 @@ -0,0 +1,493 @@ |
48 +/* | 49 +/* |
49 + * Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ | 50 + * Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ |
50 + * instruction. | 51 + * instruction. |
51 + * | 52 + * |
52 + * A white paper describing this algorithm can be found at: | 53 + * A white paper describing this algorithm can be found at: |
53 + * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fas
t-crc-computation-generic-polynomials-pclmulqdq-paper.pdf | 54 + * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fas
t-crc-computation-generic-polynomials-pclmulqdq-paper.pdf |
54 + * | 55 + * |
55 + * Copyright (C) 2013 Intel Corporation. All rights reserved. | 56 + * Copyright (C) 2013 Intel Corporation. All rights reserved. |
56 + * Authors: | 57 + * Authors: |
(...skipping 474 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
531 + | 532 + |
532 + xmm_crc2 = xmm_crc3; | 533 + xmm_crc2 = xmm_crc3; |
533 + xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); | 534 + xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); |
534 + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2); | 535 + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2); |
535 + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc1); | 536 + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc1); |
536 + | 537 + |
537 + crc = _mm_extract_epi32(xmm_crc3, 2); | 538 + crc = _mm_extract_epi32(xmm_crc3, 2); |
538 + return ~crc; | 539 + return ~crc; |
539 + CRC_SAVE(s) | 540 + CRC_SAVE(s) |
540 +} | 541 +} |
541 diff --git a/third_party/zlib/deflate.c b/third_party/zlib/deflate.c | 542 diff --git a/deflate.c b/deflate.c |
542 index 696957705b75..02fa2f64f83e 100644 | 543 index 1ec761448de9..aa0c9c67a6dc 100644 |
543 --- a/third_party/zlib/deflate.c | 544 --- a/deflate.c |
544 +++ b/third_party/zlib/deflate.c | 545 +++ b/deflate.c |
545 @@ -48,8 +48,9 @@ | 546 @@ -48,8 +48,9 @@ |
546 */ | 547 */ |
547 | 548 |
548 /* @(#) $Id$ */ | 549 /* @(#) $Id$ */ |
549 - | 550 - |
550 +#include <assert.h> | 551 +#include <assert.h> |
551 #include "deflate.h" | 552 #include "deflate.h" |
552 +#include "x86.h" | 553 +#include "x86.h" |
553 | 554 |
554 const char deflate_copyright[] = | 555 const char deflate_copyright[] = |
555 " deflate 1.2.8 Copyright 1995-2013 Jean-loup Gailly and Mark Adler "; | 556 " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler "; |
556 @@ -84,7 +85,7 @@ local block_state deflate_huff OF((deflate_state *s, int flu
sh)); | 557 @@ -86,7 +87,7 @@ local block_state deflate_huff OF((deflate_state *s, int flu
sh)); |
557 local void lm_init OF((deflate_state *s)); | 558 local void lm_init OF((deflate_state *s)); |
558 local void putShortMSB OF((deflate_state *s, uInt b)); | 559 local void putShortMSB OF((deflate_state *s, uInt b)); |
559 local void flush_pending OF((z_streamp strm)); | 560 local void flush_pending OF((z_streamp strm)); |
560 -local int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); | 561 -local unsigned read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); |
561 + | 562 +unsigned ZLIB_INTERNAL read_buf OF((z_streamp strm, Bytef *buf, unsigned size))
; |
562 #ifdef ASMV | 563 #ifdef ASMV |
| 564 # pragma message("Assembler code may have bugs -- use at your own risk") |
563 void match_init OF((void)); /* asm code initialization */ | 565 void match_init OF((void)); /* asm code initialization */ |
564 uInt longest_match OF((deflate_state *s, IPos cur_match)); | 566 @@ -100,6 +101,20 @@ local void check_match OF((deflate_state *s, IPos start, I
Pos match, |
565 @@ -97,6 +98,23 @@ local void check_match OF((deflate_state *s, IPos start, IPo
s match, | |
566 int length)); | 567 int length)); |
567 #endif | 568 #endif |
568 | 569 |
569 +/* For fill_window_sse.c to use */ | |
570 +ZLIB_INTERNAL int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); | |
571 + | |
572 +/* From crc32.c */ | 570 +/* From crc32.c */ |
573 +extern void ZLIB_INTERNAL crc_reset(deflate_state *const s); | 571 +extern void ZLIB_INTERNAL crc_reset(deflate_state *const s); |
574 +extern void ZLIB_INTERNAL crc_finalize(deflate_state *const s); | 572 +extern void ZLIB_INTERNAL crc_finalize(deflate_state *const s); |
575 +extern void ZLIB_INTERNAL copy_with_crc(z_streamp strm, Bytef *dst, long size); | 573 +extern void ZLIB_INTERNAL copy_with_crc(z_streamp strm, Bytef *dst, long size); |
576 + | 574 + |
577 +#ifdef _MSC_VER | 575 +#ifdef _MSC_VER |
578 +#define INLINE __inline | 576 +#define INLINE __inline |
579 +#else | 577 +#else |
580 +#define INLINE inline | 578 +#define INLINE inline |
581 +#endif | 579 +#endif |
582 + | 580 + |
583 +/* Inline optimisation */ | 581 +/* Inline optimisation */ |
584 +local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str); | 582 +local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str); |
585 + | 583 + |
586 /* =========================================================================== | 584 /* =========================================================================== |
587 * Local data | 585 * Local data |
588 */ | 586 */ |
589 @@ -166,7 +184,6 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compile
rs */ | 587 @@ -162,7 +177,6 @@ local const config configuration_table[10] = { |
590 */ | 588 */ |
591 #define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask) | 589 #define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask) |
592 | 590 |
593 - | 591 - |
594 /* =========================================================================== | 592 /* =========================================================================== |
595 * Insert string str in the dictionary and set match_head to the previous head | 593 * Insert string str in the dictionary and set match_head to the previous head |
596 * of the hash chain (the most recent string with same hash key). Return | 594 * of the hash chain (the most recent string with same hash key). Return |
597 @@ -177,17 +194,28 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compi
lers */ | 595 @@ -173,17 +187,28 @@ local const config configuration_table[10] = { |
598 * input characters and the first MIN_MATCH bytes of str are valid | 596 * characters and the first MIN_MATCH bytes of str are valid (except for |
599 * (except for the last MIN_MATCH-1 bytes of the input file). | 597 * the last MIN_MATCH-1 bytes of the input file). |
600 */ | 598 */ |
601 +local INLINE Pos insert_string_c(deflate_state *const s, const Pos str) | 599 +local INLINE Pos insert_string_c(deflate_state *const s, const Pos str) |
602 +{ | 600 +{ |
603 + Pos ret; | 601 + Pos ret; |
604 + | 602 + |
605 + UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]); | 603 + UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]); |
606 #ifdef FASTEST | 604 #ifdef FASTEST |
607 -#define INSERT_STRING(s, str, match_head) \ | 605 -#define INSERT_STRING(s, str, match_head) \ |
608 - (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ | 606 - (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ |
609 - match_head = s->head[s->ins_h], \ | 607 - match_head = s->head[s->ins_h], \ |
(...skipping 14 matching lines...) Expand all Loading... |
624 +local INLINE Pos insert_string(deflate_state *const s, const Pos str) | 622 +local INLINE Pos insert_string(deflate_state *const s, const Pos str) |
625 +{ | 623 +{ |
626 + if (x86_cpu_enable_simd) | 624 + if (x86_cpu_enable_simd) |
627 + return insert_string_sse(s, str); | 625 + return insert_string_sse(s, str); |
628 + return insert_string_c(s, str); | 626 + return insert_string_c(s, str); |
629 +} | 627 +} |
630 + | 628 + |
631 | 629 |
632 /* =========================================================================== | 630 /* =========================================================================== |
633 * Initialize the hash table (avoiding 64K overflow for 16 bit systems). | 631 * Initialize the hash table (avoiding 64K overflow for 16 bit systems). |
634 @@ -221,6 +249,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, m
emLevel, strategy, | 632 @@ -248,6 +273,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, m
emLevel, strategy, |
635 const char *version; | 633 const char *version; |
636 int stream_size; | 634 int stream_size; |
637 { | 635 { |
638 + unsigned window_padding = 8; | 636 + unsigned window_padding = 8; |
639 deflate_state *s; | 637 deflate_state *s; |
640 int wrap = 1; | 638 int wrap = 1; |
641 static const char my_version[] = ZLIB_VERSION; | 639 static const char my_version[] = ZLIB_VERSION; |
642 @@ -230,6 +259,8 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, m
emLevel, strategy, | 640 @@ -257,6 +283,8 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, m
emLevel, strategy, |
643 * output size for (length,distance) codes is <= 24 bits. | 641 * output size for (length,distance) codes is <= 24 bits. |
644 */ | 642 */ |
645 | 643 |
646 + x86_check_features(); | 644 + x86_check_features(); |
647 + | 645 + |
648 if (version == Z_NULL || version[0] != my_version[0] || | 646 if (version == Z_NULL || version[0] != my_version[0] || |
649 stream_size != sizeof(z_stream)) { | 647 stream_size != sizeof(z_stream)) { |
650 return Z_VERSION_ERROR; | 648 return Z_VERSION_ERROR; |
651 @@ -285,12 +316,17 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits,
memLevel, strategy, | 649 @@ -313,12 +341,19 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits,
memLevel, strategy, |
652 s->w_size = 1 << s->w_bits; | 650 s->w_size = 1 << s->w_bits; |
653 s->w_mask = s->w_size - 1; | 651 s->w_mask = s->w_size - 1; |
654 | 652 |
655 - s->hash_bits = memLevel + 7; | 653 - s->hash_bits = (uInt)memLevel + 7; |
656 + if (x86_cpu_enable_simd) { | 654 + if (x86_cpu_enable_simd) { |
657 + s->hash_bits = 15; | 655 + s->hash_bits = 15; |
658 + } else { | 656 + } else { |
659 + s->hash_bits = memLevel + 7; | 657 + s->hash_bits = memLevel + 7; |
660 + } | 658 + } |
661 + | 659 + |
662 s->hash_size = 1 << s->hash_bits; | 660 s->hash_size = 1 << s->hash_bits; |
663 s->hash_mask = s->hash_size - 1; | 661 s->hash_mask = s->hash_size - 1; |
664 s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); | 662 s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); |
665 | 663 |
666 - s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); | 664 - s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); |
667 + s->window = (Bytef *) ZALLOC(strm, s->w_size + window_padding, 2*sizeof(Byt
e)); | 665 + s->window = (Bytef *) ZALLOC(strm, |
| 666 + s->w_size + window_padding, |
| 667 + 2*sizeof(Byte)); |
668 s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); | 668 s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); |
669 s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); | 669 s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); |
670 | 670 |
671 @@ -365,11 +401,7 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLen
gth) | 671 @@ -418,11 +453,7 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLen
gth) |
672 str = s->strstart; | 672 str = s->strstart; |
673 n = s->lookahead - (MIN_MATCH-1); | 673 n = s->lookahead - (MIN_MATCH-1); |
674 do { | 674 do { |
675 - UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); | 675 - UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); |
676 -#ifndef FASTEST | 676 -#ifndef FASTEST |
677 - s->prev[str & s->w_mask] = s->head[s->ins_h]; | 677 - s->prev[str & s->w_mask] = s->head[s->ins_h]; |
678 -#endif | 678 -#endif |
679 - s->head[s->ins_h] = (Pos)str; | 679 - s->head[s->ins_h] = (Pos)str; |
680 + insert_string(s, str); | 680 + insert_string(s, str); |
681 str++; | 681 str++; |
682 } while (--n); | 682 } while (--n); |
683 s->strstart = str; | 683 s->strstart = str; |
684 @@ -690,7 +722,7 @@ int ZEXPORT deflate (strm, flush) | 684 @@ -848,7 +879,7 @@ int ZEXPORT deflate (strm, flush) |
685 if (s->status == INIT_STATE) { | |
686 #ifdef GZIP | 685 #ifdef GZIP |
687 if (s->wrap == 2) { | 686 if (s->status == GZIP_STATE) { |
688 - strm->adler = crc32(0L, Z_NULL, 0); | 687 /* gzip header */ |
689 + crc_reset(s); | 688 - strm->adler = crc32(0L, Z_NULL, 0); |
690 put_byte(s, 31); | 689 + crc_reset(s); |
691 put_byte(s, 139); | 690 put_byte(s, 31); |
692 put_byte(s, 8); | 691 put_byte(s, 139); |
693 @@ -952,6 +984,7 @@ int ZEXPORT deflate (strm, flush) | 692 put_byte(s, 8); |
| 693 @@ -1049,6 +1080,7 @@ int ZEXPORT deflate (strm, flush) |
694 /* Write the trailer */ | 694 /* Write the trailer */ |
695 #ifdef GZIP | 695 #ifdef GZIP |
696 if (s->wrap == 2) { | 696 if (s->wrap == 2) { |
697 + crc_finalize(s); | 697 + crc_finalize(s); |
698 put_byte(s, (Byte)(strm->adler & 0xff)); | 698 put_byte(s, (Byte)(strm->adler & 0xff)); |
699 put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); | 699 put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); |
700 put_byte(s, (Byte)((strm->adler >> 16) & 0xff)); | 700 put_byte(s, (Byte)((strm->adler >> 16) & 0xff)); |
701 @@ -1073,7 +1106,7 @@ int ZEXPORT deflateCopy (dest, source) | 701 @@ -1161,7 +1193,7 @@ int ZEXPORT deflateCopy (dest, source) |
702 * allocating a large strm->next_in buffer and copying from it. | 702 * allocating a large strm->next_in buffer and copying from it. |
703 * (See also flush_pending()). | 703 * (See also flush_pending()). |
704 */ | 704 */ |
705 -local int read_buf(strm, buf, size) | 705 -local unsigned read_buf(strm, buf, size) |
706 +ZLIB_INTERNAL int read_buf(strm, buf, size) | 706 +ZLIB_INTERNAL unsigned read_buf(strm, buf, size) |
707 z_streamp strm; | 707 z_streamp strm; |
708 Bytef *buf; | 708 Bytef *buf; |
709 unsigned size; | 709 unsigned size; |
710 @@ -1085,15 +1118,16 @@ local int read_buf(strm, buf, size) | 710 @@ -1173,15 +1205,16 @@ local unsigned read_buf(strm, buf, size) |
711 | 711 |
712 strm->avail_in -= len; | 712 strm->avail_in -= len; |
713 | 713 |
714 - zmemcpy(buf, strm->next_in, len); | 714 - zmemcpy(buf, strm->next_in, len); |
715 - if (strm->state->wrap == 1) { | 715 - if (strm->state->wrap == 1) { |
716 - strm->adler = adler32(strm->adler, buf, len); | 716 - strm->adler = adler32(strm->adler, buf, len); |
717 - } | 717 - } |
718 #ifdef GZIP | 718 #ifdef GZIP |
719 - else if (strm->state->wrap == 2) { | 719 - else if (strm->state->wrap == 2) { |
720 - strm->adler = crc32(strm->adler, buf, len); | 720 - strm->adler = crc32(strm->adler, buf, len); |
721 - } | 721 - } |
722 + if (strm->state->wrap == 2) | 722 + if (strm->state->wrap == 2) |
723 + copy_with_crc(strm, buf, len); | 723 + copy_with_crc(strm, buf, len); |
724 + else | 724 + else |
725 #endif | 725 #endif |
726 + { | 726 + { |
727 + zmemcpy(buf, strm->next_in, len); | 727 + zmemcpy(buf, strm->next_in, len); |
728 + if (strm->state->wrap == 1) | 728 + if (strm->state->wrap == 1) |
729 + strm->adler = adler32(strm->adler, buf, len); | 729 + strm->adler = adler32(strm->adler, buf, len); |
730 + } | 730 + } |
731 strm->next_in += len; | 731 strm->next_in += len; |
732 strm->total_in += len; | 732 strm->total_in += len; |
733 | 733 |
734 @@ -1387,7 +1421,19 @@ local void check_match(s, start, match, length) | 734 @@ -1479,7 +1512,19 @@ local void check_match(s, start, match, length) |
735 * performed for at least two bytes (required for the zip translate_eol | 735 * performed for at least two bytes (required for the zip translate_eol |
736 * option -- not supported here). | 736 * option -- not supported here). |
737 */ | 737 */ |
738 -local void fill_window(s) | 738 -local void fill_window(s) |
739 +local void fill_window_c(deflate_state *s); | 739 +local void fill_window_c(deflate_state *s); |
740 + | 740 + |
741 +local void fill_window(deflate_state *s) | 741 +local void fill_window(deflate_state *s) |
742 +{ | 742 +{ |
743 + if (x86_cpu_enable_simd) { | 743 + if (x86_cpu_enable_simd) { |
744 + fill_window_sse(s); | 744 + fill_window_sse(s); |
745 + return; | 745 + return; |
746 + } | 746 + } |
747 + | 747 + |
748 + fill_window_c(s); | 748 + fill_window_c(s); |
749 +} | 749 +} |
750 + | 750 + |
751 +local void fill_window_c(s) | 751 +local void fill_window_c(s) |
752 deflate_state *s; | 752 deflate_state *s; |
753 { | 753 { |
754 register unsigned n, m; | 754 unsigned n; |
755 @@ -1651,7 +1697,7 @@ local block_state deflate_fast(s, flush) | 755 @@ -1847,7 +1892,7 @@ local block_state deflate_fast(s, flush) |
756 */ | 756 */ |
757 hash_head = NIL; | 757 hash_head = NIL; |
758 if (s->lookahead >= MIN_MATCH) { | 758 if (s->lookahead >= MIN_MATCH) { |
759 - INSERT_STRING(s, s->strstart, hash_head); | 759 - INSERT_STRING(s, s->strstart, hash_head); |
760 + hash_head = insert_string(s, s->strstart); | 760 + hash_head = insert_string(s, s->strstart); |
761 } | 761 } |
762 | 762 |
763 /* Find the longest match, discarding those <= prev_length. | 763 /* Find the longest match, discarding those <= prev_length. |
764 @@ -1682,7 +1728,7 @@ local block_state deflate_fast(s, flush) | 764 @@ -1878,7 +1923,7 @@ local block_state deflate_fast(s, flush) |
765 s->match_length--; /* string at strstart already in table */ | 765 s->match_length--; /* string at strstart already in table */ |
766 do { | 766 do { |
767 s->strstart++; | 767 s->strstart++; |
768 - INSERT_STRING(s, s->strstart, hash_head); | 768 - INSERT_STRING(s, s->strstart, hash_head); |
769 + hash_head = insert_string(s, s->strstart); | 769 + hash_head = insert_string(s, s->strstart); |
770 /* strstart never exceeds WSIZE-MAX_MATCH, so there are | 770 /* strstart never exceeds WSIZE-MAX_MATCH, so there are |
771 * always MIN_MATCH bytes ahead. | 771 * always MIN_MATCH bytes ahead. |
772 */ | 772 */ |
773 @@ -1754,7 +1800,7 @@ local block_state deflate_slow(s, flush) | 773 @@ -1950,7 +1995,7 @@ local block_state deflate_slow(s, flush) |
774 */ | 774 */ |
775 hash_head = NIL; | 775 hash_head = NIL; |
776 if (s->lookahead >= MIN_MATCH) { | 776 if (s->lookahead >= MIN_MATCH) { |
777 - INSERT_STRING(s, s->strstart, hash_head); | 777 - INSERT_STRING(s, s->strstart, hash_head); |
778 + hash_head = insert_string(s, s->strstart); | 778 + hash_head = insert_string(s, s->strstart); |
779 } | 779 } |
780 | 780 |
781 /* Find the longest match, discarding those <= prev_length. | 781 /* Find the longest match, discarding those <= prev_length. |
782 @@ -1805,7 +1851,7 @@ local block_state deflate_slow(s, flush) | 782 @@ -2001,7 +2046,7 @@ local block_state deflate_slow(s, flush) |
783 s->prev_length -= 2; | 783 s->prev_length -= 2; |
784 do { | 784 do { |
785 if (++s->strstart <= max_insert) { | 785 if (++s->strstart <= max_insert) { |
786 - INSERT_STRING(s, s->strstart, hash_head); | 786 - INSERT_STRING(s, s->strstart, hash_head); |
787 + hash_head = insert_string(s, s->strstart); | 787 + hash_head = insert_string(s, s->strstart); |
788 } | 788 } |
789 } while (--s->prev_length != 0); | 789 } while (--s->prev_length != 0); |
790 s->match_available = 0; | 790 s->match_available = 0; |
791 @@ -1965,3 +2011,37 @@ local block_state deflate_huff(s, flush) | 791 @@ -2161,3 +2206,37 @@ local block_state deflate_huff(s, flush) |
792 FLUSH_BLOCK(s, 0); | 792 FLUSH_BLOCK(s, 0); |
793 return block_done; | 793 return block_done; |
794 } | 794 } |
795 + | 795 + |
796 +/* Safe to inline this as GCC/clang will use inline asm and Visual Studio will | 796 +/* Safe to inline this as GCC/clang will use inline asm and Visual Studio will |
797 + * use intrinsic without extra params | 797 + * use intrinsic without extra params |
798 + */ | 798 + */ |
799 +local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str) | 799 +local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str) |
800 +{ | 800 +{ |
801 + Pos ret; | 801 + Pos ret; |
(...skipping 17 matching lines...) Expand all Loading... |
819 +#else | 819 +#else |
820 + /* This should never happen */ | 820 + /* This should never happen */ |
821 + assert(0); | 821 + assert(0); |
822 +#endif | 822 +#endif |
823 + | 823 + |
824 + ret = s->head[h & s->hash_mask]; | 824 + ret = s->head[h & s->hash_mask]; |
825 + s->head[h & s->hash_mask] = str; | 825 + s->head[h & s->hash_mask] = str; |
826 + s->prev[str & s->w_mask] = ret; | 826 + s->prev[str & s->w_mask] = ret; |
827 + return ret; | 827 + return ret; |
828 +} | 828 +} |
829 diff --git a/third_party/zlib/deflate.h b/third_party/zlib/deflate.h | 829 diff --git a/deflate.h b/deflate.h |
830 index ce0299edd191..022d9c326ecc 100644 | 830 index 23ecdd312bc0..ab56df7663b6 100644 |
831 --- a/third_party/zlib/deflate.h | 831 --- a/deflate.h |
832 +++ b/third_party/zlib/deflate.h | 832 +++ b/deflate.h |
833 @@ -106,7 +106,7 @@ typedef struct internal_state { | 833 @@ -109,7 +109,7 @@ typedef struct internal_state { |
834 uInt gzindex; /* where in extra, name, or comment */ | 834 ulg gzindex; /* where in extra, name, or comment */ |
835 Byte method; /* can only be DEFLATED */ | 835 Byte method; /* can only be DEFLATED */ |
836 int last_flush; /* value of flush param for previous deflate call */ | 836 int last_flush; /* value of flush param for previous deflate call */ |
837 - | 837 - |
838 + unsigned zalign(16) crc0[4 * 5]; | 838 + unsigned zalign(16) crc0[4 * 5]; |
839 /* used by deflate.c: */ | 839 /* used by deflate.c: */ |
840 | 840 |
841 uInt w_size; /* LZ77 window size (32K by default) */ | 841 uInt w_size; /* LZ77 window size (32K by default) */ |
842 @@ -343,4 +343,14 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, c
harf *buf, | 842 @@ -346,4 +346,14 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, c
harf *buf, |
843 flush = _tr_tally(s, distance, length) | 843 flush = _tr_tally(s, distance, length) |
844 #endif | 844 #endif |
845 | 845 |
846 +/* Functions that are SIMD optimised on x86 */ | 846 +/* Functions that are SIMD optimised on x86 */ |
847 +void ZLIB_INTERNAL crc_fold_init(deflate_state* const s); | 847 +void ZLIB_INTERNAL crc_fold_init(deflate_state* const s); |
848 +void ZLIB_INTERNAL crc_fold_copy(deflate_state* const s, | 848 +void ZLIB_INTERNAL crc_fold_copy(deflate_state* const s, |
849 + unsigned char* dst, | 849 + unsigned char* dst, |
850 + const unsigned char* src, | 850 + const unsigned char* src, |
851 + long len); | 851 + long len); |
852 +unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state* const s); | 852 +unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state* const s); |
853 + | 853 + |
854 +void ZLIB_INTERNAL fill_window_sse(deflate_state* s); | 854 +void ZLIB_INTERNAL fill_window_sse(deflate_state* s); |
855 + | 855 + |
856 #endif /* DEFLATE_H */ | 856 #endif /* DEFLATE_H */ |
857 diff --git a/third_party/zlib/fill_window_sse.c b/third_party/zlib/fill_window_s
se.c | 857 diff --git a/fill_window_sse.c b/fill_window_sse.c |
858 index e69de29bb2d1..949ccce1ba9c 100644 | 858 new file mode 100644 |
859 --- a/third_party/zlib/fill_window_sse.c | 859 index 000000000000..949ccce1ba9c |
860 +++ b/third_party/zlib/fill_window_sse.c | 860 --- /dev/null |
| 861 +++ b/fill_window_sse.c |
861 @@ -0,0 +1,175 @@ | 862 @@ -0,0 +1,175 @@ |
862 +/* | 863 +/* |
863 + * Fill Window with SSE2-optimized hash shifting | 864 + * Fill Window with SSE2-optimized hash shifting |
864 + * | 865 + * |
865 + * Copyright (C) 2013 Intel Corporation | 866 + * Copyright (C) 2013 Intel Corporation |
866 + * Authors: | 867 + * Authors: |
867 + * Arjan van de Ven <arjan@linux.intel.com> | 868 + * Arjan van de Ven <arjan@linux.intel.com> |
868 + * Jim Kukunas <james.t.kukunas@linux.intel.com> | 869 + * Jim Kukunas <james.t.kukunas@linux.intel.com> |
869 + * | 870 + * |
870 + * For conditions of distribution and use, see copyright notice in zlib.h | 871 + * For conditions of distribution and use, see copyright notice in zlib.h |
(...skipping 156 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1027 + if (init > s->window_size - s->high_water) | 1028 + if (init > s->window_size - s->high_water) |
1028 + init = s->window_size - s->high_water; | 1029 + init = s->window_size - s->high_water; |
1029 + zmemzero(s->window + s->high_water, (unsigned)init); | 1030 + zmemzero(s->window + s->high_water, (unsigned)init); |
1030 + s->high_water += init; | 1031 + s->high_water += init; |
1031 + } | 1032 + } |
1032 + } | 1033 + } |
1033 + | 1034 + |
1034 + Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD, | 1035 + Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD, |
1035 + "not enough room for search"); | 1036 + "not enough room for search"); |
1036 +} | 1037 +} |
1037 diff --git a/third_party/zlib/mozzconf.h b/third_party/zlib/mozzconf.h | 1038 diff --git a/names.h b/names.h |
1038 index d7e9f91c5a12..10aca44b29a4 100644 | 1039 index f18df5684dc5..3436baa4eb57 100644 |
1039 --- a/third_party/zlib/mozzconf.h | 1040 --- a/names.h |
1040 +++ b/third_party/zlib/mozzconf.h | 1041 +++ b/names.h |
1041 @@ -169,6 +169,14 @@ | 1042 @@ -152,4 +152,16 @@ |
1042 #define inflateResetKeep MOZ_Z_inflateResetKeep | 1043 /* An exported symbol that isn't handled by Z_PREFIX in zconf.h */ |
1043 #define gzopen_w MOZ_Z_gzopen_w | 1044 #define z_errmsg Cr_z_z_errmsg |
1044 | 1045 |
1045 +/* Chromium-specific modifications */ | 1046 +/* Symbols added in simd.patch */ |
1046 +#define copy_with_crc MOZ_Z__copy_with_crc | 1047 +#define copy_with_crc Cr_z_copy_with_crc |
1047 +#define crc_finalize MOZ_Z__crc_finalize | 1048 +#define crc_finalize Cr_z_crc_finalize |
1048 +#define crc_reset MOZ_Z__crc_reset | 1049 +#define crc_fold_512to32 Cr_z_crc_fold_512to32 |
1049 +// read_buf used to be local, but this was changed in simd.patch. | 1050 +#define crc_fold_copy Cr_z_crc_fold_copy |
1050 +#define read_buf MOZ_Z__read_buf | 1051 +#define crc_fold_init Cr_z_crc_fold_init |
1051 +#define x86_check_features MOZ_Z__x86_check_features | 1052 +#define crc_reset Cr_z_crc_reset |
| 1053 +#define fill_window_sse Cr_z_fill_window_sse |
| 1054 +#define read_buf Cr_z_read_buf |
| 1055 +#define x86_check_features Cr_z_x86_check_features |
| 1056 +#define x86_cpu_enable_simd Cr_z_x86_cpu_enable_simd |
1052 + | 1057 + |
1053 /* Mangle Byte types except on Mac. */ | 1058 #endif /* THIRD_PARTY_ZLIB_NAMES_H_ */ |
1054 #if !defined(__MACTYPES__) | 1059 diff --git a/simd_stub.c b/simd_stub.c |
1055 #define Byte MOZ_Z_Byte | 1060 new file mode 100644 |
1056 diff --git a/third_party/zlib/simd_stub.c b/third_party/zlib/simd_stub.c | 1061 index 000000000000..c6d46051498f |
1057 index e69de29bb2d1..796f1f63f690 100644 | 1062 --- /dev/null |
1058 --- a/third_party/zlib/simd_stub.c | 1063 +++ b/simd_stub.c |
1059 +++ b/third_party/zlib/simd_stub.c | |
1060 @@ -0,0 +1,35 @@ | 1064 @@ -0,0 +1,35 @@ |
1061 +/* simd_stub.c -- stub implementations | 1065 +/* simd_stub.c -- stub implementations |
1062 +* Copyright (C) 2014 Intel Corporation | 1066 +* Copyright (C) 2014 Intel Corporation |
1063 +* For conditions of distribution and use, see copyright notice in zlib.h | 1067 +* For conditions of distribution and use, see copyright notice in zlib.h |
1064 +*/ | 1068 +*/ |
1065 +#include <assert.h> | 1069 +#include <assert.h> |
1066 + | 1070 + |
1067 +#include "deflate.h" | 1071 +#include "deflate.h" |
1068 +#include "x86.h" | 1072 +#include "x86.h" |
1069 + | 1073 + |
1070 +int x86_cpu_enable_simd = 0; | 1074 +int ZLIB_INTERNAL x86_cpu_enable_simd = 0; |
1071 + | 1075 + |
1072 +void ZLIB_INTERNAL crc_fold_init(deflate_state *const s) { | 1076 +void ZLIB_INTERNAL crc_fold_init(deflate_state *const s) { |
1073 + assert(0); | 1077 + assert(0); |
1074 +} | 1078 +} |
1075 + | 1079 + |
1076 +void ZLIB_INTERNAL crc_fold_copy(deflate_state *const s, | 1080 +void ZLIB_INTERNAL crc_fold_copy(deflate_state *const s, |
1077 + unsigned char *dst, | 1081 + unsigned char *dst, |
1078 + const unsigned char *src, | 1082 + const unsigned char *src, |
1079 + long len) { | 1083 + long len) { |
1080 + assert(0); | 1084 + assert(0); |
1081 +} | 1085 +} |
1082 + | 1086 + |
1083 +unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state *const s) { | 1087 +unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state *const s) { |
1084 + assert(0); | 1088 + assert(0); |
1085 + return 0; | 1089 + return 0; |
1086 +} | 1090 +} |
1087 + | 1091 + |
1088 +void ZLIB_INTERNAL fill_window_sse(deflate_state *s) | 1092 +void ZLIB_INTERNAL fill_window_sse(deflate_state *s) |
1089 +{ | 1093 +{ |
1090 + assert(0); | 1094 + assert(0); |
1091 +} | 1095 +} |
1092 + | 1096 + |
1093 +void x86_check_features(void) | 1097 +void x86_check_features(void) |
1094 +{ | 1098 +{ |
1095 +} | 1099 +} |
1096 diff --git a/third_party/zlib/x86.c b/third_party/zlib/x86.c | 1100 diff --git a/x86.c b/x86.c |
1097 index e69de29bb2d1..e6532fd10ddb 100644 | 1101 new file mode 100644 |
1098 --- a/third_party/zlib/x86.c | 1102 index 000000000000..e56fe8b85a39 |
1099 +++ b/third_party/zlib/x86.c | 1103 --- /dev/null |
1100 @@ -0,0 +1,91 @@ | 1104 +++ b/x86.c |
| 1105 @@ -0,0 +1,92 @@ |
1101 +/* | 1106 +/* |
1102 + * x86 feature check | 1107 + * x86 feature check |
1103 + * | 1108 + * |
1104 + * Copyright (C) 2013 Intel Corporation. All rights reserved. | 1109 + * Copyright (C) 2013 Intel Corporation. All rights reserved. |
1105 + * Author: | 1110 + * Author: |
1106 + * Jim Kukunas | 1111 + * Jim Kukunas |
1107 + * | 1112 + * |
1108 + * For conditions of distribution and use, see copyright notice in zlib.h | 1113 + * For conditions of distribution and use, see copyright notice in zlib.h |
1109 + */ | 1114 + */ |
1110 + | 1115 + |
1111 +#include "x86.h" | 1116 +#include "x86.h" |
| 1117 +#include "zutil.h" |
1112 + | 1118 + |
1113 +int x86_cpu_enable_simd = 0; | 1119 +int ZLIB_INTERNAL x86_cpu_enable_simd = 0; |
1114 + | 1120 + |
1115 +#ifndef _MSC_VER | 1121 +#ifndef _MSC_VER |
1116 +#include <pthread.h> | 1122 +#include <pthread.h> |
1117 + | 1123 + |
1118 +pthread_once_t cpu_check_inited_once = PTHREAD_ONCE_INIT; | 1124 +pthread_once_t cpu_check_inited_once = PTHREAD_ONCE_INIT; |
1119 +static void _x86_check_features(void); | 1125 +static void _x86_check_features(void); |
1120 + | 1126 + |
1121 +void x86_check_features(void) | 1127 +void x86_check_features(void) |
1122 +{ | 1128 +{ |
1123 + pthread_once(&cpu_check_inited_once, _x86_check_features); | 1129 + pthread_once(&cpu_check_inited_once, _x86_check_features); |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1182 + x86_cpu_has_sse2 = regs[3] & 0x4000000; | 1188 + x86_cpu_has_sse2 = regs[3] & 0x4000000; |
1183 + x86_cpu_has_sse42= regs[2] & 0x100000; | 1189 + x86_cpu_has_sse42= regs[2] & 0x100000; |
1184 + x86_cpu_has_pclmulqdq = regs[2] & 0x2; | 1190 + x86_cpu_has_pclmulqdq = regs[2] & 0x2; |
1185 + | 1191 + |
1186 + x86_cpu_enable_simd = x86_cpu_has_sse2 && | 1192 + x86_cpu_enable_simd = x86_cpu_has_sse2 && |
1187 + x86_cpu_has_sse42 && | 1193 + x86_cpu_has_sse42 && |
1188 + x86_cpu_has_pclmulqdq; | 1194 + x86_cpu_has_pclmulqdq; |
1189 + return TRUE; | 1195 + return TRUE; |
1190 +} | 1196 +} |
1191 +#endif /* _MSC_VER */ | 1197 +#endif /* _MSC_VER */ |
1192 diff --git a/third_party/zlib/x86.h b/third_party/zlib/x86.h | 1198 diff --git a/x86.h b/x86.h |
1193 index e69de29bb2d1..ebcf10ab09d2 100644 | 1199 new file mode 100644 |
1194 --- a/third_party/zlib/x86.h | 1200 index 000000000000..ebcf10ab09d2 |
1195 +++ b/third_party/zlib/x86.h | 1201 --- /dev/null |
| 1202 +++ b/x86.h |
1196 @@ -0,0 +1,15 @@ | 1203 @@ -0,0 +1,15 @@ |
1197 +/* x86.h -- check for x86 CPU features | 1204 +/* x86.h -- check for x86 CPU features |
1198 +* Copyright (C) 2013 Intel Corporation Jim Kukunas | 1205 +* Copyright (C) 2013 Intel Corporation Jim Kukunas |
1199 +* For conditions of distribution and use, see copyright notice in zlib.h | 1206 +* For conditions of distribution and use, see copyright notice in zlib.h |
1200 +*/ | 1207 +*/ |
1201 + | 1208 + |
1202 +#ifndef X86_H | 1209 +#ifndef X86_H |
1203 +#define X86_H | 1210 +#define X86_H |
1204 + | 1211 + |
1205 +#include "zlib.h" | 1212 +#include "zlib.h" |
1206 + | 1213 + |
1207 +extern int x86_cpu_enable_simd; | 1214 +extern int x86_cpu_enable_simd; |
1208 + | 1215 + |
1209 +void x86_check_features(void); | 1216 +void x86_check_features(void); |
1210 + | 1217 + |
1211 +#endif /* X86_H */ | 1218 +#endif /* X86_H */ |
| 1219 diff --git a/zutil.h b/zutil.h |
| 1220 index 80375b8b6109..4425bcf75eb3 100644 |
| 1221 --- a/zutil.h |
| 1222 +++ b/zutil.h |
| 1223 @@ -283,4 +283,10 @@ extern z_const char * const z_errmsg[10]; /* indexed by 2-z
lib_error */ |
| 1224 #define ZSWAP32(q) ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \ |
| 1225 (((q) & 0xff00) << 8) + (((q) & 0xff) << 24)) |
| 1226 |
| 1227 +#ifdef _MSC_VER |
| 1228 +#define zalign(x) __declspec(align(x)) |
| 1229 +#else |
| 1230 +#define zalign(x) __attribute__((aligned((x)))) |
| 1231 +#endif |
| 1232 + |
| 1233 #endif /* ZUTIL_H */ |
OLD | NEW |