OLD | NEW |
1 diff --git a/third_party/zlib/crc32.c b/third_party/zlib/crc32.c | 1 diff --git a/third_party/zlib/crc32.c b/third_party/zlib/crc32.c |
2 index 979a719..09228ed 100644 | 2 index 979a7190a3ca..09228ed9c76f 100644 |
3 --- a/third_party/zlib/crc32.c | 3 --- a/third_party/zlib/crc32.c |
4 +++ b/third_party/zlib/crc32.c | 4 +++ b/third_party/zlib/crc32.c |
5 @@ -28,6 +28,8 @@ | 5 @@ -28,6 +28,8 @@ |
6 # endif /* !DYNAMIC_CRC_TABLE */ | 6 # endif /* !DYNAMIC_CRC_TABLE */ |
7 #endif /* MAKECRCH */ | 7 #endif /* MAKECRCH */ |
8 | 8 |
9 +#include "deflate.h" | 9 +#include "deflate.h" |
10 +#include "x86.h" | 10 +#include "x86.h" |
11 #include "zutil.h" /* for STDC and FAR definitions */ | 11 #include "zutil.h" /* for STDC and FAR definitions */ |
12 | 12 |
(...skipping 21 matching lines...) Expand all Loading... |
34 +ZLIB_INTERNAL void copy_with_crc(z_streamp strm, Bytef *dst, long size) | 34 +ZLIB_INTERNAL void copy_with_crc(z_streamp strm, Bytef *dst, long size) |
35 +{ | 35 +{ |
36 + if (x86_cpu_enable_simd) { | 36 + if (x86_cpu_enable_simd) { |
37 + crc_fold_copy(strm->state, dst, strm->next_in, size); | 37 + crc_fold_copy(strm->state, dst, strm->next_in, size); |
38 + return; | 38 + return; |
39 + } | 39 + } |
40 + zmemcpy(dst, strm->next_in, size); | 40 + zmemcpy(dst, strm->next_in, size); |
41 + strm->adler = crc32(strm->adler, dst, size); | 41 + strm->adler = crc32(strm->adler, dst, size); |
42 +} | 42 +} |
43 diff --git a/third_party/zlib/crc_folding.c b/third_party/zlib/crc_folding.c | 43 diff --git a/third_party/zlib/crc_folding.c b/third_party/zlib/crc_folding.c |
44 new file mode 100644 | 44 index e69de29bb2d1..48d77744aaf4 100644 |
45 index 0000000..48d7774 | 45 --- a/third_party/zlib/crc_folding.c |
46 --- /dev/null | |
47 +++ b/third_party/zlib/crc_folding.c | 46 +++ b/third_party/zlib/crc_folding.c |
48 @@ -0,0 +1,493 @@ | 47 @@ -0,0 +1,493 @@ |
49 +/* | 48 +/* |
50 + * Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ | 49 + * Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ |
51 + * instruction. | 50 + * instruction. |
52 + * | 51 + * |
53 + * A white paper describing this algorithm can be found at: | 52 + * A white paper describing this algorithm can be found at: |
54 + * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fas
t-crc-computation-generic-polynomials-pclmulqdq-paper.pdf | 53 + * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fas
t-crc-computation-generic-polynomials-pclmulqdq-paper.pdf |
55 + * | 54 + * |
56 + * Copyright (C) 2013 Intel Corporation. All rights reserved. | 55 + * Copyright (C) 2013 Intel Corporation. All rights reserved. |
(...skipping 476 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
533 + xmm_crc2 = xmm_crc3; | 532 + xmm_crc2 = xmm_crc3; |
534 + xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); | 533 + xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); |
535 + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2); | 534 + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2); |
536 + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc1); | 535 + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc1); |
537 + | 536 + |
538 + crc = _mm_extract_epi32(xmm_crc3, 2); | 537 + crc = _mm_extract_epi32(xmm_crc3, 2); |
539 + return ~crc; | 538 + return ~crc; |
540 + CRC_SAVE(s) | 539 + CRC_SAVE(s) |
541 +} | 540 +} |
542 diff --git a/third_party/zlib/deflate.c b/third_party/zlib/deflate.c | 541 diff --git a/third_party/zlib/deflate.c b/third_party/zlib/deflate.c |
543 index 7c95b30..59645eb 100644 | 542 index 696957705b75..02fa2f64f83e 100644 |
544 --- a/third_party/zlib/deflate.c | 543 --- a/third_party/zlib/deflate.c |
545 +++ b/third_party/zlib/deflate.c | 544 +++ b/third_party/zlib/deflate.c |
546 @@ -48,8 +48,9 @@ | 545 @@ -48,8 +48,9 @@ |
547 */ | 546 */ |
548 | 547 |
549 /* @(#) $Id$ */ | 548 /* @(#) $Id$ */ |
550 - | 549 - |
551 +#include <assert.h> | 550 +#include <assert.h> |
552 #include "deflate.h" | 551 #include "deflate.h" |
553 +#include "x86.h" | 552 +#include "x86.h" |
554 | 553 |
555 const char deflate_copyright[] = | 554 const char deflate_copyright[] = |
556 " deflate 1.2.8 Copyright 1995-2013 Jean-loup Gailly and Mark Adler "; | 555 " deflate 1.2.8 Copyright 1995-2013 Jean-loup Gailly and Mark Adler "; |
557 @@ -85,7 +86,7 @@ local block_state deflate_huff OF((deflate_state *s, int flu
sh)); | 556 @@ -84,7 +85,7 @@ local block_state deflate_huff OF((deflate_state *s, int flu
sh)); |
558 local void lm_init OF((deflate_state *s)); | 557 local void lm_init OF((deflate_state *s)); |
559 local void putShortMSB OF((deflate_state *s, uInt b)); | 558 local void putShortMSB OF((deflate_state *s, uInt b)); |
560 local void flush_pending OF((z_streamp strm)); | 559 local void flush_pending OF((z_streamp strm)); |
561 -local int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); | 560 -local int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); |
562 + | 561 + |
563 #ifdef ASMV | 562 #ifdef ASMV |
564 void match_init OF((void)); /* asm code initialization */ | 563 void match_init OF((void)); /* asm code initialization */ |
565 uInt longest_match OF((deflate_state *s, IPos cur_match, int clas)); | 564 uInt longest_match OF((deflate_state *s, IPos cur_match)); |
566 @@ -98,6 +99,23 @@ local void check_match OF((deflate_state *s, IPos start, IPo
s match, | 565 @@ -97,6 +98,23 @@ local void check_match OF((deflate_state *s, IPos start, IPo
s match, |
567 int length)); | 566 int length)); |
568 #endif | 567 #endif |
569 | 568 |
570 +/* For fill_window_sse.c to use */ | 569 +/* For fill_window_sse.c to use */ |
571 +ZLIB_INTERNAL int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); | 570 +ZLIB_INTERNAL int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); |
572 + | 571 + |
573 +/* From crc32.c */ | 572 +/* From crc32.c */ |
574 +extern void ZLIB_INTERNAL crc_reset(deflate_state *const s); | 573 +extern void ZLIB_INTERNAL crc_reset(deflate_state *const s); |
575 +extern void ZLIB_INTERNAL crc_finalize(deflate_state *const s); | 574 +extern void ZLIB_INTERNAL crc_finalize(deflate_state *const s); |
576 +extern void ZLIB_INTERNAL copy_with_crc(z_streamp strm, Bytef *dst, long size); | 575 +extern void ZLIB_INTERNAL copy_with_crc(z_streamp strm, Bytef *dst, long size); |
577 + | 576 + |
578 +#ifdef _MSC_VER | 577 +#ifdef _MSC_VER |
579 +#define INLINE __inline | 578 +#define INLINE __inline |
580 +#else | 579 +#else |
581 +#define INLINE inline | 580 +#define INLINE inline |
582 +#endif | 581 +#endif |
583 + | 582 + |
584 +/* Inline optimisation */ | 583 +/* Inline optimisation */ |
585 +local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str); | 584 +local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str); |
586 + | 585 + |
587 /* =========================================================================== | 586 /* =========================================================================== |
588 * Local data | 587 * Local data |
589 */ | 588 */ |
590 @@ -167,7 +185,6 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compile
rs */ | 589 @@ -166,7 +184,6 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compile
rs */ |
591 */ | 590 */ |
592 #define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask) | 591 #define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask) |
593 | 592 |
594 - | 593 - |
595 /* =========================================================================== | 594 /* =========================================================================== |
596 * Insert string str in the dictionary and set match_head to the previous head | 595 * Insert string str in the dictionary and set match_head to the previous head |
597 * of the hash chain (the most recent string with same hash key). Return | 596 * of the hash chain (the most recent string with same hash key). Return |
598 @@ -178,17 +195,28 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compi
lers */ | 597 @@ -177,17 +194,28 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compi
lers */ |
599 * input characters and the first MIN_MATCH bytes of str are valid | 598 * input characters and the first MIN_MATCH bytes of str are valid |
600 * (except for the last MIN_MATCH-1 bytes of the input file). | 599 * (except for the last MIN_MATCH-1 bytes of the input file). |
601 */ | 600 */ |
602 +local INLINE Pos insert_string_c(deflate_state *const s, const Pos str) | 601 +local INLINE Pos insert_string_c(deflate_state *const s, const Pos str) |
603 +{ | 602 +{ |
604 + Pos ret; | 603 + Pos ret; |
605 + | 604 + |
606 + UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]); | 605 + UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]); |
607 #ifdef FASTEST | 606 #ifdef FASTEST |
608 -#define INSERT_STRING(s, str, match_head) \ | 607 -#define INSERT_STRING(s, str, match_head) \ |
(...skipping 16 matching lines...) Expand all Loading... |
625 +local INLINE Pos insert_string(deflate_state *const s, const Pos str) | 624 +local INLINE Pos insert_string(deflate_state *const s, const Pos str) |
626 +{ | 625 +{ |
627 + if (x86_cpu_enable_simd) | 626 + if (x86_cpu_enable_simd) |
628 + return insert_string_sse(s, str); | 627 + return insert_string_sse(s, str); |
629 + return insert_string_c(s, str); | 628 + return insert_string_c(s, str); |
630 +} | 629 +} |
631 + | 630 + |
632 | 631 |
633 /* =========================================================================== | 632 /* =========================================================================== |
634 * Initialize the hash table (avoiding 64K overflow for 16 bit systems). | 633 * Initialize the hash table (avoiding 64K overflow for 16 bit systems). |
635 @@ -222,6 +250,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, m
emLevel, strategy, | 634 @@ -221,6 +249,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, m
emLevel, strategy, |
636 const char *version; | 635 const char *version; |
637 int stream_size; | 636 int stream_size; |
638 { | 637 { |
639 + unsigned window_padding = 8; | 638 + unsigned window_padding = 8; |
640 deflate_state *s; | 639 deflate_state *s; |
641 int wrap = 1; | 640 int wrap = 1; |
642 static const char my_version[] = ZLIB_VERSION; | 641 static const char my_version[] = ZLIB_VERSION; |
643 @@ -231,6 +260,8 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, m
emLevel, strategy, | 642 @@ -230,6 +259,8 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, m
emLevel, strategy, |
644 * output size for (length,distance) codes is <= 24 bits. | 643 * output size for (length,distance) codes is <= 24 bits. |
645 */ | 644 */ |
646 | 645 |
647 + x86_check_features(); | 646 + x86_check_features(); |
648 + | 647 + |
649 if (version == Z_NULL || version[0] != my_version[0] || | 648 if (version == Z_NULL || version[0] != my_version[0] || |
650 stream_size != sizeof(z_stream)) { | 649 stream_size != sizeof(z_stream)) { |
651 return Z_VERSION_ERROR; | 650 return Z_VERSION_ERROR; |
652 @@ -286,12 +317,17 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits,
memLevel, strategy, | 651 @@ -285,12 +316,17 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits,
memLevel, strategy, |
653 s->w_size = 1 << s->w_bits; | 652 s->w_size = 1 << s->w_bits; |
654 s->w_mask = s->w_size - 1; | 653 s->w_mask = s->w_size - 1; |
655 | 654 |
656 - s->hash_bits = memLevel + 7; | 655 - s->hash_bits = memLevel + 7; |
657 + if (x86_cpu_enable_simd) { | 656 + if (x86_cpu_enable_simd) { |
658 + s->hash_bits = 15; | 657 + s->hash_bits = 15; |
659 + } else { | 658 + } else { |
660 + s->hash_bits = memLevel + 7; | 659 + s->hash_bits = memLevel + 7; |
661 + } | 660 + } |
662 + | 661 + |
663 s->hash_size = 1 << s->hash_bits; | 662 s->hash_size = 1 << s->hash_bits; |
664 s->hash_mask = s->hash_size - 1; | 663 s->hash_mask = s->hash_size - 1; |
665 s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); | 664 s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); |
666 | 665 |
667 - s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); | 666 - s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); |
668 + s->window = (Bytef *) ZALLOC(strm, s->w_size + window_padding, 2*sizeof(Byt
e)); | 667 + s->window = (Bytef *) ZALLOC(strm, s->w_size + window_padding, 2*sizeof(Byt
e)); |
669 s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); | 668 s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); |
670 s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); | 669 s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); |
671 s->class_bitmap = NULL; | 670 |
672 @@ -369,11 +405,7 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLen
gth) | 671 @@ -365,11 +401,7 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLen
gth) |
673 str = s->strstart; | 672 str = s->strstart; |
674 n = s->lookahead - (MIN_MATCH-1); | 673 n = s->lookahead - (MIN_MATCH-1); |
675 do { | 674 do { |
676 - UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); | 675 - UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); |
677 -#ifndef FASTEST | 676 -#ifndef FASTEST |
678 - s->prev[str & s->w_mask] = s->head[s->ins_h]; | 677 - s->prev[str & s->w_mask] = s->head[s->ins_h]; |
679 -#endif | 678 -#endif |
680 - s->head[s->ins_h] = (Pos)str; | 679 - s->head[s->ins_h] = (Pos)str; |
681 + insert_string(s, str); | 680 + insert_string(s, str); |
682 str++; | 681 str++; |
683 } while (--n); | 682 } while (--n); |
684 s->strstart = str; | 683 s->strstart = str; |
685 @@ -696,7 +728,7 @@ int ZEXPORT deflate (strm, flush) | 684 @@ -690,7 +722,7 @@ int ZEXPORT deflate (strm, flush) |
686 if (s->status == INIT_STATE) { | 685 if (s->status == INIT_STATE) { |
687 #ifdef GZIP | 686 #ifdef GZIP |
688 if (s->wrap == 2) { | 687 if (s->wrap == 2) { |
689 - strm->adler = crc32(0L, Z_NULL, 0); | 688 - strm->adler = crc32(0L, Z_NULL, 0); |
690 + crc_reset(s); | 689 + crc_reset(s); |
691 put_byte(s, 31); | 690 put_byte(s, 31); |
692 put_byte(s, 139); | 691 put_byte(s, 139); |
693 put_byte(s, 8); | 692 put_byte(s, 8); |
694 @@ -975,6 +1007,7 @@ int ZEXPORT deflate (strm, flush) | 693 @@ -952,6 +984,7 @@ int ZEXPORT deflate (strm, flush) |
695 /* Write the trailer */ | 694 /* Write the trailer */ |
696 #ifdef GZIP | 695 #ifdef GZIP |
697 if (s->wrap == 2) { | 696 if (s->wrap == 2) { |
698 + crc_finalize(s); | 697 + crc_finalize(s); |
699 put_byte(s, (Byte)(strm->adler & 0xff)); | 698 put_byte(s, (Byte)(strm->adler & 0xff)); |
700 put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); | 699 put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); |
701 put_byte(s, (Byte)((strm->adler >> 16) & 0xff)); | 700 put_byte(s, (Byte)((strm->adler >> 16) & 0xff)); |
702 @@ -1097,7 +1130,7 @@ int ZEXPORT deflateCopy (dest, source) | 701 @@ -1073,7 +1106,7 @@ int ZEXPORT deflateCopy (dest, source) |
703 * allocating a large strm->next_in buffer and copying from it. | 702 * allocating a large strm->next_in buffer and copying from it. |
704 * (See also flush_pending()). | 703 * (See also flush_pending()). |
705 */ | 704 */ |
706 -local int read_buf(strm, buf, size) | 705 -local int read_buf(strm, buf, size) |
707 +ZLIB_INTERNAL int read_buf(strm, buf, size) | 706 +ZLIB_INTERNAL int read_buf(strm, buf, size) |
708 z_streamp strm; | 707 z_streamp strm; |
709 Bytef *buf; | 708 Bytef *buf; |
710 unsigned size; | 709 unsigned size; |
711 @@ -1109,15 +1142,16 @@ local int read_buf(strm, buf, size) | 710 @@ -1085,15 +1118,16 @@ local int read_buf(strm, buf, size) |
712 | 711 |
713 strm->avail_in -= len; | 712 strm->avail_in -= len; |
714 | 713 |
715 - zmemcpy(buf, strm->next_in, len); | 714 - zmemcpy(buf, strm->next_in, len); |
716 - if (strm->state->wrap == 1) { | 715 - if (strm->state->wrap == 1) { |
717 - strm->adler = adler32(strm->adler, buf, len); | 716 - strm->adler = adler32(strm->adler, buf, len); |
718 - } | 717 - } |
719 #ifdef GZIP | 718 #ifdef GZIP |
720 - else if (strm->state->wrap == 2) { | 719 - else if (strm->state->wrap == 2) { |
721 - strm->adler = crc32(strm->adler, buf, len); | 720 - strm->adler = crc32(strm->adler, buf, len); |
722 - } | 721 - } |
723 + if (strm->state->wrap == 2) | 722 + if (strm->state->wrap == 2) |
724 + copy_with_crc(strm, buf, len); | 723 + copy_with_crc(strm, buf, len); |
725 + else | 724 + else |
726 #endif | 725 #endif |
727 + { | 726 + { |
728 + zmemcpy(buf, strm->next_in, len); | 727 + zmemcpy(buf, strm->next_in, len); |
729 + if (strm->state->wrap == 1) | 728 + if (strm->state->wrap == 1) |
730 + strm->adler = adler32(strm->adler, buf, len); | 729 + strm->adler = adler32(strm->adler, buf, len); |
731 + } | 730 + } |
732 strm->next_in += len; | 731 strm->next_in += len; |
733 strm->total_in += len; | 732 strm->total_in += len; |
734 | 733 |
735 @@ -1530,7 +1564,19 @@ local void check_match(s, start, match, length) | 734 @@ -1387,7 +1421,19 @@ local void check_match(s, start, match, length) |
736 * performed for at least two bytes (required for the zip translate_eol | 735 * performed for at least two bytes (required for the zip translate_eol |
737 * option -- not supported here). | 736 * option -- not supported here). |
738 */ | 737 */ |
739 -local void fill_window(s) | 738 -local void fill_window(s) |
740 +local void fill_window_c(deflate_state *s); | 739 +local void fill_window_c(deflate_state *s); |
741 + | 740 + |
742 +local void fill_window(deflate_state *s) | 741 +local void fill_window(deflate_state *s) |
743 +{ | 742 +{ |
744 + if (x86_cpu_enable_simd) { | 743 + if (x86_cpu_enable_simd) { |
745 + fill_window_sse(s); | 744 + fill_window_sse(s); |
746 + return; | 745 + return; |
747 + } | 746 + } |
748 + | 747 + |
749 + fill_window_c(s); | 748 + fill_window_c(s); |
750 +} | 749 +} |
751 + | 750 + |
752 +local void fill_window_c(s) | 751 +local void fill_window_c(s) |
753 deflate_state *s; | 752 deflate_state *s; |
754 { | 753 { |
755 register unsigned n, m; | 754 register unsigned n, m; |
756 @@ -1818,7 +1864,7 @@ local block_state deflate_fast(s, flush, clas) | 755 @@ -1651,7 +1697,7 @@ local block_state deflate_fast(s, flush) |
757 */ | 756 */ |
758 hash_head = NIL; | 757 hash_head = NIL; |
759 if (s->lookahead >= MIN_MATCH) { | 758 if (s->lookahead >= MIN_MATCH) { |
760 - INSERT_STRING(s, s->strstart, hash_head); | 759 - INSERT_STRING(s, s->strstart, hash_head); |
761 + hash_head = insert_string(s, s->strstart); | 760 + hash_head = insert_string(s, s->strstart); |
762 } | 761 } |
763 | 762 |
764 /* Find the longest match, discarding those <= prev_length. | 763 /* Find the longest match, discarding those <= prev_length. |
765 @@ -1849,7 +1895,7 @@ local block_state deflate_fast(s, flush, clas) | 764 @@ -1682,7 +1728,7 @@ local block_state deflate_fast(s, flush) |
766 s->match_length--; /* string at strstart already in table */ | 765 s->match_length--; /* string at strstart already in table */ |
767 do { | 766 do { |
768 s->strstart++; | 767 s->strstart++; |
769 - INSERT_STRING(s, s->strstart, hash_head); | 768 - INSERT_STRING(s, s->strstart, hash_head); |
770 + hash_head = insert_string(s, s->strstart); | 769 + hash_head = insert_string(s, s->strstart); |
771 /* strstart never exceeds WSIZE-MAX_MATCH, so there are | 770 /* strstart never exceeds WSIZE-MAX_MATCH, so there are |
772 * always MIN_MATCH bytes ahead. | 771 * always MIN_MATCH bytes ahead. |
773 */ | 772 */ |
774 @@ -1934,7 +1980,7 @@ local block_state deflate_slow(s, flush, clas) | 773 @@ -1754,7 +1800,7 @@ local block_state deflate_slow(s, flush) |
775 */ | 774 */ |
776 hash_head = NIL; | 775 hash_head = NIL; |
777 if (s->lookahead >= MIN_MATCH) { | 776 if (s->lookahead >= MIN_MATCH) { |
778 - INSERT_STRING(s, s->strstart, hash_head); | 777 - INSERT_STRING(s, s->strstart, hash_head); |
779 + hash_head = insert_string(s, s->strstart); | 778 + hash_head = insert_string(s, s->strstart); |
780 } | 779 } |
781 | 780 |
782 /* Find the longest match, discarding those <= prev_length. | 781 /* Find the longest match, discarding those <= prev_length. |
783 @@ -2003,7 +2049,7 @@ local block_state deflate_slow(s, flush, clas) | 782 @@ -1805,7 +1851,7 @@ local block_state deflate_slow(s, flush) |
784 s->prev_length -= 2; | 783 s->prev_length -= 2; |
785 do { | 784 do { |
786 if (++s->strstart <= max_insert) { | 785 if (++s->strstart <= max_insert) { |
787 - INSERT_STRING(s, s->strstart, hash_head); | 786 - INSERT_STRING(s, s->strstart, hash_head); |
788 + hash_head = insert_string(s, s->strstart); | 787 + hash_head = insert_string(s, s->strstart); |
789 } | 788 } |
790 } while (--s->prev_length != 0); | 789 } while (--s->prev_length != 0); |
791 s->match_available = 0; | 790 s->match_available = 0; |
792 @@ -2163,3 +2209,37 @@ local block_state deflate_huff(s, flush) | 791 @@ -1965,3 +2011,37 @@ local block_state deflate_huff(s, flush) |
793 FLUSH_BLOCK(s, 0); | 792 FLUSH_BLOCK(s, 0); |
794 return block_done; | 793 return block_done; |
795 } | 794 } |
796 + | 795 + |
797 +/* Safe to inline this as GCC/clang will use inline asm and Visual Studio will | 796 +/* Safe to inline this as GCC/clang will use inline asm and Visual Studio will |
798 + * use intrinsic without extra params | 797 + * use intrinsic without extra params |
799 + */ | 798 + */ |
800 +local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str) | 799 +local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str) |
801 +{ | 800 +{ |
802 + Pos ret; | 801 + Pos ret; |
(...skipping 18 matching lines...) Expand all Loading... |
821 + /* This should never happen */ | 820 + /* This should never happen */ |
822 + assert(0); | 821 + assert(0); |
823 +#endif | 822 +#endif |
824 + | 823 + |
825 + ret = s->head[h & s->hash_mask]; | 824 + ret = s->head[h & s->hash_mask]; |
826 + s->head[h & s->hash_mask] = str; | 825 + s->head[h & s->hash_mask] = str; |
827 + s->prev[str & s->w_mask] = ret; | 826 + s->prev[str & s->w_mask] = ret; |
828 + return ret; | 827 + return ret; |
829 +} | 828 +} |
830 diff --git a/third_party/zlib/deflate.h b/third_party/zlib/deflate.h | 829 diff --git a/third_party/zlib/deflate.h b/third_party/zlib/deflate.h |
831 index c795034..c61e4ab 100644 | 830 index ce0299edd191..022d9c326ecc 100644 |
832 --- a/third_party/zlib/deflate.h | 831 --- a/third_party/zlib/deflate.h |
833 +++ b/third_party/zlib/deflate.h | 832 +++ b/third_party/zlib/deflate.h |
834 @@ -109,7 +109,7 @@ typedef struct internal_state { | 833 @@ -106,7 +106,7 @@ typedef struct internal_state { |
835 uInt gzindex; /* where in extra, name, or comment */ | 834 uInt gzindex; /* where in extra, name, or comment */ |
836 Byte method; /* can only be DEFLATED */ | 835 Byte method; /* can only be DEFLATED */ |
837 int last_flush; /* value of flush param for previous deflate call */ | 836 int last_flush; /* value of flush param for previous deflate call */ |
838 - | 837 - |
839 + unsigned zalign(16) crc0[4 * 5]; | 838 + unsigned zalign(16) crc0[4 * 5]; |
840 /* used by deflate.c: */ | 839 /* used by deflate.c: */ |
841 | 840 |
842 uInt w_size; /* LZ77 window size (32K by default) */ | 841 uInt w_size; /* LZ77 window size (32K by default) */ |
843 @@ -348,4 +348,14 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, c
harf *buf, | 842 @@ -343,4 +343,14 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, c
harf *buf, |
844 flush = _tr_tally(s, distance, length) | 843 flush = _tr_tally(s, distance, length) |
845 #endif | 844 #endif |
846 | 845 |
847 +/* Functions that are SIMD optimised on x86 */ | 846 +/* Functions that are SIMD optimised on x86 */ |
848 +void ZLIB_INTERNAL crc_fold_init(deflate_state* const s); | 847 +void ZLIB_INTERNAL crc_fold_init(deflate_state* const s); |
849 +void ZLIB_INTERNAL crc_fold_copy(deflate_state* const s, | 848 +void ZLIB_INTERNAL crc_fold_copy(deflate_state* const s, |
850 + unsigned char* dst, | 849 + unsigned char* dst, |
851 + const unsigned char* src, | 850 + const unsigned char* src, |
852 + long len); | 851 + long len); |
853 +unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state* const s); | 852 +unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state* const s); |
854 + | 853 + |
855 +void ZLIB_INTERNAL fill_window_sse(deflate_state* s); | 854 +void ZLIB_INTERNAL fill_window_sse(deflate_state* s); |
856 + | 855 + |
857 #endif /* DEFLATE_H */ | 856 #endif /* DEFLATE_H */ |
858 diff --git a/third_party/zlib/fill_window_sse.c b/third_party/zlib/fill_window_s
se.c | 857 diff --git a/third_party/zlib/fill_window_sse.c b/third_party/zlib/fill_window_s
se.c |
859 new file mode 100644 | 858 index e69de29bb2d1..949ccce1ba9c 100644 |
860 index 0000000..949ccce | 859 --- a/third_party/zlib/fill_window_sse.c |
861 --- /dev/null | |
862 +++ b/third_party/zlib/fill_window_sse.c | 860 +++ b/third_party/zlib/fill_window_sse.c |
863 @@ -0,0 +1,175 @@ | 861 @@ -0,0 +1,175 @@ |
864 +/* | 862 +/* |
865 + * Fill Window with SSE2-optimized hash shifting | 863 + * Fill Window with SSE2-optimized hash shifting |
866 + * | 864 + * |
867 + * Copyright (C) 2013 Intel Corporation | 865 + * Copyright (C) 2013 Intel Corporation |
868 + * Authors: | 866 + * Authors: |
869 + * Arjan van de Ven <arjan@linux.intel.com> | 867 + * Arjan van de Ven <arjan@linux.intel.com> |
870 + * Jim Kukunas <james.t.kukunas@linux.intel.com> | 868 + * Jim Kukunas <james.t.kukunas@linux.intel.com> |
871 + * | 869 + * |
(...skipping 157 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1029 + if (init > s->window_size - s->high_water) | 1027 + if (init > s->window_size - s->high_water) |
1030 + init = s->window_size - s->high_water; | 1028 + init = s->window_size - s->high_water; |
1031 + zmemzero(s->window + s->high_water, (unsigned)init); | 1029 + zmemzero(s->window + s->high_water, (unsigned)init); |
1032 + s->high_water += init; | 1030 + s->high_water += init; |
1033 + } | 1031 + } |
1034 + } | 1032 + } |
1035 + | 1033 + |
1036 + Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD, | 1034 + Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD, |
1037 + "not enough room for search"); | 1035 + "not enough room for search"); |
1038 +} | 1036 +} |
| 1037 diff --git a/third_party/zlib/mozzconf.h b/third_party/zlib/mozzconf.h |
| 1038 index d7e9f91c5a12..10aca44b29a4 100644 |
| 1039 --- a/third_party/zlib/mozzconf.h |
| 1040 +++ b/third_party/zlib/mozzconf.h |
| 1041 @@ -169,6 +169,14 @@ |
| 1042 #define inflateResetKeep MOZ_Z_inflateResetKeep |
| 1043 #define gzopen_w MOZ_Z_gzopen_w |
| 1044 |
| 1045 +/* Chromium-specific modifications */ |
| 1046 +#define copy_with_crc MOZ_Z__copy_with_crc |
| 1047 +#define crc_finalize MOZ_Z__crc_finalize |
| 1048 +#define crc_reset MOZ_Z__crc_reset |
| 1049 +// read_buf used to be local, but this was changed in simd.patch. |
| 1050 +#define read_buf MOZ_Z__read_buf |
| 1051 +#define x86_check_features MOZ_Z__x86_check_features |
| 1052 + |
| 1053 /* Mangle Byte types except on Mac. */ |
| 1054 #if !defined(__MACTYPES__) |
| 1055 #define Byte MOZ_Z_Byte |
1039 diff --git a/third_party/zlib/simd_stub.c b/third_party/zlib/simd_stub.c | 1056 diff --git a/third_party/zlib/simd_stub.c b/third_party/zlib/simd_stub.c |
1040 new file mode 100644 | 1057 index e69de29bb2d1..796f1f63f690 100644 |
1041 index 0000000..796f1f6 | 1058 --- a/third_party/zlib/simd_stub.c |
1042 --- /dev/null | |
1043 +++ b/third_party/zlib/simd_stub.c | 1059 +++ b/third_party/zlib/simd_stub.c |
1044 @@ -0,0 +1,35 @@ | 1060 @@ -0,0 +1,35 @@ |
1045 +/* simd_stub.c -- stub implementations | 1061 +/* simd_stub.c -- stub implementations |
1046 +* Copyright (C) 2014 Intel Corporation | 1062 +* Copyright (C) 2014 Intel Corporation |
1047 +* For conditions of distribution and use, see copyright notice in zlib.h | 1063 +* For conditions of distribution and use, see copyright notice in zlib.h |
1048 +*/ | 1064 +*/ |
1049 +#include <assert.h> | 1065 +#include <assert.h> |
1050 + | 1066 + |
1051 +#include "deflate.h" | 1067 +#include "deflate.h" |
1052 +#include "x86.h" | 1068 +#include "x86.h" |
(...skipping 18 matching lines...) Expand all Loading... |
1071 + | 1087 + |
1072 +void ZLIB_INTERNAL fill_window_sse(deflate_state *s) | 1088 +void ZLIB_INTERNAL fill_window_sse(deflate_state *s) |
1073 +{ | 1089 +{ |
1074 + assert(0); | 1090 + assert(0); |
1075 +} | 1091 +} |
1076 + | 1092 + |
1077 +void x86_check_features(void) | 1093 +void x86_check_features(void) |
1078 +{ | 1094 +{ |
1079 +} | 1095 +} |
1080 diff --git a/third_party/zlib/x86.c b/third_party/zlib/x86.c | 1096 diff --git a/third_party/zlib/x86.c b/third_party/zlib/x86.c |
1081 new file mode 100644 | 1097 index e69de29bb2d1..e6532fd10ddb 100644 |
1082 index 0000000..e6532fd | 1098 --- a/third_party/zlib/x86.c |
1083 --- /dev/null | |
1084 +++ b/third_party/zlib/x86.c | 1099 +++ b/third_party/zlib/x86.c |
1085 @@ -0,0 +1,91 @@ | 1100 @@ -0,0 +1,91 @@ |
1086 +/* | 1101 +/* |
1087 + * x86 feature check | 1102 + * x86 feature check |
1088 + * | 1103 + * |
1089 + * Copyright (C) 2013 Intel Corporation. All rights reserved. | 1104 + * Copyright (C) 2013 Intel Corporation. All rights reserved. |
1090 + * Author: | 1105 + * Author: |
1091 + * Jim Kukunas | 1106 + * Jim Kukunas |
1092 + * | 1107 + * |
1093 + * For conditions of distribution and use, see copyright notice in zlib.h | 1108 + * For conditions of distribution and use, see copyright notice in zlib.h |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1168 + x86_cpu_has_sse42= regs[2] & 0x100000; | 1183 + x86_cpu_has_sse42= regs[2] & 0x100000; |
1169 + x86_cpu_has_pclmulqdq = regs[2] & 0x2; | 1184 + x86_cpu_has_pclmulqdq = regs[2] & 0x2; |
1170 + | 1185 + |
1171 + x86_cpu_enable_simd = x86_cpu_has_sse2 && | 1186 + x86_cpu_enable_simd = x86_cpu_has_sse2 && |
1172 + x86_cpu_has_sse42 && | 1187 + x86_cpu_has_sse42 && |
1173 + x86_cpu_has_pclmulqdq; | 1188 + x86_cpu_has_pclmulqdq; |
1174 + return TRUE; | 1189 + return TRUE; |
1175 +} | 1190 +} |
1176 +#endif /* _MSC_VER */ | 1191 +#endif /* _MSC_VER */ |
1177 diff --git a/third_party/zlib/x86.h b/third_party/zlib/x86.h | 1192 diff --git a/third_party/zlib/x86.h b/third_party/zlib/x86.h |
1178 new file mode 100644 | 1193 index e69de29bb2d1..ebcf10ab09d2 100644 |
1179 index 0000000..ac3d180 | 1194 --- a/third_party/zlib/x86.h |
1180 --- /dev/null | |
1181 +++ b/third_party/zlib/x86.h | 1195 +++ b/third_party/zlib/x86.h |
1182 @@ -0,0 +1,15 @@ | 1196 @@ -0,0 +1,15 @@ |
1183 +/* x86.h -- check for x86 CPU features | 1197 +/* x86.h -- check for x86 CPU features |
1184 +* Copyright (C) 2013 Intel Corporation Jim Kukunas | 1198 +* Copyright (C) 2013 Intel Corporation Jim Kukunas |
1185 +* For conditions of distribution and use, see copyright notice in zlib.h | 1199 +* For conditions of distribution and use, see copyright notice in zlib.h |
1186 +*/ | 1200 +*/ |
1187 + | 1201 + |
1188 +#ifndef X86_H | 1202 +#ifndef X86_H |
1189 +#define X86_H | 1203 +#define X86_H |
1190 + | 1204 + |
1191 +#include "zlib.h" | 1205 +#include "zlib.h" |
1192 + | 1206 + |
1193 +extern int x86_cpu_enable_simd; | 1207 +extern int x86_cpu_enable_simd; |
1194 + | 1208 + |
1195 +void x86_check_features(void); | 1209 +void x86_check_features(void); |
1196 + | 1210 + |
1197 +#endif /* X86_H */ | 1211 +#endif /* X86_H */ |
1198 diff --git a/third_party/zlib/mozzconf.h b/third_party/zlib/mozzconf.h | |
1199 index 06918bc..10aca44 100644 | |
1200 --- a/third_party/zlib/mozzconf.h | |
1201 +++ b/third_party/zlib/mozzconf.h | |
1202 @@ -169,6 +169,14 @@ | |
1203 #define inflateResetKeep MOZ_Z_inflateResetKeep | |
1204 #define gzopen_w MOZ_Z_gzopen_w | |
1205 | |
1206 +/* Chromium-specific modifications */ | |
1207 +#define copy_with_crc MOZ_Z__copy_with_crc | |
1208 +#define crc_finalize MOZ_Z__crc_finalize | |
1209 +#define crc_reset MOZ_Z__crc_reset | |
1210 +// read_buf used to be local, but this was changed in simd.patch. | |
1211 +#define read_buf MOZ_Z__read_buf | |
1212 +#define x86_check_features MOZ_Z__x86_check_features | |
1213 + | |
1214 /* Mangle Byte types except on Mac. */ | |
1215 #if !defined(__MACTYPES__) | |
1216 #define Byte MOZ_Z_Byte | |
1217 -- | |
1218 2.7.4 | |
1219 | |
OLD | NEW |