Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(95)

Unified Diff: third_party/zlib/deflate.c

Issue 552123005: Integrate SIMD optimisations for zlib (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Fix issues in fallback (non-SIMD) code Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/zlib/deflate.c
diff --git a/third_party/zlib/deflate.c b/third_party/zlib/deflate.c
index 8043e5bd30945f03967ede05b09d26375ecb533b..bd198a3ef09ab76ee196873acac3a85bfceffe2b 100644
--- a/third_party/zlib/deflate.c
+++ b/third_party/zlib/deflate.c
@@ -85,7 +85,7 @@ local block_state deflate_huff OF((deflate_state *s, int flush));
local void lm_init OF((deflate_state *s));
local void putShortMSB OF((deflate_state *s, uInt b));
local void flush_pending OF((z_streamp strm));
-local int read_buf OF((z_streamp strm, Bytef *buf, unsigned size));
+ZLIB_INTERNAL int read_buf OF((z_streamp strm, Bytef *buf, unsigned size));
agl 2014/09/23 21:41:40 you need to adjust the spacing here so that the li
#ifdef ASMV
void match_init OF((void)); /* asm code initialization */
uInt longest_match OF((deflate_state *s, IPos cur_match, int clas));
@@ -98,6 +98,10 @@ local void check_match OF((deflate_state *s, IPos start, IPos match,
int length));
#endif
+extern void crc_reset(deflate_state *const s);
+extern void crc_finalize(deflate_state *const s);
+extern void copy_with_crc(z_streamp strm, Bytef *dst, long size);
+
/* ===========================================================================
* Local data
*/
@@ -156,14 +160,12 @@ local const config configuration_table[10] = {
struct static_tree_desc_s {int dummy;}; /* for buggy compilers */
#endif
-/* ===========================================================================
- * Update a hash value with the given input byte
- * IN assertion: all calls to to UPDATE_HASH are made with consecutive
- * input characters, so that a running hash key can be computed from the
- * previous key instead of complete recalculation each time.
- */
-#define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask)
+#ifdef _MSC_VER
+#define INLINE __inline
+#else
+#define INLINE inline
+#endif
/* ===========================================================================
* Insert string str in the dictionary and set match_head to the previous head
@@ -175,17 +177,56 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compilers */
* input characters and the first MIN_MATCH bytes of str are valid
* (except for the last MIN_MATCH-1 bytes of the input file).
*/
+#ifdef USE_SSE4_2_CRC_HASH
+#include "x86.h"
+local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str)
+{
+ Pos ret;
+ unsigned *ip, val, h = 0;
+
+ ip = (unsigned *)&s->window[str];
+ val = *ip;
+
+ if (s->level >= 6)
+ val &= 0xFFFFFF;
+
+ __asm__ __volatile__ (
+ "crc32 %1,%0\n\t"
+ : "+r" (h)
+ : "r" (val)
+ );
+
+ ret = s->head[h & s->hash_mask];
+ s->head[h & s->hash_mask] = str;
+ s->prev[str & s->w_mask] = ret;
+ return ret;
+}
+#endif
+
+local INLINE Pos insert_string_c(deflate_state *const s, const Pos str)
+{
+ Pos ret;
+
+ UPDATE_HASH(s, s->ins_h, str);
#ifdef FASTEST
-#define INSERT_STRING(s, str, match_head) \
- (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
- match_head = s->head[s->ins_h], \
- s->head[s->ins_h] = (Pos)(str))
+ ret = s->head[s->ins_h];
#else
-#define INSERT_STRING(s, str, match_head) \
- (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
- match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \
- s->head[s->ins_h] = (Pos)(str))
+ ret = s->prev[str & s->w_mask] = s->head[s->ins_h];
#endif
+ s->head[s->ins_h] = str;
+
+ return ret;
+}
+
+local INLINE Pos insert_string(deflate_state *const s, const Pos str)
+{
+#ifdef USE_SSE4_2_CRC_HASH
+ if (x86_cpu_has_sse42)
+ return insert_string_sse(s, str);
+#endif
+ return insert_string_c(s, str);
+}
+
/* ===========================================================================
* Initialize the hash table (avoiding 64K overflow for 16 bit systems).
@@ -195,6 +236,10 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compilers */
s->head[s->hash_size-1] = NIL; \
zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head));
+#ifdef CHECK_SSE2
+#include "x86.h"
+#endif
+
/* ========================================================================= */
int ZEXPORT deflateInit_(strm, level, version, stream_size)
z_streamp strm;
@@ -219,6 +264,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
const char *version;
int stream_size;
{
+ unsigned window_padding = 0;
deflate_state *s;
int wrap = 1;
static const char my_version[] = ZLIB_VERSION;
@@ -228,6 +274,10 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
* output size for (length,distance) codes is <= 24 bits.
*/
+#if defined(CHECK_SSE2) || defined(USE_SSE4_2_CRC_HASH)
+ x86_check_features();
+#endif
+
if (version == Z_NULL || version[0] != my_version[0] ||
stream_size != sizeof(z_stream)) {
return Z_VERSION_ERROR;
@@ -274,12 +324,22 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
s->w_size = 1 << s->w_bits;
s->w_mask = s->w_size - 1;
+#ifdef USE_SSE4_2_CRC_HASH
+ if (x86_cpu_has_sse42)
+ s->hash_bits = 15;
+ else
+#endif
s->hash_bits = memLevel + 7;
+
s->hash_size = 1 << s->hash_bits;
s->hash_mask = s->hash_size - 1;
s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH);
- s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte));
+#ifdef HAVE_PCLMULQDQ
+ window_padding = 8;
+#endif
+
+ s->window = (Bytef *) ZALLOC(strm, s->w_size + window_padding, 2*sizeof(Byte));
s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos));
s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos));
s->class_bitmap = NULL;
@@ -320,7 +380,6 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength)
deflate_state *s;
uInt length = dictLength;
uInt n;
- IPos hash_head = 0;
if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL ||
strm->state->wrap == 2 ||
@@ -345,11 +404,10 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength)
* call of fill_window.
*/
s->ins_h = s->window[0];
- UPDATE_HASH(s, s->ins_h, s->window[1]);
+ UPDATE_HASH(s, s->ins_h, 2 - MIN_MATCH);
for (n = 0; n <= length - MIN_MATCH; n++) {
- INSERT_STRING(s, n, hash_head);
+ insert_string(s, n);
}
- if (hash_head) hash_head = 0; /* to make compiler happy */
return Z_OK;
}
@@ -613,7 +671,7 @@ int ZEXPORT deflate (strm, flush)
if (s->status == INIT_STATE) {
#ifdef GZIP
if (s->wrap == 2) {
- strm->adler = crc32(0L, Z_NULL, 0);
+ crc_reset(s);
put_byte(s, 31);
put_byte(s, 139);
put_byte(s, 8);
@@ -891,6 +949,7 @@ int ZEXPORT deflate (strm, flush)
/* Write the trailer */
#ifdef GZIP
if (s->wrap == 2) {
+ crc_finalize(s);
put_byte(s, (Byte)(strm->adler & 0xff));
put_byte(s, (Byte)((strm->adler >> 8) & 0xff));
put_byte(s, (Byte)((strm->adler >> 16) & 0xff));
@@ -1013,7 +1072,7 @@ int ZEXPORT deflateCopy (dest, source)
* allocating a large strm->next_in buffer and copying from it.
* (See also flush_pending()).
*/
-local int read_buf(strm, buf, size)
+ZLIB_INTERNAL int read_buf(strm, buf, size)
z_streamp strm;
Bytef *buf;
unsigned size;
@@ -1025,15 +1084,17 @@ local int read_buf(strm, buf, size)
strm->avail_in -= len;
- if (strm->state->wrap == 1) {
- strm->adler = adler32(strm->adler, strm->next_in, len);
- }
#ifdef GZIP
- else if (strm->state->wrap == 2) {
- strm->adler = crc32(strm->adler, strm->next_in, len);
+ if (strm->state->wrap == 2) {
+ copy_with_crc(strm, buf, len);
}
+ else
#endif
- zmemcpy(buf, strm->next_in, len);
+ {
+ zmemcpy(buf, strm->next_in, len);
+ if (strm->state->wrap == 1)
+ strm->adler = adler32(strm->adler, buf, len);
+ }
strm->next_in += len;
strm->total_in += len;
@@ -1445,10 +1506,31 @@ local void check_match(s, start, match, length)
* performed for at least two bytes (required for the zip translate_eol
* option -- not supported here).
*/
-local void fill_window(s)
+#ifdef HAVE_SSE2
+extern void fill_window_sse(deflate_state *s);
+#endif
+local void fill_window_c(deflate_state *s);
+
+local void fill_window(deflate_state *s)
+{
+#ifdef HAVE_SSE2
+#ifdef CHECK_SSE2
+ if (x86_cpu_has_sse2) {
+#endif
+ fill_window_sse(s);
+ return;
+#ifdef CHECK_SSE2
+ }
+#endif
+#endif
+
+ fill_window_c(s);
+}
+
+local void fill_window_c(s)
deflate_state *s;
{
- register unsigned n, m;
+ register unsigned n;
register Posf *p;
unsigned more; /* Amount of free space at the end of the window. */
uInt wsize = s->w_size;
@@ -1488,6 +1570,7 @@ local void fill_window(s)
n = s->hash_size;
p = &s->head[n];
do {
+ unsigned m;
m = *--p;
*p = (Pos)(m >= wsize ? m-wsize : NIL);
} while (--n);
@@ -1496,6 +1579,7 @@ local void fill_window(s)
#ifndef FASTEST
p = &s->prev[n];
do {
+ unsigned m;
m = *--p;
*p = (Pos)(m >= wsize ? m-wsize : NIL);
/* If n is not on any hash chain, prev[n] is garbage but
@@ -1544,7 +1628,7 @@ local void fill_window(s)
/* Initialize the hash value now that we have some input: */
if (s->lookahead >= MIN_MATCH) {
s->ins_h = s->window[s->strstart];
- UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]);
+ UPDATE_HASH(s, s->ins_h, s->strstart+1 - (MIN_MATCH - 1));
#if MIN_MATCH != 3
Call UPDATE_HASH() MIN_MATCH-3 more times
#endif
@@ -1711,7 +1795,7 @@ local block_state deflate_fast(s, flush, clas)
*/
hash_head = NIL;
if (s->lookahead >= MIN_MATCH) {
- INSERT_STRING(s, s->strstart, hash_head);
+ hash_head = insert_string(s, s->strstart);
}
/* Find the longest match, discarding those <= prev_length.
@@ -1742,7 +1826,7 @@ local block_state deflate_fast(s, flush, clas)
s->match_length--; /* string at strstart already in table */
do {
s->strstart++;
- INSERT_STRING(s, s->strstart, hash_head);
+ hash_head = insert_string(s, s->strstart);
/* strstart never exceeds WSIZE-MAX_MATCH, so there are
* always MIN_MATCH bytes ahead.
*/
@@ -1754,7 +1838,7 @@ local block_state deflate_fast(s, flush, clas)
s->strstart += s->match_length;
s->match_length = 0;
s->ins_h = s->window[s->strstart];
- UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]);
+ UPDATE_HASH(s, s->ins_h, s->strstart+2 - (MIN_MATCH));
#if MIN_MATCH != 3
Call UPDATE_HASH() MIN_MATCH-3 more times
#endif
@@ -1821,7 +1905,7 @@ local block_state deflate_slow(s, flush, clas)
*/
hash_head = NIL;
if (s->lookahead >= MIN_MATCH) {
- INSERT_STRING(s, s->strstart, hash_head);
+ hash_head = insert_string(s, s->strstart);
}
/* Find the longest match, discarding those <= prev_length.
@@ -1890,7 +1974,7 @@ local block_state deflate_slow(s, flush, clas)
s->prev_length -= 2;
do {
if (++s->strstart <= max_insert) {
- INSERT_STRING(s, s->strstart, hash_head);
+ hash_head = insert_string(s, s->strstart);
}
} while (--s->prev_length != 0);
s->match_available = 0;

Powered by Google App Engine
This is Rietveld 408576698