Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(678)

Side by Side Diff: src/opts/SkChecksum_opts.h

Issue 2251773002: 32-bit fast hash, tidy up murmur3 a bit (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2016 Google Inc. 2 * Copyright 2016 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkChecksum_opts_DEFINED 8 #ifndef SkChecksum_opts_DEFINED
9 #define SkChecksum_opts_DEFINED 9 #define SkChecksum_opts_DEFINED
10 10
11 #include "SkChecksum.h" 11 #include "SkChecksum.h"
12 #include "SkTypes.h" 12 #include "SkTypes.h"
13 13
14 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 14 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42
15 #include <immintrin.h> 15 #include <immintrin.h>
16 #endif 16 #endif
17 17
18 // TODO: ARMv8 has optional CRC instructions similar to SSE 4.2 18 // TODO: ARMv8 has optional CRC instructions similar to SSE 4.2
19 // TODO: 32-bit x86 version: same sort of idea using only _mm_crc32_u32() and sm aller
20 19
21 namespace SK_OPTS_NS { 20 namespace SK_OPTS_NS {
22 21
22 template <typename T>
23 static inline T unaligned_load(const uint8_t* src) {
24 T val;
25 memcpy(&val, src, sizeof(val));
26 return val;
27 }
28
23 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 && (defined(__x86_64__) || define d(_M_X64)) 29 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 && (defined(__x86_64__) || define d(_M_X64))
24 template <typename T> 30 // This is not a CRC32. It's Just A Hash that uses those instructions becau se they're fast.
25 static inline T unaligned_load(const uint8_t* src) {
26 T val;
27 memcpy(&val, src, sizeof(val));
28 return val;
29 }
30
31 static uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t seed) { 31 static uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t seed) {
32 auto data = (const uint8_t*)vdata; 32 auto data = (const uint8_t*)vdata;
33 33
34 // _mm_crc32_u64() operates on 64-bit registers, so we use uint64_t for a while. 34 // _mm_crc32_u64() operates on 64-bit registers, so we use uint64_t for a while.
35 uint64_t hash = seed; 35 uint64_t hash = seed;
36 if (bytes >= 24) { 36 if (bytes >= 24) {
37 // We'll create 3 independent hashes, each using _mm_crc32_u64() 37 // We'll create 3 independent hashes, each using _mm_crc32_u64()
38 // to hash 8 bytes per step. Both 3 and independent are important: 38 // to hash 8 bytes per step. Both 3 and independent are important:
39 // we can execute 3 of these instructions in parallel on a single co re. 39 // we can execute 3 of these instructions in parallel on a single co re.
40 uint64_t a = hash, 40 uint64_t a = hash,
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
75 if (bytes & 2) { 75 if (bytes & 2) {
76 hash32 = _mm_crc32_u16(hash32, unaligned_load<uint16_t>(data)); 76 hash32 = _mm_crc32_u16(hash32, unaligned_load<uint16_t>(data));
77 data += 2; 77 data += 2;
78 } 78 }
79 if (bytes & 1) { 79 if (bytes & 1) {
80 hash32 = _mm_crc32_u8(hash32, unaligned_load<uint8_t>(data)); 80 hash32 = _mm_crc32_u8(hash32, unaligned_load<uint8_t>(data));
81 } 81 }
82 return hash32; 82 return hash32;
83 } 83 }
84 84
85 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42
86 // 32-bit version of above, using _mm_crc32_u32() but not _mm_crc32_u64().
87 static uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) {
88 auto data = (const uint8_t*)vdata;
89
90 if (bytes >= 12) {
91 // We'll create 3 independent hashes, each using _mm_crc32_u32()
92 // to hash 4 bytes per step. Both 3 and independent are important:
93 // we can execute 3 of these instructions in parallel on a single co re.
94 uint32_t a = hash,
95 b = hash,
96 c = hash;
97 size_t steps = bytes/12;
98 while (steps --> 0) {
99 a = _mm_crc32_u32(a, unaligned_load<uint32_t>(data+0));
100 b = _mm_crc32_u32(b, unaligned_load<uint32_t>(data+4));
101 c = _mm_crc32_u32(c, unaligned_load<uint32_t>(data+8));
102 data += 12;
103 }
104 bytes %= 12;
105 hash = a^b^c;
106 }
107
108 SkASSERT(bytes < 12);
109 if (bytes >= 8) {
110 hash = _mm_crc32_u32(hash, unaligned_load<uint32_t>(data));
111 bytes -= 4;
112 data += 4;
113 }
114
115 SkASSERT(bytes < 8);
116 if (bytes & 4) {
117 hash = _mm_crc32_u32(hash, unaligned_load<uint32_t>(data));
118 data += 4;
119 }
120 if (bytes & 2) {
121 hash = _mm_crc32_u16(hash, unaligned_load<uint16_t>(data));
122 data += 2;
123 }
124 if (bytes & 1) {
125 hash = _mm_crc32_u8(hash, unaligned_load<uint8_t>(data));
126 }
127 return hash;
128 }
129
85 #else 130 #else
86 static uint32_t hash_fn(const void* data, size_t bytes, uint32_t seed) { 131 // This is Murmur3.
87 // This is Murmur3. 132 static uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) {
133 auto data = (const uint8_t*)vdata;
88 134
89 // Use may_alias to remind the compiler we're intentionally violating st rict aliasing, 135 size_t original_bytes = bytes;
90 // and so not to apply strict-aliasing-based optimizations.
91 typedef uint32_t SK_ATTRIBUTE(may_alias) aliased_uint32_t;
92 typedef uint8_t SK_ATTRIBUTE(may_alias) aliased_uint8_t;
93 136
94 // Handle 4 bytes at a time while possible. 137 // Handle 4 bytes at a time while possible.
95 const aliased_uint32_t* safe_data = (const aliased_uint32_t*)data; 138 while (bytes >= 4) {
96 const size_t words = bytes/4; 139 uint32_t k = unaligned_load<uint32_t>(data);
97 uint32_t hash = seed;
98 for (size_t i = 0; i < words; i++) {
99 uint32_t k = safe_data[i];
100 k *= 0xcc9e2d51; 140 k *= 0xcc9e2d51;
101 k = (k << 15) | (k >> 17); 141 k = (k << 15) | (k >> 17);
102 k *= 0x1b873593; 142 k *= 0x1b873593;
103 143
104 hash ^= k; 144 hash ^= k;
105 hash = (hash << 13) | (hash >> 19); 145 hash = (hash << 13) | (hash >> 19);
106 hash *= 5; 146 hash *= 5;
107 hash += 0xe6546b64; 147 hash += 0xe6546b64;
148
149 bytes -= 4;
150 data += 4;
108 } 151 }
109 152
110 // Handle last 0-3 bytes. 153 // Handle last 0-3 bytes.
111 const aliased_uint8_t* safe_tail = (const uint8_t*)(safe_data + words);
112 uint32_t k = 0; 154 uint32_t k = 0;
113 switch (bytes & 3) { 155 switch (bytes & 3) {
114 case 3: k ^= safe_tail[2] << 16; 156 case 3: k ^= data[2] << 16;
115 case 2: k ^= safe_tail[1] << 8; 157 case 2: k ^= data[1] << 8;
116 case 1: k ^= safe_tail[0] << 0; 158 case 1: k ^= data[0] << 0;
117 k *= 0xcc9e2d51; 159 k *= 0xcc9e2d51;
118 k = (k << 15) | (k >> 17); 160 k = (k << 15) | (k >> 17);
119 k *= 0x1b873593; 161 k *= 0x1b873593;
120 hash ^= k; 162 hash ^= k;
121 } 163 }
122 164
123 hash ^= bytes; 165 hash ^= original_bytes;
124 return SkChecksum::Mix(hash); 166 return SkChecksum::Mix(hash);
125 } 167 }
126 #endif 168 #endif
127 169
128 } // namespace SK_OPTS_NS 170 } // namespace SK_OPTS_NS
129 171
130 #endif//SkChecksum_opts_DEFINED 172 #endif//SkChecksum_opts_DEFINED
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698