OLD | NEW |
---|---|
(Empty) | |
1 /* | |
2 * Copyright 2016 Google Inc. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license that can be | |
5 * found in the LICENSE file. | |
6 */ | |
7 | |
8 #ifndef SkChecksum_opts_DEFINED | |
9 #define SkChecksum_opts_DEFINED | |
10 | |
11 #include "SkChecksum.h" | |
12 #include "SkTypes.h" | |
13 | |
14 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 | |
15 #include <immintrin.h> | |
16 #endif | |
17 | |
18 // TODO: ARMv8 has optional CRC instructions similar to SSE 4.2 | |
19 // TODO: 32-bit x86 version: same sort of idea using only _mm_crc32_u32() and sm aller | |
20 | |
21 namespace SK_OPTS_NS { | |
22 | |
23 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 && (defined(__x86_64__) || define d(_M_X64)) | |
24 template <typename T> | |
25 static inline T unaligned_load(const uint8_t* src) { | |
26 T val; | |
27 memcpy(&val, src, sizeof(val)); | |
28 return val; | |
29 } | |
30 | |
31 static uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t seed) { | |
32 auto data = (const uint8_t*)vdata; | |
33 | |
34 // _mm_crc32_u64() operates on 64-bit registers, so we use uint64_t for a while. | |
35 uint64_t hash = seed; | |
36 if (bytes >= 24) { | |
37 // We'll create 3 independent hashes, each using _mm_crc32_u64() | |
38 // to hash 8 bytes per step. Both 3 and independent are important: | |
39 // we can execute 3 of these instructions in parallel on a single co re. | |
40 uint64_t a = hash, | |
41 b = hash, | |
42 c = hash; | |
43 size_t steps = bytes/24; | |
44 while (steps --> 0) { | |
egdaniel
2016/08/05 19:45:00
how kosher are "goes to" operators in skia?
mtklein
2016/08/05 20:16:37
I personally find it the clearest way to express "
| |
45 a = _mm_crc32_u64(a, unaligned_load<uint64_t>(data+ 0)); | |
46 b = _mm_crc32_u64(b, unaligned_load<uint64_t>(data+ 8)); | |
47 c = _mm_crc32_u64(c, unaligned_load<uint64_t>(data+16)); | |
48 data += 24; | |
49 } | |
50 bytes %= 24; | |
51 hash = a^b^c; | |
52 } | |
53 | |
54 SkASSERT(bytes < 24); | |
55 if (bytes >= 16) { | |
56 hash = _mm_crc32_u64(hash, unaligned_load<uint64_t>(data)); | |
57 bytes -= 8; | |
58 data += 8; | |
59 } | |
60 | |
61 SkASSERT(bytes < 16); | |
62 if (bytes & 8) { | |
63 hash = _mm_crc32_u64(hash, unaligned_load<uint64_t>(data)); | |
64 data += 8; | |
65 } | |
66 | |
67 // The remainder of these _mm_crc32_u*() operate on a 32-bit register. | |
68 // We don't lose anything here: only the bottom 32-bits were populated. | |
69 auto hash32 = (uint32_t)hash; | |
70 | |
71 if (bytes & 4) { | |
72 hash32 = _mm_crc32_u32(hash32, unaligned_load<uint32_t>(data)); | |
73 data += 4; | |
74 } | |
75 if (bytes & 2) { | |
76 hash32 = _mm_crc32_u16(hash32, unaligned_load<uint16_t>(data)); | |
77 data += 2; | |
78 } | |
79 if (bytes & 1) { | |
80 hash32 = _mm_crc32_u8(hash32, unaligned_load<uint8_t>(data)); | |
81 } | |
82 return hash32; | |
83 } | |
84 | |
85 #else | |
86 static uint32_t hash_fn(const void* data, size_t bytes, uint32_t seed) { | |
87 // This is Murmur3. | |
88 | |
89 // Use may_alias to remind the compiler we're intentionally violating st rict aliasing, | |
90 // and so not to apply strict-aliasing-based optimizations. | |
91 typedef uint32_t SK_ATTRIBUTE(may_alias) aliased_uint32_t; | |
92 typedef uint8_t SK_ATTRIBUTE(may_alias) aliased_uint8_t; | |
93 | |
94 // Handle 4 bytes at a time while possible. | |
95 const aliased_uint32_t* safe_data = (const aliased_uint32_t*)data; | |
96 const size_t words = bytes/4; | |
97 uint32_t hash = seed; | |
98 for (size_t i = 0; i < words; i++) { | |
99 uint32_t k = safe_data[i]; | |
100 k *= 0xcc9e2d51; | |
101 k = (k << 15) | (k >> 17); | |
102 k *= 0x1b873593; | |
103 | |
104 hash ^= k; | |
105 hash = (hash << 13) | (hash >> 19); | |
106 hash *= 5; | |
107 hash += 0xe6546b64; | |
108 } | |
109 | |
110 // Handle last 0-3 bytes. | |
111 const aliased_uint8_t* safe_tail = (const uint8_t*)(safe_data + words); | |
112 uint32_t k = 0; | |
113 switch (bytes & 3) { | |
114 case 3: k ^= safe_tail[2] << 16; | |
115 case 2: k ^= safe_tail[1] << 8; | |
116 case 1: k ^= safe_tail[0] << 0; | |
117 k *= 0xcc9e2d51; | |
118 k = (k << 15) | (k >> 17); | |
119 k *= 0x1b873593; | |
120 hash ^= k; | |
121 } | |
122 | |
123 hash ^= bytes; | |
124 return SkChecksum::Mix(hash); | |
125 } | |
126 #endif | |
127 | |
128 } // namespace SK_OPTS_NS | |
129 | |
130 #endif//SkChecksum_opts_DEFINED | |
OLD | NEW |