OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkChecksum_opts_DEFINED | 8 #ifndef SkChecksum_opts_DEFINED |
9 #define SkChecksum_opts_DEFINED | 9 #define SkChecksum_opts_DEFINED |
10 | 10 |
11 #include "SkChecksum.h" | 11 #include "SkChecksum.h" |
12 #include "SkTypes.h" | 12 #include "SkTypes.h" |
13 | 13 |
14 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 | 14 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 |
15 #include <immintrin.h> | 15 #include <immintrin.h> |
| 16 #elif defined(SK_CPU_ARM64) && defined(__ARM_FEATURE_CRC32) |
| 17 #include <arm_acle.h> |
16 #endif | 18 #endif |
17 | 19 |
18 // TODO: ARMv8 has optional CRC instructions similar to SSE 4.2 | |
19 | |
20 namespace SK_OPTS_NS { | 20 namespace SK_OPTS_NS { |
21 | 21 |
22 template <typename T> | 22 template <typename T> |
23 static inline T unaligned_load(const uint8_t* src) { | 23 static inline T unaligned_load(const uint8_t* src) { |
24 T val; | 24 T val; |
25 memcpy(&val, src, sizeof(val)); | 25 memcpy(&val, src, sizeof(val)); |
26 return val; | 26 return val; |
27 } | 27 } |
28 | 28 |
29 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 && (defined(__x86_64__) || define
d(_M_X64)) | 29 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 && (defined(__x86_64__) || define
d(_M_X64)) |
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
120 if (bytes & 2) { | 120 if (bytes & 2) { |
121 hash = _mm_crc32_u16(hash, unaligned_load<uint16_t>(data)); | 121 hash = _mm_crc32_u16(hash, unaligned_load<uint16_t>(data)); |
122 data += 2; | 122 data += 2; |
123 } | 123 } |
124 if (bytes & 1) { | 124 if (bytes & 1) { |
125 hash = _mm_crc32_u8(hash, unaligned_load<uint8_t>(data)); | 125 hash = _mm_crc32_u8(hash, unaligned_load<uint8_t>(data)); |
126 } | 126 } |
127 return hash; | 127 return hash; |
128 } | 128 } |
129 | 129 |
| 130 #elif defined(SK_CPU_ARM64) && defined(__ARM_FEATURE_CRC32) |
| 131 static uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) { |
| 132 auto data = (const uint8_t*)vdata; |
| 133 if (bytes >= 24) { |
| 134 uint32_t a = hash, |
| 135 b = hash, |
| 136 c = hash; |
| 137 size_t steps = bytes/24; |
| 138 while (steps --> 0) { |
| 139 a = __crc32d(a, unaligned_load<uint64_t>(data+ 0)); |
| 140 b = __crc32d(b, unaligned_load<uint64_t>(data+ 8)); |
| 141 c = __crc32d(c, unaligned_load<uint64_t>(data+16)); |
| 142 data += 24; |
| 143 } |
| 144 bytes %= 24; |
| 145 hash = a^b^c; |
| 146 } |
| 147 |
| 148 SkASSERT(bytes < 24); |
| 149 if (bytes >= 16) { |
| 150 hash = __crc32d(hash, unaligned_load<uint64_t>(data)); |
| 151 bytes -= 8; |
| 152 data += 8; |
| 153 } |
| 154 |
| 155 SkASSERT(bytes < 16); |
| 156 if (bytes & 8) { |
| 157 hash = __crc32d(hash, unaligned_load<uint64_t>(data)); |
| 158 data += 8; |
| 159 } |
| 160 if (bytes & 4) { |
| 161 hash = __crc32w(hash, unaligned_load<uint32_t>(data)); |
| 162 data += 4; |
| 163 } |
| 164 if (bytes & 2) { |
| 165 hash = __crc32h(hash, unaligned_load<uint16_t>(data)); |
| 166 data += 2; |
| 167 } |
| 168 if (bytes & 1) { |
| 169 hash = __crc32b(hash, unaligned_load<uint8_t>(data)); |
| 170 } |
| 171 return hash; |
| 172 } |
| 173 |
130 #else | 174 #else |
131 // This is Murmur3. | 175 // This is Murmur3. |
132 static uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) { | 176 static uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) { |
133 auto data = (const uint8_t*)vdata; | 177 auto data = (const uint8_t*)vdata; |
134 | 178 |
135 size_t original_bytes = bytes; | 179 size_t original_bytes = bytes; |
136 | 180 |
137 // Handle 4 bytes at a time while possible. | 181 // Handle 4 bytes at a time while possible. |
138 while (bytes >= 4) { | 182 while (bytes >= 4) { |
139 uint32_t k = unaligned_load<uint32_t>(data); | 183 uint32_t k = unaligned_load<uint32_t>(data); |
(...skipping 23 matching lines...) Expand all Loading... |
163 } | 207 } |
164 | 208 |
165 hash ^= original_bytes; | 209 hash ^= original_bytes; |
166 return SkChecksum::Mix(hash); | 210 return SkChecksum::Mix(hash); |
167 } | 211 } |
168 #endif | 212 #endif |
169 | 213 |
170 } // namespace SK_OPTS_NS | 214 } // namespace SK_OPTS_NS |
171 | 215 |
172 #endif//SkChecksum_opts_DEFINED | 216 #endif//SkChecksum_opts_DEFINED |
OLD | NEW |