OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright 2016 Google Inc. |
| 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. |
| 6 */ |
| 7 |
| 8 #ifndef SkChecksum_opts_DEFINED |
| 9 #define SkChecksum_opts_DEFINED |
| 10 |
| 11 #include "SkChecksum.h" |
| 12 #include "SkTypes.h" |
| 13 |
| 14 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 |
| 15 #include <immintrin.h> |
| 16 #endif |
| 17 |
| 18 // TODO: ARMv8 has optional CRC instructions similar to SSE 4.2 |
| 19 // TODO: 32-bit x86 version: same sort of idea using only _mm_crc32_u32() and sm
aller |
| 20 |
| 21 namespace SK_OPTS_NS { |
| 22 |
| 23 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 && (defined(__x86_64__) || define
d(_M_X64)) |
| 24 template <typename T> |
| 25 static inline T unaligned_load(const uint8_t* src) { |
| 26 T val; |
| 27 memcpy(&val, src, sizeof(val)); |
| 28 return val; |
| 29 } |
| 30 |
| 31 static uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t seed) { |
| 32 auto data = (const uint8_t*)vdata; |
| 33 |
| 34 // _mm_crc32_u64() operates on 64-bit registers, so we use uint64_t for
a while. |
| 35 uint64_t hash = seed; |
| 36 if (bytes >= 24) { |
| 37 // We'll create 3 independent hashes, each using _mm_crc32_u64() |
| 38 // to hash 8 bytes per step. Both 3 and independent are important: |
| 39 // we can execute 3 of these instructions in parallel on a single co
re. |
| 40 uint64_t a = hash, |
| 41 b = hash, |
| 42 c = hash; |
| 43 size_t steps = bytes/24; |
| 44 while (steps --> 0) { |
| 45 a = _mm_crc32_u64(a, unaligned_load<uint64_t>(data+ 0)); |
| 46 b = _mm_crc32_u64(b, unaligned_load<uint64_t>(data+ 8)); |
| 47 c = _mm_crc32_u64(c, unaligned_load<uint64_t>(data+16)); |
| 48 data += 24; |
| 49 } |
| 50 bytes %= 24; |
| 51 hash = a^b^c; |
| 52 } |
| 53 |
| 54 SkASSERT(bytes < 24); |
| 55 if (bytes >= 16) { |
| 56 hash = _mm_crc32_u64(hash, unaligned_load<uint64_t>(data)); |
| 57 bytes -= 8; |
| 58 data += 8; |
| 59 } |
| 60 |
| 61 SkASSERT(bytes < 16); |
| 62 if (bytes & 8) { |
| 63 hash = _mm_crc32_u64(hash, unaligned_load<uint64_t>(data)); |
| 64 data += 8; |
| 65 } |
| 66 |
| 67 // The remainder of these _mm_crc32_u*() operate on a 32-bit register. |
| 68 // We don't lose anything here: only the bottom 32-bits were populated. |
| 69 auto hash32 = (uint32_t)hash; |
| 70 |
| 71 if (bytes & 4) { |
| 72 hash32 = _mm_crc32_u32(hash32, unaligned_load<uint32_t>(data)); |
| 73 data += 4; |
| 74 } |
| 75 if (bytes & 2) { |
| 76 hash32 = _mm_crc32_u16(hash32, unaligned_load<uint16_t>(data)); |
| 77 data += 2; |
| 78 } |
| 79 if (bytes & 1) { |
| 80 hash32 = _mm_crc32_u8(hash32, unaligned_load<uint8_t>(data)); |
| 81 } |
| 82 return hash32; |
| 83 } |
| 84 |
| 85 #else |
| 86 static uint32_t hash_fn(const void* data, size_t bytes, uint32_t seed) { |
| 87 // This is Murmur3. |
| 88 |
| 89 // Use may_alias to remind the compiler we're intentionally violating st
rict aliasing, |
| 90 // and so not to apply strict-aliasing-based optimizations. |
| 91 typedef uint32_t SK_ATTRIBUTE(may_alias) aliased_uint32_t; |
| 92 typedef uint8_t SK_ATTRIBUTE(may_alias) aliased_uint8_t; |
| 93 |
| 94 // Handle 4 bytes at a time while possible. |
| 95 const aliased_uint32_t* safe_data = (const aliased_uint32_t*)data; |
| 96 const size_t words = bytes/4; |
| 97 uint32_t hash = seed; |
| 98 for (size_t i = 0; i < words; i++) { |
| 99 uint32_t k = safe_data[i]; |
| 100 k *= 0xcc9e2d51; |
| 101 k = (k << 15) | (k >> 17); |
| 102 k *= 0x1b873593; |
| 103 |
| 104 hash ^= k; |
| 105 hash = (hash << 13) | (hash >> 19); |
| 106 hash *= 5; |
| 107 hash += 0xe6546b64; |
| 108 } |
| 109 |
| 110 // Handle last 0-3 bytes. |
| 111 const aliased_uint8_t* safe_tail = (const uint8_t*)(safe_data + words); |
| 112 uint32_t k = 0; |
| 113 switch (bytes & 3) { |
| 114 case 3: k ^= safe_tail[2] << 16; |
| 115 case 2: k ^= safe_tail[1] << 8; |
| 116 case 1: k ^= safe_tail[0] << 0; |
| 117 k *= 0xcc9e2d51; |
| 118 k = (k << 15) | (k >> 17); |
| 119 k *= 0x1b873593; |
| 120 hash ^= k; |
| 121 } |
| 122 |
| 123 hash ^= bytes; |
| 124 return SkChecksum::Mix(hash); |
| 125 } |
| 126 #endif |
| 127 |
| 128 } // namespace SK_OPTS_NS |
| 129 |
| 130 #endif//SkChecksum_opts_DEFINED |
OLD | NEW |