Index: src/opts/SkChecksum_opts.h |
diff --git a/src/opts/SkChecksum_opts.h b/src/opts/SkChecksum_opts.h |
index 07fdfaab65165198f152e6ad24ce594dfe6d094e..4bcd9b1c358f0dda5275f2a8cd679708ab143e79 100644 |
--- a/src/opts/SkChecksum_opts.h |
+++ b/src/opts/SkChecksum_opts.h |
@@ -13,10 +13,10 @@ |
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 |
#include <immintrin.h> |
+#elif defined(SK_CPU_ARM64) && defined(__ARM_FEATURE_CRC32) |
+ #include <arm_acle.h> |
#endif |
-// TODO: ARMv8 has optional CRC instructions similar to SSE 4.2 |
- |
namespace SK_OPTS_NS { |
template <typename T> |
@@ -127,6 +127,50 @@ static inline T unaligned_load(const uint8_t* src) { |
return hash; |
} |
+#elif defined(SK_CPU_ARM64) && defined(__ARM_FEATURE_CRC32) |
+ static uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) { |
+ auto data = (const uint8_t*)vdata; |
+ if (bytes >= 24) { |
+ uint32_t a = hash, |
+ b = hash, |
+ c = hash; |
+ size_t steps = bytes/24; |
+ while (steps --> 0) { |
+ a = __crc32d(a, unaligned_load<uint64_t>(data+ 0)); |
+ b = __crc32d(b, unaligned_load<uint64_t>(data+ 8)); |
+ c = __crc32d(c, unaligned_load<uint64_t>(data+16)); |
+ data += 24; |
+ } |
+ bytes %= 24; |
+ hash = a^b^c; |
+ } |
+ |
+ SkASSERT(bytes < 24); |
+ if (bytes >= 16) { |
+ hash = __crc32d(hash, unaligned_load<uint64_t>(data)); |
+ bytes -= 8; |
+ data += 8; |
+ } |
+ |
+ SkASSERT(bytes < 16); |
+ if (bytes & 8) { |
+ hash = __crc32d(hash, unaligned_load<uint64_t>(data)); |
+ data += 8; |
+ } |
+ if (bytes & 4) { |
+ hash = __crc32w(hash, unaligned_load<uint32_t>(data)); |
+ data += 4; |
+ } |
+ if (bytes & 2) { |
+ hash = __crc32h(hash, unaligned_load<uint16_t>(data)); |
+ data += 2; |
+ } |
+ if (bytes & 1) { |
+ hash = __crc32b(hash, unaligned_load<uint8_t>(data)); |
+ } |
+ return hash; |
+ } |
+ |
#else |
// This is Murmur3. |
static uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) { |