| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkNx_neon_DEFINED | 8 #ifndef SkNx_neon_DEFINED |
| 9 #define SkNx_neon_DEFINED | 9 #define SkNx_neon_DEFINED |
| 10 | 10 |
| 11 #include <arm_neon.h> | |
| 12 | |
| 13 #define SKNX_IS_FAST | 11 #define SKNX_IS_FAST |
| 14 | 12 |
| 15 // ARMv8 has vrndmq_f32 to floor 4 floats. Here we emulate it: | 13 // ARMv8 has vrndmq_f32 to floor 4 floats. Here we emulate it: |
| 16 // - roundtrip through integers via truncation | 14 // - roundtrip through integers via truncation |
| 17 // - subtract 1 if that's too big (possible for negative values). | 15 // - subtract 1 if that's too big (possible for negative values). |
| 18 // This restricts the domain of our inputs to a maximum somehwere around 2^31.
Seems plenty big. | 16 // This restricts the domain of our inputs to a maximum somehwere around 2^31.
Seems plenty big. |
| 19 static inline float32x4_t armv7_vrndmq_f32(float32x4_t v) { | 17 static inline float32x4_t armv7_vrndmq_f32(float32x4_t v) { |
| 20 auto roundtrip = vcvtq_f32_s32(vcvtq_s32_f32(v)); | 18 auto roundtrip = vcvtq_f32_s32(vcvtq_s32_f32(v)); |
| 21 auto too_big = vcgtq_f32(roundtrip, v); | 19 auto too_big = vcgtq_f32(roundtrip, v); |
| 22 return vsubq_f32(roundtrip, (float32x4_t)vandq_u32(too_big, (uint32x4_t)vdup
q_n_f32(1))); | 20 return vsubq_f32(roundtrip, (float32x4_t)vandq_u32(too_big, (uint32x4_t)vdup
q_n_f32(1))); |
| (...skipping 419 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 442 | 440 |
| 443 template<> inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) { | 441 template<> inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) { |
| 444 return vget_low_u16(vmovl_u8(src.fVec)); | 442 return vget_low_u16(vmovl_u8(src.fVec)); |
| 445 } | 443 } |
| 446 | 444 |
| 447 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) { | 445 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) { |
| 448 return vmovn_u16(vcombine_u16(src.fVec, src.fVec)); | 446 return vmovn_u16(vcombine_u16(src.fVec, src.fVec)); |
| 449 } | 447 } |
| 450 | 448 |
| 451 #endif//SkNx_neon_DEFINED | 449 #endif//SkNx_neon_DEFINED |
| OLD | NEW |