| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkNx_neon_DEFINED | 8 #ifndef SkNx_neon_DEFINED |
| 9 #define SkNx_neon_DEFINED | 9 #define SkNx_neon_DEFINED |
| 10 | 10 |
| 11 #include <arm_neon.h> |
| 12 |
| 11 #define SKNX_IS_FAST | 13 #define SKNX_IS_FAST |
| 12 | 14 |
| 13 // ARMv8 has vrndmq_f32 to floor 4 floats. Here we emulate it: | 15 // ARMv8 has vrndmq_f32 to floor 4 floats. Here we emulate it: |
| 14 // - roundtrip through integers via truncation | 16 // - roundtrip through integers via truncation |
| 15 // - subtract 1 if that's too big (possible for negative values). | 17 // - subtract 1 if that's too big (possible for negative values). |
| 16 // This restricts the domain of our inputs to a maximum somehwere around 2^31.
Seems plenty big. | 18 // This restricts the domain of our inputs to a maximum somehwere around 2^31.
Seems plenty big. |
| 17 static inline float32x4_t armv7_vrndmq_f32(float32x4_t v) { | 19 static inline float32x4_t armv7_vrndmq_f32(float32x4_t v) { |
| 18 auto roundtrip = vcvtq_f32_s32(vcvtq_s32_f32(v)); | 20 auto roundtrip = vcvtq_f32_s32(vcvtq_s32_f32(v)); |
| 19 auto too_big = vcgtq_f32(roundtrip, v); | 21 auto too_big = vcgtq_f32(roundtrip, v); |
| 20 return vsubq_f32(roundtrip, (float32x4_t)vandq_u32(too_big, (uint32x4_t)vdup
q_n_f32(1))); | 22 return vsubq_f32(roundtrip, (float32x4_t)vandq_u32(too_big, (uint32x4_t)vdup
q_n_f32(1))); |
| (...skipping 419 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 440 | 442 |
| 441 template<> inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) { | 443 template<> inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) { |
| 442 return vget_low_u16(vmovl_u8(src.fVec)); | 444 return vget_low_u16(vmovl_u8(src.fVec)); |
| 443 } | 445 } |
| 444 | 446 |
| 445 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) { | 447 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) { |
| 446 return vmovn_u16(vcombine_u16(src.fVec, src.fVec)); | 448 return vmovn_u16(vcombine_u16(src.fVec, src.fVec)); |
| 447 } | 449 } |
| 448 | 450 |
| 449 #endif//SkNx_neon_DEFINED | 451 #endif//SkNx_neon_DEFINED |
| OLD | NEW |