OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_neon_DEFINED | 8 #ifndef SkNx_neon_DEFINED |
9 #define SkNx_neon_DEFINED | 9 #define SkNx_neon_DEFINED |
10 | 10 |
| 11 #include <arm_neon.h> |
| 12 |
11 #define SKNX_IS_FAST | 13 #define SKNX_IS_FAST |
12 | 14 |
13 // ARMv8 has vrndmq_f32 to floor 4 floats. Here we emulate it: | 15 // ARMv8 has vrndmq_f32 to floor 4 floats. Here we emulate it: |
14 // - roundtrip through integers via truncation | 16 // - roundtrip through integers via truncation |
15 // - subtract 1 if that's too big (possible for negative values). | 17 // - subtract 1 if that's too big (possible for negative values). |
16 // This restricts the domain of our inputs to a maximum somehwere around 2^31.
Seems plenty big. | 18 // This restricts the domain of our inputs to a maximum somehwere around 2^31.
Seems plenty big. |
17 static inline float32x4_t armv7_vrndmq_f32(float32x4_t v) { | 19 static inline float32x4_t armv7_vrndmq_f32(float32x4_t v) { |
18 auto roundtrip = vcvtq_f32_s32(vcvtq_s32_f32(v)); | 20 auto roundtrip = vcvtq_f32_s32(vcvtq_s32_f32(v)); |
19 auto too_big = vcgtq_f32(roundtrip, v); | 21 auto too_big = vcgtq_f32(roundtrip, v); |
20 return vsubq_f32(roundtrip, (float32x4_t)vandq_u32(too_big, (uint32x4_t)vdup
q_n_f32(1))); | 22 return vsubq_f32(roundtrip, (float32x4_t)vandq_u32(too_big, (uint32x4_t)vdup
q_n_f32(1))); |
(...skipping 419 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
440 | 442 |
441 template<> inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) { | 443 template<> inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) { |
442 return vget_low_u16(vmovl_u8(src.fVec)); | 444 return vget_low_u16(vmovl_u8(src.fVec)); |
443 } | 445 } |
444 | 446 |
445 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) { | 447 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) { |
446 return vmovn_u16(vcombine_u16(src.fVec, src.fVec)); | 448 return vmovn_u16(vcombine_u16(src.fVec, src.fVec)); |
447 } | 449 } |
448 | 450 |
449 #endif//SkNx_neon_DEFINED | 451 #endif//SkNx_neon_DEFINED |
OLD | NEW |