| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkNx_neon_DEFINED | 8 #ifndef SkNx_neon_DEFINED |
| 9 #define SkNx_neon_DEFINED | 9 #define SkNx_neon_DEFINED |
| 10 | 10 |
| (...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 216 }; | 216 }; |
| 217 | 217 |
| 218 template <> | 218 template <> |
| 219 class SkNf<4, float> { | 219 class SkNf<4, float> { |
| 220 public: | 220 public: |
| 221 SkNf(float32x4_t vec) : fVec(vec) {} | 221 SkNf(float32x4_t vec) : fVec(vec) {} |
| 222 | 222 |
| 223 SkNf() {} | 223 SkNf() {} |
| 224 explicit SkNf(float val) : fVec(vdupq_n_f32(val)) {} | 224 explicit SkNf(float val) : fVec(vdupq_n_f32(val)) {} |
| 225 static SkNf Load(const float vals[4]) { return vld1q_f32(vals); } | 225 static SkNf Load(const float vals[4]) { return vld1q_f32(vals); } |
| 226 static SkNf FromBytes(const uint8_t vals[4]) { |
| 227 uint8x8_t fix8 = (uint8x8_t)vld1_dup_u32((const uint32_t*)vals); |
| 228 uint16x8_t fix8_16 = vmovl_u8(fix8); |
| 229 uint32x4_t fix8_32 = vmovl_u16(vget_low_u16(fix8_16)); |
| 230 return SkNf(vcvtq_f32_u32(fix8_32)); |
| 231 } |
| 232 |
| 226 SkNf(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d
}; } | 233 SkNf(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d
}; } |
| 227 | 234 |
| 228 void store(float vals[4]) const { vst1q_f32(vals, fVec); } | 235 void store(float vals[4]) const { vst1q_f32(vals, fVec); } |
| 236 void toBytes(uint8_t bytes[4]) const { |
| 237 uint32x4_t fix8_32 = vcvtq_u32_f32(fVec); |
| 238 uint16x4_t fix8_16 = vqmovn_u32(fix8_32); |
| 239 uint8x8_t fix8 = vqmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0))); |
| 240 vst1_lane_u32((uint32_t*)bytes, (uint32x2_t)fix8, 0); |
| 241 } |
| 229 | 242 |
| 230 SkNi<4, int> castTrunc() const { return vcvtq_s32_f32(fVec); } | 243 SkNi<4, int> castTrunc() const { return vcvtq_s32_f32(fVec); } |
| 231 | 244 |
| 232 SkNf approxInvert() const { | 245 SkNf approxInvert() const { |
| 233 float32x4_t est0 = vrecpeq_f32(fVec), | 246 float32x4_t est0 = vrecpeq_f32(fVec), |
| 234 est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0); | 247 est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0); |
| 235 return est1; | 248 return est1; |
| 236 } | 249 } |
| 237 SkNf invert() const { | 250 SkNf invert() const { |
| 238 float32x4_t est1 = this->approxInvert().fVec, | 251 float32x4_t est1 = this->approxInvert().fVec, |
| (...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 379 uint8x16_t fVec; | 392 uint8x16_t fVec; |
| 380 }; | 393 }; |
| 381 | 394 |
| 382 #undef SHIFT32 | 395 #undef SHIFT32 |
| 383 #undef SHIFT16 | 396 #undef SHIFT16 |
| 384 #undef SHIFT8 | 397 #undef SHIFT8 |
| 385 | 398 |
| 386 } // namespace | 399 } // namespace |
| 387 | 400 |
| 388 #endif//SkNx_neon_DEFINED | 401 #endif//SkNx_neon_DEFINED |
| OLD | NEW |