OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_neon_DEFINED | 8 #ifndef SkNx_neon_DEFINED |
9 #define SkNx_neon_DEFINED | 9 #define SkNx_neon_DEFINED |
10 | 10 |
(...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
216 }; | 216 }; |
217 | 217 |
218 template <> | 218 template <> |
219 class SkNf<4, float> { | 219 class SkNf<4, float> { |
220 public: | 220 public: |
221 SkNf(float32x4_t vec) : fVec(vec) {} | 221 SkNf(float32x4_t vec) : fVec(vec) {} |
222 | 222 |
223 SkNf() {} | 223 SkNf() {} |
224 explicit SkNf(float val) : fVec(vdupq_n_f32(val)) {} | 224 explicit SkNf(float val) : fVec(vdupq_n_f32(val)) {} |
225 static SkNf Load(const float vals[4]) { return vld1q_f32(vals); } | 225 static SkNf Load(const float vals[4]) { return vld1q_f32(vals); } |
| 226 static SkNf FromBytes(const uint8_t vals[4]) { |
| 227 uint8x8_t fix8 = (uint8x8_t)vld1_dup_u32((const uint32_t*)vals); |
| 228 uint16x8_t fix8_16 = vmovl_u8(fix8); |
| 229 uint32x4_t fix8_32 = vmovl_u16(vget_low_u16(fix8_16)); |
| 230 return SkNf(vcvtq_f32_u32(fix8_32)); |
| 231 } |
| 232 |
226 SkNf(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d
}; } | 233 SkNf(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d
}; } |
227 | 234 |
228 void store(float vals[4]) const { vst1q_f32(vals, fVec); } | 235 void store(float vals[4]) const { vst1q_f32(vals, fVec); } |
| 236 void toBytes(uint8_t bytes[4]) const { |
| 237 uint32x4_t fix8_32 = vcvtq_u32_f32(fVec); |
| 238 uint16x4_t fix8_16 = vqmovn_u32(fix8_32); |
| 239 uint8x8_t fix8 = vqmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0))); |
| 240 vst1_lane_u32((uint32_t*)bytes, (uint32x2_t)fix8, 0); |
| 241 } |
229 | 242 |
230 SkNi<4, int> castTrunc() const { return vcvtq_s32_f32(fVec); } | 243 SkNi<4, int> castTrunc() const { return vcvtq_s32_f32(fVec); } |
231 | 244 |
232 SkNf approxInvert() const { | 245 SkNf approxInvert() const { |
233 float32x4_t est0 = vrecpeq_f32(fVec), | 246 float32x4_t est0 = vrecpeq_f32(fVec), |
234 est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0); | 247 est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0); |
235 return est1; | 248 return est1; |
236 } | 249 } |
237 SkNf invert() const { | 250 SkNf invert() const { |
238 float32x4_t est1 = this->approxInvert().fVec, | 251 float32x4_t est1 = this->approxInvert().fVec, |
(...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
379 uint8x16_t fVec; | 392 uint8x16_t fVec; |
380 }; | 393 }; |
381 | 394 |
382 #undef SHIFT32 | 395 #undef SHIFT32 |
383 #undef SHIFT16 | 396 #undef SHIFT16 |
384 #undef SHIFT8 | 397 #undef SHIFT8 |
385 | 398 |
386 } // namespace | 399 } // namespace |
387 | 400 |
388 #endif//SkNx_neon_DEFINED | 401 #endif//SkNx_neon_DEFINED |
OLD | NEW |