OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_neon_DEFINED | 8 #ifndef SkNx_neon_DEFINED |
9 #define SkNx_neon_DEFINED | 9 #define SkNx_neon_DEFINED |
10 | 10 |
(...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
158 SkNx(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d
}; } | 158 SkNx(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d
}; } |
159 | 159 |
160 void store(float vals[4]) const { vst1q_f32(vals, fVec); } | 160 void store(float vals[4]) const { vst1q_f32(vals, fVec); } |
161 void toBytes(uint8_t bytes[4]) const { | 161 void toBytes(uint8_t bytes[4]) const { |
162 uint32x4_t fix8_32 = vcvtq_u32_f32(fVec); | 162 uint32x4_t fix8_32 = vcvtq_u32_f32(fVec); |
163 uint16x4_t fix8_16 = vqmovn_u32(fix8_32); | 163 uint16x4_t fix8_16 = vqmovn_u32(fix8_32); |
164 uint8x8_t fix8 = vqmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0))); | 164 uint8x8_t fix8 = vqmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0))); |
165 vst1_lane_u32((uint32_t*)bytes, (uint32x2_t)fix8, 0); | 165 vst1_lane_u32((uint32_t*)bytes, (uint32x2_t)fix8, 0); |
166 } | 166 } |
167 | 167 |
| 168 static void ToBytes(uint8_t bytes[16], |
| 169 const SkNx& a, const SkNx& b, const SkNx& c, const SkNx&
d) { |
| 170 vst1q_u8(bytes, vuzpq_u8(vuzpq_u8((uint8x16_t)vcvtq_u32_f32(a.fVec), |
| 171 (uint8x16_t)vcvtq_u32_f32(b.fVec)).val
[0], |
| 172 vuzpq_u8((uint8x16_t)vcvtq_u32_f32(c.fVec), |
| 173 (uint8x16_t)vcvtq_u32_f32(d.fVec)).val
[0]).val[0]); |
| 174 } |
| 175 |
168 SkNx approxInvert() const { | 176 SkNx approxInvert() const { |
169 float32x4_t est0 = vrecpeq_f32(fVec), | 177 float32x4_t est0 = vrecpeq_f32(fVec), |
170 est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0); | 178 est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0); |
171 return est1; | 179 return est1; |
172 } | 180 } |
173 SkNx invert() const { | 181 SkNx invert() const { |
174 float32x4_t est1 = this->approxInvert().fVec, | 182 float32x4_t est1 = this->approxInvert().fVec, |
175 est2 = vmulq_f32(vrecpsq_f32(est1, fVec), est1); | 183 est2 = vmulq_f32(vrecpsq_f32(est1, fVec), est1); |
176 return est2; | 184 return est2; |
177 } | 185 } |
(...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
320 #undef SHIFT8 | 328 #undef SHIFT8 |
321 | 329 |
322 template<> | 330 template<> |
323 inline SkNx<4, int> SkNx_cast<int, float, 4>(const SkNx<4, float>& src) { | 331 inline SkNx<4, int> SkNx_cast<int, float, 4>(const SkNx<4, float>& src) { |
324 return vcvtq_s32_f32(src.fVec); | 332 return vcvtq_s32_f32(src.fVec); |
325 } | 333 } |
326 | 334 |
327 } // namespace | 335 } // namespace |
328 | 336 |
329 #endif//SkNx_neon_DEFINED | 337 #endif//SkNx_neon_DEFINED |
OLD | NEW |