Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(177)

Side by Side Diff: src/opts/SkNx_neon.h

Issue 1526523003: Unify some SkNx code (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: typo Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/SkNx_avx.h ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkNx_neon_DEFINED 8 #ifndef SkNx_neon_DEFINED
9 #define SkNx_neon_DEFINED 9 #define SkNx_neon_DEFINED
10 10
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after
143 }; 143 };
144 144
145 template <> 145 template <>
146 class SkNx<4, float> { 146 class SkNx<4, float> {
147 public: 147 public:
148 SkNx(float32x4_t vec) : fVec(vec) {} 148 SkNx(float32x4_t vec) : fVec(vec) {}
149 149
150 SkNx() {} 150 SkNx() {}
151 SkNx(float val) : fVec(vdupq_n_f32(val)) {} 151 SkNx(float val) : fVec(vdupq_n_f32(val)) {}
152 static SkNx Load(const float vals[4]) { return vld1q_f32(vals); } 152 static SkNx Load(const float vals[4]) { return vld1q_f32(vals); }
153 static SkNx FromBytes(const uint8_t vals[4]) {
154 uint8x8_t fix8 = (uint8x8_t)vld1_dup_u32((const uint32_t*)vals);
155 uint16x8_t fix8_16 = vmovl_u8(fix8);
156 uint32x4_t fix8_32 = vmovl_u16(vget_low_u16(fix8_16));
157 return SkNx(vcvtq_f32_u32(fix8_32));
158 }
159
160 SkNx(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d }; } 153 SkNx(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d }; }
161 154
162 void store(float vals[4]) const { vst1q_f32(vals, fVec); } 155 void store(float vals[4]) const { vst1q_f32(vals, fVec); }
163 void toBytes(uint8_t bytes[4]) const {
164 uint32x4_t fix8_32 = vcvtq_u32_f32(fVec);
165 uint16x4_t fix8_16 = vqmovn_u32(fix8_32);
166 uint8x8_t fix8 = vqmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0)));
167 vst1_lane_u32((uint32_t*)bytes, (uint32x2_t)fix8, 0);
168 }
169
170 static void ToBytes(uint8_t bytes[16],
171 const SkNx& a, const SkNx& b, const SkNx& c, const SkNx& d) {
172 vst1q_u8(bytes, vuzpq_u8(vuzpq_u8((uint8x16_t)vcvtq_u32_f32(a.fVec),
173 (uint8x16_t)vcvtq_u32_f32(b.fVec)).val [0],
174 vuzpq_u8((uint8x16_t)vcvtq_u32_f32(c.fVec),
175 (uint8x16_t)vcvtq_u32_f32(d.fVec)).val [0]).val[0]);
176 }
177
178 SkNx approxInvert() const { 156 SkNx approxInvert() const {
179 float32x4_t est0 = vrecpeq_f32(fVec), 157 float32x4_t est0 = vrecpeq_f32(fVec),
180 est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0); 158 est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0);
181 return est1; 159 return est1;
182 } 160 }
183 SkNx invert() const { 161 SkNx invert() const {
184 float32x4_t est1 = this->approxInvert().fVec, 162 float32x4_t est1 = this->approxInvert().fVec,
185 est2 = vmulq_f32(vrecpsq_f32(est1, fVec), est1); 163 est2 = vmulq_f32(vrecpsq_f32(est1, fVec), est1);
186 return est2; 164 return est2;
187 } 165 }
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
281 } 259 }
282 260
283 SkNx thenElse(const SkNx& t, const SkNx& e) const { 261 SkNx thenElse(const SkNx& t, const SkNx& e) const {
284 return vbslq_u16(fVec, t.fVec, e.fVec); 262 return vbslq_u16(fVec, t.fVec, e.fVec);
285 } 263 }
286 264
287 uint16x8_t fVec; 265 uint16x8_t fVec;
288 }; 266 };
289 267
290 template <> 268 template <>
269 class SkNx<4, uint8_t> {
270 public:
271 SkNx(const uint8x8_t& vec) : fVec(vec) {}
272
273 SkNx() {}
274 static SkNx Load(const uint8_t vals[4]) {
275 return (uint8x8_t)vld1_dup_u32((const uint32_t*)vals);
276 }
277 void store(uint8_t vals[4]) const {
278 return vst1_lane_u32((uint32_t*)vals, (uint32x2_t)fVec, 0);
279 }
280
281 // TODO as needed
282
283 uint8x8_t fVec;
284 };
285
286 template <>
291 class SkNx<16, uint8_t> { 287 class SkNx<16, uint8_t> {
292 public: 288 public:
293 SkNx(const uint8x16_t& vec) : fVec(vec) {} 289 SkNx(const uint8x16_t& vec) : fVec(vec) {}
294 290
295 SkNx() {} 291 SkNx() {}
296 SkNx(uint8_t val) : fVec(vdupq_n_u8(val)) {} 292 SkNx(uint8_t val) : fVec(vdupq_n_u8(val)) {}
297 static SkNx Load(const uint8_t vals[16]) { return vld1q_u8(vals); } 293 static SkNx Load(const uint8_t vals[16]) { return vld1q_u8(vals); }
298 294
299 SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d, 295 SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d,
300 uint8_t e, uint8_t f, uint8_t g, uint8_t h, 296 uint8_t e, uint8_t f, uint8_t g, uint8_t h,
(...skipping 21 matching lines...) Expand all
322 return vbslq_u8(fVec, t.fVec, e.fVec); 318 return vbslq_u8(fVec, t.fVec, e.fVec);
323 } 319 }
324 320
325 uint8x16_t fVec; 321 uint8x16_t fVec;
326 }; 322 };
327 323
328 #undef SHIFT32 324 #undef SHIFT32
329 #undef SHIFT16 325 #undef SHIFT16
330 #undef SHIFT8 326 #undef SHIFT8
331 327
332 template<> 328 template<> inline Sk4i SkNx_cast<int, float, 4>(const Sk4f& src) {
333 inline SkNx<4, int> SkNx_cast<int, float, 4>(const SkNx<4, float>& src) {
334 return vcvtq_s32_f32(src.fVec); 329 return vcvtq_s32_f32(src.fVec);
335 } 330 }
336 331
332 template<> inline Sk4b SkNx_cast<uint8_t, float, 4>(const Sk4f& src) {
333 uint32x4_t _32 = vcvtq_u32_f32(src.fVec);
334 uint16x4_t _16 = vqmovn_u32(_32);
335 return vqmovn_u16(vcombine_u16(_16, _16));
336 }
337
338 template<> inline Sk4f SkNx_cast<float, uint8_t, 4>(const Sk4b& src) {
339 uint16x8_t _16 = vmovl_u8 (src.fVec) ;
340 uint32x4_t _32 = vmovl_u16(vget_low_u16(_16));
341 return vcvtq_f32_u32(_32);
342 }
343
344 static inline void Sk4f_ToBytes(uint8_t bytes[16],
345 const Sk4f& a, const Sk4f& b, const Sk4f& c, con st Sk4f& d) {
346 vst1q_u8(bytes, vuzpq_u8(vuzpq_u8((uint8x16_t)vcvtq_u32_f32(a.fVec),
347 (uint8x16_t)vcvtq_u32_f32(b.fVec)).val[0],
348 vuzpq_u8((uint8x16_t)vcvtq_u32_f32(c.fVec),
349 (uint8x16_t)vcvtq_u32_f32(d.fVec)).val[0]) .val[0]);
350 }
351
337 } // namespace 352 } // namespace
338 353
339 #endif//SkNx_neon_DEFINED 354 #endif//SkNx_neon_DEFINED
OLDNEW
« no previous file with comments | « src/opts/SkNx_avx.h ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698