Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(77)

Side by Side Diff: src/opts/SkNx_neon.h

Issue 1683543002: sknx refactoring (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: typos Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/core/SkNx.h ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkNx_neon_DEFINED 8 #ifndef SkNx_neon_DEFINED
9 #define SkNx_neon_DEFINED 9 #define SkNx_neon_DEFINED
10 10
11 #define SKNX_IS_FAST 11 #define SKNX_IS_FAST
12 12
13 namespace { // See SkNx.h
14
15 // Well, this is absurd. The shifts require compile-time constant arguments. 13 // Well, this is absurd. The shifts require compile-time constant arguments.
16 14
17 #define SHIFT8(op, v, bits) switch(bits) { \ 15 #define SHIFT8(op, v, bits) switch(bits) { \
18 case 1: return op(v, 1); case 2: return op(v, 2); case 3: return op(v , 3); \ 16 case 1: return op(v, 1); case 2: return op(v, 2); case 3: return op(v , 3); \
19 case 4: return op(v, 4); case 5: return op(v, 5); case 6: return op(v , 6); \ 17 case 4: return op(v, 4); case 5: return op(v, 5); case 6: return op(v , 6); \
20 case 7: return op(v, 7); \ 18 case 7: return op(v, 7); \
21 } return fVec 19 } return fVec
22 20
23 #define SHIFT16(op, v, bits) if (bits < 8) { SHIFT8(op, v, bits); } switch(bits) { \ 21 #define SHIFT16(op, v, bits) if (bits < 8) { SHIFT8(op, v, bits); } switch(bits) { \
24 case 8: return op(v, 8); case 9: return op(v , 9); \ 22 case 8: return op(v, 8); case 9: return op(v , 9); \
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
91 } 89 }
92 90
93 SkNx sqrt() const { 91 SkNx sqrt() const {
94 #if defined(SK_CPU_ARM64) 92 #if defined(SK_CPU_ARM64)
95 return vsqrt_f32(fVec); 93 return vsqrt_f32(fVec);
96 #else 94 #else
97 return *this * this->rsqrt2(); 95 return *this * this->rsqrt2();
98 #endif 96 #endif
99 } 97 }
100 98
101 template <int k> float kth() const { 99 float operator[](int k) const {
102 SkASSERT(0 <= k && k < 2); 100 SkASSERT(0 <= k && k < 2);
103 return vget_lane_f32(fVec, k&1); 101 union { float32x2_t v; float fs[2]; } pun = {fVec};
102 return pun.fs[k&1];
104 } 103 }
104 template <int k> float kth() const { return (*this)[k]; }
105 105
106 bool allTrue() const { 106 bool allTrue() const {
107 auto v = vreinterpret_u32_f32(fVec); 107 auto v = vreinterpret_u32_f32(fVec);
108 return vget_lane_u32(v,0) && vget_lane_u32(v,1); 108 return vget_lane_u32(v,0) && vget_lane_u32(v,1);
109 } 109 }
110 bool anyTrue() const { 110 bool anyTrue() const {
111 auto v = vreinterpret_u32_f32(fVec); 111 auto v = vreinterpret_u32_f32(fVec);
112 return vget_lane_u32(v,0) || vget_lane_u32(v,1); 112 return vget_lane_u32(v,0) || vget_lane_u32(v,1);
113 } 113 }
114 114
115 float32x2_t fVec; 115 float32x2_t fVec;
116 }; 116 };
117 117
118 template <> 118 template <>
119 class SkNx<4, int> {
120 public:
121 SkNx(const int32x4_t& vec) : fVec(vec) {}
122
123 SkNx() {}
124 SkNx(int val) : fVec(vdupq_n_s32(val)) {}
125 static SkNx Load(const void* ptr) { return vld1q_s32((const int*)ptr); }
126 SkNx(int a, int b, int c, int d) { fVec = (int32x4_t) { a, b, c, d }; }
127
128 void store(void* ptr) const { vst1q_s32((int*)ptr, fVec); }
129
130 SkNx operator + (const SkNx& o) const { return vaddq_s32(fVec, o.fVec); }
131 SkNx operator - (const SkNx& o) const { return vsubq_s32(fVec, o.fVec); }
132 SkNx operator * (const SkNx& o) const { return vmulq_s32(fVec, o.fVec); }
133
134 SkNx operator << (int bits) const { SHIFT32(vshlq_n_s32, fVec, bits); }
135 SkNx operator >> (int bits) const { SHIFT32(vshrq_n_s32, fVec, bits); }
136
137 template <int k> int kth() const {
138 SkASSERT(0 <= k && k < 4);
139 return vgetq_lane_s32(fVec, k&3);
140 }
141
142 int32x4_t fVec;
143 };
144
145 template <>
146 class SkNx<4, float> { 119 class SkNx<4, float> {
147 public: 120 public:
148 SkNx(float32x4_t vec) : fVec(vec) {} 121 SkNx(float32x4_t vec) : fVec(vec) {}
149 122
150 SkNx() {} 123 SkNx() {}
151 SkNx(float val) : fVec(vdupq_n_f32(val)) {} 124 SkNx(float val) : fVec(vdupq_n_f32(val)) {}
152 static SkNx Load(const void* ptr) { return vld1q_f32((const float*)ptr); } 125 static SkNx Load(const void* ptr) { return vld1q_f32((const float*)ptr); }
153 SkNx(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d }; } 126 SkNx(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d }; }
154 127
155 void store(void* ptr) const { vst1q_f32((float*)ptr, fVec); } 128 void store(void* ptr) const { vst1q_f32((float*)ptr, fVec); }
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
200 } 173 }
201 174
202 SkNx sqrt() const { 175 SkNx sqrt() const {
203 #if defined(SK_CPU_ARM64) 176 #if defined(SK_CPU_ARM64)
204 return vsqrtq_f32(fVec); 177 return vsqrtq_f32(fVec);
205 #else 178 #else
206 return *this * this->rsqrt2(); 179 return *this * this->rsqrt2();
207 #endif 180 #endif
208 } 181 }
209 182
210 template <int k> float kth() const { 183 float operator[](int k) const {
211 SkASSERT(0 <= k && k < 4); 184 SkASSERT(0 <= k && k < 4);
212 return vgetq_lane_f32(fVec, k&3); 185 union { float32x4_t v; float fs[4]; } pun = {fVec};
186 return pun.fs[k&3];
213 } 187 }
188 template <int k> float kth() const { return (*this)[k]; }
214 189
215 bool allTrue() const { 190 bool allTrue() const {
216 auto v = vreinterpretq_u32_f32(fVec); 191 auto v = vreinterpretq_u32_f32(fVec);
217 return vgetq_lane_u32(v,0) && vgetq_lane_u32(v,1) 192 return vgetq_lane_u32(v,0) && vgetq_lane_u32(v,1)
218 && vgetq_lane_u32(v,2) && vgetq_lane_u32(v,3); 193 && vgetq_lane_u32(v,2) && vgetq_lane_u32(v,3);
219 } 194 }
220 bool anyTrue() const { 195 bool anyTrue() const {
221 auto v = vreinterpretq_u32_f32(fVec); 196 auto v = vreinterpretq_u32_f32(fVec);
222 return vgetq_lane_u32(v,0) || vgetq_lane_u32(v,1) 197 return vgetq_lane_u32(v,0) || vgetq_lane_u32(v,1)
223 || vgetq_lane_u32(v,2) || vgetq_lane_u32(v,3); 198 || vgetq_lane_u32(v,2) || vgetq_lane_u32(v,3);
(...skipping 26 matching lines...) Expand all
250 225
251 SkNx operator + (const SkNx& o) const { return vadd_u16(fVec, o.fVec); } 226 SkNx operator + (const SkNx& o) const { return vadd_u16(fVec, o.fVec); }
252 SkNx operator - (const SkNx& o) const { return vsub_u16(fVec, o.fVec); } 227 SkNx operator - (const SkNx& o) const { return vsub_u16(fVec, o.fVec); }
253 SkNx operator * (const SkNx& o) const { return vmul_u16(fVec, o.fVec); } 228 SkNx operator * (const SkNx& o) const { return vmul_u16(fVec, o.fVec); }
254 229
255 SkNx operator << (int bits) const { SHIFT16(vshl_n_u16, fVec, bits); } 230 SkNx operator << (int bits) const { SHIFT16(vshl_n_u16, fVec, bits); }
256 SkNx operator >> (int bits) const { SHIFT16(vshr_n_u16, fVec, bits); } 231 SkNx operator >> (int bits) const { SHIFT16(vshr_n_u16, fVec, bits); }
257 232
258 static SkNx Min(const SkNx& a, const SkNx& b) { return vmin_u16(a.fVec, b.fV ec); } 233 static SkNx Min(const SkNx& a, const SkNx& b) { return vmin_u16(a.fVec, b.fV ec); }
259 234
260 template <int k> uint16_t kth() const { 235 uint16_t operator[](int k) const {
261 SkASSERT(0 <= k && k < 4); 236 SkASSERT(0 <= k && k < 4);
262 return vget_lane_u16(fVec, k&3); 237 union { uint16x4_t v; uint16_t us[4]; } pun = {fVec};
238 return pun.us[k&3];
263 } 239 }
240 template <int k> uint16_t kth() const { return (*this)[k]; }
264 241
265 SkNx thenElse(const SkNx& t, const SkNx& e) const { 242 SkNx thenElse(const SkNx& t, const SkNx& e) const {
266 return vbsl_u16(fVec, t.fVec, e.fVec); 243 return vbsl_u16(fVec, t.fVec, e.fVec);
267 } 244 }
268 245
269 uint16x4_t fVec; 246 uint16x4_t fVec;
270 }; 247 };
271 248
272 template <> 249 template <>
273 class SkNx<8, uint16_t> { 250 class SkNx<8, uint16_t> {
(...skipping 13 matching lines...) Expand all
287 264
288 SkNx operator + (const SkNx& o) const { return vaddq_u16(fVec, o.fVec); } 265 SkNx operator + (const SkNx& o) const { return vaddq_u16(fVec, o.fVec); }
289 SkNx operator - (const SkNx& o) const { return vsubq_u16(fVec, o.fVec); } 266 SkNx operator - (const SkNx& o) const { return vsubq_u16(fVec, o.fVec); }
290 SkNx operator * (const SkNx& o) const { return vmulq_u16(fVec, o.fVec); } 267 SkNx operator * (const SkNx& o) const { return vmulq_u16(fVec, o.fVec); }
291 268
292 SkNx operator << (int bits) const { SHIFT16(vshlq_n_u16, fVec, bits); } 269 SkNx operator << (int bits) const { SHIFT16(vshlq_n_u16, fVec, bits); }
293 SkNx operator >> (int bits) const { SHIFT16(vshrq_n_u16, fVec, bits); } 270 SkNx operator >> (int bits) const { SHIFT16(vshrq_n_u16, fVec, bits); }
294 271
295 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u16(a.fVec, b.f Vec); } 272 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u16(a.fVec, b.f Vec); }
296 273
297 template <int k> uint16_t kth() const { 274 uint16_t operator[](int k) const {
298 SkASSERT(0 <= k && k < 8); 275 SkASSERT(0 <= k && k < 8);
299 return vgetq_lane_u16(fVec, k&7); 276 union { uint16x8_t v; uint16_t us[8]; } pun = {fVec};
277 return pun.us[k&7];
300 } 278 }
279 template <int k> uint16_t kth() const { return (*this)[k]; }
301 280
302 SkNx thenElse(const SkNx& t, const SkNx& e) const { 281 SkNx thenElse(const SkNx& t, const SkNx& e) const {
303 return vbslq_u16(fVec, t.fVec, e.fVec); 282 return vbslq_u16(fVec, t.fVec, e.fVec);
304 } 283 }
305 284
306 uint16x8_t fVec; 285 uint16x8_t fVec;
307 }; 286 };
308 287
309 template <> 288 template <>
310 class SkNx<4, uint8_t> { 289 class SkNx<4, uint8_t> {
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
343 void store(void* ptr) const { vst1q_u8((uint8_t*)ptr, fVec); } 322 void store(void* ptr) const { vst1q_u8((uint8_t*)ptr, fVec); }
344 323
345 SkNx saturatedAdd(const SkNx& o) const { return vqaddq_u8(fVec, o.fVec); } 324 SkNx saturatedAdd(const SkNx& o) const { return vqaddq_u8(fVec, o.fVec); }
346 325
347 SkNx operator + (const SkNx& o) const { return vaddq_u8(fVec, o.fVec); } 326 SkNx operator + (const SkNx& o) const { return vaddq_u8(fVec, o.fVec); }
348 SkNx operator - (const SkNx& o) const { return vsubq_u8(fVec, o.fVec); } 327 SkNx operator - (const SkNx& o) const { return vsubq_u8(fVec, o.fVec); }
349 328
350 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u8(a.fVec, b.fV ec); } 329 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u8(a.fVec, b.fV ec); }
351 SkNx operator < (const SkNx& o) const { return vcltq_u8(fVec, o.fVec); } 330 SkNx operator < (const SkNx& o) const { return vcltq_u8(fVec, o.fVec); }
352 331
353 template <int k> uint8_t kth() const { 332 uint8_t operator[](int k) const {
354 SkASSERT(0 <= k && k < 15); 333 SkASSERT(0 <= k && k < 16);
355 return vgetq_lane_u8(fVec, k&16); 334 union { uint8x16_t v; uint8_t us[16]; } pun = {fVec};
335 return pun.us[k&15];
356 } 336 }
337 template <int k> uint8_t kth() const { return (*this)[k]; }
357 338
358 SkNx thenElse(const SkNx& t, const SkNx& e) const { 339 SkNx thenElse(const SkNx& t, const SkNx& e) const {
359 return vbslq_u8(fVec, t.fVec, e.fVec); 340 return vbslq_u8(fVec, t.fVec, e.fVec);
360 } 341 }
361 342
362 uint8x16_t fVec; 343 uint8x16_t fVec;
363 }; 344 };
364 345
365 #undef SHIFT32 346 #undef SHIFT32
366 #undef SHIFT16 347 #undef SHIFT16
367 #undef SHIFT8 348 #undef SHIFT8
368 349
369 template<> inline Sk4i SkNx_cast<int, float, 4>(const Sk4f& src) { 350 template<> inline Sk4b SkNx_cast<uint8_t, float>(const Sk4f& src) {
370 return vcvtq_s32_f32(src.fVec);
371 }
372
373 template<> inline Sk4b SkNx_cast<uint8_t, float, 4>(const Sk4f& src) {
374 uint32x4_t _32 = vcvtq_u32_f32(src.fVec); 351 uint32x4_t _32 = vcvtq_u32_f32(src.fVec);
375 uint16x4_t _16 = vqmovn_u32(_32); 352 uint16x4_t _16 = vqmovn_u32(_32);
376 return vqmovn_u16(vcombine_u16(_16, _16)); 353 return vqmovn_u16(vcombine_u16(_16, _16));
377 } 354 }
378 355
379 template<> inline Sk4f SkNx_cast<float, uint8_t, 4>(const Sk4b& src) { 356 template<> inline Sk4f SkNx_cast<float, uint8_t>(const Sk4b& src) {
380 uint16x8_t _16 = vmovl_u8 (src.fVec) ; 357 uint16x8_t _16 = vmovl_u8 (src.fVec) ;
381 uint32x4_t _32 = vmovl_u16(vget_low_u16(_16)); 358 uint32x4_t _32 = vmovl_u16(vget_low_u16(_16));
382 return vcvtq_f32_u32(_32); 359 return vcvtq_f32_u32(_32);
383 } 360 }
384 361
385 static inline void Sk4f_ToBytes(uint8_t bytes[16], 362 static inline void Sk4f_ToBytes(uint8_t bytes[16],
386 const Sk4f& a, const Sk4f& b, const Sk4f& c, con st Sk4f& d) { 363 const Sk4f& a, const Sk4f& b, const Sk4f& c, con st Sk4f& d) {
387 vst1q_u8(bytes, vuzpq_u8(vuzpq_u8((uint8x16_t)vcvtq_u32_f32(a.fVec), 364 vst1q_u8(bytes, vuzpq_u8(vuzpq_u8((uint8x16_t)vcvtq_u32_f32(a.fVec),
388 (uint8x16_t)vcvtq_u32_f32(b.fVec)).val[0], 365 (uint8x16_t)vcvtq_u32_f32(b.fVec)).val[0],
389 vuzpq_u8((uint8x16_t)vcvtq_u32_f32(c.fVec), 366 vuzpq_u8((uint8x16_t)vcvtq_u32_f32(c.fVec),
390 (uint8x16_t)vcvtq_u32_f32(d.fVec)).val[0]) .val[0]); 367 (uint8x16_t)vcvtq_u32_f32(d.fVec)).val[0]) .val[0]);
391 } 368 }
392 369
393 template<> inline Sk4h SkNx_cast<uint16_t, uint8_t, 4>(const Sk4b& src) { 370 template<> inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) {
394 return vget_low_u16(vmovl_u8(src.fVec)); 371 return vget_low_u16(vmovl_u8(src.fVec));
395 } 372 }
396 373
397 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t, 4>(const Sk4h& src) { 374 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) {
398 return vmovn_u16(vcombine_u16(src.fVec, src.fVec)); 375 return vmovn_u16(vcombine_u16(src.fVec, src.fVec));
399 } 376 }
400 377
401 } // namespace
402
403 #endif//SkNx_neon_DEFINED 378 #endif//SkNx_neon_DEFINED
OLDNEW
« no previous file with comments | « src/core/SkNx.h ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698