Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(452)

Side by Side Diff: src/opts/SkNx_neon.h

Issue 1132273004: Sk4px (Closed) Base URL: https://skia.googlesource.com/skia@master
Patch Set: a smidge faster Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/Sk4px_none.h ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkNx_neon_DEFINED 8 #ifndef SkNx_neon_DEFINED
9 #define SkNx_neon_DEFINED 9 #define SkNx_neon_DEFINED
10 10
11 #include <arm_neon.h> 11 #include <arm_neon.h>
12 12
13 // Well, this is absurd. The shifts require compile-time constant arguments.
14
15 #define SHIFT8(op, v, bits) switch(bits) { \
16 case 1: return op(v, 1); case 2: return op(v, 2); case 3: return op(v , 3); \
17 case 4: return op(v, 4); case 5: return op(v, 5); case 6: return op(v , 6); \
18 case 7: return op(v, 7); \
19 } return fVec
20
21 #define SHIFT16(op, v, bits) if (bits < 8) { SHIFT8(op, v, bits); } switch(bits) { \
22 case 8: return op(v, 8); case 9: return op(v , 9); \
23 case 10: return op(v, 10); case 11: return op(v, 11); case 12: return op(v , 12); \
24 case 13: return op(v, 13); case 14: return op(v, 14); case 15: return op(v , 15); \
25 } return fVec
26
27 #define SHIFT32(op, v, bits) if (bits < 16) { SHIFT16(op, v, bits); } switch(bit s) { \
28 case 16: return op(v, 16); case 17: return op(v, 17); case 18: return op(v , 18); \
29 case 19: return op(v, 19); case 20: return op(v, 20); case 21: return op(v , 21); \
30 case 22: return op(v, 22); case 23: return op(v, 23); case 24: return op(v , 24); \
31 case 25: return op(v, 25); case 26: return op(v, 26); case 27: return op(v , 27); \
32 case 28: return op(v, 28); case 29: return op(v, 29); case 30: return op(v , 30); \
33 case 31: return op(v, 31); } return fVec
34
13 template <> 35 template <>
14 class SkNb<2, 4> { 36 class SkNb<2, 4> {
15 public: 37 public:
16 SkNb(uint32x2_t vec) : fVec(vec) {} 38 SkNb(uint32x2_t vec) : fVec(vec) {}
17 39
18 SkNb() {} 40 SkNb() {}
19 bool allTrue() const { return vget_lane_u32(fVec, 0) && vget_lane_u32(fVec, 1); } 41 bool allTrue() const { return vget_lane_u32(fVec, 0) && vget_lane_u32(fVec, 1); }
20 bool anyTrue() const { return vget_lane_u32(fVec, 0) || vget_lane_u32(fVec, 1); } 42 bool anyTrue() const { return vget_lane_u32(fVec, 0) || vget_lane_u32(fVec, 1); }
21 private: 43
22 uint32x2_t fVec; 44 uint32x2_t fVec;
23 }; 45 };
24 46
25 template <> 47 template <>
26 class SkNb<4, 4> { 48 class SkNb<4, 4> {
27 public: 49 public:
28 SkNb(uint32x4_t vec) : fVec(vec) {} 50 SkNb(uint32x4_t vec) : fVec(vec) {}
29 51
30 SkNb() {} 52 SkNb() {}
31 bool allTrue() const { return vgetq_lane_u32(fVec, 0) && vgetq_lane_u32(fVec , 1) 53 bool allTrue() const { return vgetq_lane_u32(fVec, 0) && vgetq_lane_u32(fVec , 1)
32 && vgetq_lane_u32(fVec, 2) && vgetq_lane_u32(fVec , 3); } 54 && vgetq_lane_u32(fVec, 2) && vgetq_lane_u32(fVec , 3); }
33 bool anyTrue() const { return vgetq_lane_u32(fVec, 0) || vgetq_lane_u32(fVec , 1) 55 bool anyTrue() const { return vgetq_lane_u32(fVec, 0) || vgetq_lane_u32(fVec , 1)
34 || vgetq_lane_u32(fVec, 2) || vgetq_lane_u32(fVec , 3); } 56 || vgetq_lane_u32(fVec, 2) || vgetq_lane_u32(fVec , 3); }
35 private: 57
36 uint32x4_t fVec; 58 uint32x4_t fVec;
37 }; 59 };
38 60
39 template <> 61 template <>
40 class SkNf<2, float> { 62 class SkNf<2, float> {
41 typedef SkNb<2, 4> Nb; 63 typedef SkNb<2, 4> Nb;
42 public: 64 public:
43 SkNf(float32x2_t vec) : fVec(vec) {} 65 SkNf(float32x2_t vec) : fVec(vec) {}
44 66
45 SkNf() {} 67 SkNf() {}
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
97 #else 119 #else
98 return *this * this->rsqrt2(); 120 return *this * this->rsqrt2();
99 #endif 121 #endif
100 } 122 }
101 123
102 template <int k> float kth() const { 124 template <int k> float kth() const {
103 SkASSERT(0 <= k && k < 2); 125 SkASSERT(0 <= k && k < 2);
104 return vget_lane_f32(fVec, k&1); 126 return vget_lane_f32(fVec, k&1);
105 } 127 }
106 128
107 private:
108 float32x2_t fVec; 129 float32x2_t fVec;
109 }; 130 };
110 131
111 #if defined(SK_CPU_ARM64) 132 #if defined(SK_CPU_ARM64)
112 template <> 133 template <>
113 class SkNb<2, 8> { 134 class SkNb<2, 8> {
114 public: 135 public:
115 SkNb(uint64x2_t vec) : fVec(vec) {} 136 SkNb(uint64x2_t vec) : fVec(vec) {}
116 137
117 SkNb() {} 138 SkNb() {}
118 bool allTrue() const { return vgetq_lane_u64(fVec, 0) && vgetq_lane_u64(fVec , 1); } 139 bool allTrue() const { return vgetq_lane_u64(fVec, 0) && vgetq_lane_u64(fVec , 1); }
119 bool anyTrue() const { return vgetq_lane_u64(fVec, 0) || vgetq_lane_u64(fVec , 1); } 140 bool anyTrue() const { return vgetq_lane_u64(fVec, 0) || vgetq_lane_u64(fVec , 1); }
120 private: 141
121 uint64x2_t fVec; 142 uint64x2_t fVec;
122 }; 143 };
123 144
124 template <> 145 template <>
125 class SkNf<2, double> { 146 class SkNf<2, double> {
126 typedef SkNb<2, 8> Nb; 147 typedef SkNb<2, 8> Nb;
127 public: 148 public:
128 SkNf(float64x2_t vec) : fVec(vec) {} 149 SkNf(float64x2_t vec) : fVec(vec) {}
129 150
130 SkNf() {} 151 SkNf() {}
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
174 est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1), 195 est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1),
175 est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2); 196 est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2);
176 return est3; 197 return est3;
177 } 198 }
178 199
179 template <int k> double kth() const { 200 template <int k> double kth() const {
180 SkASSERT(0 <= k && k < 2); 201 SkASSERT(0 <= k && k < 2);
181 return vgetq_lane_f64(fVec, k&1); 202 return vgetq_lane_f64(fVec, k&1);
182 } 203 }
183 204
184 private:
185 float64x2_t fVec; 205 float64x2_t fVec;
186 }; 206 };
187 #endif//defined(SK_CPU_ARM64) 207 #endif//defined(SK_CPU_ARM64)
188 208
189 template <> 209 template <>
190 class SkNi<4, int> { 210 class SkNi<4, int> {
191 public: 211 public:
192 SkNi(const int32x4_t& vec) : fVec(vec) {} 212 SkNi(const int32x4_t& vec) : fVec(vec) {}
193 213
194 SkNi() {} 214 SkNi() {}
195 explicit SkNi(int val) : fVec(vdupq_n_s32(val)) {} 215 explicit SkNi(int val) : fVec(vdupq_n_s32(val)) {}
196 static SkNi Load(const int vals[4]) { return vld1q_s32(vals); } 216 static SkNi Load(const int vals[4]) { return vld1q_s32(vals); }
197 SkNi(int a, int b, int c, int d) { fVec = (int32x4_t) { a, b, c, d }; } 217 SkNi(int a, int b, int c, int d) { fVec = (int32x4_t) { a, b, c, d }; }
198 218
199 void store(int vals[4]) const { vst1q_s32(vals, fVec); } 219 void store(int vals[4]) const { vst1q_s32(vals, fVec); }
200 220
201 SkNi operator + (const SkNi& o) const { return vaddq_s32(fVec, o.fVec); } 221 SkNi operator + (const SkNi& o) const { return vaddq_s32(fVec, o.fVec); }
202 SkNi operator - (const SkNi& o) const { return vsubq_s32(fVec, o.fVec); } 222 SkNi operator - (const SkNi& o) const { return vsubq_s32(fVec, o.fVec); }
203 SkNi operator * (const SkNi& o) const { return vmulq_s32(fVec, o.fVec); } 223 SkNi operator * (const SkNi& o) const { return vmulq_s32(fVec, o.fVec); }
204 224
205 // Well, this is absurd. The shifts require compile-time constant arguments . 225 SkNi operator << (int bits) const { SHIFT32(vshlq_n_s32, fVec, bits); }
206 #define SHIFT(op, v, bits) switch(bits) { \ 226 SkNi operator >> (int bits) const { SHIFT32(vshrq_n_s32, fVec, bits); }
207 case 1: return op(v, 1); case 2: return op(v, 2); case 3: return op(v , 3); \
208 case 4: return op(v, 4); case 5: return op(v, 5); case 6: return op(v , 6); \
209 case 7: return op(v, 7); case 8: return op(v, 8); case 9: return op(v , 9); \
210 case 10: return op(v, 10); case 11: return op(v, 11); case 12: return op(v , 12); \
211 case 13: return op(v, 13); case 14: return op(v, 14); case 15: return op(v , 15); \
212 case 16: return op(v, 16); case 17: return op(v, 17); case 18: return op(v , 18); \
213 case 19: return op(v, 19); case 20: return op(v, 20); case 21: return op(v , 21); \
214 case 22: return op(v, 22); case 23: return op(v, 23); case 24: return op(v , 24); \
215 case 25: return op(v, 25); case 26: return op(v, 26); case 27: return op(v , 27); \
216 case 28: return op(v, 28); case 29: return op(v, 29); case 30: return op(v , 30); \
217 case 31: return op(v, 31); } return fVec
218
219 SkNi operator << (int bits) const { SHIFT(vshlq_n_s32, fVec, bits); }
220 SkNi operator >> (int bits) const { SHIFT(vshrq_n_s32, fVec, bits); }
221 #undef SHIFT
222 227
223 template <int k> int kth() const { 228 template <int k> int kth() const {
224 SkASSERT(0 <= k && k < 4); 229 SkASSERT(0 <= k && k < 4);
225 return vgetq_lane_s32(fVec, k&3); 230 return vgetq_lane_s32(fVec, k&3);
226 } 231 }
227 protected: 232
228 int32x4_t fVec; 233 int32x4_t fVec;
229 }; 234 };
230 235
231 template <> 236 template <>
232 class SkNf<4, float> { 237 class SkNf<4, float> {
233 typedef SkNb<4, 4> Nb; 238 typedef SkNb<4, 4> Nb;
234 public: 239 public:
235 SkNf(float32x4_t vec) : fVec(vec) {} 240 SkNf(float32x4_t vec) : fVec(vec) {}
236 241
237 SkNf() {} 242 SkNf() {}
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
291 #else 296 #else
292 return *this * this->rsqrt2(); 297 return *this * this->rsqrt2();
293 #endif 298 #endif
294 } 299 }
295 300
296 template <int k> float kth() const { 301 template <int k> float kth() const {
297 SkASSERT(0 <= k && k < 4); 302 SkASSERT(0 <= k && k < 4);
298 return vgetq_lane_f32(fVec, k&3); 303 return vgetq_lane_f32(fVec, k&3);
299 } 304 }
300 305
301 protected:
302 float32x4_t fVec; 306 float32x4_t fVec;
303 }; 307 };
304 308
309 template <>
310 class SkNi<8, uint16_t> {
311 public:
312 SkNi(const uint16x8_t& vec) : fVec(vec) {}
313
314 SkNi() {}
315 explicit SkNi(uint16_t val) : fVec(vdupq_n_u16(val)) {}
316 static SkNi Load(const uint16_t vals[8]) { return vld1q_u16(vals); }
317
318 SkNi(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
319 uint16_t e, uint16_t f, uint16_t g, uint16_t h) {
320 fVec = (uint16x8_t) { a,b,c,d, e,f,g,h };
321 }
322
323 void store(uint16_t vals[8]) const { vst1q_u16(vals, fVec); }
324
325 SkNi operator + (const SkNi& o) const { return vaddq_u16(fVec, o.fVec); }
326 SkNi operator - (const SkNi& o) const { return vsubq_u16(fVec, o.fVec); }
327 SkNi operator * (const SkNi& o) const { return vmulq_u16(fVec, o.fVec); }
328
329 SkNi operator << (int bits) const { SHIFT16(vshlq_n_u16, fVec, bits); }
330 SkNi operator >> (int bits) const { SHIFT16(vshrq_n_u16, fVec, bits); }
331
332 template <int k> uint16_t kth() const {
333 SkASSERT(0 <= k && k < 8);
334 return vgetq_lane_u16(fVec, k&7);
335 }
336
337 uint16x8_t fVec;
338 };
339
340 template <>
341 class SkNi<16, uint8_t> {
342 public:
343 SkNi(const uint8x16_t& vec) : fVec(vec) {}
344
345 SkNi() {}
346 explicit SkNi(uint8_t val) : fVec(vdupq_n_u8(val)) {}
347 static SkNi Load(const uint8_t vals[16]) { return vld1q_u8(vals); }
348
349 SkNi(uint8_t a, uint8_t b, uint8_t c, uint8_t d,
350 uint8_t e, uint8_t f, uint8_t g, uint8_t h,
351 uint8_t i, uint8_t j, uint8_t k, uint8_t l,
352 uint8_t m, uint8_t n, uint8_t o, uint8_t p) {
353 fVec = (uint8x16_t) { a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p };
354 }
355
356 void store(uint8_t vals[16]) const { vst1q_u8(vals, fVec); }
357
358 SkNi operator + (const SkNi& o) const { return vaddq_u8(fVec, o.fVec); }
359 SkNi operator - (const SkNi& o) const { return vsubq_u8(fVec, o.fVec); }
360 SkNi operator * (const SkNi& o) const { return vmulq_u8(fVec, o.fVec); }
361
362 SkNi operator << (int bits) const { SHIFT8(vshlq_n_u8, fVec, bits); }
363 SkNi operator >> (int bits) const { SHIFT8(vshrq_n_u8, fVec, bits); }
364
365 template <int k> uint8_t kth() const {
366 SkASSERT(0 <= k && k < 15);
367 return vgetq_lane_u8(fVec, k&16);
368 }
369
370 uint8x16_t fVec;
371 };
372
373 #undef SHIFT32
374 #undef SHIFT16
375 #undef SHIFT8
376
305 #endif//SkNx_neon_DEFINED 377 #endif//SkNx_neon_DEFINED
OLDNEW
« no previous file with comments | « src/opts/Sk4px_none.h ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698