Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(172)

Side by Side Diff: src/opts/SkNx_neon.h

Issue 2194953002: simplify neon shifts (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: merge right Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkNx_neon_DEFINED 8 #ifndef SkNx_neon_DEFINED
9 #define SkNx_neon_DEFINED 9 #define SkNx_neon_DEFINED
10 10
11 #include <arm_neon.h> 11 #include <arm_neon.h>
12 12
13 #define SKNX_IS_FAST 13 #define SKNX_IS_FAST
14 14
15 // ARMv8 has vrndmq_f32 to floor 4 floats. Here we emulate it: 15 // ARMv8 has vrndmq_f32 to floor 4 floats. Here we emulate it:
16 // - roundtrip through integers via truncation 16 // - roundtrip through integers via truncation
17 // - subtract 1 if that's too big (possible for negative values). 17 // - subtract 1 if that's too big (possible for negative values).
18 // This restricts the domain of our inputs to a maximum somehwere around 2^31. Seems plenty big. 18 // This restricts the domain of our inputs to a maximum somehwere around 2^31. Seems plenty big.
19 static inline float32x4_t armv7_vrndmq_f32(float32x4_t v) { 19 static inline float32x4_t armv7_vrndmq_f32(float32x4_t v) {
20 float32x4_t roundtrip = vcvtq_f32_s32(vcvtq_s32_f32(v)); 20 float32x4_t roundtrip = vcvtq_f32_s32(vcvtq_s32_f32(v));
21 uint32x4_t too_big = roundtrip > v; 21 uint32x4_t too_big = roundtrip > v;
22 return roundtrip - (float32x4_t)vandq_u32(too_big, (uint32x4_t)vdupq_n_f32(1 )); 22 return roundtrip - (float32x4_t)vandq_u32(too_big, (uint32x4_t)vdupq_n_f32(1 ));
23 } 23 }
24 24
25 // Well, this is absurd. The shifts require compile-time constant arguments.
26
27 #define SHIFT8(op, v, bits) switch(bits) { \
28 case 1: return op(v, 1); case 2: return op(v, 2); case 3: return op(v , 3); \
29 case 4: return op(v, 4); case 5: return op(v, 5); case 6: return op(v , 6); \
30 case 7: return op(v, 7); \
31 } return fVec
32
33 #define SHIFT16(op, v, bits) if (bits < 8) { SHIFT8(op, v, bits); } switch(bits) { \
34 case 8: return op(v, 8); case 9: return op(v , 9); \
35 case 10: return op(v, 10); case 11: return op(v, 11); case 12: return op(v , 12); \
36 case 13: return op(v, 13); case 14: return op(v, 14); case 15: return op(v , 15); \
37 } return fVec
38
39 #define SHIFT32(op, v, bits) if (bits < 16) { SHIFT16(op, v, bits); } switch(bit s) { \
40 case 16: return op(v, 16); case 17: return op(v, 17); case 18: return op(v , 18); \
41 case 19: return op(v, 19); case 20: return op(v, 20); case 21: return op(v , 21); \
42 case 22: return op(v, 22); case 23: return op(v, 23); case 24: return op(v , 24); \
43 case 25: return op(v, 25); case 26: return op(v, 26); case 27: return op(v , 27); \
44 case 28: return op(v, 28); case 29: return op(v, 29); case 30: return op(v , 30); \
45 case 31: return op(v, 31); } return fVec
46
47 template <> 25 template <>
48 class SkNx<2, float> { 26 class SkNx<2, float> {
49 public: 27 public:
50 SkNx(float32x2_t vec) : fVec(vec) {} 28 SkNx(float32x2_t vec) : fVec(vec) {}
51 29
52 SkNx() {} 30 SkNx() {}
53 SkNx(float a, float b) : fVec{a,b} {} 31 SkNx(float a, float b) : fVec{a,b} {}
54 SkNx(float v) : fVec{v,v} {} 32 SkNx(float v) : fVec{v,v} {}
55 33
56 static SkNx Load(const void* ptr) { return vld1_f32((const float*)ptr); } 34 static SkNx Load(const void* ptr) { return vld1_f32((const float*)ptr); }
(...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after
197 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d) : fVec{a,b,c,d} {} 175 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d) : fVec{a,b,c,d} {}
198 SkNx(uint16_t v) : fVec{v,v,v,v} {} 176 SkNx(uint16_t v) : fVec{v,v,v,v} {}
199 177
200 static SkNx Load(const void* ptr) { return vld1_u16((const uint16_t*)ptr); } 178 static SkNx Load(const void* ptr) { return vld1_u16((const uint16_t*)ptr); }
201 void store(void* ptr) const { vst1_u16((uint16_t*)ptr, fVec); } 179 void store(void* ptr) const { vst1_u16((uint16_t*)ptr, fVec); }
202 180
203 SkNx operator + (const SkNx& o) const { return fVec + o.fVec; } 181 SkNx operator + (const SkNx& o) const { return fVec + o.fVec; }
204 SkNx operator - (const SkNx& o) const { return fVec - o.fVec; } 182 SkNx operator - (const SkNx& o) const { return fVec - o.fVec; }
205 SkNx operator * (const SkNx& o) const { return fVec * o.fVec; } 183 SkNx operator * (const SkNx& o) const { return fVec * o.fVec; }
206 184
207 SkNx operator << (int bits) const { SHIFT16(vshl_n_u16, fVec, bits); } 185 SkNx operator << (int bits) const { return fVec << SkNx(bits).fVec; }
208 SkNx operator >> (int bits) const { SHIFT16(vshr_n_u16, fVec, bits); } 186 SkNx operator >> (int bits) const { return fVec >> SkNx(bits).fVec; }
209 187
210 static SkNx Min(const SkNx& a, const SkNx& b) { return vmin_u16(a.fVec, b.fV ec); } 188 static SkNx Min(const SkNx& a, const SkNx& b) { return vmin_u16(a.fVec, b.fV ec); }
211 189
212 uint16_t operator[](int k) const { return fVec[k&3]; } 190 uint16_t operator[](int k) const { return fVec[k&3]; }
213 191
214 SkNx thenElse(const SkNx& t, const SkNx& e) const { 192 SkNx thenElse(const SkNx& t, const SkNx& e) const {
215 return vbsl_u16(fVec, t.fVec, e.fVec); 193 return vbsl_u16(fVec, t.fVec, e.fVec);
216 } 194 }
217 195
218 uint16x4_t fVec; 196 uint16x4_t fVec;
219 }; 197 };
220 198
221 template <> 199 template <>
222 class SkNx<8, uint16_t> { 200 class SkNx<8, uint16_t> {
223 public: 201 public:
224 SkNx(const uint16x8_t& vec) : fVec(vec) {} 202 SkNx(const uint16x8_t& vec) : fVec(vec) {}
225 203
226 SkNx() {} 204 SkNx() {}
227 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d, 205 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
228 uint16_t e, uint16_t f, uint16_t g, uint16_t h) : fVec{a,b,c,d,e,f,g,h} {} 206 uint16_t e, uint16_t f, uint16_t g, uint16_t h) : fVec{a,b,c,d,e,f,g,h} {}
229 SkNx(uint16_t v) : fVec{v,v,v,v,v,v,v,v} {} 207 SkNx(uint16_t v) : fVec{v,v,v,v,v,v,v,v} {}
230 208
231 static SkNx Load(const void* ptr) { return vld1q_u16((const uint16_t*)ptr); } 209 static SkNx Load(const void* ptr) { return vld1q_u16((const uint16_t*)ptr); }
232 void store(void* ptr) const { vst1q_u16((uint16_t*)ptr, fVec); } 210 void store(void* ptr) const { vst1q_u16((uint16_t*)ptr, fVec); }
233 211
234 SkNx operator + (const SkNx& o) const { return fVec + o.fVec; } 212 SkNx operator + (const SkNx& o) const { return fVec + o.fVec; }
235 SkNx operator - (const SkNx& o) const { return fVec - o.fVec; } 213 SkNx operator - (const SkNx& o) const { return fVec - o.fVec; }
236 SkNx operator * (const SkNx& o) const { return fVec * o.fVec; } 214 SkNx operator * (const SkNx& o) const { return fVec * o.fVec; }
237 215
238 SkNx operator << (int bits) const { SHIFT16(vshlq_n_u16, fVec, bits); } 216 SkNx operator << (int bits) const { return fVec << SkNx(bits).fVec; }
239 SkNx operator >> (int bits) const { SHIFT16(vshrq_n_u16, fVec, bits); } 217 SkNx operator >> (int bits) const { return fVec >> SkNx(bits).fVec; }
240 218
241 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u16(a.fVec, b.f Vec); } 219 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u16(a.fVec, b.f Vec); }
242 220
243 uint16_t operator[](int k) const { return fVec[k&7]; } 221 uint16_t operator[](int k) const { return fVec[k&7]; }
244 222
245 SkNx thenElse(const SkNx& t, const SkNx& e) const { 223 SkNx thenElse(const SkNx& t, const SkNx& e) const {
246 return vbslq_u16(fVec, t.fVec, e.fVec); 224 return vbslq_u16(fVec, t.fVec, e.fVec);
247 } 225 }
248 226
249 uint16x8_t fVec; 227 uint16x8_t fVec;
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
317 void store(void* ptr) const { return vst1q_s32((int32_t*)ptr, fVec); } 295 void store(void* ptr) const { return vst1q_s32((int32_t*)ptr, fVec); }
318 296
319 SkNx operator + (const SkNx& o) const { return fVec + o.fVec; } 297 SkNx operator + (const SkNx& o) const { return fVec + o.fVec; }
320 SkNx operator - (const SkNx& o) const { return fVec - o.fVec; } 298 SkNx operator - (const SkNx& o) const { return fVec - o.fVec; }
321 SkNx operator * (const SkNx& o) const { return fVec * o.fVec; } 299 SkNx operator * (const SkNx& o) const { return fVec * o.fVec; }
322 300
323 SkNx operator & (const SkNx& o) const { return fVec & o.fVec; } 301 SkNx operator & (const SkNx& o) const { return fVec & o.fVec; }
324 SkNx operator | (const SkNx& o) const { return fVec | o.fVec; } 302 SkNx operator | (const SkNx& o) const { return fVec | o.fVec; }
325 SkNx operator ^ (const SkNx& o) const { return fVec ^ o.fVec; } 303 SkNx operator ^ (const SkNx& o) const { return fVec ^ o.fVec; }
326 304
327 SkNx operator << (int bits) const { SHIFT32(vshlq_n_s32, fVec, bits); } 305 SkNx operator << (int bits) const { return fVec << SkNx(bits).fVec; }
328 SkNx operator >> (int bits) const { SHIFT32(vshrq_n_s32, fVec, bits); } 306 SkNx operator >> (int bits) const { return fVec >> SkNx(bits).fVec; }
329 307
330 SkNx operator == (const SkNx& o) const { return fVec == o.fVec; } 308 SkNx operator == (const SkNx& o) const { return fVec == o.fVec; }
331 SkNx operator < (const SkNx& o) const { return fVec < o.fVec; } 309 SkNx operator < (const SkNx& o) const { return fVec < o.fVec; }
332 SkNx operator > (const SkNx& o) const { return fVec > o.fVec; } 310 SkNx operator > (const SkNx& o) const { return fVec > o.fVec; }
333 311
334 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_s32(a.fVec, b.f Vec); } 312 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_s32(a.fVec, b.f Vec); }
335 313
336 int32_t operator[](int k) const { return fVec[k&3]; } 314 int32_t operator[](int k) const { return fVec[k&3]; }
337 315
338 SkNx thenElse(const SkNx& t, const SkNx& e) const { 316 SkNx thenElse(const SkNx& t, const SkNx& e) const {
(...skipping 16 matching lines...) Expand all
355 void store(void* ptr) const { return vst1q_u32((uint32_t*)ptr, fVec); } 333 void store(void* ptr) const { return vst1q_u32((uint32_t*)ptr, fVec); }
356 334
357 SkNx operator + (const SkNx& o) const { return fVec + o.fVec; } 335 SkNx operator + (const SkNx& o) const { return fVec + o.fVec; }
358 SkNx operator - (const SkNx& o) const { return fVec - o.fVec; } 336 SkNx operator - (const SkNx& o) const { return fVec - o.fVec; }
359 SkNx operator * (const SkNx& o) const { return fVec * o.fVec; } 337 SkNx operator * (const SkNx& o) const { return fVec * o.fVec; }
360 338
361 SkNx operator & (const SkNx& o) const { return fVec & o.fVec; } 339 SkNx operator & (const SkNx& o) const { return fVec & o.fVec; }
362 SkNx operator | (const SkNx& o) const { return fVec | o.fVec; } 340 SkNx operator | (const SkNx& o) const { return fVec | o.fVec; }
363 SkNx operator ^ (const SkNx& o) const { return fVec ^ o.fVec; } 341 SkNx operator ^ (const SkNx& o) const { return fVec ^ o.fVec; }
364 342
365 SkNx operator << (int bits) const { SHIFT32(vshlq_n_u32, fVec, bits); } 343 SkNx operator << (int bits) const { return fVec << SkNx(bits).fVec; }
366 SkNx operator >> (int bits) const { SHIFT32(vshrq_n_u32, fVec, bits); } 344 SkNx operator >> (int bits) const { return fVec >> SkNx(bits).fVec; }
367 345
368 SkNx operator == (const SkNx& o) const { return fVec == o.fVec; } 346 SkNx operator == (const SkNx& o) const { return fVec == o.fVec; }
369 SkNx operator < (const SkNx& o) const { return fVec < o.fVec; } 347 SkNx operator < (const SkNx& o) const { return fVec < o.fVec; }
370 SkNx operator > (const SkNx& o) const { return fVec > o.fVec; } 348 SkNx operator > (const SkNx& o) const { return fVec > o.fVec; }
371 349
372 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u32(a.fVec, b.f Vec); } 350 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u32(a.fVec, b.f Vec); }
373 351
374 uint32_t operator[](int k) const { return fVec[k&3]; } 352 uint32_t operator[](int k) const { return fVec[k&3]; }
375 353
376 SkNx thenElse(const SkNx& t, const SkNx& e) const { 354 SkNx thenElse(const SkNx& t, const SkNx& e) const {
377 return vbslq_u32(fVec, t.fVec, e.fVec); 355 return vbslq_u32(fVec, t.fVec, e.fVec);
378 } 356 }
379 357
380 uint32x4_t fVec; 358 uint32x4_t fVec;
381 }; 359 };
382 360
383 #undef SHIFT32
384 #undef SHIFT16
385 #undef SHIFT8
386
387 template<> inline Sk4i SkNx_cast<int32_t, float>(const Sk4f& src) { 361 template<> inline Sk4i SkNx_cast<int32_t, float>(const Sk4f& src) {
388 return vcvtq_s32_f32(src.fVec); 362 return vcvtq_s32_f32(src.fVec);
389 363
390 } 364 }
391 template<> inline Sk4f SkNx_cast<float, int32_t>(const Sk4i& src) { 365 template<> inline Sk4f SkNx_cast<float, int32_t>(const Sk4i& src) {
392 return vcvtq_f32_s32(src.fVec); 366 return vcvtq_f32_s32(src.fVec);
393 } 367 }
394 template<> inline Sk4f SkNx_cast<float, uint32_t>(const Sk4u& src) { 368 template<> inline Sk4f SkNx_cast<float, uint32_t>(const Sk4u& src) {
395 return SkNx_cast<float>(Sk4i::Load(&src)); 369 return SkNx_cast<float>(Sk4i::Load(&src));
396 } 370 }
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
466 uint16x4x4_t rgba = {{ 440 uint16x4x4_t rgba = {{
467 r.fVec, 441 r.fVec,
468 g.fVec, 442 g.fVec,
469 b.fVec, 443 b.fVec,
470 a.fVec, 444 a.fVec,
471 }}; 445 }};
472 vst4_u16((uint16_t*) dst, rgba); 446 vst4_u16((uint16_t*) dst, rgba);
473 } 447 }
474 448
475 #endif//SkNx_neon_DEFINED 449 #endif//SkNx_neon_DEFINED
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698