Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3)

Side by Side Diff: src/opts/SkNx_neon.h

Issue 2193983003: Revert of simplify neon shifts (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkNx_neon_DEFINED 8 #ifndef SkNx_neon_DEFINED
9 #define SkNx_neon_DEFINED 9 #define SkNx_neon_DEFINED
10 10
11 #include <arm_neon.h> 11 #include <arm_neon.h>
12 12
13 #define SKNX_IS_FAST 13 #define SKNX_IS_FAST
14 14
15 // ARMv8 has vrndmq_f32 to floor 4 floats. Here we emulate it: 15 // ARMv8 has vrndmq_f32 to floor 4 floats. Here we emulate it:
16 // - roundtrip through integers via truncation 16 // - roundtrip through integers via truncation
17 // - subtract 1 if that's too big (possible for negative values). 17 // - subtract 1 if that's too big (possible for negative values).
18 // This restricts the domain of our inputs to a maximum somehwere around 2^31. Seems plenty big. 18 // This restricts the domain of our inputs to a maximum somehwere around 2^31. Seems plenty big.
19 static inline float32x4_t armv7_vrndmq_f32(float32x4_t v) { 19 static inline float32x4_t armv7_vrndmq_f32(float32x4_t v) {
20 float32x4_t roundtrip = vcvtq_f32_s32(vcvtq_s32_f32(v)); 20 float32x4_t roundtrip = vcvtq_f32_s32(vcvtq_s32_f32(v));
21 uint32x4_t too_big = roundtrip > v; 21 uint32x4_t too_big = roundtrip > v;
22 return roundtrip - (float32x4_t)vandq_u32(too_big, (uint32x4_t)vdupq_n_f32(1 )); 22 return roundtrip - (float32x4_t)vandq_u32(too_big, (uint32x4_t)vdupq_n_f32(1 ));
23 } 23 }
24 24
25 // Well, this is absurd. The shifts require compile-time constant arguments.
26
27 #define SHIFT8(op, v, bits) switch(bits) { \
28 case 1: return op(v, 1); case 2: return op(v, 2); case 3: return op(v , 3); \
29 case 4: return op(v, 4); case 5: return op(v, 5); case 6: return op(v , 6); \
30 case 7: return op(v, 7); \
31 } return fVec
32
33 #define SHIFT16(op, v, bits) if (bits < 8) { SHIFT8(op, v, bits); } switch(bits) { \
34 case 8: return op(v, 8); case 9: return op(v , 9); \
35 case 10: return op(v, 10); case 11: return op(v, 11); case 12: return op(v , 12); \
36 case 13: return op(v, 13); case 14: return op(v, 14); case 15: return op(v , 15); \
37 } return fVec
38
39 #define SHIFT32(op, v, bits) if (bits < 16) { SHIFT16(op, v, bits); } switch(bit s) { \
40 case 16: return op(v, 16); case 17: return op(v, 17); case 18: return op(v , 18); \
41 case 19: return op(v, 19); case 20: return op(v, 20); case 21: return op(v , 21); \
42 case 22: return op(v, 22); case 23: return op(v, 23); case 24: return op(v , 24); \
43 case 25: return op(v, 25); case 26: return op(v, 26); case 27: return op(v , 27); \
44 case 28: return op(v, 28); case 29: return op(v, 29); case 30: return op(v , 30); \
45 case 31: return op(v, 31); } return fVec
46
25 template <> 47 template <>
26 class SkNx<2, float> { 48 class SkNx<2, float> {
27 public: 49 public:
28 SkNx(float32x2_t vec) : fVec(vec) {} 50 SkNx(float32x2_t vec) : fVec(vec) {}
29 51
30 SkNx() {} 52 SkNx() {}
31 SkNx(float a, float b) : fVec{a,b} {} 53 SkNx(float a, float b) : fVec{a,b} {}
32 SkNx(float v) : fVec{v,v} {} 54 SkNx(float v) : fVec{v,v} {}
33 55
34 static SkNx Load(const void* ptr) { return vld1_f32((const float*)ptr); } 56 static SkNx Load(const void* ptr) { return vld1_f32((const float*)ptr); }
(...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after
175 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d) : fVec{a,b,c,d} {} 197 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d) : fVec{a,b,c,d} {}
176 SkNx(uint16_t v) : fVec{v,v,v,v} {} 198 SkNx(uint16_t v) : fVec{v,v,v,v} {}
177 199
178 static SkNx Load(const void* ptr) { return vld1_u16((const uint16_t*)ptr); } 200 static SkNx Load(const void* ptr) { return vld1_u16((const uint16_t*)ptr); }
179 void store(void* ptr) const { vst1_u16((uint16_t*)ptr, fVec); } 201 void store(void* ptr) const { vst1_u16((uint16_t*)ptr, fVec); }
180 202
181 SkNx operator + (const SkNx& o) const { return fVec + o.fVec; } 203 SkNx operator + (const SkNx& o) const { return fVec + o.fVec; }
182 SkNx operator - (const SkNx& o) const { return fVec - o.fVec; } 204 SkNx operator - (const SkNx& o) const { return fVec - o.fVec; }
183 SkNx operator * (const SkNx& o) const { return fVec * o.fVec; } 205 SkNx operator * (const SkNx& o) const { return fVec * o.fVec; }
184 206
185 SkNx operator << (int bits) const { return fVec << SkNx(bits).fVec; } 207 SkNx operator << (int bits) const { SHIFT16(vshl_n_u16, fVec, bits); }
186 SkNx operator >> (int bits) const { return fVec >> SkNx(bits).fVec; } 208 SkNx operator >> (int bits) const { SHIFT16(vshr_n_u16, fVec, bits); }
187 209
188 static SkNx Min(const SkNx& a, const SkNx& b) { return vmin_u16(a.fVec, b.fV ec); } 210 static SkNx Min(const SkNx& a, const SkNx& b) { return vmin_u16(a.fVec, b.fV ec); }
189 211
190 uint16_t operator[](int k) const { return fVec[k&3]; } 212 uint16_t operator[](int k) const { return fVec[k&3]; }
191 213
192 SkNx thenElse(const SkNx& t, const SkNx& e) const { 214 SkNx thenElse(const SkNx& t, const SkNx& e) const {
193 return vbsl_u16(fVec, t.fVec, e.fVec); 215 return vbsl_u16(fVec, t.fVec, e.fVec);
194 } 216 }
195 217
196 uint16x4_t fVec; 218 uint16x4_t fVec;
197 }; 219 };
198 220
199 template <> 221 template <>
200 class SkNx<8, uint16_t> { 222 class SkNx<8, uint16_t> {
201 public: 223 public:
202 SkNx(const uint16x8_t& vec) : fVec(vec) {} 224 SkNx(const uint16x8_t& vec) : fVec(vec) {}
203 225
204 SkNx() {} 226 SkNx() {}
205 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d, 227 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
206 uint16_t e, uint16_t f, uint16_t g, uint16_t h) : fVec{a,b,c,d,e,f,g,h} {} 228 uint16_t e, uint16_t f, uint16_t g, uint16_t h) : fVec{a,b,c,d,e,f,g,h} {}
207 SkNx(uint16_t v) : fVec{v,v,v,v,v,v,v,v} {} 229 SkNx(uint16_t v) : fVec{v,v,v,v,v,v,v,v} {}
208 230
209 static SkNx Load(const void* ptr) { return vld1q_u16((const uint16_t*)ptr); } 231 static SkNx Load(const void* ptr) { return vld1q_u16((const uint16_t*)ptr); }
210 void store(void* ptr) const { vst1q_u16((uint16_t*)ptr, fVec); } 232 void store(void* ptr) const { vst1q_u16((uint16_t*)ptr, fVec); }
211 233
212 SkNx operator + (const SkNx& o) const { return fVec + o.fVec; } 234 SkNx operator + (const SkNx& o) const { return fVec + o.fVec; }
213 SkNx operator - (const SkNx& o) const { return fVec - o.fVec; } 235 SkNx operator - (const SkNx& o) const { return fVec - o.fVec; }
214 SkNx operator * (const SkNx& o) const { return fVec * o.fVec; } 236 SkNx operator * (const SkNx& o) const { return fVec * o.fVec; }
215 237
216 SkNx operator << (int bits) const { return fVec << SkNx(bits).fVec; } 238 SkNx operator << (int bits) const { SHIFT16(vshlq_n_u16, fVec, bits); }
217 SkNx operator >> (int bits) const { return fVec >> SkNx(bits).fVec; } 239 SkNx operator >> (int bits) const { SHIFT16(vshrq_n_u16, fVec, bits); }
218 240
219 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u16(a.fVec, b.f Vec); } 241 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u16(a.fVec, b.f Vec); }
220 242
221 uint16_t operator[](int k) const { return fVec[k&7]; } 243 uint16_t operator[](int k) const { return fVec[k&7]; }
222 244
223 SkNx thenElse(const SkNx& t, const SkNx& e) const { 245 SkNx thenElse(const SkNx& t, const SkNx& e) const {
224 return vbslq_u16(fVec, t.fVec, e.fVec); 246 return vbslq_u16(fVec, t.fVec, e.fVec);
225 } 247 }
226 248
227 uint16x8_t fVec; 249 uint16x8_t fVec;
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
295 void store(void* ptr) const { return vst1q_s32((int32_t*)ptr, fVec); } 317 void store(void* ptr) const { return vst1q_s32((int32_t*)ptr, fVec); }
296 318
297 SkNx operator + (const SkNx& o) const { return fVec + o.fVec; } 319 SkNx operator + (const SkNx& o) const { return fVec + o.fVec; }
298 SkNx operator - (const SkNx& o) const { return fVec - o.fVec; } 320 SkNx operator - (const SkNx& o) const { return fVec - o.fVec; }
299 SkNx operator * (const SkNx& o) const { return fVec * o.fVec; } 321 SkNx operator * (const SkNx& o) const { return fVec * o.fVec; }
300 322
301 SkNx operator & (const SkNx& o) const { return fVec & o.fVec; } 323 SkNx operator & (const SkNx& o) const { return fVec & o.fVec; }
302 SkNx operator | (const SkNx& o) const { return fVec | o.fVec; } 324 SkNx operator | (const SkNx& o) const { return fVec | o.fVec; }
303 SkNx operator ^ (const SkNx& o) const { return fVec ^ o.fVec; } 325 SkNx operator ^ (const SkNx& o) const { return fVec ^ o.fVec; }
304 326
305 SkNx operator << (int bits) const { return fVec << SkNx(bits).fVec; } 327 SkNx operator << (int bits) const { SHIFT32(vshlq_n_s32, fVec, bits); }
306 SkNx operator >> (int bits) const { return fVec >> SkNx(bits).fVec; } 328 SkNx operator >> (int bits) const { SHIFT32(vshrq_n_s32, fVec, bits); }
307 329
308 SkNx operator == (const SkNx& o) const { return fVec == o.fVec; } 330 SkNx operator == (const SkNx& o) const { return fVec == o.fVec; }
309 SkNx operator < (const SkNx& o) const { return fVec < o.fVec; } 331 SkNx operator < (const SkNx& o) const { return fVec < o.fVec; }
310 SkNx operator > (const SkNx& o) const { return fVec > o.fVec; } 332 SkNx operator > (const SkNx& o) const { return fVec > o.fVec; }
311 333
312 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_s32(a.fVec, b.f Vec); } 334 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_s32(a.fVec, b.f Vec); }
313 335
314 int32_t operator[](int k) const { return fVec[k&3]; } 336 int32_t operator[](int k) const { return fVec[k&3]; }
315 337
316 SkNx thenElse(const SkNx& t, const SkNx& e) const { 338 SkNx thenElse(const SkNx& t, const SkNx& e) const {
(...skipping 16 matching lines...) Expand all
333 void store(void* ptr) const { return vst1q_u32((uint32_t*)ptr, fVec); } 355 void store(void* ptr) const { return vst1q_u32((uint32_t*)ptr, fVec); }
334 356
335 SkNx operator + (const SkNx& o) const { return fVec + o.fVec; } 357 SkNx operator + (const SkNx& o) const { return fVec + o.fVec; }
336 SkNx operator - (const SkNx& o) const { return fVec - o.fVec; } 358 SkNx operator - (const SkNx& o) const { return fVec - o.fVec; }
337 SkNx operator * (const SkNx& o) const { return fVec * o.fVec; } 359 SkNx operator * (const SkNx& o) const { return fVec * o.fVec; }
338 360
339 SkNx operator & (const SkNx& o) const { return fVec & o.fVec; } 361 SkNx operator & (const SkNx& o) const { return fVec & o.fVec; }
340 SkNx operator | (const SkNx& o) const { return fVec | o.fVec; } 362 SkNx operator | (const SkNx& o) const { return fVec | o.fVec; }
341 SkNx operator ^ (const SkNx& o) const { return fVec ^ o.fVec; } 363 SkNx operator ^ (const SkNx& o) const { return fVec ^ o.fVec; }
342 364
343 SkNx operator << (int bits) const { return fVec << SkNx(bits).fVec; } 365 SkNx operator << (int bits) const { SHIFT32(vshlq_n_u32, fVec, bits); }
344 SkNx operator >> (int bits) const { return fVec >> SkNx(bits).fVec; } 366 SkNx operator >> (int bits) const { SHIFT32(vshrq_n_u32, fVec, bits); }
345 367
346 SkNx operator == (const SkNx& o) const { return fVec == o.fVec; } 368 SkNx operator == (const SkNx& o) const { return fVec == o.fVec; }
347 SkNx operator < (const SkNx& o) const { return fVec < o.fVec; } 369 SkNx operator < (const SkNx& o) const { return fVec < o.fVec; }
348 SkNx operator > (const SkNx& o) const { return fVec > o.fVec; } 370 SkNx operator > (const SkNx& o) const { return fVec > o.fVec; }
349 371
350 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u32(a.fVec, b.f Vec); } 372 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u32(a.fVec, b.f Vec); }
351 373
352 uint32_t operator[](int k) const { return fVec[k&3]; } 374 uint32_t operator[](int k) const { return fVec[k&3]; }
353 375
354 SkNx thenElse(const SkNx& t, const SkNx& e) const { 376 SkNx thenElse(const SkNx& t, const SkNx& e) const {
355 return vbslq_u32(fVec, t.fVec, e.fVec); 377 return vbslq_u32(fVec, t.fVec, e.fVec);
356 } 378 }
357 379
358 uint32x4_t fVec; 380 uint32x4_t fVec;
359 }; 381 };
360 382
383 #undef SHIFT32
384 #undef SHIFT16
385 #undef SHIFT8
386
361 template<> inline Sk4i SkNx_cast<int32_t, float>(const Sk4f& src) { 387 template<> inline Sk4i SkNx_cast<int32_t, float>(const Sk4f& src) {
362 return vcvtq_s32_f32(src.fVec); 388 return vcvtq_s32_f32(src.fVec);
363 389
364 } 390 }
365 template<> inline Sk4f SkNx_cast<float, int32_t>(const Sk4i& src) { 391 template<> inline Sk4f SkNx_cast<float, int32_t>(const Sk4i& src) {
366 return vcvtq_f32_s32(src.fVec); 392 return vcvtq_f32_s32(src.fVec);
367 } 393 }
368 template<> inline Sk4f SkNx_cast<float, uint32_t>(const Sk4u& src) { 394 template<> inline Sk4f SkNx_cast<float, uint32_t>(const Sk4u& src) {
369 return SkNx_cast<float>(Sk4i::Load(&src)); 395 return SkNx_cast<float>(Sk4i::Load(&src));
370 } 396 }
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
440 uint16x4x4_t rgba = {{ 466 uint16x4x4_t rgba = {{
441 r.fVec, 467 r.fVec,
442 g.fVec, 468 g.fVec,
443 b.fVec, 469 b.fVec,
444 a.fVec, 470 a.fVec,
445 }}; 471 }};
446 vst4_u16((uint16_t*) dst, rgba); 472 vst4_u16((uint16_t*) dst, rgba);
447 } 473 }
448 474
449 #endif//SkNx_neon_DEFINED 475 #endif//SkNx_neon_DEFINED
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698