Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Side by Side Diff: src/opts/SkNx_neon.h

Issue 1650653002: SkNx Load/store: take any pointer. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: simplify call sites Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/SkNx_avx.h ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkNx_neon_DEFINED 8 #ifndef SkNx_neon_DEFINED
9 #define SkNx_neon_DEFINED 9 #define SkNx_neon_DEFINED
10 10
(...skipping 23 matching lines...) Expand all
34 case 28: return op(v, 28); case 29: return op(v, 29); case 30: return op(v , 30); \ 34 case 28: return op(v, 28); case 29: return op(v, 29); case 30: return op(v , 30); \
35 case 31: return op(v, 31); } return fVec 35 case 31: return op(v, 31); } return fVec
36 36
37 template <> 37 template <>
38 class SkNx<2, float> { 38 class SkNx<2, float> {
39 public: 39 public:
40 SkNx(float32x2_t vec) : fVec(vec) {} 40 SkNx(float32x2_t vec) : fVec(vec) {}
41 41
42 SkNx() {} 42 SkNx() {}
43 SkNx(float val) : fVec(vdup_n_f32(val)) {} 43 SkNx(float val) : fVec(vdup_n_f32(val)) {}
44 static SkNx Load(const float vals[2]) { return vld1_f32(vals); } 44 static SkNx Load(const void* ptr) { return vld1_f32((const float*)ptr); }
45 SkNx(float a, float b) { fVec = (float32x2_t) { a, b }; } 45 SkNx(float a, float b) { fVec = (float32x2_t) { a, b }; }
46 46
47 void store(float vals[2]) const { vst1_f32(vals, fVec); } 47 void store(void* ptr) const { vst1_f32((float*)ptr, fVec); }
48 48
49 SkNx approxInvert() const { 49 SkNx approxInvert() const {
50 float32x2_t est0 = vrecpe_f32(fVec), 50 float32x2_t est0 = vrecpe_f32(fVec),
51 est1 = vmul_f32(vrecps_f32(est0, fVec), est0); 51 est1 = vmul_f32(vrecps_f32(est0, fVec), est0);
52 return est1; 52 return est1;
53 } 53 }
54 SkNx invert() const { 54 SkNx invert() const {
55 float32x2_t est1 = this->approxInvert().fVec, 55 float32x2_t est1 = this->approxInvert().fVec,
56 est2 = vmul_f32(vrecps_f32(est1, fVec), est1); 56 est2 = vmul_f32(vrecps_f32(est1, fVec), est1);
57 return est2; 57 return est2;
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
115 float32x2_t fVec; 115 float32x2_t fVec;
116 }; 116 };
117 117
118 template <> 118 template <>
119 class SkNx<4, int> { 119 class SkNx<4, int> {
120 public: 120 public:
121 SkNx(const int32x4_t& vec) : fVec(vec) {} 121 SkNx(const int32x4_t& vec) : fVec(vec) {}
122 122
123 SkNx() {} 123 SkNx() {}
124 SkNx(int val) : fVec(vdupq_n_s32(val)) {} 124 SkNx(int val) : fVec(vdupq_n_s32(val)) {}
125 static SkNx Load(const int vals[4]) { return vld1q_s32(vals); } 125 static SkNx Load(const void* ptr) { return vld1q_s32((const int*)ptr); }
126 SkNx(int a, int b, int c, int d) { fVec = (int32x4_t) { a, b, c, d }; } 126 SkNx(int a, int b, int c, int d) { fVec = (int32x4_t) { a, b, c, d }; }
127 127
128 void store(int vals[4]) const { vst1q_s32(vals, fVec); } 128 void store(void* ptr) const { vst1q_s32((int*)ptr, fVec); }
129 129
130 SkNx operator + (const SkNx& o) const { return vaddq_s32(fVec, o.fVec); } 130 SkNx operator + (const SkNx& o) const { return vaddq_s32(fVec, o.fVec); }
131 SkNx operator - (const SkNx& o) const { return vsubq_s32(fVec, o.fVec); } 131 SkNx operator - (const SkNx& o) const { return vsubq_s32(fVec, o.fVec); }
132 SkNx operator * (const SkNx& o) const { return vmulq_s32(fVec, o.fVec); } 132 SkNx operator * (const SkNx& o) const { return vmulq_s32(fVec, o.fVec); }
133 133
134 SkNx operator << (int bits) const { SHIFT32(vshlq_n_s32, fVec, bits); } 134 SkNx operator << (int bits) const { SHIFT32(vshlq_n_s32, fVec, bits); }
135 SkNx operator >> (int bits) const { SHIFT32(vshrq_n_s32, fVec, bits); } 135 SkNx operator >> (int bits) const { SHIFT32(vshrq_n_s32, fVec, bits); }
136 136
137 template <int k> int kth() const { 137 template <int k> int kth() const {
138 SkASSERT(0 <= k && k < 4); 138 SkASSERT(0 <= k && k < 4);
139 return vgetq_lane_s32(fVec, k&3); 139 return vgetq_lane_s32(fVec, k&3);
140 } 140 }
141 141
142 int32x4_t fVec; 142 int32x4_t fVec;
143 }; 143 };
144 144
145 template <> 145 template <>
146 class SkNx<4, float> { 146 class SkNx<4, float> {
147 public: 147 public:
148 SkNx(float32x4_t vec) : fVec(vec) {} 148 SkNx(float32x4_t vec) : fVec(vec) {}
149 149
150 SkNx() {} 150 SkNx() {}
151 SkNx(float val) : fVec(vdupq_n_f32(val)) {} 151 SkNx(float val) : fVec(vdupq_n_f32(val)) {}
152 static SkNx Load(const float vals[4]) { return vld1q_f32(vals); } 152 static SkNx Load(const void* ptr) { return vld1q_f32((const float*)ptr); }
153 SkNx(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d }; } 153 SkNx(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d }; }
154 154
155 void store(float vals[4]) const { vst1q_f32(vals, fVec); } 155 void store(void* ptr) const { vst1q_f32((float*)ptr, fVec); }
156 SkNx approxInvert() const { 156 SkNx approxInvert() const {
157 float32x4_t est0 = vrecpeq_f32(fVec), 157 float32x4_t est0 = vrecpeq_f32(fVec),
158 est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0); 158 est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0);
159 return est1; 159 return est1;
160 } 160 }
161 SkNx invert() const { 161 SkNx invert() const {
162 float32x4_t est1 = this->approxInvert().fVec, 162 float32x4_t est1 = this->approxInvert().fVec,
163 est2 = vmulq_f32(vrecpsq_f32(est1, fVec), est1); 163 est2 = vmulq_f32(vrecpsq_f32(est1, fVec), est1);
164 return est2; 164 return est2;
165 } 165 }
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
233 // It's possible that for our current use cases, representing this as 233 // It's possible that for our current use cases, representing this as
234 // half a uint16x8_t might be better than representing it as a uint16x4_t. 234 // half a uint16x8_t might be better than representing it as a uint16x4_t.
235 // It'd make conversion to Sk4b one step simpler. 235 // It'd make conversion to Sk4b one step simpler.
236 template <> 236 template <>
237 class SkNx<4, uint16_t> { 237 class SkNx<4, uint16_t> {
238 public: 238 public:
239 SkNx(const uint16x4_t& vec) : fVec(vec) {} 239 SkNx(const uint16x4_t& vec) : fVec(vec) {}
240 240
241 SkNx() {} 241 SkNx() {}
242 SkNx(uint16_t val) : fVec(vdup_n_u16(val)) {} 242 SkNx(uint16_t val) : fVec(vdup_n_u16(val)) {}
243 static SkNx Load(const uint16_t vals[4]) { return vld1_u16(vals); } 243 static SkNx Load(const void* ptr) { return vld1_u16((const uint16_t*)ptr); }
244 244
245 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d) { 245 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d) {
246 fVec = (uint16x4_t) { a,b,c,d }; 246 fVec = (uint16x4_t) { a,b,c,d };
247 } 247 }
248 248
249 void store(uint16_t vals[4]) const { vst1_u16(vals, fVec); } 249 void store(void* ptr) const { vst1_u16((uint16_t*)ptr, fVec); }
250 250
251 SkNx operator + (const SkNx& o) const { return vadd_u16(fVec, o.fVec); } 251 SkNx operator + (const SkNx& o) const { return vadd_u16(fVec, o.fVec); }
252 SkNx operator - (const SkNx& o) const { return vsub_u16(fVec, o.fVec); } 252 SkNx operator - (const SkNx& o) const { return vsub_u16(fVec, o.fVec); }
253 SkNx operator * (const SkNx& o) const { return vmul_u16(fVec, o.fVec); } 253 SkNx operator * (const SkNx& o) const { return vmul_u16(fVec, o.fVec); }
254 254
255 SkNx operator << (int bits) const { SHIFT16(vshl_n_u16, fVec, bits); } 255 SkNx operator << (int bits) const { SHIFT16(vshl_n_u16, fVec, bits); }
256 SkNx operator >> (int bits) const { SHIFT16(vshr_n_u16, fVec, bits); } 256 SkNx operator >> (int bits) const { SHIFT16(vshr_n_u16, fVec, bits); }
257 257
258 static SkNx Min(const SkNx& a, const SkNx& b) { return vmin_u16(a.fVec, b.fV ec); } 258 static SkNx Min(const SkNx& a, const SkNx& b) { return vmin_u16(a.fVec, b.fV ec); }
259 259
260 template <int k> uint16_t kth() const { 260 template <int k> uint16_t kth() const {
261 SkASSERT(0 <= k && k < 4); 261 SkASSERT(0 <= k && k < 4);
262 return vget_lane_u16(fVec, k&3); 262 return vget_lane_u16(fVec, k&3);
263 } 263 }
264 264
265 SkNx thenElse(const SkNx& t, const SkNx& e) const { 265 SkNx thenElse(const SkNx& t, const SkNx& e) const {
266 return vbsl_u16(fVec, t.fVec, e.fVec); 266 return vbsl_u16(fVec, t.fVec, e.fVec);
267 } 267 }
268 268
269 uint16x4_t fVec; 269 uint16x4_t fVec;
270 }; 270 };
271 271
272 template <> 272 template <>
273 class SkNx<8, uint16_t> { 273 class SkNx<8, uint16_t> {
274 public: 274 public:
275 SkNx(const uint16x8_t& vec) : fVec(vec) {} 275 SkNx(const uint16x8_t& vec) : fVec(vec) {}
276 276
277 SkNx() {} 277 SkNx() {}
278 SkNx(uint16_t val) : fVec(vdupq_n_u16(val)) {} 278 SkNx(uint16_t val) : fVec(vdupq_n_u16(val)) {}
279 static SkNx Load(const uint16_t vals[8]) { return vld1q_u16(vals); } 279 static SkNx Load(const void* ptr) { return vld1q_u16((const uint16_t*)ptr); }
280 280
281 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d, 281 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
282 uint16_t e, uint16_t f, uint16_t g, uint16_t h) { 282 uint16_t e, uint16_t f, uint16_t g, uint16_t h) {
283 fVec = (uint16x8_t) { a,b,c,d, e,f,g,h }; 283 fVec = (uint16x8_t) { a,b,c,d, e,f,g,h };
284 } 284 }
285 285
286 void store(uint16_t vals[8]) const { vst1q_u16(vals, fVec); } 286 void store(void* ptr) const { vst1q_u16((uint16_t*)ptr, fVec); }
287 287
288 SkNx operator + (const SkNx& o) const { return vaddq_u16(fVec, o.fVec); } 288 SkNx operator + (const SkNx& o) const { return vaddq_u16(fVec, o.fVec); }
289 SkNx operator - (const SkNx& o) const { return vsubq_u16(fVec, o.fVec); } 289 SkNx operator - (const SkNx& o) const { return vsubq_u16(fVec, o.fVec); }
290 SkNx operator * (const SkNx& o) const { return vmulq_u16(fVec, o.fVec); } 290 SkNx operator * (const SkNx& o) const { return vmulq_u16(fVec, o.fVec); }
291 291
292 SkNx operator << (int bits) const { SHIFT16(vshlq_n_u16, fVec, bits); } 292 SkNx operator << (int bits) const { SHIFT16(vshlq_n_u16, fVec, bits); }
293 SkNx operator >> (int bits) const { SHIFT16(vshrq_n_u16, fVec, bits); } 293 SkNx operator >> (int bits) const { SHIFT16(vshrq_n_u16, fVec, bits); }
294 294
295 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u16(a.fVec, b.f Vec); } 295 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u16(a.fVec, b.f Vec); }
296 296
297 template <int k> uint16_t kth() const { 297 template <int k> uint16_t kth() const {
298 SkASSERT(0 <= k && k < 8); 298 SkASSERT(0 <= k && k < 8);
299 return vgetq_lane_u16(fVec, k&7); 299 return vgetq_lane_u16(fVec, k&7);
300 } 300 }
301 301
302 SkNx thenElse(const SkNx& t, const SkNx& e) const { 302 SkNx thenElse(const SkNx& t, const SkNx& e) const {
303 return vbslq_u16(fVec, t.fVec, e.fVec); 303 return vbslq_u16(fVec, t.fVec, e.fVec);
304 } 304 }
305 305
306 uint16x8_t fVec; 306 uint16x8_t fVec;
307 }; 307 };
308 308
309 template <> 309 template <>
310 class SkNx<4, uint8_t> { 310 class SkNx<4, uint8_t> {
311 public: 311 public:
312 SkNx(const uint8x8_t& vec) : fVec(vec) {} 312 SkNx(const uint8x8_t& vec) : fVec(vec) {}
313 313
314 SkNx() {} 314 SkNx() {}
315 static SkNx Load(const uint8_t vals[4]) { 315 static SkNx Load(const void* ptr) {
316 return (uint8x8_t)vld1_dup_u32((const uint32_t*)vals); 316 return (uint8x8_t)vld1_dup_u32((const uint32_t*)ptr);
317 } 317 }
318 void store(uint8_t vals[4]) const { 318 void store(void* ptr) const {
319 return vst1_lane_u32((uint32_t*)vals, (uint32x2_t)fVec, 0); 319 return vst1_lane_u32((uint32_t*)ptr, (uint32x2_t)fVec, 0);
320 } 320 }
321 321
322 // TODO as needed 322 // TODO as needed
323 323
324 uint8x8_t fVec; 324 uint8x8_t fVec;
325 }; 325 };
326 326
327 template <> 327 template <>
328 class SkNx<16, uint8_t> { 328 class SkNx<16, uint8_t> {
329 public: 329 public:
330 SkNx(const uint8x16_t& vec) : fVec(vec) {} 330 SkNx(const uint8x16_t& vec) : fVec(vec) {}
331 331
332 SkNx() {} 332 SkNx() {}
333 SkNx(uint8_t val) : fVec(vdupq_n_u8(val)) {} 333 SkNx(uint8_t val) : fVec(vdupq_n_u8(val)) {}
334 static SkNx Load(const uint8_t vals[16]) { return vld1q_u8(vals); } 334 static SkNx Load(const void* ptr) { return vld1q_u8((const uint8_t*)ptr); }
335 335
336 SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d, 336 SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d,
337 uint8_t e, uint8_t f, uint8_t g, uint8_t h, 337 uint8_t e, uint8_t f, uint8_t g, uint8_t h,
338 uint8_t i, uint8_t j, uint8_t k, uint8_t l, 338 uint8_t i, uint8_t j, uint8_t k, uint8_t l,
339 uint8_t m, uint8_t n, uint8_t o, uint8_t p) { 339 uint8_t m, uint8_t n, uint8_t o, uint8_t p) {
340 fVec = (uint8x16_t) { a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p }; 340 fVec = (uint8x16_t) { a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p };
341 } 341 }
342 342
343 void store(uint8_t vals[16]) const { vst1q_u8(vals, fVec); } 343 void store(void* ptr) const { vst1q_u8((uint8_t*)ptr, fVec); }
344 344
345 SkNx saturatedAdd(const SkNx& o) const { return vqaddq_u8(fVec, o.fVec); } 345 SkNx saturatedAdd(const SkNx& o) const { return vqaddq_u8(fVec, o.fVec); }
346 346
347 SkNx operator + (const SkNx& o) const { return vaddq_u8(fVec, o.fVec); } 347 SkNx operator + (const SkNx& o) const { return vaddq_u8(fVec, o.fVec); }
348 SkNx operator - (const SkNx& o) const { return vsubq_u8(fVec, o.fVec); } 348 SkNx operator - (const SkNx& o) const { return vsubq_u8(fVec, o.fVec); }
349 349
350 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u8(a.fVec, b.fV ec); } 350 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u8(a.fVec, b.fV ec); }
351 SkNx operator < (const SkNx& o) const { return vcltq_u8(fVec, o.fVec); } 351 SkNx operator < (const SkNx& o) const { return vcltq_u8(fVec, o.fVec); }
352 352
353 template <int k> uint8_t kth() const { 353 template <int k> uint8_t kth() const {
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
394 return vget_low_u16(vmovl_u8(src.fVec)); 394 return vget_low_u16(vmovl_u8(src.fVec));
395 } 395 }
396 396
397 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t, 4>(const Sk4h& src) { 397 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t, 4>(const Sk4h& src) {
398 return vmovn_u16(vcombine_u16(src.fVec, src.fVec)); 398 return vmovn_u16(vcombine_u16(src.fVec, src.fVec));
399 } 399 }
400 400
401 } // namespace 401 } // namespace
402 402
403 #endif//SkNx_neon_DEFINED 403 #endif//SkNx_neon_DEFINED
OLDNEW
« no previous file with comments | « src/opts/SkNx_avx.h ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698