Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(202)

Side by Side Diff: src/opts/SkNx_neon.h

Issue 1812233003: SkNx refresh (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: typo Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/effects/gradients/SkRadialGradient.cpp ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkNx_neon_DEFINED 8 #ifndef SkNx_neon_DEFINED
9 #define SkNx_neon_DEFINED 9 #define SkNx_neon_DEFINED
10 10
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
47 public: 47 public:
48 SkNx(float32x2_t vec) : fVec(vec) {} 48 SkNx(float32x2_t vec) : fVec(vec) {}
49 49
50 SkNx() {} 50 SkNx() {}
51 SkNx(float val) : fVec(vdup_n_f32(val)) {} 51 SkNx(float val) : fVec(vdup_n_f32(val)) {}
52 static SkNx Load(const void* ptr) { return vld1_f32((const float*)ptr); } 52 static SkNx Load(const void* ptr) { return vld1_f32((const float*)ptr); }
53 SkNx(float a, float b) { fVec = (float32x2_t) { a, b }; } 53 SkNx(float a, float b) { fVec = (float32x2_t) { a, b }; }
54 54
55 void store(void* ptr) const { vst1_f32((float*)ptr, fVec); } 55 void store(void* ptr) const { vst1_f32((float*)ptr, fVec); }
56 56
57 SkNx approxInvert() const { 57 SkNx invert() const {
58 float32x2_t est0 = vrecpe_f32(fVec), 58 float32x2_t est0 = vrecpe_f32(fVec),
59 est1 = vmul_f32(vrecps_f32(est0, fVec), est0); 59 est1 = vmul_f32(vrecps_f32(est0, fVec), est0);
60 return est1; 60 return est1;
61 } 61 }
62 SkNx invert() const {
63 float32x2_t est1 = this->approxInvert().fVec,
64 est2 = vmul_f32(vrecps_f32(est1, fVec), est1);
65 return est2;
66 }
67 62
68 SkNx operator + (const SkNx& o) const { return vadd_f32(fVec, o.fVec); } 63 SkNx operator + (const SkNx& o) const { return vadd_f32(fVec, o.fVec); }
69 SkNx operator - (const SkNx& o) const { return vsub_f32(fVec, o.fVec); } 64 SkNx operator - (const SkNx& o) const { return vsub_f32(fVec, o.fVec); }
70 SkNx operator * (const SkNx& o) const { return vmul_f32(fVec, o.fVec); } 65 SkNx operator * (const SkNx& o) const { return vmul_f32(fVec, o.fVec); }
71 SkNx operator / (const SkNx& o) const { 66 SkNx operator / (const SkNx& o) const {
72 #if defined(SK_CPU_ARM64) 67 #if defined(SK_CPU_ARM64)
73 return vdiv_f32(fVec, o.fVec); 68 return vdiv_f32(fVec, o.fVec);
74 #else 69 #else
75 return vmul_f32(fVec, o.invert().fVec); 70 float32x2_t est0 = vrecpe_f32(o.fVec),
71 est1 = vmul_f32(vrecps_f32(est0, o.fVec), est0),
72 est2 = vmul_f32(vrecps_f32(est1, o.fVec), est1);
73 return vmul_f32(fVec, est2);
76 #endif 74 #endif
77 } 75 }
78 76
79 SkNx operator == (const SkNx& o) const { return vreinterpret_f32_u32(vceq_f3 2(fVec, o.fVec)); } 77 SkNx operator == (const SkNx& o) const { return vreinterpret_f32_u32(vceq_f3 2(fVec, o.fVec)); }
80 SkNx operator < (const SkNx& o) const { return vreinterpret_f32_u32(vclt_f3 2(fVec, o.fVec)); } 78 SkNx operator < (const SkNx& o) const { return vreinterpret_f32_u32(vclt_f3 2(fVec, o.fVec)); }
81 SkNx operator > (const SkNx& o) const { return vreinterpret_f32_u32(vcgt_f3 2(fVec, o.fVec)); } 79 SkNx operator > (const SkNx& o) const { return vreinterpret_f32_u32(vcgt_f3 2(fVec, o.fVec)); }
82 SkNx operator <= (const SkNx& o) const { return vreinterpret_f32_u32(vcle_f3 2(fVec, o.fVec)); } 80 SkNx operator <= (const SkNx& o) const { return vreinterpret_f32_u32(vcle_f3 2(fVec, o.fVec)); }
83 SkNx operator >= (const SkNx& o) const { return vreinterpret_f32_u32(vcge_f3 2(fVec, o.fVec)); } 81 SkNx operator >= (const SkNx& o) const { return vreinterpret_f32_u32(vcge_f3 2(fVec, o.fVec)); }
84 SkNx operator != (const SkNx& o) const { 82 SkNx operator != (const SkNx& o) const {
85 return vreinterpret_f32_u32(vmvn_u32(vceq_f32(fVec, o.fVec))); 83 return vreinterpret_f32_u32(vmvn_u32(vceq_f32(fVec, o.fVec)));
86 } 84 }
87 85
88 static SkNx Min(const SkNx& l, const SkNx& r) { return vmin_f32(l.fVec, r.fV ec); } 86 static SkNx Min(const SkNx& l, const SkNx& r) { return vmin_f32(l.fVec, r.fV ec); }
89 static SkNx Max(const SkNx& l, const SkNx& r) { return vmax_f32(l.fVec, r.fV ec); } 87 static SkNx Max(const SkNx& l, const SkNx& r) { return vmax_f32(l.fVec, r.fV ec); }
90 88
91 SkNx rsqrt0() const { return vrsqrte_f32(fVec); } 89 SkNx rsqrt() const {
92 SkNx rsqrt1() const { 90 float32x2_t est0 = vrsqrte_f32(fVec);
93 float32x2_t est0 = this->rsqrt0().fVec;
94 return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0); 91 return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0);
95 } 92 }
96 SkNx rsqrt2() const {
97 float32x2_t est1 = this->rsqrt1().fVec;
98 return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est1);
99 }
100 93
101 SkNx sqrt() const { 94 SkNx sqrt() const {
102 #if defined(SK_CPU_ARM64) 95 #if defined(SK_CPU_ARM64)
103 return vsqrt_f32(fVec); 96 return vsqrt_f32(fVec);
104 #else 97 #else
105 return *this * this->rsqrt2(); 98 float32x2_t est0 = vrsqrte_f32(fVec),
99 est1 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est 0),
100 est2 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est 1);
101 return vmul_f32(fVec, est2);
106 #endif 102 #endif
107 } 103 }
108 104
109 float operator[](int k) const { 105 float operator[](int k) const {
110 SkASSERT(0 <= k && k < 2); 106 SkASSERT(0 <= k && k < 2);
111 union { float32x2_t v; float fs[2]; } pun = {fVec}; 107 union { float32x2_t v; float fs[2]; } pun = {fVec};
112 return pun.fs[k&1]; 108 return pun.fs[k&1];
113 } 109 }
114 110
115 bool allTrue() const { 111 bool allTrue() const {
(...skipping 12 matching lines...) Expand all
128 class SkNx<4, float> { 124 class SkNx<4, float> {
129 public: 125 public:
130 SkNx(float32x4_t vec) : fVec(vec) {} 126 SkNx(float32x4_t vec) : fVec(vec) {}
131 127
132 SkNx() {} 128 SkNx() {}
133 SkNx(float val) : fVec(vdupq_n_f32(val)) {} 129 SkNx(float val) : fVec(vdupq_n_f32(val)) {}
134 static SkNx Load(const void* ptr) { return vld1q_f32((const float*)ptr); } 130 static SkNx Load(const void* ptr) { return vld1q_f32((const float*)ptr); }
135 SkNx(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d }; } 131 SkNx(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d }; }
136 132
137 void store(void* ptr) const { vst1q_f32((float*)ptr, fVec); } 133 void store(void* ptr) const { vst1q_f32((float*)ptr, fVec); }
138 SkNx approxInvert() const { 134 SkNx invert() const {
139 float32x4_t est0 = vrecpeq_f32(fVec), 135 float32x4_t est0 = vrecpeq_f32(fVec),
140 est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0); 136 est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0);
141 return est1; 137 return est1;
142 } 138 }
143 SkNx invert() const {
144 float32x4_t est1 = this->approxInvert().fVec,
145 est2 = vmulq_f32(vrecpsq_f32(est1, fVec), est1);
146 return est2;
147 }
148 139
149 SkNx operator + (const SkNx& o) const { return vaddq_f32(fVec, o.fVec); } 140 SkNx operator + (const SkNx& o) const { return vaddq_f32(fVec, o.fVec); }
150 SkNx operator - (const SkNx& o) const { return vsubq_f32(fVec, o.fVec); } 141 SkNx operator - (const SkNx& o) const { return vsubq_f32(fVec, o.fVec); }
151 SkNx operator * (const SkNx& o) const { return vmulq_f32(fVec, o.fVec); } 142 SkNx operator * (const SkNx& o) const { return vmulq_f32(fVec, o.fVec); }
152 SkNx operator / (const SkNx& o) const { 143 SkNx operator / (const SkNx& o) const {
153 #if defined(SK_CPU_ARM64) 144 #if defined(SK_CPU_ARM64)
154 return vdivq_f32(fVec, o.fVec); 145 return vdivq_f32(fVec, o.fVec);
155 #else 146 #else
156 return vmulq_f32(fVec, o.invert().fVec); 147 float32x4_t est0 = vrecpeq_f32(o.fVec),
148 est1 = vmulq_f32(vrecpsq_f32(est0, o.fVec), est0),
149 est2 = vmulq_f32(vrecpsq_f32(est1, o.fVec), est1);
150 return vmulq_f32(fVec, est2);
157 #endif 151 #endif
158 } 152 }
159 153
160 SkNx operator==(const SkNx& o) const { return vreinterpretq_f32_u32(vceqq_f3 2(fVec, o.fVec)); } 154 SkNx operator==(const SkNx& o) const { return vreinterpretq_f32_u32(vceqq_f3 2(fVec, o.fVec)); }
161 SkNx operator <(const SkNx& o) const { return vreinterpretq_f32_u32(vcltq_f3 2(fVec, o.fVec)); } 155 SkNx operator <(const SkNx& o) const { return vreinterpretq_f32_u32(vcltq_f3 2(fVec, o.fVec)); }
162 SkNx operator >(const SkNx& o) const { return vreinterpretq_f32_u32(vcgtq_f3 2(fVec, o.fVec)); } 156 SkNx operator >(const SkNx& o) const { return vreinterpretq_f32_u32(vcgtq_f3 2(fVec, o.fVec)); }
163 SkNx operator<=(const SkNx& o) const { return vreinterpretq_f32_u32(vcleq_f3 2(fVec, o.fVec)); } 157 SkNx operator<=(const SkNx& o) const { return vreinterpretq_f32_u32(vcleq_f3 2(fVec, o.fVec)); }
164 SkNx operator>=(const SkNx& o) const { return vreinterpretq_f32_u32(vcgeq_f3 2(fVec, o.fVec)); } 158 SkNx operator>=(const SkNx& o) const { return vreinterpretq_f32_u32(vcgeq_f3 2(fVec, o.fVec)); }
165 SkNx operator!=(const SkNx& o) const { 159 SkNx operator!=(const SkNx& o) const {
166 return vreinterpretq_f32_u32(vmvnq_u32(vceqq_f32(fVec, o.fVec))); 160 return vreinterpretq_f32_u32(vmvnq_u32(vceqq_f32(fVec, o.fVec)));
167 } 161 }
168 162
169 static SkNx Min(const SkNx& l, const SkNx& r) { return vminq_f32(l.fVec, r.f Vec); } 163 static SkNx Min(const SkNx& l, const SkNx& r) { return vminq_f32(l.fVec, r.f Vec); }
170 static SkNx Max(const SkNx& l, const SkNx& r) { return vmaxq_f32(l.fVec, r.f Vec); } 164 static SkNx Max(const SkNx& l, const SkNx& r) { return vmaxq_f32(l.fVec, r.f Vec); }
171 165
172 SkNx abs() const { return vabsq_f32(fVec); } 166 SkNx abs() const { return vabsq_f32(fVec); }
173 SkNx floor() const { 167 SkNx floor() const {
174 #if defined(SK_CPU_ARM64) 168 #if defined(SK_CPU_ARM64)
175 return vrndmq_f32(fVec); 169 return vrndmq_f32(fVec);
176 #else 170 #else
177 return armv7_vrndmq_f32(fVec); 171 return armv7_vrndmq_f32(fVec);
178 #endif 172 #endif
179 } 173 }
180 174
181 175
182 SkNx rsqrt0() const { return vrsqrteq_f32(fVec); } 176 SkNx rsqrt() const {
183 SkNx rsqrt1() const { 177 float32x4_t est0 = vrsqrteq_f32(fVec);
184 float32x4_t est0 = this->rsqrt0().fVec;
185 return vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0); 178 return vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0);
186 } 179 }
187 SkNx rsqrt2() const {
188 float32x4_t est1 = this->rsqrt1().fVec;
189 return vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1);
190 }
191 180
192 SkNx sqrt() const { 181 SkNx sqrt() const {
193 #if defined(SK_CPU_ARM64) 182 #if defined(SK_CPU_ARM64)
194 return vsqrtq_f32(fVec); 183 return vsqrtq_f32(fVec);
195 #else 184 #else
196 return *this * this->rsqrt2(); 185 float32x4_t est0 = vrsqrteq_f32(fVec),
186 est1 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0),
187 est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1);
188 return vmulq_f32(fVec, est2);
197 #endif 189 #endif
198 } 190 }
199 191
200 float operator[](int k) const { 192 float operator[](int k) const {
201 SkASSERT(0 <= k && k < 4); 193 SkASSERT(0 <= k && k < 4);
202 union { float32x4_t v; float fs[4]; } pun = {fVec}; 194 union { float32x4_t v; float fs[4]; } pun = {fVec};
203 return pun.fs[k&3]; 195 return pun.fs[k&3];
204 } 196 }
205 197
206 bool allTrue() const { 198 bool allTrue() const {
(...skipping 150 matching lines...) Expand 10 before | Expand all | Expand 10 after
357 return pun.us[k&15]; 349 return pun.us[k&15];
358 } 350 }
359 351
360 SkNx thenElse(const SkNx& t, const SkNx& e) const { 352 SkNx thenElse(const SkNx& t, const SkNx& e) const {
361 return vbslq_u8(fVec, t.fVec, e.fVec); 353 return vbslq_u8(fVec, t.fVec, e.fVec);
362 } 354 }
363 355
364 uint8x16_t fVec; 356 uint8x16_t fVec;
365 }; 357 };
366 358
359 template <>
360 class SkNx<4, int> {
361 public:
362 SkNx(const int32x4_t& vec) : fVec(vec) {}
363
364 SkNx() {}
365 SkNx(int v) {
366 fVec = vdupq_n_s32(v);
367 }
368 SkNx(int a, int b, int c, int d) {
369 fVec = (int32x4_t){a,b,c,d};
370 }
371 static SkNx Load(const void* ptr) {
372 return vld1q_s32((const int32_t*)ptr);
373 }
374 void store(void* ptr) const {
375 return vst1q_s32((int32_t*)ptr, fVec);
376 }
377 int operator[](int k) const {
378 SkASSERT(0 <= k && k < 4);
379 union { int32x4_t v; int is[4]; } pun = {fVec};
380 return pun.is[k&3];
381 }
382
383 SkNx operator + (const SkNx& o) const { return vaddq_s32(fVec, o.fVec); }
384 SkNx operator - (const SkNx& o) const { return vsubq_s32(fVec, o.fVec); }
385 SkNx operator * (const SkNx& o) const { return vmulq_s32(fVec, o.fVec); }
386
387 SkNx operator << (int bits) const { SHIFT32(vshlq_n_s32, fVec, bits); }
388 SkNx operator >> (int bits) const { SHIFT32(vshrq_n_s32, fVec, bits); }
389
390 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_s32(a.fVec, b.f Vec); }
391 // TODO as needed
392
393 int32x4_t fVec;
394 };
395
367 #undef SHIFT32 396 #undef SHIFT32
368 #undef SHIFT16 397 #undef SHIFT16
369 #undef SHIFT8 398 #undef SHIFT8
370 399
400 template<> inline Sk4i SkNx_cast<int, float>(const Sk4f& src) {
401 return vcvtq_s32_f32(src.fVec);
402
403 }
404 template<> inline Sk4f SkNx_cast<float, int>(const Sk4i& src) {
405 return vcvtq_f32_s32(src.fVec);
406 }
407
371 template<> inline Sk4h SkNx_cast<uint16_t, float>(const Sk4f& src) { 408 template<> inline Sk4h SkNx_cast<uint16_t, float>(const Sk4f& src) {
372 return vqmovn_u32(vcvtq_u32_f32(src.fVec)); 409 return vqmovn_u32(vcvtq_u32_f32(src.fVec));
373 } 410 }
374 411
375 template<> inline Sk4f SkNx_cast<float, uint16_t>(const Sk4h& src) { 412 template<> inline Sk4f SkNx_cast<float, uint16_t>(const Sk4h& src) {
376 return vcvtq_f32_u32(vmovl_u16(src.fVec)); 413 return vcvtq_f32_u32(vmovl_u16(src.fVec));
377 } 414 }
378 415
379 template<> inline Sk4b SkNx_cast<uint8_t, float>(const Sk4f& src) { 416 template<> inline Sk4b SkNx_cast<uint8_t, float>(const Sk4f& src) {
380 uint32x4_t _32 = vcvtq_u32_f32(src.fVec); 417 uint32x4_t _32 = vcvtq_u32_f32(src.fVec);
381 uint16x4_t _16 = vqmovn_u32(_32); 418 uint16x4_t _16 = vqmovn_u32(_32);
382 return vqmovn_u16(vcombine_u16(_16, _16)); 419 return vqmovn_u16(vcombine_u16(_16, _16));
383 } 420 }
384 421
385 template<> inline Sk4f SkNx_cast<float, uint8_t>(const Sk4b& src) { 422 template<> inline Sk4f SkNx_cast<float, uint8_t>(const Sk4b& src) {
386 uint16x8_t _16 = vmovl_u8 (src.fVec) ; 423 uint16x8_t _16 = vmovl_u8 (src.fVec) ;
387 uint32x4_t _32 = vmovl_u16(vget_low_u16(_16)); 424 uint32x4_t _32 = vmovl_u16(vget_low_u16(_16));
388 return vcvtq_f32_u32(_32); 425 return vcvtq_f32_u32(_32);
389 } 426 }
390 427
391 static inline void Sk4f_ToBytes(uint8_t bytes[16], 428 template<> inline Sk16b SkNx_cast<uint8_t, float>(const Sk16f& src) {
392 const Sk4f& a, const Sk4f& b, const Sk4f& c, con st Sk4f& d) { 429 Sk8f ab, cd;
393 vst1q_u8(bytes, vuzpq_u8(vuzpq_u8((uint8x16_t)vcvtq_u32_f32(a.fVec), 430 SkNx_split(src, &ab, &cd);
394 (uint8x16_t)vcvtq_u32_f32(b.fVec)).val[0], 431
395 vuzpq_u8((uint8x16_t)vcvtq_u32_f32(c.fVec), 432 Sk4f a,b,c,d;
396 (uint8x16_t)vcvtq_u32_f32(d.fVec)).val[0]) .val[0]); 433 SkNx_split(ab, &a, &b);
434 SkNx_split(cd, &c, &d);
435 return vuzpq_u8(vuzpq_u8((uint8x16_t)vcvtq_u32_f32(a.fVec),
436 (uint8x16_t)vcvtq_u32_f32(b.fVec)).val[0],
437 vuzpq_u8((uint8x16_t)vcvtq_u32_f32(c.fVec),
438 (uint8x16_t)vcvtq_u32_f32(d.fVec)).val[0]).val[0];
397 } 439 }
398 440
399 template<> inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) { 441 template<> inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) {
400 return vget_low_u16(vmovl_u8(src.fVec)); 442 return vget_low_u16(vmovl_u8(src.fVec));
401 } 443 }
402 444
403 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) { 445 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) {
404 return vmovn_u16(vcombine_u16(src.fVec, src.fVec)); 446 return vmovn_u16(vcombine_u16(src.fVec, src.fVec));
405 } 447 }
406 448
407 #endif//SkNx_neon_DEFINED 449 #endif//SkNx_neon_DEFINED
OLDNEW
« no previous file with comments | « src/effects/gradients/SkRadialGradient.cpp ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698