Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(30)

Side by Side Diff: src/opts/SkNx_neon.h

Issue 1048593002: Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T> (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: This is actually faster Created 5 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/Sk4x_sse.h ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 * Copyright 2015 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #ifndef SkNx_neon_DEFINED
9 #define SkNx_neon_DEFINED
10
11 #include <arm_neon.h>
12
13 template <>
14 class SkNi<2, int32_t> {
15 public:
16 SkNi(int32x2_t vec) : fVec(vec) {}
17
18 SkNi() {}
19 bool allTrue() const { return fVec[0] && fVec[1]; }
20 bool anyTrue() const { return fVec[0] || fVec[1]; }
21 private:
22 int32x2_t fVec;
23 };
24
25 template <>
26 class SkNi<4, int32_t> {
27 public:
28 SkNi(int32x4_t vec) : fVec(vec) {}
29
30 SkNi() {}
31 bool allTrue() const { return fVec[0] && fVec[1] && fVec[2] && fVec[3]; }
32 bool anyTrue() const { return fVec[0] || fVec[1] || fVec[2] || fVec[3]; }
33 private:
34 int32x4_t fVec;
35 };
36
37 template <>
38 class SkNf<2, float> {
39 typedef SkNi<2, int32_t> Ni;
40 public:
41 SkNf(float32x2_t vec) : fVec(vec) {}
42
43 SkNf() {}
44 explicit SkNf(float val) : fVec(vdup_n_f32(val)) {}
45 static SkNf Load(const float vals[2]) { return vld1_f32(vals); }
46 SkNf(float a, float b) { fVec = (float32x2_t) { a, b }; }
47
48 void store(float vals[2]) const { vst1_f32(vals, fVec); }
49
50 SkNf approxInvert() const {
51 float32x2_t est0 = vrecpe_f32(fVec),
52 est1 = vmul_f32(vrecps_f32(est0, fVec), est0);
53 return est1;
54 }
55 SkNf invert() const {
56 float32x2_t est1 = this->approxInvert().fVec,
57 est2 = vmul_f32(vrecps_f32(est1, fVec), est1);
58 return est2;
59 }
60
61 SkNf operator + (const SkNf& o) const { return vadd_f32(fVec, o.fVec); }
62 SkNf operator - (const SkNf& o) const { return vsub_f32(fVec, o.fVec); }
63 SkNf operator * (const SkNf& o) const { return vmul_f32(fVec, o.fVec); }
64 SkNf operator / (const SkNf& o) const {
65 #if defined(SK_CPU_ARM64)
66 return vdiv_f32(fVec, o.fVec);
67 #else
68 return vmul_f32(fVec, o.invert().fVec);
69 #endif
70 }
71
72 Ni operator == (const SkNf& o) const { return vreinterpret_s32_u32(vceq_f32( fVec, o.fVec)); }
73 Ni operator < (const SkNf& o) const { return vreinterpret_s32_u32(vclt_f32( fVec, o.fVec)); }
74 Ni operator > (const SkNf& o) const { return vreinterpret_s32_u32(vcgt_f32( fVec, o.fVec)); }
75 Ni operator <= (const SkNf& o) const { return vreinterpret_s32_u32(vcle_f32( fVec, o.fVec)); }
76 Ni operator >= (const SkNf& o) const { return vreinterpret_s32_u32(vcge_f32( fVec, o.fVec)); }
77 Ni operator != (const SkNf& o) const {
78 return vreinterpret_s32_u32(vmvn_u32(vceq_f32(fVec, o.fVec)));
79 }
80
81 static SkNf Min(const SkNf& l, const SkNf& r) { return vmin_f32(l.fVec, r.fV ec); }
82 static SkNf Max(const SkNf& l, const SkNf& r) { return vmax_f32(l.fVec, r.fV ec); }
83
84 SkNf rsqrt() const {
85 float32x2_t est0 = vrsqrte_f32(fVec),
86 est1 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est 0);
87 return est1;
88 }
89
90 SkNf sqrt() const {
91 #if defined(SK_CPU_ARM64)
92 return vsqrt_f32(fVec);
93 #else
94 float32x2_t est1 = this->rsqrt().fVec,
95 // An extra step of Newton's method to refine the estimate of 1/sqrt(thi s).
96 est2 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est 1);
97 return vmul_f32(fVec, est2);
98 #endif
99 }
100
101 float operator[] (int k) const {
102 SkASSERT(0 <= k && k < 2);
103 return fVec[k];
104 }
105
106 private:
107 float32x2_t fVec;
108 };
109
110 #if defined(SK_CPU_ARM64)
111 template <>
112 class SkNi<2, int64_t> {
113 public:
114 SkNi(int64x2_t vec) : fVec(vec) {}
115
116 SkNi() {}
117 bool allTrue() const { return fVec[0] && fVec[1]; }
118 bool anyTrue() const { return fVec[0] || fVec[1]; }
119 private:
120 int64x2_t fVec;
121 };
122
123 template <>
124 class SkNf<2, double> {
125 typedef SkNi<2, int64_t> Ni;
126 public:
127 SkNf(float64x2_t vec) : fVec(vec) {}
128
129 SkNf() {}
130 explicit SkNf(double val) : fVec(vdupq_n_f64(val)) {}
131 static SkNf Load(const double vals[2]) { return vld1q_f64(vals); }
132 SkNf(double a, double b) { fVec = (float64x2_t) { a, b }; }
133
134 void store(double vals[2]) const { vst1q_f64(vals, fVec); }
135
136 SkNf operator + (const SkNf& o) const { return vaddq_f64(fVec, o.fVec); }
137 SkNf operator - (const SkNf& o) const { return vsubq_f64(fVec, o.fVec); }
138 SkNf operator * (const SkNf& o) const { return vmulq_f64(fVec, o.fVec); }
139 SkNf operator / (const SkNf& o) const { return vdivq_f64(fVec, o.fVec); }
140
141 Ni operator == (const SkNf& o) const { return vreinterpretq_s64_u64(vceqq_f6 4(fVec, o.fVec)); }
142 Ni operator < (const SkNf& o) const { return vreinterpretq_s64_u64(vcltq_f6 4(fVec, o.fVec)); }
143 Ni operator > (const SkNf& o) const { return vreinterpretq_s64_u64(vcgtq_f6 4(fVec, o.fVec)); }
144 Ni operator <= (const SkNf& o) const { return vreinterpretq_s64_u64(vcleq_f6 4(fVec, o.fVec)); }
145 Ni operator >= (const SkNf& o) const { return vreinterpretq_s64_u64(vcgeq_f6 4(fVec, o.fVec)); }
146 Ni operator != (const SkNf& o) const {
147 return vreinterpretq_s64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(f Vec, o.fVec))));
148 }
149
150 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f64(l.fVec, r.f Vec); }
151 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f64(l.fVec, r.f Vec); }
152
153 SkNf sqrt() const { return vsqrtq_f64(fVec); }
154 SkNf rsqrt() const {
155 float64x2_t est0 = vrsqrteq_f64(fVec),
156 est1 = vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)), est0);
157 return est1;
158 }
159
160 SkNf approxInvert() const {
161 float64x2_t est0 = vrecpeq_f64(fVec),
162 est1 = vmulq_f64(vrecpsq_f64(est0, fVec), est0);
163 return est1;
164 }
165
166 SkNf invert() const {
167 float64x2_t est1 = this->approxInvert().fVec,
168 est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1),
169 est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2);
170 return est3;
171 }
172
173 double operator[] (int k) const {
174 SkASSERT(0 <= k && k < 2);
175 return fVec[k];
176 }
177
178 private:
179 float64x2_t fVec;
180 };
181 #endif//defined(SK_CPU_ARM64)
182
183 template <>
184 class SkNf<4, float> {
185 typedef SkNi<4, int32_t> Ni;
186 public:
187 SkNf(float32x4_t vec) : fVec(vec) {}
188 float32x4_t vec() const { return fVec; }
189
190 SkNf() {}
191 explicit SkNf(float val) : fVec(vdupq_n_f32(val)) {}
192 static SkNf Load(const float vals[4]) { return vld1q_f32(vals); }
193 SkNf(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d }; }
194
195 void store(float vals[4]) const { vst1q_f32(vals, fVec); }
196
197 SkNf approxInvert() const {
198 float32x4_t est0 = vrecpeq_f32(fVec),
199 est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0);
200 return est1;
201 }
202 SkNf invert() const {
203 float32x4_t est1 = this->approxInvert().fVec,
204 est2 = vmulq_f32(vrecpsq_f32(est1, fVec), est1);
205 return est2;
206 }
207
208 SkNf operator + (const SkNf& o) const { return vaddq_f32(fVec, o.fVec); }
209 SkNf operator - (const SkNf& o) const { return vsubq_f32(fVec, o.fVec); }
210 SkNf operator * (const SkNf& o) const { return vmulq_f32(fVec, o.fVec); }
211 SkNf operator / (const SkNf& o) const {
212 #if defined(SK_CPU_ARM64)
213 return vdivq_f32(fVec, o.fVec);
214 #else
215 return vmulq_f32(fVec, o.invert().fVec);
216 #endif
217 }
218
219 Ni operator == (const SkNf& o) const { return vreinterpretq_s32_u32(vceqq_f3 2(fVec, o.fVec)); }
220 Ni operator < (const SkNf& o) const { return vreinterpretq_s32_u32(vcltq_f3 2(fVec, o.fVec)); }
221 Ni operator > (const SkNf& o) const { return vreinterpretq_s32_u32(vcgtq_f3 2(fVec, o.fVec)); }
222 Ni operator <= (const SkNf& o) const { return vreinterpretq_s32_u32(vcleq_f3 2(fVec, o.fVec)); }
223 Ni operator >= (const SkNf& o) const { return vreinterpretq_s32_u32(vcgeq_f3 2(fVec, o.fVec)); }
224 Ni operator != (const SkNf& o) const {
225 return vreinterpretq_s32_u32(vmvnq_u32(vceqq_f32(fVec, o.fVec)));
226 }
227
228 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f32(l.fVec, r.f Vec); }
229 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f32(l.fVec, r.f Vec); }
230
231 SkNf rsqrt() const {
232 float32x4_t est0 = vrsqrteq_f32(fVec),
233 est1 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0);
234 return est1;
235 }
236
237 SkNf sqrt() const {
238 #if defined(SK_CPU_ARM64)
239 return vsqrtq_f32(fVec);
240 #else
241 float32x4_t est1 = this->rsqrt().fVec,
242 // An extra step of Newton's method to refine the estimate of 1/sqrt(thi s).
243 est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1);
244 return vmulq_f32(fVec, est2);
245 #endif
246 }
247
248 float operator[] (int k) const {
249 SkASSERT(0 <= k && k < 4);
250 return fVec[k];
251 }
252
253 private:
254 float32x4_t fVec;
255 };
256
257 #endif//SkNx_neon_DEFINED
OLDNEW
« no previous file with comments | « src/opts/Sk4x_sse.h ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698