Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(298)

Side by Side Diff: src/opts/SkNx_neon.h

Issue 1059743002: Use switch operator[](int) to kth<int>() so we can use vget_lane. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 5 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/core/SkPMFloat.h ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkNx_neon_DEFINED 8 #ifndef SkNx_neon_DEFINED
9 #define SkNx_neon_DEFINED 9 #define SkNx_neon_DEFINED
10 10
11 #include <arm_neon.h> 11 #include <arm_neon.h>
12 12
13 template <> 13 template <>
14 class SkNi<2, int32_t> { 14 class SkNi<2, int32_t> {
15 public: 15 public:
16 SkNi(int32x2_t vec) : fVec(vec) {} 16 SkNi(int32x2_t vec) : fVec(vec) {}
17 17
18 SkNi() {} 18 SkNi() {}
19 bool allTrue() const { return fVec[0] && fVec[1]; } 19 bool allTrue() const { return vget_lane_s32(fVec, 0) && vget_lane_s32(fVec, 1); }
20 bool anyTrue() const { return fVec[0] || fVec[1]; } 20 bool anyTrue() const { return vget_lane_s32(fVec, 0) || vget_lane_s32(fVec, 1); }
21 private: 21 private:
22 int32x2_t fVec; 22 int32x2_t fVec;
23 }; 23 };
24 24
25 template <> 25 template <>
26 class SkNi<4, int32_t> { 26 class SkNi<4, int32_t> {
27 public: 27 public:
28 SkNi(int32x4_t vec) : fVec(vec) {} 28 SkNi(int32x4_t vec) : fVec(vec) {}
29 29
30 SkNi() {} 30 SkNi() {}
31 bool allTrue() const { return fVec[0] && fVec[1] && fVec[2] && fVec[3]; } 31 bool allTrue() const { return vgetq_lane_s32(fVec, 0) && vgetq_lane_s32(fVec , 1)
32 bool anyTrue() const { return fVec[0] || fVec[1] || fVec[2] || fVec[3]; } 32 && vgetq_lane_s32(fVec, 2) && vgetq_lane_s32(fVec , 3); }
33 bool anyTrue() const { return vgetq_lane_s32(fVec, 0) || vgetq_lane_s32(fVec , 1)
34 || vgetq_lane_s32(fVec, 2) || vgetq_lane_s32(fVec , 3); }
33 private: 35 private:
34 int32x4_t fVec; 36 int32x4_t fVec;
35 }; 37 };
36 38
37 template <> 39 template <>
38 class SkNf<2, float> { 40 class SkNf<2, float> {
39 typedef SkNi<2, int32_t> Ni; 41 typedef SkNi<2, int32_t> Ni;
40 public: 42 public:
41 SkNf(float32x2_t vec) : fVec(vec) {} 43 SkNf(float32x2_t vec) : fVec(vec) {}
42 44
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
91 #if defined(SK_CPU_ARM64) 93 #if defined(SK_CPU_ARM64)
92 return vsqrt_f32(fVec); 94 return vsqrt_f32(fVec);
93 #else 95 #else
94 float32x2_t est1 = this->rsqrt().fVec, 96 float32x2_t est1 = this->rsqrt().fVec,
95 // An extra step of Newton's method to refine the estimate of 1/sqrt(thi s). 97 // An extra step of Newton's method to refine the estimate of 1/sqrt(thi s).
96 est2 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est 1); 98 est2 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est 1);
97 return vmul_f32(fVec, est2); 99 return vmul_f32(fVec, est2);
98 #endif 100 #endif
99 } 101 }
100 102
101 float operator[] (int k) const { 103 template <int k> float kth() const {
102 SkASSERT(0 <= k && k < 2); 104 SkASSERT(0 <= k && k < 2);
103 return fVec[k]; 105 return vget_lane_f32(fVec, k&1);
104 } 106 }
105 107
106 private: 108 private:
107 float32x2_t fVec; 109 float32x2_t fVec;
108 }; 110 };
109 111
110 #if defined(SK_CPU_ARM64) 112 #if defined(SK_CPU_ARM64)
111 template <> 113 template <>
112 class SkNi<2, int64_t> { 114 class SkNi<2, int64_t> {
113 public: 115 public:
114 SkNi(int64x2_t vec) : fVec(vec) {} 116 SkNi(int64x2_t vec) : fVec(vec) {}
115 117
116 SkNi() {} 118 SkNi() {}
117 bool allTrue() const { return fVec[0] && fVec[1]; } 119 bool allTrue() const { return vgetq_lane_s64(fVec, 0) && vgetq_lane_s64(fVec , 1); }
118 bool anyTrue() const { return fVec[0] || fVec[1]; } 120 bool anyTrue() const { return vgetq_lane_s64(fVec, 0) || vgetq_lane_s64(fVec , 1); }
119 private: 121 private:
120 int64x2_t fVec; 122 int64x2_t fVec;
121 }; 123 };
122 124
123 template <> 125 template <>
124 class SkNf<2, double> { 126 class SkNf<2, double> {
125 typedef SkNi<2, int64_t> Ni; 127 typedef SkNi<2, int64_t> Ni;
126 public: 128 public:
127 SkNf(float64x2_t vec) : fVec(vec) {} 129 SkNf(float64x2_t vec) : fVec(vec) {}
128 130
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
163 return est1; 165 return est1;
164 } 166 }
165 167
166 SkNf invert() const { 168 SkNf invert() const {
167 float64x2_t est1 = this->approxInvert().fVec, 169 float64x2_t est1 = this->approxInvert().fVec,
168 est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1), 170 est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1),
169 est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2); 171 est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2);
170 return est3; 172 return est3;
171 } 173 }
172 174
173 double operator[] (int k) const { 175 template <int k> double kth() const {
174 SkASSERT(0 <= k && k < 2); 176 SkASSERT(0 <= k && k < 2);
175 return fVec[k]; 177 return vgetq_lane_f64(fVec, k&1);
176 } 178 }
177 179
178 private: 180 private:
179 float64x2_t fVec; 181 float64x2_t fVec;
180 }; 182 };
181 #endif//defined(SK_CPU_ARM64) 183 #endif//defined(SK_CPU_ARM64)
182 184
183 template <> 185 template <>
184 class SkNf<4, float> { 186 class SkNf<4, float> {
185 typedef SkNi<4, int32_t> Ni; 187 typedef SkNi<4, int32_t> Ni;
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
238 #if defined(SK_CPU_ARM64) 240 #if defined(SK_CPU_ARM64)
239 return vsqrtq_f32(fVec); 241 return vsqrtq_f32(fVec);
240 #else 242 #else
241 float32x4_t est1 = this->rsqrt().fVec, 243 float32x4_t est1 = this->rsqrt().fVec,
242 // An extra step of Newton's method to refine the estimate of 1/sqrt(thi s). 244 // An extra step of Newton's method to refine the estimate of 1/sqrt(thi s).
243 est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1); 245 est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1);
244 return vmulq_f32(fVec, est2); 246 return vmulq_f32(fVec, est2);
245 #endif 247 #endif
246 } 248 }
247 249
248 float operator[] (int k) const { 250 template <int k> float kth() const {
249 SkASSERT(0 <= k && k < 4); 251 SkASSERT(0 <= k && k < 4);
250 return fVec[k]; 252 return vgetq_lane_f32(fVec, k&3);
251 } 253 }
252 254
253 private: 255 private:
254 float32x4_t fVec; 256 float32x4_t fVec;
255 }; 257 };
256 258
257 #endif//SkNx_neon_DEFINED 259 #endif//SkNx_neon_DEFINED
OLDNEW
« no previous file with comments | « src/core/SkPMFloat.h ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698