Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(911)

Side by Side Diff: src/opts/SkNx_neon.h

Issue 1083123002: Rename SkNi to SkNb. (Closed) Base URL: https://skia.googlesource.com/skia@master
Patch Set: nb Created 5 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/core/SkRect.cpp ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkNx_neon_DEFINED 8 #ifndef SkNx_neon_DEFINED
9 #define SkNx_neon_DEFINED 9 #define SkNx_neon_DEFINED
10 10
11 #include <arm_neon.h> 11 #include <arm_neon.h>
12 12
13 template <> 13 template <>
14 class SkNi<2, int32_t> { 14 class SkNb<2, 4> {
15 public: 15 public:
16 SkNi(int32x2_t vec) : fVec(vec) {} 16 SkNb(uint32x2_t vec) : fVec(vec) {}
17 17
18 SkNi() {} 18 SkNb() {}
19 bool allTrue() const { return vget_lane_s32(fVec, 0) && vget_lane_s32(fVec, 1); } 19 bool allTrue() const { return vget_lane_u32(fVec, 0) && vget_lane_u32(fVec, 1); }
20 bool anyTrue() const { return vget_lane_s32(fVec, 0) || vget_lane_s32(fVec, 1); } 20 bool anyTrue() const { return vget_lane_u32(fVec, 0) || vget_lane_u32(fVec, 1); }
21 private: 21 private:
22 int32x2_t fVec; 22 uint32x2_t fVec;
23 }; 23 };
24 24
25 template <> 25 template <>
26 class SkNi<4, int32_t> { 26 class SkNb<4, 4> {
27 public: 27 public:
28 SkNi(int32x4_t vec) : fVec(vec) {} 28 SkNb(uint32x4_t vec) : fVec(vec) {}
29 29
30 SkNi() {} 30 SkNb() {}
31 bool allTrue() const { return vgetq_lane_s32(fVec, 0) && vgetq_lane_s32(fVec , 1) 31 bool allTrue() const { return vgetq_lane_u32(fVec, 0) && vgetq_lane_u32(fVec , 1)
32 && vgetq_lane_s32(fVec, 2) && vgetq_lane_s32(fVec , 3); } 32 && vgetq_lane_u32(fVec, 2) && vgetq_lane_u32(fVec , 3); }
33 bool anyTrue() const { return vgetq_lane_s32(fVec, 0) || vgetq_lane_s32(fVec , 1) 33 bool anyTrue() const { return vgetq_lane_u32(fVec, 0) || vgetq_lane_u32(fVec , 1)
34 || vgetq_lane_s32(fVec, 2) || vgetq_lane_s32(fVec , 3); } 34 || vgetq_lane_u32(fVec, 2) || vgetq_lane_u32(fVec , 3); }
35 private: 35 private:
36 int32x4_t fVec; 36 uint32x4_t fVec;
37 }; 37 };
38 38
39 template <> 39 template <>
40 class SkNf<2, float> { 40 class SkNf<2, float> {
41 typedef SkNi<2, int32_t> Ni; 41 typedef SkNb<2, 4> Nb;
42 public: 42 public:
43 SkNf(float32x2_t vec) : fVec(vec) {} 43 SkNf(float32x2_t vec) : fVec(vec) {}
44 44
45 SkNf() {} 45 SkNf() {}
46 explicit SkNf(float val) : fVec(vdup_n_f32(val)) {} 46 explicit SkNf(float val) : fVec(vdup_n_f32(val)) {}
47 static SkNf Load(const float vals[2]) { return vld1_f32(vals); } 47 static SkNf Load(const float vals[2]) { return vld1_f32(vals); }
48 SkNf(float a, float b) { fVec = (float32x2_t) { a, b }; } 48 SkNf(float a, float b) { fVec = (float32x2_t) { a, b }; }
49 49
50 void store(float vals[2]) const { vst1_f32(vals, fVec); } 50 void store(float vals[2]) const { vst1_f32(vals, fVec); }
51 51
(...skipping 12 matching lines...) Expand all
64 SkNf operator - (const SkNf& o) const { return vsub_f32(fVec, o.fVec); } 64 SkNf operator - (const SkNf& o) const { return vsub_f32(fVec, o.fVec); }
65 SkNf operator * (const SkNf& o) const { return vmul_f32(fVec, o.fVec); } 65 SkNf operator * (const SkNf& o) const { return vmul_f32(fVec, o.fVec); }
66 SkNf operator / (const SkNf& o) const { 66 SkNf operator / (const SkNf& o) const {
67 #if defined(SK_CPU_ARM64) 67 #if defined(SK_CPU_ARM64)
68 return vdiv_f32(fVec, o.fVec); 68 return vdiv_f32(fVec, o.fVec);
69 #else 69 #else
70 return vmul_f32(fVec, o.invert().fVec); 70 return vmul_f32(fVec, o.invert().fVec);
71 #endif 71 #endif
72 } 72 }
73 73
74 Ni operator == (const SkNf& o) const { return vreinterpret_s32_u32(vceq_f32( fVec, o.fVec)); } 74 Nb operator == (const SkNf& o) const { return vceq_f32(fVec, o.fVec); }
75 Ni operator < (const SkNf& o) const { return vreinterpret_s32_u32(vclt_f32( fVec, o.fVec)); } 75 Nb operator < (const SkNf& o) const { return vclt_f32(fVec, o.fVec); }
76 Ni operator > (const SkNf& o) const { return vreinterpret_s32_u32(vcgt_f32( fVec, o.fVec)); } 76 Nb operator > (const SkNf& o) const { return vcgt_f32(fVec, o.fVec); }
77 Ni operator <= (const SkNf& o) const { return vreinterpret_s32_u32(vcle_f32( fVec, o.fVec)); } 77 Nb operator <= (const SkNf& o) const { return vcle_f32(fVec, o.fVec); }
78 Ni operator >= (const SkNf& o) const { return vreinterpret_s32_u32(vcge_f32( fVec, o.fVec)); } 78 Nb operator >= (const SkNf& o) const { return vcge_f32(fVec, o.fVec); }
79 Ni operator != (const SkNf& o) const { 79 Nb operator != (const SkNf& o) const { return vmvn_u32(vceq_f32(fVec, o.fVec )); }
80 return vreinterpret_s32_u32(vmvn_u32(vceq_f32(fVec, o.fVec)));
81 }
82 80
83 static SkNf Min(const SkNf& l, const SkNf& r) { return vmin_f32(l.fVec, r.fV ec); } 81 static SkNf Min(const SkNf& l, const SkNf& r) { return vmin_f32(l.fVec, r.fV ec); }
84 static SkNf Max(const SkNf& l, const SkNf& r) { return vmax_f32(l.fVec, r.fV ec); } 82 static SkNf Max(const SkNf& l, const SkNf& r) { return vmax_f32(l.fVec, r.fV ec); }
85 83
86 SkNf rsqrt() const { 84 SkNf rsqrt() const {
87 float32x2_t est0 = vrsqrte_f32(fVec), 85 float32x2_t est0 = vrsqrte_f32(fVec),
88 est1 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est 0); 86 est1 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est 0);
89 return est1; 87 return est1;
90 } 88 }
91 89
(...skipping 12 matching lines...) Expand all
104 SkASSERT(0 <= k && k < 2); 102 SkASSERT(0 <= k && k < 2);
105 return vget_lane_f32(fVec, k&1); 103 return vget_lane_f32(fVec, k&1);
106 } 104 }
107 105
108 private: 106 private:
109 float32x2_t fVec; 107 float32x2_t fVec;
110 }; 108 };
111 109
112 #if defined(SK_CPU_ARM64) 110 #if defined(SK_CPU_ARM64)
113 template <> 111 template <>
114 class SkNi<2, int64_t> { 112 class SkNb<2, 8> {
115 public: 113 public:
116 SkNi(int64x2_t vec) : fVec(vec) {} 114 SkNb(uint64x2_t vec) : fVec(vec) {}
117 115
118 SkNi() {} 116 SkNb() {}
119 bool allTrue() const { return vgetq_lane_s64(fVec, 0) && vgetq_lane_s64(fVec , 1); } 117 bool allTrue() const { return vgetq_lane_u64(fVec, 0) && vgetq_lane_u64(fVec , 1); }
120 bool anyTrue() const { return vgetq_lane_s64(fVec, 0) || vgetq_lane_s64(fVec , 1); } 118 bool anyTrue() const { return vgetq_lane_u64(fVec, 0) || vgetq_lane_u64(fVec , 1); }
121 private: 119 private:
122 int64x2_t fVec; 120 uint64x2_t fVec;
123 }; 121 };
124 122
125 template <> 123 template <>
126 class SkNf<2, double> { 124 class SkNf<2, double> {
127 typedef SkNi<2, int64_t> Ni; 125 typedef SkNb<2, 8> Nb;
128 public: 126 public:
129 SkNf(float64x2_t vec) : fVec(vec) {} 127 SkNf(float64x2_t vec) : fVec(vec) {}
130 128
131 SkNf() {} 129 SkNf() {}
132 explicit SkNf(double val) : fVec(vdupq_n_f64(val)) {} 130 explicit SkNf(double val) : fVec(vdupq_n_f64(val)) {}
133 static SkNf Load(const double vals[2]) { return vld1q_f64(vals); } 131 static SkNf Load(const double vals[2]) { return vld1q_f64(vals); }
134 SkNf(double a, double b) { fVec = (float64x2_t) { a, b }; } 132 SkNf(double a, double b) { fVec = (float64x2_t) { a, b }; }
135 133
136 void store(double vals[2]) const { vst1q_f64(vals, fVec); } 134 void store(double vals[2]) const { vst1q_f64(vals, fVec); }
137 135
138 SkNf operator + (const SkNf& o) const { return vaddq_f64(fVec, o.fVec); } 136 SkNf operator + (const SkNf& o) const { return vaddq_f64(fVec, o.fVec); }
139 SkNf operator - (const SkNf& o) const { return vsubq_f64(fVec, o.fVec); } 137 SkNf operator - (const SkNf& o) const { return vsubq_f64(fVec, o.fVec); }
140 SkNf operator * (const SkNf& o) const { return vmulq_f64(fVec, o.fVec); } 138 SkNf operator * (const SkNf& o) const { return vmulq_f64(fVec, o.fVec); }
141 SkNf operator / (const SkNf& o) const { return vdivq_f64(fVec, o.fVec); } 139 SkNf operator / (const SkNf& o) const { return vdivq_f64(fVec, o.fVec); }
142 140
143 Ni operator == (const SkNf& o) const { return vreinterpretq_s64_u64(vceqq_f6 4(fVec, o.fVec)); } 141 Nb operator == (const SkNf& o) const { return vceqq_f64(fVec, o.fVec); }
144 Ni operator < (const SkNf& o) const { return vreinterpretq_s64_u64(vcltq_f6 4(fVec, o.fVec)); } 142 Nb operator < (const SkNf& o) const { return vcltq_f64(fVec, o.fVec); }
145 Ni operator > (const SkNf& o) const { return vreinterpretq_s64_u64(vcgtq_f6 4(fVec, o.fVec)); } 143 Nb operator > (const SkNf& o) const { return vcgtq_f64(fVec, o.fVec); }
146 Ni operator <= (const SkNf& o) const { return vreinterpretq_s64_u64(vcleq_f6 4(fVec, o.fVec)); } 144 Nb operator <= (const SkNf& o) const { return vcleq_f64(fVec, o.fVec); }
147 Ni operator >= (const SkNf& o) const { return vreinterpretq_s64_u64(vcgeq_f6 4(fVec, o.fVec)); } 145 Nb operator >= (const SkNf& o) const { return vcgeq_f64(fVec, o.fVec); }
148 Ni operator != (const SkNf& o) const { 146 Nb operator != (const SkNf& o) const {
149 return vreinterpretq_s64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(f Vec, o.fVec)))); 147 return vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(f Vec, o.fVec))));
150 } 148 }
151 149
152 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f64(l.fVec, r.f Vec); } 150 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f64(l.fVec, r.f Vec); }
153 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f64(l.fVec, r.f Vec); } 151 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f64(l.fVec, r.f Vec); }
154 152
155 SkNf sqrt() const { return vsqrtq_f64(fVec); } 153 SkNf sqrt() const { return vsqrtq_f64(fVec); }
156 SkNf rsqrt() const { 154 SkNf rsqrt() const {
157 float64x2_t est0 = vrsqrteq_f64(fVec), 155 float64x2_t est0 = vrsqrteq_f64(fVec),
158 est1 = vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)), est0); 156 est1 = vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)), est0);
159 return est1; 157 return est1;
(...skipping 17 matching lines...) Expand all
177 return vgetq_lane_f64(fVec, k&1); 175 return vgetq_lane_f64(fVec, k&1);
178 } 176 }
179 177
180 private: 178 private:
181 float64x2_t fVec; 179 float64x2_t fVec;
182 }; 180 };
183 #endif//defined(SK_CPU_ARM64) 181 #endif//defined(SK_CPU_ARM64)
184 182
185 template <> 183 template <>
186 class SkNf<4, float> { 184 class SkNf<4, float> {
187 typedef SkNi<4, int32_t> Ni; 185 typedef SkNb<4, 4> Nb;
188 public: 186 public:
189 SkNf(float32x4_t vec) : fVec(vec) {} 187 SkNf(float32x4_t vec) : fVec(vec) {}
190 188
191 SkNf() {} 189 SkNf() {}
192 explicit SkNf(float val) : fVec(vdupq_n_f32(val)) {} 190 explicit SkNf(float val) : fVec(vdupq_n_f32(val)) {}
193 static SkNf Load(const float vals[4]) { return vld1q_f32(vals); } 191 static SkNf Load(const float vals[4]) { return vld1q_f32(vals); }
194 SkNf(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d }; } 192 SkNf(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d }; }
195 193
196 void store(float vals[4]) const { vst1q_f32(vals, fVec); } 194 void store(float vals[4]) const { vst1q_f32(vals, fVec); }
197 195
(...skipping 12 matching lines...) Expand all
210 SkNf operator - (const SkNf& o) const { return vsubq_f32(fVec, o.fVec); } 208 SkNf operator - (const SkNf& o) const { return vsubq_f32(fVec, o.fVec); }
211 SkNf operator * (const SkNf& o) const { return vmulq_f32(fVec, o.fVec); } 209 SkNf operator * (const SkNf& o) const { return vmulq_f32(fVec, o.fVec); }
212 SkNf operator / (const SkNf& o) const { 210 SkNf operator / (const SkNf& o) const {
213 #if defined(SK_CPU_ARM64) 211 #if defined(SK_CPU_ARM64)
214 return vdivq_f32(fVec, o.fVec); 212 return vdivq_f32(fVec, o.fVec);
215 #else 213 #else
216 return vmulq_f32(fVec, o.invert().fVec); 214 return vmulq_f32(fVec, o.invert().fVec);
217 #endif 215 #endif
218 } 216 }
219 217
220 Ni operator == (const SkNf& o) const { return vreinterpretq_s32_u32(vceqq_f3 2(fVec, o.fVec)); } 218 Nb operator == (const SkNf& o) const { return vceqq_f32(fVec, o.fVec); }
221 Ni operator < (const SkNf& o) const { return vreinterpretq_s32_u32(vcltq_f3 2(fVec, o.fVec)); } 219 Nb operator < (const SkNf& o) const { return vcltq_f32(fVec, o.fVec); }
222 Ni operator > (const SkNf& o) const { return vreinterpretq_s32_u32(vcgtq_f3 2(fVec, o.fVec)); } 220 Nb operator > (const SkNf& o) const { return vcgtq_f32(fVec, o.fVec); }
223 Ni operator <= (const SkNf& o) const { return vreinterpretq_s32_u32(vcleq_f3 2(fVec, o.fVec)); } 221 Nb operator <= (const SkNf& o) const { return vcleq_f32(fVec, o.fVec); }
224 Ni operator >= (const SkNf& o) const { return vreinterpretq_s32_u32(vcgeq_f3 2(fVec, o.fVec)); } 222 Nb operator >= (const SkNf& o) const { return vcgeq_f32(fVec, o.fVec); }
225 Ni operator != (const SkNf& o) const { 223 Nb operator != (const SkNf& o) const { return vmvnq_u32(vceqq_f32(fVec, o.fV ec)); }
226 return vreinterpretq_s32_u32(vmvnq_u32(vceqq_f32(fVec, o.fVec)));
227 }
228 224
229 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f32(l.fVec, r.f Vec); } 225 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f32(l.fVec, r.f Vec); }
230 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f32(l.fVec, r.f Vec); } 226 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f32(l.fVec, r.f Vec); }
231 227
232 SkNf rsqrt() const { 228 SkNf rsqrt() const {
233 float32x4_t est0 = vrsqrteq_f32(fVec), 229 float32x4_t est0 = vrsqrteq_f32(fVec),
234 est1 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0); 230 est1 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0);
235 return est1; 231 return est1;
236 } 232 }
237 233
(...skipping 11 matching lines...) Expand all
249 template <int k> float kth() const { 245 template <int k> float kth() const {
250 SkASSERT(0 <= k && k < 4); 246 SkASSERT(0 <= k && k < 4);
251 return vgetq_lane_f32(fVec, k&3); 247 return vgetq_lane_f32(fVec, k&3);
252 } 248 }
253 249
254 protected: 250 protected:
255 float32x4_t fVec; 251 float32x4_t fVec;
256 }; 252 };
257 253
258 #endif//SkNx_neon_DEFINED 254 #endif//SkNx_neon_DEFINED
OLDNEW
« no previous file with comments | « src/core/SkRect.cpp ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698