Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(180)

Side by Side Diff: src/opts/SkNx_neon.h

Issue 1411563008: prune unused SkNx features (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: 0.001f Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/effects/gradients/SkRadialGradient.cpp ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkNx_neon_DEFINED 8 #ifndef SkNx_neon_DEFINED
9 #define SkNx_neon_DEFINED 9 #define SkNx_neon_DEFINED
10 10
(...skipping 15 matching lines...) Expand all
26 26
27 #define SHIFT32(op, v, bits) if (bits < 16) { SHIFT16(op, v, bits); } switch(bit s) { \ 27 #define SHIFT32(op, v, bits) if (bits < 16) { SHIFT16(op, v, bits); } switch(bit s) { \
28 case 16: return op(v, 16); case 17: return op(v, 17); case 18: return op(v , 18); \ 28 case 16: return op(v, 16); case 17: return op(v, 17); case 18: return op(v , 18); \
29 case 19: return op(v, 19); case 20: return op(v, 20); case 21: return op(v , 21); \ 29 case 19: return op(v, 19); case 20: return op(v, 20); case 21: return op(v , 21); \
30 case 22: return op(v, 22); case 23: return op(v, 23); case 24: return op(v , 24); \ 30 case 22: return op(v, 22); case 23: return op(v, 23); case 24: return op(v , 24); \
31 case 25: return op(v, 25); case 26: return op(v, 26); case 27: return op(v , 27); \ 31 case 25: return op(v, 25); case 26: return op(v, 26); case 27: return op(v , 27); \
32 case 28: return op(v, 28); case 29: return op(v, 29); case 30: return op(v , 30); \ 32 case 28: return op(v, 28); case 29: return op(v, 29); case 30: return op(v , 30); \
33 case 31: return op(v, 31); } return fVec 33 case 31: return op(v, 31); } return fVec
34 34
35 template <> 35 template <>
36 class SkNf<2, float> { 36 class SkNf<2> {
37 public: 37 public:
38 SkNf(float32x2_t vec) : fVec(vec) {} 38 SkNf(float32x2_t vec) : fVec(vec) {}
39 39
40 SkNf() {} 40 SkNf() {}
41 explicit SkNf(float val) : fVec(vdup_n_f32(val)) {} 41 explicit SkNf(float val) : fVec(vdup_n_f32(val)) {}
42 static SkNf Load(const float vals[2]) { return vld1_f32(vals); } 42 static SkNf Load(const float vals[2]) { return vld1_f32(vals); }
43 SkNf(float a, float b) { fVec = (float32x2_t) { a, b }; } 43 SkNf(float a, float b) { fVec = (float32x2_t) { a, b }; }
44 44
45 void store(float vals[2]) const { vst1_f32(vals, fVec); } 45 void store(float vals[2]) const { vst1_f32(vals, fVec); }
46 46
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
106 return vget_lane_u32(v,0) && vget_lane_u32(v,1); 106 return vget_lane_u32(v,0) && vget_lane_u32(v,1);
107 } 107 }
108 bool anyTrue() const { 108 bool anyTrue() const {
109 auto v = vreinterpret_u32_f32(fVec); 109 auto v = vreinterpret_u32_f32(fVec);
110 return vget_lane_u32(v,0) || vget_lane_u32(v,1); 110 return vget_lane_u32(v,0) || vget_lane_u32(v,1);
111 } 111 }
112 112
113 float32x2_t fVec; 113 float32x2_t fVec;
114 }; 114 };
115 115
116 #if defined(SK_CPU_ARM64)
117 template <>
118 class SkNf<2, double> {
119 public:
120 SkNf(float64x2_t vec) : fVec(vec) {}
121
122 SkNf() {}
123 explicit SkNf(double val) : fVec(vdupq_n_f64(val)) {}
124 static SkNf Load(const double vals[2]) { return vld1q_f64(vals); }
125 SkNf(double a, double b) { fVec = (float64x2_t) { a, b }; }
126
127 void store(double vals[2]) const { vst1q_f64(vals, fVec); }
128
129 SkNf operator + (const SkNf& o) const { return vaddq_f64(fVec, o.fVec); }
130 SkNf operator - (const SkNf& o) const { return vsubq_f64(fVec, o.fVec); }
131 SkNf operator * (const SkNf& o) const { return vmulq_f64(fVec, o.fVec); }
132 SkNf operator / (const SkNf& o) const { return vdivq_f64(fVec, o.fVec); }
133
134 // vreinterpretq_f64_u64 and vreinterpretq_f64_u32 don't seem to exist.... weird.
135 SkNf operator==(const SkNf& o) const { return (float64x2_t)(vceqq_f64(fVec, o.fVec)); }
136 SkNf operator <(const SkNf& o) const { return (float64x2_t)(vcltq_f64(fVec, o.fVec)); }
137 SkNf operator >(const SkNf& o) const { return (float64x2_t)(vcgtq_f64(fVec, o.fVec)); }
138 SkNf operator<=(const SkNf& o) const { return (float64x2_t)(vcleq_f64(fVec, o.fVec)); }
139 SkNf operator>=(const SkNf& o) const { return (float64x2_t)(vcgeq_f64(fVec, o.fVec)); }
140 SkNf operator != (const SkNf& o) const {
141 return (float64x2_t)(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(fVec, o.f Vec))));
142 }
143
144 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f64(l.fVec, r.f Vec); }
145 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f64(l.fVec, r.f Vec); }
146
147 SkNf sqrt() const { return vsqrtq_f64(fVec); }
148
149 SkNf rsqrt0() const { return vrsqrteq_f64(fVec); }
150 SkNf rsqrt1() const {
151 float64x2_t est0 = this->rsqrt0().fVec;
152 return vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)), est0);
153 }
154 SkNf rsqrt2() const {
155 float64x2_t est1 = this->rsqrt1().fVec;
156 return vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est1, est1)), est1);
157 }
158
159 SkNf approxInvert() const {
160 float64x2_t est0 = vrecpeq_f64(fVec),
161 est1 = vmulq_f64(vrecpsq_f64(est0, fVec), est0);
162 return est1;
163 }
164
165 SkNf invert() const {
166 float64x2_t est1 = this->approxInvert().fVec,
167 est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1),
168 est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2);
169 return est3;
170 }
171
172 template <int k> double kth() const {
173 SkASSERT(0 <= k && k < 2);
174 return vgetq_lane_f64(fVec, k&1);
175 }
176
177 // vreinterpretq_u64_f64 doesn't seem to exist.... weird.
178 bool allTrue() const {
179 auto v = (uint64x2_t)(fVec);
180 return vgetq_lane_u64(v,0) && vgetq_lane_u64(v,1);
181 }
182 bool anyTrue() const {
183 auto v = (uint64x2_t)(fVec);
184 return vgetq_lane_u64(v,0) || vgetq_lane_u64(v,1);
185 }
186
187 float64x2_t fVec;
188 };
189 #endif//defined(SK_CPU_ARM64)
190
191 template <> 116 template <>
192 class SkNi<4, int> { 117 class SkNi<4, int> {
193 public: 118 public:
194 SkNi(const int32x4_t& vec) : fVec(vec) {} 119 SkNi(const int32x4_t& vec) : fVec(vec) {}
195 120
196 SkNi() {} 121 SkNi() {}
197 explicit SkNi(int val) : fVec(vdupq_n_s32(val)) {} 122 explicit SkNi(int val) : fVec(vdupq_n_s32(val)) {}
198 static SkNi Load(const int vals[4]) { return vld1q_s32(vals); } 123 static SkNi Load(const int vals[4]) { return vld1q_s32(vals); }
199 SkNi(int a, int b, int c, int d) { fVec = (int32x4_t) { a, b, c, d }; } 124 SkNi(int a, int b, int c, int d) { fVec = (int32x4_t) { a, b, c, d }; }
200 125
201 void store(int vals[4]) const { vst1q_s32(vals, fVec); } 126 void store(int vals[4]) const { vst1q_s32(vals, fVec); }
202 127
203 SkNi operator + (const SkNi& o) const { return vaddq_s32(fVec, o.fVec); } 128 SkNi operator + (const SkNi& o) const { return vaddq_s32(fVec, o.fVec); }
204 SkNi operator - (const SkNi& o) const { return vsubq_s32(fVec, o.fVec); } 129 SkNi operator - (const SkNi& o) const { return vsubq_s32(fVec, o.fVec); }
205 SkNi operator * (const SkNi& o) const { return vmulq_s32(fVec, o.fVec); } 130 SkNi operator * (const SkNi& o) const { return vmulq_s32(fVec, o.fVec); }
206 131
207 SkNi operator << (int bits) const { SHIFT32(vshlq_n_s32, fVec, bits); } 132 SkNi operator << (int bits) const { SHIFT32(vshlq_n_s32, fVec, bits); }
208 SkNi operator >> (int bits) const { SHIFT32(vshrq_n_s32, fVec, bits); } 133 SkNi operator >> (int bits) const { SHIFT32(vshrq_n_s32, fVec, bits); }
209 134
210 template <int k> int kth() const { 135 template <int k> int kth() const {
211 SkASSERT(0 <= k && k < 4); 136 SkASSERT(0 <= k && k < 4);
212 return vgetq_lane_s32(fVec, k&3); 137 return vgetq_lane_s32(fVec, k&3);
213 } 138 }
214 139
215 int32x4_t fVec; 140 int32x4_t fVec;
216 }; 141 };
217 142
218 template <> 143 template <>
219 class SkNf<4, float> { 144 class SkNf<4> {
220 public: 145 public:
221 SkNf(float32x4_t vec) : fVec(vec) {} 146 SkNf(float32x4_t vec) : fVec(vec) {}
222 147
223 SkNf() {} 148 SkNf() {}
224 explicit SkNf(float val) : fVec(vdupq_n_f32(val)) {} 149 explicit SkNf(float val) : fVec(vdupq_n_f32(val)) {}
225 static SkNf Load(const float vals[4]) { return vld1q_f32(vals); } 150 static SkNf Load(const float vals[4]) { return vld1q_f32(vals); }
226 static SkNf FromBytes(const uint8_t vals[4]) { 151 static SkNf FromBytes(const uint8_t vals[4]) {
227 uint8x8_t fix8 = (uint8x8_t)vld1_dup_u32((const uint32_t*)vals); 152 uint8x8_t fix8 = (uint8x8_t)vld1_dup_u32((const uint32_t*)vals);
228 uint16x8_t fix8_16 = vmovl_u8(fix8); 153 uint16x8_t fix8_16 = vmovl_u8(fix8);
229 uint32x4_t fix8_32 = vmovl_u16(vget_low_u16(fix8_16)); 154 uint32x4_t fix8_32 = vmovl_u16(vget_low_u16(fix8_16));
230 return SkNf(vcvtq_f32_u32(fix8_32)); 155 return SkNf(vcvtq_f32_u32(fix8_32));
231 } 156 }
232 157
233 SkNf(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d }; } 158 SkNf(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d }; }
234 159
235 void store(float vals[4]) const { vst1q_f32(vals, fVec); } 160 void store(float vals[4]) const { vst1q_f32(vals, fVec); }
236 void toBytes(uint8_t bytes[4]) const { 161 void toBytes(uint8_t bytes[4]) const {
237 uint32x4_t fix8_32 = vcvtq_u32_f32(fVec); 162 uint32x4_t fix8_32 = vcvtq_u32_f32(fVec);
238 uint16x4_t fix8_16 = vqmovn_u32(fix8_32); 163 uint16x4_t fix8_16 = vqmovn_u32(fix8_32);
239 uint8x8_t fix8 = vqmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0))); 164 uint8x8_t fix8 = vqmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0)));
240 vst1_lane_u32((uint32_t*)bytes, (uint32x2_t)fix8, 0); 165 vst1_lane_u32((uint32_t*)bytes, (uint32x2_t)fix8, 0);
241 } 166 }
242 167
243 SkNi<4, int> castTrunc() const { return vcvtq_s32_f32(fVec); }
244
245 SkNf approxInvert() const { 168 SkNf approxInvert() const {
246 float32x4_t est0 = vrecpeq_f32(fVec), 169 float32x4_t est0 = vrecpeq_f32(fVec),
247 est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0); 170 est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0);
248 return est1; 171 return est1;
249 } 172 }
250 SkNf invert() const { 173 SkNf invert() const {
251 float32x4_t est1 = this->approxInvert().fVec, 174 float32x4_t est1 = this->approxInvert().fVec,
252 est2 = vmulq_f32(vrecpsq_f32(est1, fVec), est1); 175 est2 = vmulq_f32(vrecpsq_f32(est1, fVec), est1);
253 return est2; 176 return est2;
254 } 177 }
(...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after
392 uint8x16_t fVec; 315 uint8x16_t fVec;
393 }; 316 };
394 317
395 #undef SHIFT32 318 #undef SHIFT32
396 #undef SHIFT16 319 #undef SHIFT16
397 #undef SHIFT8 320 #undef SHIFT8
398 321
399 } // namespace 322 } // namespace
400 323
401 #endif//SkNx_neon_DEFINED 324 #endif//SkNx_neon_DEFINED
OLDNEW
« no previous file with comments | « src/effects/gradients/SkRadialGradient.cpp ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698