OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_neon_DEFINED | 8 #ifndef SkNx_neon_DEFINED |
9 #define SkNx_neon_DEFINED | 9 #define SkNx_neon_DEFINED |
10 | 10 |
(...skipping 15 matching lines...) Expand all Loading... |
26 | 26 |
27 #define SHIFT32(op, v, bits) if (bits < 16) { SHIFT16(op, v, bits); } switch(bit
s) { \ | 27 #define SHIFT32(op, v, bits) if (bits < 16) { SHIFT16(op, v, bits); } switch(bit
s) { \ |
28 case 16: return op(v, 16); case 17: return op(v, 17); case 18: return op(v
, 18); \ | 28 case 16: return op(v, 16); case 17: return op(v, 17); case 18: return op(v
, 18); \ |
29 case 19: return op(v, 19); case 20: return op(v, 20); case 21: return op(v
, 21); \ | 29 case 19: return op(v, 19); case 20: return op(v, 20); case 21: return op(v
, 21); \ |
30 case 22: return op(v, 22); case 23: return op(v, 23); case 24: return op(v
, 24); \ | 30 case 22: return op(v, 22); case 23: return op(v, 23); case 24: return op(v
, 24); \ |
31 case 25: return op(v, 25); case 26: return op(v, 26); case 27: return op(v
, 27); \ | 31 case 25: return op(v, 25); case 26: return op(v, 26); case 27: return op(v
, 27); \ |
32 case 28: return op(v, 28); case 29: return op(v, 29); case 30: return op(v
, 30); \ | 32 case 28: return op(v, 28); case 29: return op(v, 29); case 30: return op(v
, 30); \ |
33 case 31: return op(v, 31); } return fVec | 33 case 31: return op(v, 31); } return fVec |
34 | 34 |
35 template <> | 35 template <> |
| 36 class SkNb<2, 4> { |
| 37 public: |
| 38 SkNb(uint32x2_t vec) : fVec(vec) {} |
| 39 |
| 40 SkNb() {} |
| 41 bool allTrue() const { return vget_lane_u32(fVec, 0) && vget_lane_u32(fVec,
1); } |
| 42 bool anyTrue() const { return vget_lane_u32(fVec, 0) || vget_lane_u32(fVec,
1); } |
| 43 |
| 44 uint32x2_t fVec; |
| 45 }; |
| 46 |
| 47 template <> |
| 48 class SkNb<4, 4> { |
| 49 public: |
| 50 SkNb(uint32x4_t vec) : fVec(vec) {} |
| 51 |
| 52 SkNb() {} |
| 53 bool allTrue() const { return vgetq_lane_u32(fVec, 0) && vgetq_lane_u32(fVec
, 1) |
| 54 && vgetq_lane_u32(fVec, 2) && vgetq_lane_u32(fVec
, 3); } |
| 55 bool anyTrue() const { return vgetq_lane_u32(fVec, 0) || vgetq_lane_u32(fVec
, 1) |
| 56 || vgetq_lane_u32(fVec, 2) || vgetq_lane_u32(fVec
, 3); } |
| 57 |
| 58 uint32x4_t fVec; |
| 59 }; |
| 60 |
| 61 template <> |
36 class SkNf<2, float> { | 62 class SkNf<2, float> { |
| 63 typedef SkNb<2, 4> Nb; |
37 public: | 64 public: |
38 SkNf(float32x2_t vec) : fVec(vec) {} | 65 SkNf(float32x2_t vec) : fVec(vec) {} |
39 | 66 |
40 SkNf() {} | 67 SkNf() {} |
41 explicit SkNf(float val) : fVec(vdup_n_f32(val)) {} | 68 explicit SkNf(float val) : fVec(vdup_n_f32(val)) {} |
42 static SkNf Load(const float vals[2]) { return vld1_f32(vals); } | 69 static SkNf Load(const float vals[2]) { return vld1_f32(vals); } |
43 SkNf(float a, float b) { fVec = (float32x2_t) { a, b }; } | 70 SkNf(float a, float b) { fVec = (float32x2_t) { a, b }; } |
44 | 71 |
45 void store(float vals[2]) const { vst1_f32(vals, fVec); } | 72 void store(float vals[2]) const { vst1_f32(vals, fVec); } |
46 | 73 |
(...skipping 12 matching lines...) Expand all Loading... |
59 SkNf operator - (const SkNf& o) const { return vsub_f32(fVec, o.fVec); } | 86 SkNf operator - (const SkNf& o) const { return vsub_f32(fVec, o.fVec); } |
60 SkNf operator * (const SkNf& o) const { return vmul_f32(fVec, o.fVec); } | 87 SkNf operator * (const SkNf& o) const { return vmul_f32(fVec, o.fVec); } |
61 SkNf operator / (const SkNf& o) const { | 88 SkNf operator / (const SkNf& o) const { |
62 #if defined(SK_CPU_ARM64) | 89 #if defined(SK_CPU_ARM64) |
63 return vdiv_f32(fVec, o.fVec); | 90 return vdiv_f32(fVec, o.fVec); |
64 #else | 91 #else |
65 return vmul_f32(fVec, o.invert().fVec); | 92 return vmul_f32(fVec, o.invert().fVec); |
66 #endif | 93 #endif |
67 } | 94 } |
68 | 95 |
69 SkNf operator == (const SkNf& o) const { return vreinterpret_f32_u32(vceq_f3
2(fVec, o.fVec)); } | 96 Nb operator == (const SkNf& o) const { return vceq_f32(fVec, o.fVec); } |
70 SkNf operator < (const SkNf& o) const { return vreinterpret_f32_u32(vclt_f3
2(fVec, o.fVec)); } | 97 Nb operator < (const SkNf& o) const { return vclt_f32(fVec, o.fVec); } |
71 SkNf operator > (const SkNf& o) const { return vreinterpret_f32_u32(vcgt_f3
2(fVec, o.fVec)); } | 98 Nb operator > (const SkNf& o) const { return vcgt_f32(fVec, o.fVec); } |
72 SkNf operator <= (const SkNf& o) const { return vreinterpret_f32_u32(vcle_f3
2(fVec, o.fVec)); } | 99 Nb operator <= (const SkNf& o) const { return vcle_f32(fVec, o.fVec); } |
73 SkNf operator >= (const SkNf& o) const { return vreinterpret_f32_u32(vcge_f3
2(fVec, o.fVec)); } | 100 Nb operator >= (const SkNf& o) const { return vcge_f32(fVec, o.fVec); } |
74 SkNf operator != (const SkNf& o) const { | 101 Nb operator != (const SkNf& o) const { return vmvn_u32(vceq_f32(fVec, o.fVec
)); } |
75 return vreinterpret_f32_u32(vmvn_u32(vceq_f32(fVec, o.fVec))); | |
76 } | |
77 | 102 |
78 static SkNf Min(const SkNf& l, const SkNf& r) { return vmin_f32(l.fVec, r.fV
ec); } | 103 static SkNf Min(const SkNf& l, const SkNf& r) { return vmin_f32(l.fVec, r.fV
ec); } |
79 static SkNf Max(const SkNf& l, const SkNf& r) { return vmax_f32(l.fVec, r.fV
ec); } | 104 static SkNf Max(const SkNf& l, const SkNf& r) { return vmax_f32(l.fVec, r.fV
ec); } |
80 | 105 |
81 SkNf rsqrt0() const { return vrsqrte_f32(fVec); } | 106 SkNf rsqrt0() const { return vrsqrte_f32(fVec); } |
82 SkNf rsqrt1() const { | 107 SkNf rsqrt1() const { |
83 float32x2_t est0 = this->rsqrt0().fVec; | 108 float32x2_t est0 = this->rsqrt0().fVec; |
84 return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0); | 109 return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0); |
85 } | 110 } |
86 SkNf rsqrt2() const { | 111 SkNf rsqrt2() const { |
87 float32x2_t est1 = this->rsqrt1().fVec; | 112 float32x2_t est1 = this->rsqrt1().fVec; |
88 return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est1); | 113 return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est1); |
89 } | 114 } |
90 | 115 |
91 SkNf sqrt() const { | 116 SkNf sqrt() const { |
92 #if defined(SK_CPU_ARM64) | 117 #if defined(SK_CPU_ARM64) |
93 return vsqrt_f32(fVec); | 118 return vsqrt_f32(fVec); |
94 #else | 119 #else |
95 return *this * this->rsqrt2(); | 120 return *this * this->rsqrt2(); |
96 #endif | 121 #endif |
97 } | 122 } |
98 | 123 |
99 template <int k> float kth() const { | 124 template <int k> float kth() const { |
100 SkASSERT(0 <= k && k < 2); | 125 SkASSERT(0 <= k && k < 2); |
101 return vget_lane_f32(fVec, k&1); | 126 return vget_lane_f32(fVec, k&1); |
102 } | 127 } |
103 | 128 |
104 bool allTrue() const { | |
105 auto v = vreinterpret_u32_f32(fVec); | |
106 return vget_lane_u32(v,0) && vget_lane_u32(v,1); | |
107 } | |
108 bool anyTrue() const { | |
109 auto v = vreinterpret_u32_f32(fVec); | |
110 return vget_lane_u32(v,0) || vget_lane_u32(v,1); | |
111 } | |
112 | |
113 float32x2_t fVec; | 129 float32x2_t fVec; |
114 }; | 130 }; |
115 | 131 |
116 #if defined(SK_CPU_ARM64) | 132 #if defined(SK_CPU_ARM64) |
117 template <> | 133 template <> |
| 134 class SkNb<2, 8> { |
| 135 public: |
| 136 SkNb(uint64x2_t vec) : fVec(vec) {} |
| 137 |
| 138 SkNb() {} |
| 139 bool allTrue() const { return vgetq_lane_u64(fVec, 0) && vgetq_lane_u64(fVec
, 1); } |
| 140 bool anyTrue() const { return vgetq_lane_u64(fVec, 0) || vgetq_lane_u64(fVec
, 1); } |
| 141 |
| 142 uint64x2_t fVec; |
| 143 }; |
| 144 |
| 145 template <> |
118 class SkNf<2, double> { | 146 class SkNf<2, double> { |
| 147 typedef SkNb<2, 8> Nb; |
119 public: | 148 public: |
120 SkNf(float64x2_t vec) : fVec(vec) {} | 149 SkNf(float64x2_t vec) : fVec(vec) {} |
121 | 150 |
122 SkNf() {} | 151 SkNf() {} |
123 explicit SkNf(double val) : fVec(vdupq_n_f64(val)) {} | 152 explicit SkNf(double val) : fVec(vdupq_n_f64(val)) {} |
124 static SkNf Load(const double vals[2]) { return vld1q_f64(vals); } | 153 static SkNf Load(const double vals[2]) { return vld1q_f64(vals); } |
125 SkNf(double a, double b) { fVec = (float64x2_t) { a, b }; } | 154 SkNf(double a, double b) { fVec = (float64x2_t) { a, b }; } |
126 | 155 |
127 void store(double vals[2]) const { vst1q_f64(vals, fVec); } | 156 void store(double vals[2]) const { vst1q_f64(vals, fVec); } |
128 | 157 |
129 SkNf operator + (const SkNf& o) const { return vaddq_f64(fVec, o.fVec); } | 158 SkNf operator + (const SkNf& o) const { return vaddq_f64(fVec, o.fVec); } |
130 SkNf operator - (const SkNf& o) const { return vsubq_f64(fVec, o.fVec); } | 159 SkNf operator - (const SkNf& o) const { return vsubq_f64(fVec, o.fVec); } |
131 SkNf operator * (const SkNf& o) const { return vmulq_f64(fVec, o.fVec); } | 160 SkNf operator * (const SkNf& o) const { return vmulq_f64(fVec, o.fVec); } |
132 SkNf operator / (const SkNf& o) const { return vdivq_f64(fVec, o.fVec); } | 161 SkNf operator / (const SkNf& o) const { return vdivq_f64(fVec, o.fVec); } |
133 | 162 |
134 SkNf operator==(const SkNf& o) const { return vreinterpretq_f64_u64(vceqq_f6
4(fVec, o.fVec)); } | 163 Nb operator == (const SkNf& o) const { return vceqq_f64(fVec, o.fVec); } |
135 SkNf operator <(const SkNf& o) const { return vreinterpretq_f64_u64(vcltq_f6
4(fVec, o.fVec)); } | 164 Nb operator < (const SkNf& o) const { return vcltq_f64(fVec, o.fVec); } |
136 SkNf operator >(const SkNf& o) const { return vreinterpretq_f64_u64(vcgtq_f6
4(fVec, o.fVec)); } | 165 Nb operator > (const SkNf& o) const { return vcgtq_f64(fVec, o.fVec); } |
137 SkNf operator<=(const SkNf& o) const { return vreinterpretq_f64_u64(vcleq_f6
4(fVec, o.fVec)); } | 166 Nb operator <= (const SkNf& o) const { return vcleq_f64(fVec, o.fVec); } |
138 SkNf operator>=(const SkNf& o) const { return vreinterpretq_f64_u64(vcgeq_f6
4(fVec, o.fVec)); } | 167 Nb operator >= (const SkNf& o) const { return vcgeq_f64(fVec, o.fVec); } |
139 SkNf operator != (const SkNf& o) const { | 168 Nb operator != (const SkNf& o) const { |
140 return vreinterpretq_f64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(f
Vec, o.fVec)))); | 169 return vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(f
Vec, o.fVec)))); |
141 } | 170 } |
142 | 171 |
143 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f64(l.fVec, r.f
Vec); } | 172 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f64(l.fVec, r.f
Vec); } |
144 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f64(l.fVec, r.f
Vec); } | 173 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f64(l.fVec, r.f
Vec); } |
145 | 174 |
146 SkNf sqrt() const { return vsqrtq_f64(fVec); } | 175 SkNf sqrt() const { return vsqrtq_f64(fVec); } |
147 | 176 |
148 SkNf rsqrt0() const { return vrsqrteq_f64(fVec); } | 177 SkNf rsqrt0() const { return vrsqrteq_f64(fVec); } |
149 SkNf rsqrt1() const { | 178 SkNf rsqrt1() const { |
150 float64x2_t est0 = this->rsqrt0().fVec; | 179 float64x2_t est0 = this->rsqrt0().fVec; |
(...skipping 15 matching lines...) Expand all Loading... |
166 est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1), | 195 est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1), |
167 est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2); | 196 est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2); |
168 return est3; | 197 return est3; |
169 } | 198 } |
170 | 199 |
171 template <int k> double kth() const { | 200 template <int k> double kth() const { |
172 SkASSERT(0 <= k && k < 2); | 201 SkASSERT(0 <= k && k < 2); |
173 return vgetq_lane_f64(fVec, k&1); | 202 return vgetq_lane_f64(fVec, k&1); |
174 } | 203 } |
175 | 204 |
176 bool allTrue() const { | |
177 auto v = vreinterpretq_u64_f64(fVec); | |
178 return vgetq_lane_u64(v,0) && vgetq_lane_u64(v,1); | |
179 } | |
180 bool anyTrue() const { | |
181 auto v = vreinterpretq_u64_f64(fVec); | |
182 return vgetq_lane_u64(v,0) || vgetq_lane_u64(v,1); | |
183 } | |
184 | |
185 float64x2_t fVec; | 205 float64x2_t fVec; |
186 }; | 206 }; |
187 #endif//defined(SK_CPU_ARM64) | 207 #endif//defined(SK_CPU_ARM64) |
188 | 208 |
189 template <> | 209 template <> |
190 class SkNi<4, int> { | 210 class SkNi<4, int> { |
191 public: | 211 public: |
192 SkNi(const int32x4_t& vec) : fVec(vec) {} | 212 SkNi(const int32x4_t& vec) : fVec(vec) {} |
193 | 213 |
194 SkNi() {} | 214 SkNi() {} |
(...skipping 13 matching lines...) Expand all Loading... |
208 template <int k> int kth() const { | 228 template <int k> int kth() const { |
209 SkASSERT(0 <= k && k < 4); | 229 SkASSERT(0 <= k && k < 4); |
210 return vgetq_lane_s32(fVec, k&3); | 230 return vgetq_lane_s32(fVec, k&3); |
211 } | 231 } |
212 | 232 |
213 int32x4_t fVec; | 233 int32x4_t fVec; |
214 }; | 234 }; |
215 | 235 |
216 template <> | 236 template <> |
217 class SkNf<4, float> { | 237 class SkNf<4, float> { |
| 238 typedef SkNb<4, 4> Nb; |
218 public: | 239 public: |
219 SkNf(float32x4_t vec) : fVec(vec) {} | 240 SkNf(float32x4_t vec) : fVec(vec) {} |
220 | 241 |
221 SkNf() {} | 242 SkNf() {} |
222 explicit SkNf(float val) : fVec(vdupq_n_f32(val)) {} | 243 explicit SkNf(float val) : fVec(vdupq_n_f32(val)) {} |
223 static SkNf Load(const float vals[4]) { return vld1q_f32(vals); } | 244 static SkNf Load(const float vals[4]) { return vld1q_f32(vals); } |
224 SkNf(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d
}; } | 245 SkNf(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d
}; } |
225 | 246 |
226 void store(float vals[4]) const { vst1q_f32(vals, fVec); } | 247 void store(float vals[4]) const { vst1q_f32(vals, fVec); } |
227 | 248 |
(...skipping 14 matching lines...) Expand all Loading... |
242 SkNf operator - (const SkNf& o) const { return vsubq_f32(fVec, o.fVec); } | 263 SkNf operator - (const SkNf& o) const { return vsubq_f32(fVec, o.fVec); } |
243 SkNf operator * (const SkNf& o) const { return vmulq_f32(fVec, o.fVec); } | 264 SkNf operator * (const SkNf& o) const { return vmulq_f32(fVec, o.fVec); } |
244 SkNf operator / (const SkNf& o) const { | 265 SkNf operator / (const SkNf& o) const { |
245 #if defined(SK_CPU_ARM64) | 266 #if defined(SK_CPU_ARM64) |
246 return vdivq_f32(fVec, o.fVec); | 267 return vdivq_f32(fVec, o.fVec); |
247 #else | 268 #else |
248 return vmulq_f32(fVec, o.invert().fVec); | 269 return vmulq_f32(fVec, o.invert().fVec); |
249 #endif | 270 #endif |
250 } | 271 } |
251 | 272 |
252 SkNf operator==(const SkNf& o) const { return vreinterpretq_f32_u32(vceqq_f3
2(fVec, o.fVec)); } | 273 Nb operator == (const SkNf& o) const { return vceqq_f32(fVec, o.fVec); } |
253 SkNf operator <(const SkNf& o) const { return vreinterpretq_f32_u32(vcltq_f3
2(fVec, o.fVec)); } | 274 Nb operator < (const SkNf& o) const { return vcltq_f32(fVec, o.fVec); } |
254 SkNf operator >(const SkNf& o) const { return vreinterpretq_f32_u32(vcgtq_f3
2(fVec, o.fVec)); } | 275 Nb operator > (const SkNf& o) const { return vcgtq_f32(fVec, o.fVec); } |
255 SkNf operator<=(const SkNf& o) const { return vreinterpretq_f32_u32(vcleq_f3
2(fVec, o.fVec)); } | 276 Nb operator <= (const SkNf& o) const { return vcleq_f32(fVec, o.fVec); } |
256 SkNf operator>=(const SkNf& o) const { return vreinterpretq_f32_u32(vcgeq_f3
2(fVec, o.fVec)); } | 277 Nb operator >= (const SkNf& o) const { return vcgeq_f32(fVec, o.fVec); } |
257 SkNf operator!=(const SkNf& o) const { | 278 Nb operator != (const SkNf& o) const { return vmvnq_u32(vceqq_f32(fVec, o.fV
ec)); } |
258 return vreinterpretq_f32_u32(vmvnq_u32(vceqq_f32(fVec, o.fVec))); | |
259 } | |
260 | 279 |
261 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f32(l.fVec, r.f
Vec); } | 280 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f32(l.fVec, r.f
Vec); } |
262 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f32(l.fVec, r.f
Vec); } | 281 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f32(l.fVec, r.f
Vec); } |
263 | 282 |
264 SkNf rsqrt0() const { return vrsqrteq_f32(fVec); } | 283 SkNf rsqrt0() const { return vrsqrteq_f32(fVec); } |
265 SkNf rsqrt1() const { | 284 SkNf rsqrt1() const { |
266 float32x4_t est0 = this->rsqrt0().fVec; | 285 float32x4_t est0 = this->rsqrt0().fVec; |
267 return vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0); | 286 return vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0); |
268 } | 287 } |
269 SkNf rsqrt2() const { | 288 SkNf rsqrt2() const { |
270 float32x4_t est1 = this->rsqrt1().fVec; | 289 float32x4_t est1 = this->rsqrt1().fVec; |
271 return vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1); | 290 return vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1); |
272 } | 291 } |
273 | 292 |
274 SkNf sqrt() const { | 293 SkNf sqrt() const { |
275 #if defined(SK_CPU_ARM64) | 294 #if defined(SK_CPU_ARM64) |
276 return vsqrtq_f32(fVec); | 295 return vsqrtq_f32(fVec); |
277 #else | 296 #else |
278 return *this * this->rsqrt2(); | 297 return *this * this->rsqrt2(); |
279 #endif | 298 #endif |
280 } | 299 } |
281 | 300 |
282 template <int k> float kth() const { | 301 template <int k> float kth() const { |
283 SkASSERT(0 <= k && k < 4); | 302 SkASSERT(0 <= k && k < 4); |
284 return vgetq_lane_f32(fVec, k&3); | 303 return vgetq_lane_f32(fVec, k&3); |
285 } | 304 } |
286 | 305 |
287 bool allTrue() const { | |
288 auto v = vreinterpretq_u32_f32(fVec); | |
289 return vgetq_lane_u32(v,0) && vgetq_lane_u32(v,1) | |
290 && vgetq_lane_u32(v,2) && vgetq_lane_u32(v,3); | |
291 } | |
292 bool anyTrue() const { | |
293 auto v = vreinterpretq_u32_f32(fVec); | |
294 return vgetq_lane_u32(v,0) || vgetq_lane_u32(v,1) | |
295 || vgetq_lane_u32(v,2) || vgetq_lane_u32(v,3); | |
296 } | |
297 | |
298 float32x4_t fVec; | 306 float32x4_t fVec; |
299 }; | 307 }; |
300 | 308 |
301 template <> | 309 template <> |
302 class SkNi<8, uint16_t> { | 310 class SkNi<8, uint16_t> { |
303 public: | 311 public: |
304 SkNi(const uint16x8_t& vec) : fVec(vec) {} | 312 SkNi(const uint16x8_t& vec) : fVec(vec) {} |
305 | 313 |
306 SkNi() {} | 314 SkNi() {} |
307 explicit SkNi(uint16_t val) : fVec(vdupq_n_u16(val)) {} | 315 explicit SkNi(uint16_t val) : fVec(vdupq_n_u16(val)) {} |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
348 } | 356 } |
349 | 357 |
350 void store(uint8_t vals[16]) const { vst1q_u8(vals, fVec); } | 358 void store(uint8_t vals[16]) const { vst1q_u8(vals, fVec); } |
351 | 359 |
352 SkNi saturatedAdd(const SkNi& o) const { return vqaddq_u8(fVec, o.fVec); } | 360 SkNi saturatedAdd(const SkNi& o) const { return vqaddq_u8(fVec, o.fVec); } |
353 | 361 |
354 SkNi operator + (const SkNi& o) const { return vaddq_u8(fVec, o.fVec); } | 362 SkNi operator + (const SkNi& o) const { return vaddq_u8(fVec, o.fVec); } |
355 SkNi operator - (const SkNi& o) const { return vsubq_u8(fVec, o.fVec); } | 363 SkNi operator - (const SkNi& o) const { return vsubq_u8(fVec, o.fVec); } |
356 | 364 |
357 static SkNi Min(const SkNi& a, const SkNi& b) { return vminq_u8(a.fVec, b.fV
ec); } | 365 static SkNi Min(const SkNi& a, const SkNi& b) { return vminq_u8(a.fVec, b.fV
ec); } |
358 SkNi operator < (const SkNi& o) const { return vcltq_u8(fVec, o.fVec); } | |
359 | 366 |
360 template <int k> uint8_t kth() const { | 367 template <int k> uint8_t kth() const { |
361 SkASSERT(0 <= k && k < 15); | 368 SkASSERT(0 <= k && k < 15); |
362 return vgetq_lane_u8(fVec, k&16); | 369 return vgetq_lane_u8(fVec, k&16); |
363 } | 370 } |
364 | 371 |
365 SkNi thenElse(const SkNi& t, const SkNi& e) const { | |
366 return vorrq_u8(vandq_u8(t.fVec, fVec), | |
367 vbicq_u8(e.fVec, fVec)); | |
368 } | |
369 | |
370 uint8x16_t fVec; | 372 uint8x16_t fVec; |
371 }; | 373 }; |
372 | 374 |
373 #undef SHIFT32 | 375 #undef SHIFT32 |
374 #undef SHIFT16 | 376 #undef SHIFT16 |
375 #undef SHIFT8 | 377 #undef SHIFT8 |
376 | 378 |
377 } // namespace | 379 } // namespace |
378 | 380 |
379 #endif//SkNx_neon_DEFINED | 381 #endif//SkNx_neon_DEFINED |
OLD | NEW |