| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkNx_neon_DEFINED | 8 #ifndef SkNx_neon_DEFINED |
| 9 #define SkNx_neon_DEFINED | 9 #define SkNx_neon_DEFINED |
| 10 | 10 |
| (...skipping 163 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 174 SkASSERT(0 <= k && k < 2); | 174 SkASSERT(0 <= k && k < 2); |
| 175 return vgetq_lane_f64(fVec, k&1); | 175 return vgetq_lane_f64(fVec, k&1); |
| 176 } | 176 } |
| 177 | 177 |
| 178 private: | 178 private: |
| 179 float64x2_t fVec; | 179 float64x2_t fVec; |
| 180 }; | 180 }; |
| 181 #endif//defined(SK_CPU_ARM64) | 181 #endif//defined(SK_CPU_ARM64) |
| 182 | 182 |
| 183 template <> | 183 template <> |
| 184 class SkNi<4, int> { | |
| 185 public: | |
| 186 SkNi(const int32x4_t& vec) : fVec(vec) {} | |
| 187 | |
| 188 SkNi() {} | |
| 189 explicit SkNi(int val) : fVec(vdupq_n_s32(val)) {} | |
| 190 static SkNi Load(const int vals[4]) { return vld1q_s32(vals); } | |
| 191 SkNi(int a, int b, int c, int d) { fVec = (int32x4_t) { a, b, c, d }; } | |
| 192 | |
| 193 void store(int vals[4]) const { vst1q_s32(vals, fVec); } | |
| 194 | |
| 195 SkNi operator + (const SkNi& o) const { return vaddq_s32(fVec, o.fVec); } | |
| 196 SkNi operator - (const SkNi& o) const { return vsubq_s32(fVec, o.fVec); } | |
| 197 SkNi operator * (const SkNi& o) const { return vmulq_s32(fVec, o.fVec); } | |
| 198 | |
| 199 // Well, this is absurd. The shifts require compile-time constant arguments
. | |
| 200 #define SHIFT(op, v, bits) switch(bits) { \ | |
| 201 case 1: return op(v, 1); case 2: return op(v, 2); case 3: return op(v
, 3); \ | |
| 202 case 4: return op(v, 4); case 5: return op(v, 5); case 6: return op(v
, 6); \ | |
| 203 case 7: return op(v, 7); case 8: return op(v, 8); case 9: return op(v
, 9); \ | |
| 204 case 10: return op(v, 10); case 11: return op(v, 11); case 12: return op(v
, 12); \ | |
| 205 case 13: return op(v, 13); case 14: return op(v, 14); case 15: return op(v
, 15); \ | |
| 206 case 16: return op(v, 16); case 17: return op(v, 17); case 18: return op(v
, 18); \ | |
| 207 case 19: return op(v, 19); case 20: return op(v, 20); case 21: return op(v
, 21); \ | |
| 208 case 22: return op(v, 22); case 23: return op(v, 23); case 24: return op(v
, 24); \ | |
| 209 case 25: return op(v, 25); case 26: return op(v, 26); case 27: return op(v
, 27); \ | |
| 210 case 28: return op(v, 28); case 29: return op(v, 29); case 30: return op(v
, 30); \ | |
| 211 case 31: return op(v, 31); } return fVec | |
| 212 | |
| 213 SkNi operator << (int bits) const { SHIFT(vshlq_n_s32, fVec, bits); } | |
| 214 SkNi operator >> (int bits) const { SHIFT(vshrq_n_s32, fVec, bits); } | |
| 215 #undef SHIFT | |
| 216 | |
| 217 template <int k> int kth() const { | |
| 218 SkASSERT(0 <= k && k < 4); | |
| 219 return vgetq_lane_s32(fVec, k); | |
| 220 } | |
| 221 protected: | |
| 222 int32x4_t fVec; | |
| 223 }; | |
| 224 | |
| 225 template <> | |
| 226 class SkNf<4, float> { | 184 class SkNf<4, float> { |
| 227 typedef SkNb<4, 4> Nb; | 185 typedef SkNb<4, 4> Nb; |
| 228 public: | 186 public: |
| 229 SkNf(float32x4_t vec) : fVec(vec) {} | 187 SkNf(float32x4_t vec) : fVec(vec) {} |
| 230 | 188 |
| 231 SkNf() {} | 189 SkNf() {} |
| 232 explicit SkNf(float val) : fVec(vdupq_n_f32(val)) {} | 190 explicit SkNf(float val) : fVec(vdupq_n_f32(val)) {} |
| 233 static SkNf Load(const float vals[4]) { return vld1q_f32(vals); } | 191 static SkNf Load(const float vals[4]) { return vld1q_f32(vals); } |
| 234 SkNf(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d
}; } | 192 SkNf(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d
}; } |
| 235 | 193 |
| 236 void store(float vals[4]) const { vst1q_f32(vals, fVec); } | 194 void store(float vals[4]) const { vst1q_f32(vals, fVec); } |
| 237 | 195 |
| 238 SkNi<4, int> castTrunc() const { return vcvtq_s32_f32(fVec); } | |
| 239 | |
| 240 SkNf approxInvert() const { | 196 SkNf approxInvert() const { |
| 241 float32x4_t est0 = vrecpeq_f32(fVec), | 197 float32x4_t est0 = vrecpeq_f32(fVec), |
| 242 est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0); | 198 est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0); |
| 243 return est1; | 199 return est1; |
| 244 } | 200 } |
| 245 SkNf invert() const { | 201 SkNf invert() const { |
| 246 float32x4_t est1 = this->approxInvert().fVec, | 202 float32x4_t est1 = this->approxInvert().fVec, |
| 247 est2 = vmulq_f32(vrecpsq_f32(est1, fVec), est1); | 203 est2 = vmulq_f32(vrecpsq_f32(est1, fVec), est1); |
| 248 return est2; | 204 return est2; |
| 249 } | 205 } |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 289 template <int k> float kth() const { | 245 template <int k> float kth() const { |
| 290 SkASSERT(0 <= k && k < 4); | 246 SkASSERT(0 <= k && k < 4); |
| 291 return vgetq_lane_f32(fVec, k&3); | 247 return vgetq_lane_f32(fVec, k&3); |
| 292 } | 248 } |
| 293 | 249 |
| 294 protected: | 250 protected: |
| 295 float32x4_t fVec; | 251 float32x4_t fVec; |
| 296 }; | 252 }; |
| 297 | 253 |
| 298 #endif//SkNx_neon_DEFINED | 254 #endif//SkNx_neon_DEFINED |
| OLD | NEW |