| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkNx_neon_DEFINED | 8 #ifndef SkNx_neon_DEFINED |
| 9 #define SkNx_neon_DEFINED | 9 #define SkNx_neon_DEFINED |
| 10 | 10 |
| 11 #include <arm_neon.h> | 11 #include <arm_neon.h> |
| 12 | 12 |
| 13 template <> | 13 template <> |
| 14 class SkNi<2, int32_t> { | 14 class SkNb<2, 4> { |
| 15 public: | 15 public: |
| 16 SkNi(int32x2_t vec) : fVec(vec) {} | 16 SkNb(uint32x2_t vec) : fVec(vec) {} |
| 17 | 17 |
| 18 SkNi() {} | 18 SkNb() {} |
| 19 bool allTrue() const { return vget_lane_s32(fVec, 0) && vget_lane_s32(fVec,
1); } | 19 bool allTrue() const { return vget_lane_u32(fVec, 0) && vget_lane_u32(fVec,
1); } |
| 20 bool anyTrue() const { return vget_lane_s32(fVec, 0) || vget_lane_s32(fVec,
1); } | 20 bool anyTrue() const { return vget_lane_u32(fVec, 0) || vget_lane_u32(fVec,
1); } |
| 21 private: | 21 private: |
| 22 int32x2_t fVec; | 22 uint32x2_t fVec; |
| 23 }; | 23 }; |
| 24 | 24 |
| 25 template <> | 25 template <> |
| 26 class SkNi<4, int32_t> { | 26 class SkNb<4, 4> { |
| 27 public: | 27 public: |
| 28 SkNi(int32x4_t vec) : fVec(vec) {} | 28 SkNb(uint32x4_t vec) : fVec(vec) {} |
| 29 | 29 |
| 30 SkNi() {} | 30 SkNb() {} |
| 31 bool allTrue() const { return vgetq_lane_s32(fVec, 0) && vgetq_lane_s32(fVec
, 1) | 31 bool allTrue() const { return vgetq_lane_u32(fVec, 0) && vgetq_lane_u32(fVec
, 1) |
| 32 && vgetq_lane_s32(fVec, 2) && vgetq_lane_s32(fVec
, 3); } | 32 && vgetq_lane_u32(fVec, 2) && vgetq_lane_u32(fVec
, 3); } |
| 33 bool anyTrue() const { return vgetq_lane_s32(fVec, 0) || vgetq_lane_s32(fVec
, 1) | 33 bool anyTrue() const { return vgetq_lane_u32(fVec, 0) || vgetq_lane_u32(fVec
, 1) |
| 34 || vgetq_lane_s32(fVec, 2) || vgetq_lane_s32(fVec
, 3); } | 34 || vgetq_lane_u32(fVec, 2) || vgetq_lane_u32(fVec
, 3); } |
| 35 private: | 35 private: |
| 36 int32x4_t fVec; | 36 uint32x4_t fVec; |
| 37 }; | 37 }; |
| 38 | 38 |
| 39 template <> | 39 template <> |
| 40 class SkNf<2, float> { | 40 class SkNf<2, float> { |
| 41 typedef SkNi<2, int32_t> Ni; | 41 typedef SkNb<2, 4> Nb; |
| 42 public: | 42 public: |
| 43 SkNf(float32x2_t vec) : fVec(vec) {} | 43 SkNf(float32x2_t vec) : fVec(vec) {} |
| 44 | 44 |
| 45 SkNf() {} | 45 SkNf() {} |
| 46 explicit SkNf(float val) : fVec(vdup_n_f32(val)) {} | 46 explicit SkNf(float val) : fVec(vdup_n_f32(val)) {} |
| 47 static SkNf Load(const float vals[2]) { return vld1_f32(vals); } | 47 static SkNf Load(const float vals[2]) { return vld1_f32(vals); } |
| 48 SkNf(float a, float b) { fVec = (float32x2_t) { a, b }; } | 48 SkNf(float a, float b) { fVec = (float32x2_t) { a, b }; } |
| 49 | 49 |
| 50 void store(float vals[2]) const { vst1_f32(vals, fVec); } | 50 void store(float vals[2]) const { vst1_f32(vals, fVec); } |
| 51 | 51 |
| (...skipping 12 matching lines...) Expand all Loading... |
| 64 SkNf operator - (const SkNf& o) const { return vsub_f32(fVec, o.fVec); } | 64 SkNf operator - (const SkNf& o) const { return vsub_f32(fVec, o.fVec); } |
| 65 SkNf operator * (const SkNf& o) const { return vmul_f32(fVec, o.fVec); } | 65 SkNf operator * (const SkNf& o) const { return vmul_f32(fVec, o.fVec); } |
| 66 SkNf operator / (const SkNf& o) const { | 66 SkNf operator / (const SkNf& o) const { |
| 67 #if defined(SK_CPU_ARM64) | 67 #if defined(SK_CPU_ARM64) |
| 68 return vdiv_f32(fVec, o.fVec); | 68 return vdiv_f32(fVec, o.fVec); |
| 69 #else | 69 #else |
| 70 return vmul_f32(fVec, o.invert().fVec); | 70 return vmul_f32(fVec, o.invert().fVec); |
| 71 #endif | 71 #endif |
| 72 } | 72 } |
| 73 | 73 |
| 74 Ni operator == (const SkNf& o) const { return vreinterpret_s32_u32(vceq_f32(
fVec, o.fVec)); } | 74 Nb operator == (const SkNf& o) const { return vceq_f32(fVec, o.fVec); } |
| 75 Ni operator < (const SkNf& o) const { return vreinterpret_s32_u32(vclt_f32(
fVec, o.fVec)); } | 75 Nb operator < (const SkNf& o) const { return vclt_f32(fVec, o.fVec); } |
| 76 Ni operator > (const SkNf& o) const { return vreinterpret_s32_u32(vcgt_f32(
fVec, o.fVec)); } | 76 Nb operator > (const SkNf& o) const { return vcgt_f32(fVec, o.fVec); } |
| 77 Ni operator <= (const SkNf& o) const { return vreinterpret_s32_u32(vcle_f32(
fVec, o.fVec)); } | 77 Nb operator <= (const SkNf& o) const { return vcle_f32(fVec, o.fVec); } |
| 78 Ni operator >= (const SkNf& o) const { return vreinterpret_s32_u32(vcge_f32(
fVec, o.fVec)); } | 78 Nb operator >= (const SkNf& o) const { return vcge_f32(fVec, o.fVec); } |
| 79 Ni operator != (const SkNf& o) const { | 79 Nb operator != (const SkNf& o) const { return vmvn_u32(vceq_f32(fVec, o.fVec
)); } |
| 80 return vreinterpret_s32_u32(vmvn_u32(vceq_f32(fVec, o.fVec))); | |
| 81 } | |
| 82 | 80 |
| 83 static SkNf Min(const SkNf& l, const SkNf& r) { return vmin_f32(l.fVec, r.fV
ec); } | 81 static SkNf Min(const SkNf& l, const SkNf& r) { return vmin_f32(l.fVec, r.fV
ec); } |
| 84 static SkNf Max(const SkNf& l, const SkNf& r) { return vmax_f32(l.fVec, r.fV
ec); } | 82 static SkNf Max(const SkNf& l, const SkNf& r) { return vmax_f32(l.fVec, r.fV
ec); } |
| 85 | 83 |
| 86 SkNf rsqrt() const { | 84 SkNf rsqrt() const { |
| 87 float32x2_t est0 = vrsqrte_f32(fVec), | 85 float32x2_t est0 = vrsqrte_f32(fVec), |
| 88 est1 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est
0); | 86 est1 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est
0); |
| 89 return est1; | 87 return est1; |
| 90 } | 88 } |
| 91 | 89 |
| (...skipping 12 matching lines...) Expand all Loading... |
| 104 SkASSERT(0 <= k && k < 2); | 102 SkASSERT(0 <= k && k < 2); |
| 105 return vget_lane_f32(fVec, k&1); | 103 return vget_lane_f32(fVec, k&1); |
| 106 } | 104 } |
| 107 | 105 |
| 108 private: | 106 private: |
| 109 float32x2_t fVec; | 107 float32x2_t fVec; |
| 110 }; | 108 }; |
| 111 | 109 |
| 112 #if defined(SK_CPU_ARM64) | 110 #if defined(SK_CPU_ARM64) |
| 113 template <> | 111 template <> |
| 114 class SkNi<2, int64_t> { | 112 class SkNb<2, 8> { |
| 115 public: | 113 public: |
| 116 SkNi(int64x2_t vec) : fVec(vec) {} | 114 SkNb(uint64x2_t vec) : fVec(vec) {} |
| 117 | 115 |
| 118 SkNi() {} | 116 SkNb() {} |
| 119 bool allTrue() const { return vgetq_lane_s64(fVec, 0) && vgetq_lane_s64(fVec
, 1); } | 117 bool allTrue() const { return vgetq_lane_u64(fVec, 0) && vgetq_lane_u64(fVec
, 1); } |
| 120 bool anyTrue() const { return vgetq_lane_s64(fVec, 0) || vgetq_lane_s64(fVec
, 1); } | 118 bool anyTrue() const { return vgetq_lane_u64(fVec, 0) || vgetq_lane_u64(fVec
, 1); } |
| 121 private: | 119 private: |
| 122 int64x2_t fVec; | 120 uint64x2_t fVec; |
| 123 }; | 121 }; |
| 124 | 122 |
| 125 template <> | 123 template <> |
| 126 class SkNf<2, double> { | 124 class SkNf<2, double> { |
| 127 typedef SkNi<2, int64_t> Ni; | 125 typedef SkNb<2, 8> Nb; |
| 128 public: | 126 public: |
| 129 SkNf(float64x2_t vec) : fVec(vec) {} | 127 SkNf(float64x2_t vec) : fVec(vec) {} |
| 130 | 128 |
| 131 SkNf() {} | 129 SkNf() {} |
| 132 explicit SkNf(double val) : fVec(vdupq_n_f64(val)) {} | 130 explicit SkNf(double val) : fVec(vdupq_n_f64(val)) {} |
| 133 static SkNf Load(const double vals[2]) { return vld1q_f64(vals); } | 131 static SkNf Load(const double vals[2]) { return vld1q_f64(vals); } |
| 134 SkNf(double a, double b) { fVec = (float64x2_t) { a, b }; } | 132 SkNf(double a, double b) { fVec = (float64x2_t) { a, b }; } |
| 135 | 133 |
| 136 void store(double vals[2]) const { vst1q_f64(vals, fVec); } | 134 void store(double vals[2]) const { vst1q_f64(vals, fVec); } |
| 137 | 135 |
| 138 SkNf operator + (const SkNf& o) const { return vaddq_f64(fVec, o.fVec); } | 136 SkNf operator + (const SkNf& o) const { return vaddq_f64(fVec, o.fVec); } |
| 139 SkNf operator - (const SkNf& o) const { return vsubq_f64(fVec, o.fVec); } | 137 SkNf operator - (const SkNf& o) const { return vsubq_f64(fVec, o.fVec); } |
| 140 SkNf operator * (const SkNf& o) const { return vmulq_f64(fVec, o.fVec); } | 138 SkNf operator * (const SkNf& o) const { return vmulq_f64(fVec, o.fVec); } |
| 141 SkNf operator / (const SkNf& o) const { return vdivq_f64(fVec, o.fVec); } | 139 SkNf operator / (const SkNf& o) const { return vdivq_f64(fVec, o.fVec); } |
| 142 | 140 |
| 143 Ni operator == (const SkNf& o) const { return vreinterpretq_s64_u64(vceqq_f6
4(fVec, o.fVec)); } | 141 Nb operator == (const SkNf& o) const { return vceqq_f64(fVec, o.fVec); } |
| 144 Ni operator < (const SkNf& o) const { return vreinterpretq_s64_u64(vcltq_f6
4(fVec, o.fVec)); } | 142 Nb operator < (const SkNf& o) const { return vcltq_f64(fVec, o.fVec); } |
| 145 Ni operator > (const SkNf& o) const { return vreinterpretq_s64_u64(vcgtq_f6
4(fVec, o.fVec)); } | 143 Nb operator > (const SkNf& o) const { return vcgtq_f64(fVec, o.fVec); } |
| 146 Ni operator <= (const SkNf& o) const { return vreinterpretq_s64_u64(vcleq_f6
4(fVec, o.fVec)); } | 144 Nb operator <= (const SkNf& o) const { return vcleq_f64(fVec, o.fVec); } |
| 147 Ni operator >= (const SkNf& o) const { return vreinterpretq_s64_u64(vcgeq_f6
4(fVec, o.fVec)); } | 145 Nb operator >= (const SkNf& o) const { return vcgeq_f64(fVec, o.fVec); } |
| 148 Ni operator != (const SkNf& o) const { | 146 Nb operator != (const SkNf& o) const { |
| 149 return vreinterpretq_s64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(f
Vec, o.fVec)))); | 147 return vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(f
Vec, o.fVec)))); |
| 150 } | 148 } |
| 151 | 149 |
| 152 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f64(l.fVec, r.f
Vec); } | 150 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f64(l.fVec, r.f
Vec); } |
| 153 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f64(l.fVec, r.f
Vec); } | 151 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f64(l.fVec, r.f
Vec); } |
| 154 | 152 |
| 155 SkNf sqrt() const { return vsqrtq_f64(fVec); } | 153 SkNf sqrt() const { return vsqrtq_f64(fVec); } |
| 156 SkNf rsqrt() const { | 154 SkNf rsqrt() const { |
| 157 float64x2_t est0 = vrsqrteq_f64(fVec), | 155 float64x2_t est0 = vrsqrteq_f64(fVec), |
| 158 est1 = vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)),
est0); | 156 est1 = vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)),
est0); |
| 159 return est1; | 157 return est1; |
| (...skipping 17 matching lines...) Expand all Loading... |
| 177 return vgetq_lane_f64(fVec, k&1); | 175 return vgetq_lane_f64(fVec, k&1); |
| 178 } | 176 } |
| 179 | 177 |
| 180 private: | 178 private: |
| 181 float64x2_t fVec; | 179 float64x2_t fVec; |
| 182 }; | 180 }; |
| 183 #endif//defined(SK_CPU_ARM64) | 181 #endif//defined(SK_CPU_ARM64) |
| 184 | 182 |
| 185 template <> | 183 template <> |
| 186 class SkNf<4, float> { | 184 class SkNf<4, float> { |
| 187 typedef SkNi<4, int32_t> Ni; | 185 typedef SkNb<4, 4> Nb; |
| 188 public: | 186 public: |
| 189 SkNf(float32x4_t vec) : fVec(vec) {} | 187 SkNf(float32x4_t vec) : fVec(vec) {} |
| 190 | 188 |
| 191 SkNf() {} | 189 SkNf() {} |
| 192 explicit SkNf(float val) : fVec(vdupq_n_f32(val)) {} | 190 explicit SkNf(float val) : fVec(vdupq_n_f32(val)) {} |
| 193 static SkNf Load(const float vals[4]) { return vld1q_f32(vals); } | 191 static SkNf Load(const float vals[4]) { return vld1q_f32(vals); } |
| 194 SkNf(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d
}; } | 192 SkNf(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d
}; } |
| 195 | 193 |
| 196 void store(float vals[4]) const { vst1q_f32(vals, fVec); } | 194 void store(float vals[4]) const { vst1q_f32(vals, fVec); } |
| 197 | 195 |
| (...skipping 12 matching lines...) Expand all Loading... |
| 210 SkNf operator - (const SkNf& o) const { return vsubq_f32(fVec, o.fVec); } | 208 SkNf operator - (const SkNf& o) const { return vsubq_f32(fVec, o.fVec); } |
| 211 SkNf operator * (const SkNf& o) const { return vmulq_f32(fVec, o.fVec); } | 209 SkNf operator * (const SkNf& o) const { return vmulq_f32(fVec, o.fVec); } |
| 212 SkNf operator / (const SkNf& o) const { | 210 SkNf operator / (const SkNf& o) const { |
| 213 #if defined(SK_CPU_ARM64) | 211 #if defined(SK_CPU_ARM64) |
| 214 return vdivq_f32(fVec, o.fVec); | 212 return vdivq_f32(fVec, o.fVec); |
| 215 #else | 213 #else |
| 216 return vmulq_f32(fVec, o.invert().fVec); | 214 return vmulq_f32(fVec, o.invert().fVec); |
| 217 #endif | 215 #endif |
| 218 } | 216 } |
| 219 | 217 |
| 220 Ni operator == (const SkNf& o) const { return vreinterpretq_s32_u32(vceqq_f3
2(fVec, o.fVec)); } | 218 Nb operator == (const SkNf& o) const { return vceqq_f32(fVec, o.fVec); } |
| 221 Ni operator < (const SkNf& o) const { return vreinterpretq_s32_u32(vcltq_f3
2(fVec, o.fVec)); } | 219 Nb operator < (const SkNf& o) const { return vcltq_f32(fVec, o.fVec); } |
| 222 Ni operator > (const SkNf& o) const { return vreinterpretq_s32_u32(vcgtq_f3
2(fVec, o.fVec)); } | 220 Nb operator > (const SkNf& o) const { return vcgtq_f32(fVec, o.fVec); } |
| 223 Ni operator <= (const SkNf& o) const { return vreinterpretq_s32_u32(vcleq_f3
2(fVec, o.fVec)); } | 221 Nb operator <= (const SkNf& o) const { return vcleq_f32(fVec, o.fVec); } |
| 224 Ni operator >= (const SkNf& o) const { return vreinterpretq_s32_u32(vcgeq_f3
2(fVec, o.fVec)); } | 222 Nb operator >= (const SkNf& o) const { return vcgeq_f32(fVec, o.fVec); } |
| 225 Ni operator != (const SkNf& o) const { | 223 Nb operator != (const SkNf& o) const { return vmvnq_u32(vceqq_f32(fVec, o.fV
ec)); } |
| 226 return vreinterpretq_s32_u32(vmvnq_u32(vceqq_f32(fVec, o.fVec))); | |
| 227 } | |
| 228 | 224 |
| 229 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f32(l.fVec, r.f
Vec); } | 225 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f32(l.fVec, r.f
Vec); } |
| 230 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f32(l.fVec, r.f
Vec); } | 226 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f32(l.fVec, r.f
Vec); } |
| 231 | 227 |
| 232 SkNf rsqrt() const { | 228 SkNf rsqrt() const { |
| 233 float32x4_t est0 = vrsqrteq_f32(fVec), | 229 float32x4_t est0 = vrsqrteq_f32(fVec), |
| 234 est1 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)),
est0); | 230 est1 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)),
est0); |
| 235 return est1; | 231 return est1; |
| 236 } | 232 } |
| 237 | 233 |
| (...skipping 11 matching lines...) Expand all Loading... |
| 249 template <int k> float kth() const { | 245 template <int k> float kth() const { |
| 250 SkASSERT(0 <= k && k < 4); | 246 SkASSERT(0 <= k && k < 4); |
| 251 return vgetq_lane_f32(fVec, k&3); | 247 return vgetq_lane_f32(fVec, k&3); |
| 252 } | 248 } |
| 253 | 249 |
| 254 protected: | 250 protected: |
| 255 float32x4_t fVec; | 251 float32x4_t fVec; |
| 256 }; | 252 }; |
| 257 | 253 |
| 258 #endif//SkNx_neon_DEFINED | 254 #endif//SkNx_neon_DEFINED |
| OLD | NEW |