OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright 2015 Google Inc. |
| 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. |
| 6 */ |
| 7 |
| 8 #ifndef SkNx_neon_DEFINED |
| 9 #define SkNx_neon_DEFINED |
| 10 |
| 11 #include <arm_neon.h> |
| 12 |
| 13 template <> |
| 14 class SkNi<2, int32_t> { |
| 15 public: |
| 16 SkNi(int32x2_t vec) : fVec(vec) {} |
| 17 |
| 18 SkNi() {} |
| 19 bool allTrue() const { return fVec[0] && fVec[1]; } |
| 20 bool anyTrue() const { return fVec[0] || fVec[1]; } |
| 21 private: |
| 22 int32x2_t fVec; |
| 23 }; |
| 24 |
| 25 template <> |
| 26 class SkNi<4, int32_t> { |
| 27 public: |
| 28 SkNi(int32x4_t vec) : fVec(vec) {} |
| 29 |
| 30 SkNi() {} |
| 31 bool allTrue() const { return fVec[0] && fVec[1] && fVec[2] && fVec[3]; } |
| 32 bool anyTrue() const { return fVec[0] || fVec[1] || fVec[2] || fVec[3]; } |
| 33 private: |
| 34 int32x4_t fVec; |
| 35 }; |
| 36 |
| 37 template <> |
| 38 class SkNf<2, float> { |
| 39 typedef SkNi<2, int32_t> Ni; |
| 40 public: |
| 41 SkNf(float32x2_t vec) : fVec(vec) {} |
| 42 |
| 43 SkNf() {} |
| 44 explicit SkNf(float val) : fVec(vdup_n_f32(val)) {} |
| 45 static SkNf Load(const float vals[2]) { return vld1_f32(vals); } |
| 46 SkNf(float a, float b) { fVec = (float32x2_t) { a, b }; } |
| 47 |
| 48 void store(float vals[2]) const { vst1_f32(vals, fVec); } |
| 49 |
| 50 SkNf approxInvert() const { |
| 51 float32x2_t est0 = vrecpe_f32(fVec), |
| 52 est1 = vmul_f32(vrecps_f32(est0, fVec), est0); |
| 53 return est1; |
| 54 } |
| 55 SkNf invert() const { |
| 56 float32x2_t est1 = this->approxInvert().fVec, |
| 57 est2 = vmul_f32(vrecps_f32(est1, fVec), est1); |
| 58 return est2; |
| 59 } |
| 60 |
| 61 SkNf operator + (const SkNf& o) const { return vadd_f32(fVec, o.fVec); } |
| 62 SkNf operator - (const SkNf& o) const { return vsub_f32(fVec, o.fVec); } |
| 63 SkNf operator * (const SkNf& o) const { return vmul_f32(fVec, o.fVec); } |
| 64 SkNf operator / (const SkNf& o) const { |
| 65 #if defined(SK_CPU_ARM64) |
| 66 return vdiv_f32(fVec, o.fVec); |
| 67 #else |
| 68 return vmul_f32(fVec, o.invert().fVec); |
| 69 #endif |
| 70 } |
| 71 |
| 72 Ni operator == (const SkNf& o) const { return vreinterpret_s32_u32(vceq_f32(
fVec, o.fVec)); } |
| 73 Ni operator < (const SkNf& o) const { return vreinterpret_s32_u32(vclt_f32(
fVec, o.fVec)); } |
| 74 Ni operator > (const SkNf& o) const { return vreinterpret_s32_u32(vcgt_f32(
fVec, o.fVec)); } |
| 75 Ni operator <= (const SkNf& o) const { return vreinterpret_s32_u32(vcle_f32(
fVec, o.fVec)); } |
| 76 Ni operator >= (const SkNf& o) const { return vreinterpret_s32_u32(vcge_f32(
fVec, o.fVec)); } |
| 77 Ni operator != (const SkNf& o) const { |
| 78 return vreinterpret_s32_u32(vmvn_u32(vceq_f32(fVec, o.fVec))); |
| 79 } |
| 80 |
| 81 static SkNf Min(const SkNf& l, const SkNf& r) { return vmin_f32(l.fVec, r.fV
ec); } |
| 82 static SkNf Max(const SkNf& l, const SkNf& r) { return vmax_f32(l.fVec, r.fV
ec); } |
| 83 |
| 84 SkNf rsqrt() const { |
| 85 float32x2_t est0 = vrsqrte_f32(fVec), |
| 86 est1 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est
0); |
| 87 return est1; |
| 88 } |
| 89 |
| 90 SkNf sqrt() const { |
| 91 #if defined(SK_CPU_ARM64) |
| 92 return vsqrt_f32(fVec); |
| 93 #else |
| 94 float32x2_t est1 = this->rsqrt().fVec, |
| 95 // An extra step of Newton's method to refine the estimate of 1/sqrt(thi
s). |
| 96 est2 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est
1); |
| 97 return vmul_f32(fVec, est2); |
| 98 #endif |
| 99 } |
| 100 |
| 101 float operator[] (int k) const { |
| 102 SkASSERT(0 <= k && k < 2); |
| 103 return fVec[k]; |
| 104 } |
| 105 |
| 106 private: |
| 107 float32x2_t fVec; |
| 108 }; |
| 109 |
| 110 #if defined(SK_CPU_ARM64) |
| 111 template <> |
| 112 class SkNi<2, int64_t> { |
| 113 public: |
| 114 SkNi(int64x2_t vec) : fVec(vec) {} |
| 115 |
| 116 SkNi() {} |
| 117 bool allTrue() const { return fVec[0] && fVec[1]; } |
| 118 bool anyTrue() const { return fVec[0] || fVec[1]; } |
| 119 private: |
| 120 int64x2_t fVec; |
| 121 }; |
| 122 |
| 123 template <> |
| 124 class SkNf<2, double> { |
| 125 typedef SkNi<2, int64_t> Ni; |
| 126 public: |
| 127 SkNf(float64x2_t vec) : fVec(vec) {} |
| 128 |
| 129 SkNf() {} |
| 130 explicit SkNf(double val) : fVec(vdupq_n_f64(val)) {} |
| 131 static SkNf Load(const double vals[2]) { return vld1q_f64(vals); } |
| 132 SkNf(double a, double b) { fVec = (float64x2_t) { a, b }; } |
| 133 |
| 134 void store(double vals[2]) const { vst1q_f64(vals, fVec); } |
| 135 |
| 136 SkNf operator + (const SkNf& o) const { return vaddq_f64(fVec, o.fVec); } |
| 137 SkNf operator - (const SkNf& o) const { return vsubq_f64(fVec, o.fVec); } |
| 138 SkNf operator * (const SkNf& o) const { return vmulq_f64(fVec, o.fVec); } |
| 139 SkNf operator / (const SkNf& o) const { return vdivq_f64(fVec, o.fVec); } |
| 140 |
| 141 Ni operator == (const SkNf& o) const { return vreinterpretq_s64_u64(vceqq_f6
4(fVec, o.fVec)); } |
| 142 Ni operator < (const SkNf& o) const { return vreinterpretq_s64_u64(vcltq_f6
4(fVec, o.fVec)); } |
| 143 Ni operator > (const SkNf& o) const { return vreinterpretq_s64_u64(vcgtq_f6
4(fVec, o.fVec)); } |
| 144 Ni operator <= (const SkNf& o) const { return vreinterpretq_s64_u64(vcleq_f6
4(fVec, o.fVec)); } |
| 145 Ni operator >= (const SkNf& o) const { return vreinterpretq_s64_u64(vcgeq_f6
4(fVec, o.fVec)); } |
| 146 Ni operator != (const SkNf& o) const { |
| 147 return vreinterpretq_s64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(f
Vec, o.fVec)))); |
| 148 } |
| 149 |
| 150 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f64(l.fVec, r.f
Vec); } |
| 151 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f64(l.fVec, r.f
Vec); } |
| 152 |
| 153 SkNf sqrt() const { return vsqrtq_f64(fVec); } |
| 154 SkNf rsqrt() const { |
| 155 float64x2_t est0 = vrsqrteq_f64(fVec), |
| 156 est1 = vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)),
est0); |
| 157 return est1; |
| 158 } |
| 159 |
| 160 SkNf approxInvert() const { |
| 161 float64x2_t est0 = vrecpeq_f64(fVec), |
| 162 est1 = vmulq_f64(vrecpsq_f64(est0, fVec), est0); |
| 163 return est1; |
| 164 } |
| 165 |
| 166 SkNf invert() const { |
| 167 float64x2_t est1 = this->approxInvert().fVec, |
| 168 est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1), |
| 169 est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2); |
| 170 return est3; |
| 171 } |
| 172 |
| 173 double operator[] (int k) const { |
| 174 SkASSERT(0 <= k && k < 2); |
| 175 return fVec[k]; |
| 176 } |
| 177 |
| 178 private: |
| 179 float64x2_t fVec; |
| 180 }; |
| 181 #endif//defined(SK_CPU_ARM64) |
| 182 |
| 183 template <> |
| 184 class SkNf<4, float> { |
| 185 typedef SkNi<4, int32_t> Ni; |
| 186 public: |
| 187 SkNf(float32x4_t vec) : fVec(vec) {} |
| 188 float32x4_t vec() const { return fVec; } |
| 189 |
| 190 SkNf() {} |
| 191 explicit SkNf(float val) : fVec(vdupq_n_f32(val)) {} |
| 192 static SkNf Load(const float vals[4]) { return vld1q_f32(vals); } |
| 193 SkNf(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d
}; } |
| 194 |
| 195 void store(float vals[4]) const { vst1q_f32(vals, fVec); } |
| 196 |
| 197 SkNf approxInvert() const { |
| 198 float32x4_t est0 = vrecpeq_f32(fVec), |
| 199 est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0); |
| 200 return est1; |
| 201 } |
| 202 SkNf invert() const { |
| 203 float32x4_t est1 = this->approxInvert().fVec, |
| 204 est2 = vmulq_f32(vrecpsq_f32(est1, fVec), est1); |
| 205 return est2; |
| 206 } |
| 207 |
| 208 SkNf operator + (const SkNf& o) const { return vaddq_f32(fVec, o.fVec); } |
| 209 SkNf operator - (const SkNf& o) const { return vsubq_f32(fVec, o.fVec); } |
| 210 SkNf operator * (const SkNf& o) const { return vmulq_f32(fVec, o.fVec); } |
| 211 SkNf operator / (const SkNf& o) const { |
| 212 #if defined(SK_CPU_ARM64) |
| 213 return vdivq_f32(fVec, o.fVec); |
| 214 #else |
| 215 return vmulq_f32(fVec, o.invert().fVec); |
| 216 #endif |
| 217 } |
| 218 |
| 219 Ni operator == (const SkNf& o) const { return vreinterpretq_s32_u32(vceqq_f3
2(fVec, o.fVec)); } |
| 220 Ni operator < (const SkNf& o) const { return vreinterpretq_s32_u32(vcltq_f3
2(fVec, o.fVec)); } |
| 221 Ni operator > (const SkNf& o) const { return vreinterpretq_s32_u32(vcgtq_f3
2(fVec, o.fVec)); } |
| 222 Ni operator <= (const SkNf& o) const { return vreinterpretq_s32_u32(vcleq_f3
2(fVec, o.fVec)); } |
| 223 Ni operator >= (const SkNf& o) const { return vreinterpretq_s32_u32(vcgeq_f3
2(fVec, o.fVec)); } |
| 224 Ni operator != (const SkNf& o) const { |
| 225 return vreinterpretq_s32_u32(vmvnq_u32(vceqq_f32(fVec, o.fVec))); |
| 226 } |
| 227 |
| 228 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f32(l.fVec, r.f
Vec); } |
| 229 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f32(l.fVec, r.f
Vec); } |
| 230 |
| 231 SkNf rsqrt() const { |
| 232 float32x4_t est0 = vrsqrteq_f32(fVec), |
| 233 est1 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)),
est0); |
| 234 return est1; |
| 235 } |
| 236 |
| 237 SkNf sqrt() const { |
| 238 #if defined(SK_CPU_ARM64) |
| 239 return vsqrtq_f32(fVec); |
| 240 #else |
| 241 float32x4_t est1 = this->rsqrt().fVec, |
| 242 // An extra step of Newton's method to refine the estimate of 1/sqrt(thi
s). |
| 243 est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)),
est1); |
| 244 return vmulq_f32(fVec, est2); |
| 245 #endif |
| 246 } |
| 247 |
| 248 float operator[] (int k) const { |
| 249 SkASSERT(0 <= k && k < 4); |
| 250 return fVec[k]; |
| 251 } |
| 252 |
| 253 private: |
| 254 float32x4_t fVec; |
| 255 }; |
| 256 |
| 257 #endif//SkNx_neon_DEFINED |
OLD | NEW |