| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 // It is important _not_ to put header guards here. | 8 // It is important _not_ to put header guards here. |
| 9 // This file will be intentionally included three times. | 9 // This file will be intentionally included three times. |
| 10 | 10 |
| (...skipping 20 matching lines...) Expand all Loading... |
| 31 #define M(...) template <> inline __VA_ARGS__ Sk2x<float>:: | 31 #define M(...) template <> inline __VA_ARGS__ Sk2x<float>:: |
| 32 | 32 |
| 33 M() Sk2x() {} | 33 M() Sk2x() {} |
| 34 M() Sk2x(float val) { fVec = vdup_n_f32(val); } | 34 M() Sk2x(float val) { fVec = vdup_n_f32(val); } |
| 35 M() Sk2x(float a, float b) { fVec = (float32x2_t) { a, b }; } | 35 M() Sk2x(float a, float b) { fVec = (float32x2_t) { a, b }; } |
| 36 M(Sk2f&) operator=(const Sk2f& o) { fVec = o.fVec; return *this; } | 36 M(Sk2f&) operator=(const Sk2f& o) { fVec = o.fVec; return *this; } |
| 37 | 37 |
| 38 M(Sk2f) Load(const float vals[2]) { return vld1_f32(vals); } | 38 M(Sk2f) Load(const float vals[2]) { return vld1_f32(vals); } |
| 39 M(void) store(float vals[2]) const { vst1_f32(vals, fVec); } | 39 M(void) store(float vals[2]) const { vst1_f32(vals, fVec); } |
| 40 | 40 |
| 41 M(Sk2f) approxInvert() const { |
| 42 float32x2_t est0 = vrecpe_f32(fVec), |
| 43 est1 = vmul_f32(vrecps_f32(est0, fVec), est0); |
| 44 return est1; |
| 45 } |
| 46 |
| 47 M(Sk2f) invert() const { |
| 48 float32x2_t est1 = this->approxInvert().fVec, |
| 49 est2 = vmul_f32(vrecps_f32(est1, fVec), est1); |
| 50 return est2; |
| 51 } |
| 52 |
| 41 M(Sk2f) add(const Sk2f& o) const { return vadd_f32(fVec, o.fVec); } | 53 M(Sk2f) add(const Sk2f& o) const { return vadd_f32(fVec, o.fVec); } |
| 42 M(Sk2f) subtract(const Sk2f& o) const { return vsub_f32(fVec, o.fVec); } | 54 M(Sk2f) subtract(const Sk2f& o) const { return vsub_f32(fVec, o.fVec); } |
| 43 M(Sk2f) multiply(const Sk2f& o) const { return vmul_f32(fVec, o.fVec); } | 55 M(Sk2f) multiply(const Sk2f& o) const { return vmul_f32(fVec, o.fVec); } |
| 44 M(Sk2f) divide(const Sk2f& o) const { | 56 M(Sk2f) divide(const Sk2f& o) const { |
| 45 #if defined(SK_CPU_ARM64) | 57 #if defined(SK_CPU_ARM64) |
| 46 return vdiv_f32(fVec, o.fVec); | 58 return vdiv_f32(fVec, o.fVec); |
| 47 #else | 59 #else |
| 48 float32x2_t est0 = vrecpe_f32(o.fVec), | 60 return vmul_f32(fVec, o.invert().fVec); |
| 49 est1 = vmul_f32(vrecps_f32(est0, o.fVec), est0), | |
| 50 est2 = vmul_f32(vrecps_f32(est1, o.fVec), est1); | |
| 51 return vmul_f32(est2, fVec); | |
| 52 #endif | 61 #endif |
| 53 } | 62 } |
| 54 | 63 |
| 55 M(Sk2f) Min(const Sk2f& a, const Sk2f& b) { return vmin_f32(a.fVec, b.fVec); } | 64 M(Sk2f) Min(const Sk2f& a, const Sk2f& b) { return vmin_f32(a.fVec, b.fVec); } |
| 56 M(Sk2f) Max(const Sk2f& a, const Sk2f& b) { return vmax_f32(a.fVec, b.fVec); } | 65 M(Sk2f) Max(const Sk2f& a, const Sk2f& b) { return vmax_f32(a.fVec, b.fVec); } |
| 57 | 66 |
| 58 M(Sk2f) rsqrt() const { | 67 M(Sk2f) rsqrt() const { |
| 59 float32x2_t est0 = vrsqrte_f32(fVec), | 68 float32x2_t est0 = vrsqrte_f32(fVec), |
| 60 est1 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0); | 69 est1 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0); |
| 61 return est1; | 70 return est1; |
| (...skipping 30 matching lines...) Expand all Loading... |
| 92 M(Sk2d) Min(const Sk2d& a, const Sk2d& b) { return vminq_f64(a.fVec, b.fVec)
; } | 101 M(Sk2d) Min(const Sk2d& a, const Sk2d& b) { return vminq_f64(a.fVec, b.fVec)
; } |
| 93 M(Sk2d) Max(const Sk2d& a, const Sk2d& b) { return vmaxq_f64(a.fVec, b.fVec)
; } | 102 M(Sk2d) Max(const Sk2d& a, const Sk2d& b) { return vmaxq_f64(a.fVec, b.fVec)
; } |
| 94 | 103 |
| 95 M(Sk2d) rsqrt() const { | 104 M(Sk2d) rsqrt() const { |
| 96 float64x2_t est0 = vrsqrteq_f64(fVec), | 105 float64x2_t est0 = vrsqrteq_f64(fVec), |
| 97 est1 = vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)),
est0); | 106 est1 = vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)),
est0); |
| 98 return est1; | 107 return est1; |
| 99 } | 108 } |
| 100 M(Sk2d) sqrt() const { return vsqrtq_f64(fVec); } | 109 M(Sk2d) sqrt() const { return vsqrtq_f64(fVec); } |
| 101 | 110 |
| 111 M(Sk2d) approxInvert() const { |
| 112 float64x2_t est0 = vrecpeq_f64(fVec), |
| 113 est1 = vmulq_f64(vrecpsq_f64(est0, fVec), est0); |
| 114 return est1; |
| 115 } |
| 116 |
| 117 M(Sk2d) invert() const { |
| 118 float64x2_t est1 = this->approxInvert().fVec, |
| 119 est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1), |
| 120 est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2); |
| 121 return est3; |
| 122 } |
| 123 |
| 102 #else // Scalar implementation for 32-bit chips, which don't have float64x2_t. | 124 #else // Scalar implementation for 32-bit chips, which don't have float64x2_t. |
| 103 M() Sk2x() {} | 125 M() Sk2x() {} |
| 104 M() Sk2x(double val) { fVec[0] = fVec[1] = val; } | 126 M() Sk2x(double val) { fVec[0] = fVec[1] = val; } |
| 105 M() Sk2x(double a, double b) { fVec[0] = a; fVec[1] = b; } | 127 M() Sk2x(double a, double b) { fVec[0] = a; fVec[1] = b; } |
| 106 M(Sk2d&) operator=(const Sk2d& o) { | 128 M(Sk2d&) operator=(const Sk2d& o) { |
| 107 fVec[0] = o.fVec[0]; | 129 fVec[0] = o.fVec[0]; |
| 108 fVec[1] = o.fVec[1]; | 130 fVec[1] = o.fVec[1]; |
| 109 return *this; | 131 return *this; |
| 110 } | 132 } |
| 111 | 133 |
| 112 M(Sk2d) Load(const double vals[2]) { return Sk2d(vals[0], vals[1]); } | 134 M(Sk2d) Load(const double vals[2]) { return Sk2d(vals[0], vals[1]); } |
| 113 M(void) store(double vals[2]) const { vals[0] = fVec[0]; vals[1] = fVec[1];
} | 135 M(void) store(double vals[2]) const { vals[0] = fVec[0]; vals[1] = fVec[1];
} |
| 114 | 136 |
| 115 M(Sk2d) add(const Sk2d& o) const { return Sk2d(fVec[0] + o.fVec[0], fVe
c[1] + o.fVec[1]); } | 137 M(Sk2d) add(const Sk2d& o) const { return Sk2d(fVec[0] + o.fVec[0], fVe
c[1] + o.fVec[1]); } |
| 116 M(Sk2d) subtract(const Sk2d& o) const { return Sk2d(fVec[0] - o.fVec[0], fVe
c[1] - o.fVec[1]); } | 138 M(Sk2d) subtract(const Sk2d& o) const { return Sk2d(fVec[0] - o.fVec[0], fVe
c[1] - o.fVec[1]); } |
| 117 M(Sk2d) multiply(const Sk2d& o) const { return Sk2d(fVec[0] * o.fVec[0], fVe
c[1] * o.fVec[1]); } | 139 M(Sk2d) multiply(const Sk2d& o) const { return Sk2d(fVec[0] * o.fVec[0], fVe
c[1] * o.fVec[1]); } |
| 118 M(Sk2d) divide(const Sk2d& o) const { return Sk2d(fVec[0] / o.fVec[0], fVe
c[1] / o.fVec[1]); } | 140 M(Sk2d) divide(const Sk2d& o) const { return Sk2d(fVec[0] / o.fVec[0], fVe
c[1] / o.fVec[1]); } |
| 119 | 141 |
| 120 M(Sk2d) Min(const Sk2d& a, const Sk2d& b) { | 142 M(Sk2d) Min(const Sk2d& a, const Sk2d& b) { |
| 121 return Sk2d(SkTMin(a.fVec[0], b.fVec[0]), SkTMin(a.fVec[1], b.fVec[1])); | 143 return Sk2d(SkTMin(a.fVec[0], b.fVec[0]), SkTMin(a.fVec[1], b.fVec[1])); |
| 122 } | 144 } |
| 123 M(Sk2d) Max(const Sk2d& a, const Sk2d& b) { | 145 M(Sk2d) Max(const Sk2d& a, const Sk2d& b) { |
| 124 return Sk2d(SkTMax(a.fVec[0], b.fVec[0]), SkTMax(a.fVec[1], b.fVec[1])); | 146 return Sk2d(SkTMax(a.fVec[0], b.fVec[0]), SkTMax(a.fVec[1], b.fVec[1])); |
| 125 } | 147 } |
| 126 | 148 |
| 127 M(Sk2d) rsqrt() const { return Sk2d(1.0/::sqrt(fVec[0]), 1.0/::sqrt(fVec[1])
); } | 149 M(Sk2d) rsqrt() const { return Sk2d(1.0/::sqrt(fVec[0]), 1.0/::sqrt(fVec[1])
); } |
| 128 M(Sk2d) sqrt() const { return Sk2d( ::sqrt(fVec[0]), ::sqrt(fVec[1])
); } | 150 M(Sk2d) sqrt() const { return Sk2d( ::sqrt(fVec[0]), ::sqrt(fVec[1])
); } |
| 151 |
| 152 M(Sk2d) invert() const { return Sk2d(1.0 / fVec[0], 1.0 / fVec[1]); } |
| 153 M(Sk2d) approxInvert() const { return this->invert(); } |
| 129 #endif | 154 #endif |
| 130 | 155 |
| 131 #undef M | 156 #undef M |
| 132 | 157 |
| 133 #endif | 158 #endif |
| OLD | NEW |