OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 // It is important _not_ to put header guards here. | 8 // It is important _not_ to put header guards here. |
9 // This file will be intentionally included three times. | 9 // This file will be intentionally included three times. |
10 | 10 |
(...skipping 20 matching lines...) Expand all Loading... |
31 #define M(...) template <> inline __VA_ARGS__ Sk2x<float>:: | 31 #define M(...) template <> inline __VA_ARGS__ Sk2x<float>:: |
32 | 32 |
33 M() Sk2x() {} | 33 M() Sk2x() {} |
34 M() Sk2x(float val) { fVec = vdup_n_f32(val); } | 34 M() Sk2x(float val) { fVec = vdup_n_f32(val); } |
35 M() Sk2x(float a, float b) { fVec = (float32x2_t) { a, b }; } | 35 M() Sk2x(float a, float b) { fVec = (float32x2_t) { a, b }; } |
36 M(Sk2f&) operator=(const Sk2f& o) { fVec = o.fVec; return *this; } | 36 M(Sk2f&) operator=(const Sk2f& o) { fVec = o.fVec; return *this; } |
37 | 37 |
38 M(Sk2f) Load(const float vals[2]) { return vld1_f32(vals); } | 38 M(Sk2f) Load(const float vals[2]) { return vld1_f32(vals); } |
39 M(void) store(float vals[2]) const { vst1_f32(vals, fVec); } | 39 M(void) store(float vals[2]) const { vst1_f32(vals, fVec); } |
40 | 40 |
| 41 M(Sk2f) approxInvert() const { |
| 42 float32x2_t est0 = vrecpe_f32(fVec), |
| 43 est1 = vmul_f32(vrecps_f32(est0, fVec), est0); |
| 44 return est1; |
| 45 } |
| 46 |
| 47 M(Sk2f) invert() const { |
| 48 float32x2_t est1 = this->approxInvert().fVec, |
| 49 est2 = vmul_f32(vrecps_f32(est1, fVec), est1); |
| 50 return est2; |
| 51 } |
| 52 |
41 M(Sk2f) add(const Sk2f& o) const { return vadd_f32(fVec, o.fVec); } | 53 M(Sk2f) add(const Sk2f& o) const { return vadd_f32(fVec, o.fVec); } |
42 M(Sk2f) subtract(const Sk2f& o) const { return vsub_f32(fVec, o.fVec); } | 54 M(Sk2f) subtract(const Sk2f& o) const { return vsub_f32(fVec, o.fVec); } |
43 M(Sk2f) multiply(const Sk2f& o) const { return vmul_f32(fVec, o.fVec); } | 55 M(Sk2f) multiply(const Sk2f& o) const { return vmul_f32(fVec, o.fVec); } |
44 M(Sk2f) divide(const Sk2f& o) const { | 56 M(Sk2f) divide(const Sk2f& o) const { |
45 #if defined(SK_CPU_ARM64) | 57 #if defined(SK_CPU_ARM64) |
46 return vdiv_f32(fVec, o.fVec); | 58 return vdiv_f32(fVec, o.fVec); |
47 #else | 59 #else |
48 float32x2_t est0 = vrecpe_f32(o.fVec), | 60 return vmul_f32(fVec, o.invert().fVec); |
49 est1 = vmul_f32(vrecps_f32(est0, o.fVec), est0), | |
50 est2 = vmul_f32(vrecps_f32(est1, o.fVec), est1); | |
51 return vmul_f32(est2, fVec); | |
52 #endif | 61 #endif |
53 } | 62 } |
54 | 63 |
55 M(Sk2f) Min(const Sk2f& a, const Sk2f& b) { return vmin_f32(a.fVec, b.fVec); } | 64 M(Sk2f) Min(const Sk2f& a, const Sk2f& b) { return vmin_f32(a.fVec, b.fVec); } |
56 M(Sk2f) Max(const Sk2f& a, const Sk2f& b) { return vmax_f32(a.fVec, b.fVec); } | 65 M(Sk2f) Max(const Sk2f& a, const Sk2f& b) { return vmax_f32(a.fVec, b.fVec); } |
57 | 66 |
58 M(Sk2f) rsqrt() const { | 67 M(Sk2f) rsqrt() const { |
59 float32x2_t est0 = vrsqrte_f32(fVec), | 68 float32x2_t est0 = vrsqrte_f32(fVec), |
60 est1 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0); | 69 est1 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0); |
61 return est1; | 70 return est1; |
(...skipping 30 matching lines...) Expand all Loading... |
92 M(Sk2d) Min(const Sk2d& a, const Sk2d& b) { return vminq_f64(a.fVec, b.fVec)
; } | 101 M(Sk2d) Min(const Sk2d& a, const Sk2d& b) { return vminq_f64(a.fVec, b.fVec)
; } |
93 M(Sk2d) Max(const Sk2d& a, const Sk2d& b) { return vmaxq_f64(a.fVec, b.fVec)
; } | 102 M(Sk2d) Max(const Sk2d& a, const Sk2d& b) { return vmaxq_f64(a.fVec, b.fVec)
; } |
94 | 103 |
95 M(Sk2d) rsqrt() const { | 104 M(Sk2d) rsqrt() const { |
96 float64x2_t est0 = vrsqrteq_f64(fVec), | 105 float64x2_t est0 = vrsqrteq_f64(fVec), |
97 est1 = vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)),
est0); | 106 est1 = vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)),
est0); |
98 return est1; | 107 return est1; |
99 } | 108 } |
100 M(Sk2d) sqrt() const { return vsqrtq_f64(fVec); } | 109 M(Sk2d) sqrt() const { return vsqrtq_f64(fVec); } |
101 | 110 |
| 111 M(Sk2d) approxInvert() const { |
| 112 float64x2_t est0 = vrecpeq_f64(fVec), |
| 113 est1 = vmulq_f64(vrecpsq_f64(est0, fVec), est0); |
| 114 return est1; |
| 115 } |
| 116 |
| 117 M(Sk2d) invert() const { |
| 118 float64x2_t est1 = this->approxInvert().fVec, |
| 119 est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1), |
| 120 est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2); |
| 121 return est3; |
| 122 } |
| 123 |
102 #else // Scalar implementation for 32-bit chips, which don't have float64x2_t. | 124 #else // Scalar implementation for 32-bit chips, which don't have float64x2_t. |
103 M() Sk2x() {} | 125 M() Sk2x() {} |
104 M() Sk2x(double val) { fVec[0] = fVec[1] = val; } | 126 M() Sk2x(double val) { fVec[0] = fVec[1] = val; } |
105 M() Sk2x(double a, double b) { fVec[0] = a; fVec[1] = b; } | 127 M() Sk2x(double a, double b) { fVec[0] = a; fVec[1] = b; } |
106 M(Sk2d&) operator=(const Sk2d& o) { | 128 M(Sk2d&) operator=(const Sk2d& o) { |
107 fVec[0] = o.fVec[0]; | 129 fVec[0] = o.fVec[0]; |
108 fVec[1] = o.fVec[1]; | 130 fVec[1] = o.fVec[1]; |
109 return *this; | 131 return *this; |
110 } | 132 } |
111 | 133 |
112 M(Sk2d) Load(const double vals[2]) { return Sk2d(vals[0], vals[1]); } | 134 M(Sk2d) Load(const double vals[2]) { return Sk2d(vals[0], vals[1]); } |
113 M(void) store(double vals[2]) const { vals[0] = fVec[0]; vals[1] = fVec[1];
} | 135 M(void) store(double vals[2]) const { vals[0] = fVec[0]; vals[1] = fVec[1];
} |
114 | 136 |
115 M(Sk2d) add(const Sk2d& o) const { return Sk2d(fVec[0] + o.fVec[0], fVe
c[1] + o.fVec[1]); } | 137 M(Sk2d) add(const Sk2d& o) const { return Sk2d(fVec[0] + o.fVec[0], fVe
c[1] + o.fVec[1]); } |
116 M(Sk2d) subtract(const Sk2d& o) const { return Sk2d(fVec[0] - o.fVec[0], fVe
c[1] - o.fVec[1]); } | 138 M(Sk2d) subtract(const Sk2d& o) const { return Sk2d(fVec[0] - o.fVec[0], fVe
c[1] - o.fVec[1]); } |
117 M(Sk2d) multiply(const Sk2d& o) const { return Sk2d(fVec[0] * o.fVec[0], fVe
c[1] * o.fVec[1]); } | 139 M(Sk2d) multiply(const Sk2d& o) const { return Sk2d(fVec[0] * o.fVec[0], fVe
c[1] * o.fVec[1]); } |
118 M(Sk2d) divide(const Sk2d& o) const { return Sk2d(fVec[0] / o.fVec[0], fVe
c[1] / o.fVec[1]); } | 140 M(Sk2d) divide(const Sk2d& o) const { return Sk2d(fVec[0] / o.fVec[0], fVe
c[1] / o.fVec[1]); } |
119 | 141 |
120 M(Sk2d) Min(const Sk2d& a, const Sk2d& b) { | 142 M(Sk2d) Min(const Sk2d& a, const Sk2d& b) { |
121 return Sk2d(SkTMin(a.fVec[0], b.fVec[0]), SkTMin(a.fVec[1], b.fVec[1])); | 143 return Sk2d(SkTMin(a.fVec[0], b.fVec[0]), SkTMin(a.fVec[1], b.fVec[1])); |
122 } | 144 } |
123 M(Sk2d) Max(const Sk2d& a, const Sk2d& b) { | 145 M(Sk2d) Max(const Sk2d& a, const Sk2d& b) { |
124 return Sk2d(SkTMax(a.fVec[0], b.fVec[0]), SkTMax(a.fVec[1], b.fVec[1])); | 146 return Sk2d(SkTMax(a.fVec[0], b.fVec[0]), SkTMax(a.fVec[1], b.fVec[1])); |
125 } | 147 } |
126 | 148 |
127 M(Sk2d) rsqrt() const { return Sk2d(1.0/::sqrt(fVec[0]), 1.0/::sqrt(fVec[1])
); } | 149 M(Sk2d) rsqrt() const { return Sk2d(1.0/::sqrt(fVec[0]), 1.0/::sqrt(fVec[1])
); } |
128 M(Sk2d) sqrt() const { return Sk2d( ::sqrt(fVec[0]), ::sqrt(fVec[1])
); } | 150 M(Sk2d) sqrt() const { return Sk2d( ::sqrt(fVec[0]), ::sqrt(fVec[1])
); } |
| 151 |
| 152 M(Sk2d) invert() const { return Sk2d(1.0 / fVec[0], 1.0 / fVec[1]); } |
| 153 M(Sk2d) approxInvert() const { return this->invert(); } |
129 #endif | 154 #endif |
130 | 155 |
131 #undef M | 156 #undef M |
132 | 157 |
133 #endif | 158 #endif |
OLD | NEW |