OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright 2015 Google Inc. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license that can be | |
5 * found in the LICENSE file. | |
6 */ | |
7 | |
8 // It is important _not_ to put header guards here. | |
9 // This file will be intentionally included three times. | |
10 | |
11 #include "SkTypes.h" // Keep this before any #ifdef for skbug.com/3362 | |
12 | |
13 #if defined(SK2X_PREAMBLE) | |
14 #include <arm_neon.h> | |
15 #include <math.h> | |
16 template <typename T> struct SkScalarToSIMD; | |
17 template <> struct SkScalarToSIMD< float> { typedef float32x2_t Type; }; | |
18 #if defined(SK_CPU_ARM64) | |
19 template <> struct SkScalarToSIMD<double> { typedef float64x2_t Type; }; | |
20 #else | |
21 template <> struct SkScalarToSIMD<double> { typedef double Type[2]; }; | |
22 #endif | |
23 | |
24 | |
25 #elif defined(SK2X_PRIVATE) | |
26 typename SkScalarToSIMD<T>::Type fVec; | |
27 /*implicit*/ Sk2x(const typename SkScalarToSIMD<T>::Type vec) { fVec = vec;
} | |
28 | |
29 #else | |
30 | |
31 #define M(...) template <> inline __VA_ARGS__ Sk2x<float>:: | |
32 | |
33 M() Sk2x() {} | |
34 M() Sk2x(float val) { fVec = vdup_n_f32(val); } | |
35 M() Sk2x(float a, float b) { fVec = (float32x2_t) { a, b }; } | |
36 M(Sk2f&) operator=(const Sk2f& o) { fVec = o.fVec; return *this; } | |
37 | |
38 M(Sk2f) Load(const float vals[2]) { return vld1_f32(vals); } | |
39 M(void) store(float vals[2]) const { vst1_f32(vals, fVec); } | |
40 | |
41 M(Sk2f) approxInvert() const { | |
42 float32x2_t est0 = vrecpe_f32(fVec), | |
43 est1 = vmul_f32(vrecps_f32(est0, fVec), est0); | |
44 return est1; | |
45 } | |
46 | |
47 M(Sk2f) invert() const { | |
48 float32x2_t est1 = this->approxInvert().fVec, | |
49 est2 = vmul_f32(vrecps_f32(est1, fVec), est1); | |
50 return est2; | |
51 } | |
52 | |
53 M(Sk2f) add(const Sk2f& o) const { return vadd_f32(fVec, o.fVec); } | |
54 M(Sk2f) subtract(const Sk2f& o) const { return vsub_f32(fVec, o.fVec); } | |
55 M(Sk2f) multiply(const Sk2f& o) const { return vmul_f32(fVec, o.fVec); } | |
56 M(Sk2f) divide(const Sk2f& o) const { | |
57 #if defined(SK_CPU_ARM64) | |
58 return vdiv_f32(fVec, o.fVec); | |
59 #else | |
60 return vmul_f32(fVec, o.invert().fVec); | |
61 #endif | |
62 } | |
63 | |
64 M(Sk2f) Min(const Sk2f& a, const Sk2f& b) { return vmin_f32(a.fVec, b.fVec); } | |
65 M(Sk2f) Max(const Sk2f& a, const Sk2f& b) { return vmax_f32(a.fVec, b.fVec); } | |
66 | |
67 M(Sk2f) rsqrt() const { | |
68 float32x2_t est0 = vrsqrte_f32(fVec), | |
69 est1 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0); | |
70 return est1; | |
71 } | |
72 M(Sk2f) sqrt() const { | |
73 #if defined(SK_CPU_ARM64) | |
74 return vsqrt_f32(fVec); | |
75 #else | |
76 float32x2_t est1 = this->rsqrt().fVec, | |
77 // An extra step of Newton's method to refine the estimate of 1/sqrt(this). | |
78 est2 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est1); | |
79 return vmul_f32(fVec, est2); | |
80 #endif | |
81 } | |
82 | |
83 #undef M | |
84 | |
85 #define M(...) template <> inline __VA_ARGS__ Sk2x<double>:: | |
86 | |
87 #if defined(SK_CPU_ARM64) | |
88 M() Sk2x() {} | |
89 M() Sk2x(double val) { fVec = vdupq_n_f64(val); } | |
90 M() Sk2x(double a, double b) { fVec = (float64x2_t) { a, b }; } | |
91 M(Sk2d&) operator=(const Sk2d& o) { fVec = o.fVec; return *this; } | |
92 | |
93 M(Sk2d) Load(const double vals[2]) { return vld1q_f64(vals); } | |
94 M(void) store(double vals[2]) const { vst1q_f64(vals, fVec); } | |
95 | |
96 M(Sk2d) add(const Sk2d& o) const { return vaddq_f64(fVec, o.fVec); } | |
97 M(Sk2d) subtract(const Sk2d& o) const { return vsubq_f64(fVec, o.fVec); } | |
98 M(Sk2d) multiply(const Sk2d& o) const { return vmulq_f64(fVec, o.fVec); } | |
99 M(Sk2d) divide(const Sk2d& o) const { return vdivq_f64(fVec, o.fVec); } | |
100 | |
101 M(Sk2d) Min(const Sk2d& a, const Sk2d& b) { return vminq_f64(a.fVec, b.fVec)
; } | |
102 M(Sk2d) Max(const Sk2d& a, const Sk2d& b) { return vmaxq_f64(a.fVec, b.fVec)
; } | |
103 | |
104 M(Sk2d) rsqrt() const { | |
105 float64x2_t est0 = vrsqrteq_f64(fVec), | |
106 est1 = vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)),
est0); | |
107 return est1; | |
108 } | |
109 M(Sk2d) sqrt() const { return vsqrtq_f64(fVec); } | |
110 | |
111 M(Sk2d) approxInvert() const { | |
112 float64x2_t est0 = vrecpeq_f64(fVec), | |
113 est1 = vmulq_f64(vrecpsq_f64(est0, fVec), est0); | |
114 return est1; | |
115 } | |
116 | |
117 M(Sk2d) invert() const { | |
118 float64x2_t est1 = this->approxInvert().fVec, | |
119 est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1), | |
120 est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2); | |
121 return est3; | |
122 } | |
123 | |
124 #else // Scalar implementation for 32-bit chips, which don't have float64x2_t. | |
125 M() Sk2x() {} | |
126 M() Sk2x(double val) { fVec[0] = fVec[1] = val; } | |
127 M() Sk2x(double a, double b) { fVec[0] = a; fVec[1] = b; } | |
128 M(Sk2d&) operator=(const Sk2d& o) { | |
129 fVec[0] = o.fVec[0]; | |
130 fVec[1] = o.fVec[1]; | |
131 return *this; | |
132 } | |
133 | |
134 M(Sk2d) Load(const double vals[2]) { return Sk2d(vals[0], vals[1]); } | |
135 M(void) store(double vals[2]) const { vals[0] = fVec[0]; vals[1] = fVec[1];
} | |
136 | |
137 M(Sk2d) add(const Sk2d& o) const { return Sk2d(fVec[0] + o.fVec[0], fVe
c[1] + o.fVec[1]); } | |
138 M(Sk2d) subtract(const Sk2d& o) const { return Sk2d(fVec[0] - o.fVec[0], fVe
c[1] - o.fVec[1]); } | |
139 M(Sk2d) multiply(const Sk2d& o) const { return Sk2d(fVec[0] * o.fVec[0], fVe
c[1] * o.fVec[1]); } | |
140 M(Sk2d) divide(const Sk2d& o) const { return Sk2d(fVec[0] / o.fVec[0], fVe
c[1] / o.fVec[1]); } | |
141 | |
142 M(Sk2d) Min(const Sk2d& a, const Sk2d& b) { | |
143 return Sk2d(SkTMin(a.fVec[0], b.fVec[0]), SkTMin(a.fVec[1], b.fVec[1])); | |
144 } | |
145 M(Sk2d) Max(const Sk2d& a, const Sk2d& b) { | |
146 return Sk2d(SkTMax(a.fVec[0], b.fVec[0]), SkTMax(a.fVec[1], b.fVec[1])); | |
147 } | |
148 | |
149 M(Sk2d) rsqrt() const { return Sk2d(1.0/::sqrt(fVec[0]), 1.0/::sqrt(fVec[1])
); } | |
150 M(Sk2d) sqrt() const { return Sk2d( ::sqrt(fVec[0]), ::sqrt(fVec[1])
); } | |
151 | |
152 M(Sk2d) invert() const { return Sk2d(1.0 / fVec[0], 1.0 / fVec[1]); } | |
153 M(Sk2d) approxInvert() const { return this->invert(); } | |
154 #endif | |
155 | |
156 #undef M | |
157 | |
158 #endif | |
OLD | NEW |