OLD | NEW |
1 | 1 |
2 /* | 2 /* |
3 * Copyright 2006 The Android Open Source Project | 3 * Copyright 2006 The Android Open Source Project |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license that can be | 5 * Use of this source code is governed by a BSD-style license that can be |
6 * found in the LICENSE file. | 6 * found in the LICENSE file. |
7 */ | 7 */ |
8 | 8 |
9 | 9 |
10 #ifndef SkFloatingPoint_DEFINED | 10 #ifndef SkFloatingPoint_DEFINED |
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
136 #define sk_double_ceil2int(x) (int)ceil(x) | 136 #define sk_double_ceil2int(x) (int)ceil(x) |
137 | 137 |
138 extern const uint32_t gIEEENotANumber; | 138 extern const uint32_t gIEEENotANumber; |
139 extern const uint32_t gIEEEInfinity; | 139 extern const uint32_t gIEEEInfinity; |
140 extern const uint32_t gIEEENegativeInfinity; | 140 extern const uint32_t gIEEENegativeInfinity; |
141 | 141 |
142 #define SK_FloatNaN (*SkTCast<const float*>(&gIEEENotANumber)) | 142 #define SK_FloatNaN (*SkTCast<const float*>(&gIEEENotANumber)) |
143 #define SK_FloatInfinity (*SkTCast<const float*>(&gIEEEInfinity)) | 143 #define SK_FloatInfinity (*SkTCast<const float*>(&gIEEEInfinity)) |
144 #define SK_FloatNegativeInfinity (*SkTCast<const float*>(&gIEEENegativeInfini
ty)) | 144 #define SK_FloatNegativeInfinity (*SkTCast<const float*>(&gIEEENegativeInfini
ty)) |
145 | 145 |
146 #if defined(__SSE__) | |
147 #include <xmmintrin.h> | |
148 #elif defined(SK_ARM_HAS_NEON) | |
149 #include <arm_neon.h> | |
150 #endif | |
151 | |
152 // Fast, approximate inverse square root. | 146 // Fast, approximate inverse square root. |
153 // Compare to name-brand "1.0f / sk_float_sqrt(x)". Should be around 10x faster
on SSE, 2x on NEON. | 147 // Compare to name-brand "1.0f / sk_float_sqrt(x)". Should be around 10x faster
on SSE, 2x on NEON. |
154 static inline float sk_float_rsqrt(const float x) { | 148 static inline float sk_float_rsqrt(const float x) { |
155 // We want all this inlined, so we'll inline SIMD and just take the hit when we
don't know we've got | 149 // We want all this inlined, so we'll inline SIMD and just take the hit when we
don't know we've got |
156 // it at compile time. This is going to be too fast to productively hide behind
a function pointer. | 150 // it at compile time. This is going to be too fast to productively hide behind
a function pointer. |
157 // | 151 // |
158 // We do one step of Newton's method to refine the estimates in the NEON and nul
l paths. No | 152 // We do one step of Newton's method to refine the estimates in the NEON and nul
l paths. No |
159 // refinement is faster, but very innacurate. Two steps is more accurate, but s
lower than 1/sqrt. | 153 // refinement is faster, but very innacurate. Two steps is more accurate, but s
lower than 1/sqrt. |
160 #if defined(__SSE__) | 154 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1 |
161 float result; | 155 return _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(x))); |
162 _mm_store_ss(&result, _mm_rsqrt_ss(_mm_set_ss(x))); | |
163 return result; | |
164 #elif defined(SK_ARM_HAS_NEON) | 156 #elif defined(SK_ARM_HAS_NEON) |
165 // Get initial estimate. | 157 // Get initial estimate. |
166 const float32x2_t xx = vdup_n_f32(x); // Clever readers will note we're doi
ng everything 2x. | 158 const float32x2_t xx = vdup_n_f32(x); // Clever readers will note we're doi
ng everything 2x. |
167 float32x2_t estimate = vrsqrte_f32(xx); | 159 float32x2_t estimate = vrsqrte_f32(xx); |
168 | 160 |
169 // One step of Newton's method to refine. | 161 // One step of Newton's method to refine. |
170 const float32x2_t estimate_sq = vmul_f32(estimate, estimate); | 162 const float32x2_t estimate_sq = vmul_f32(estimate, estimate); |
171 estimate = vmul_f32(estimate, vrsqrts_f32(xx, estimate_sq)); | 163 estimate = vmul_f32(estimate, vrsqrts_f32(xx, estimate_sq)); |
172 return vget_lane_f32(estimate, 0); // 1 will work fine too; the answer's in
both places. | 164 return vget_lane_f32(estimate, 0); // 1 will work fine too; the answer's in
both places. |
173 #else | 165 #else |
174 // Get initial estimate. | 166 // Get initial estimate. |
175 int i = *SkTCast<int*>(&x); | 167 int i = *SkTCast<int*>(&x); |
176 i = 0x5f3759df - (i>>1); | 168 i = 0x5f3759df - (i>>1); |
177 float estimate = *SkTCast<float*>(&i); | 169 float estimate = *SkTCast<float*>(&i); |
178 | 170 |
179 // One step of Newton's method to refine. | 171 // One step of Newton's method to refine. |
180 const float estimate_sq = estimate*estimate; | 172 const float estimate_sq = estimate*estimate; |
181 estimate *= (1.5f-0.5f*x*estimate_sq); | 173 estimate *= (1.5f-0.5f*x*estimate_sq); |
182 return estimate; | 174 return estimate; |
183 #endif | 175 #endif |
184 } | 176 } |
185 | 177 |
186 #endif | 178 #endif |
OLD | NEW |