OLD | NEW |
1 | 1 |
2 /* | 2 /* |
3 * Copyright 2006 The Android Open Source Project | 3 * Copyright 2006 The Android Open Source Project |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license that can be | 5 * Use of this source code is governed by a BSD-style license that can be |
6 * found in the LICENSE file. | 6 * found in the LICENSE file. |
7 */ | 7 */ |
8 | 8 |
9 | 9 |
10 #ifndef SkMath_DEFINED | 10 #ifndef SkMath_DEFINED |
(...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
150 * Returns true if value is a power of 2. Does not explicitly check for | 150 * Returns true if value is a power of 2. Does not explicitly check for |
151 * value <= 0. | 151 * value <= 0. |
152 */ | 152 */ |
153 template <typename T> inline bool SkIsPow2(T value) { | 153 template <typename T> inline bool SkIsPow2(T value) { |
154 return (value & (value - 1)) == 0; | 154 return (value & (value - 1)) == 0; |
155 } | 155 } |
156 | 156 |
157 /////////////////////////////////////////////////////////////////////////////// | 157 /////////////////////////////////////////////////////////////////////////////// |
158 | 158 |
159 /** | 159 /** |
160 * SkMulS16(a, b) multiplies a * b, but requires that a and b are both int16_t. | |
161 * With this requirement, we can generate faster instructions on some | |
162 * architectures. | |
163 */ | |
164 #ifdef SK_ARM_HAS_EDSP | |
165 static inline int32_t SkMulS16(S16CPU x, S16CPU y) { | |
166 SkASSERT((int16_t)x == x); | |
167 SkASSERT((int16_t)y == y); | |
168 int32_t product; | |
169 asm("smulbb %0, %1, %2 \n" | |
170 : "=r"(product) | |
171 : "r"(x), "r"(y) | |
172 ); | |
173 return product; | |
174 } | |
175 #else | |
176 #ifdef SK_DEBUG | |
177 static inline int32_t SkMulS16(S16CPU x, S16CPU y) { | |
178 SkASSERT((int16_t)x == x); | |
179 SkASSERT((int16_t)y == y); | |
180 return x * y; | |
181 } | |
182 #else | |
183 #define SkMulS16(x, y) ((x) * (y)) | |
184 #endif | |
185 #endif | |
186 | |
187 /** | |
188 * Return a*b/((1 << shift) - 1), rounding any fractional bits. | 160 * Return a*b/((1 << shift) - 1), rounding any fractional bits. |
189 * Only valid if a and b are unsigned and <= 32767 and shift is > 0 and <= 8 | 161 * Only valid if a and b are unsigned and <= 32767 and shift is > 0 and <= 8 |
190 */ | 162 */ |
191 static inline unsigned SkMul16ShiftRound(U16CPU a, U16CPU b, int shift) { | 163 static inline unsigned SkMul16ShiftRound(U16CPU a, U16CPU b, int shift) { |
192 SkASSERT(a <= 32767); | 164 SkASSERT(a <= 32767); |
193 SkASSERT(b <= 32767); | 165 SkASSERT(b <= 32767); |
194 SkASSERT(shift > 0 && shift <= 8); | 166 SkASSERT(shift > 0 && shift <= 8); |
195 unsigned prod = SkMulS16(a, b) + (1 << (shift - 1)); | 167 unsigned prod = a*b + (1 << (shift - 1)); |
196 return (prod + (prod >> shift)) >> shift; | 168 return (prod + (prod >> shift)) >> shift; |
197 } | 169 } |
198 | 170 |
199 /** | 171 /** |
200 * Return a*b/255, rounding any fractional bits. | 172 * Return a*b/255, rounding any fractional bits. |
201 * Only valid if a and b are unsigned and <= 32767. | 173 * Only valid if a and b are unsigned and <= 32767. |
202 */ | 174 */ |
203 static inline U8CPU SkMulDiv255Round(U16CPU a, U16CPU b) { | 175 static inline U8CPU SkMulDiv255Round(U16CPU a, U16CPU b) { |
204 SkASSERT(a <= 32767); | 176 SkASSERT(a <= 32767); |
205 SkASSERT(b <= 32767); | 177 SkASSERT(b <= 32767); |
206 unsigned prod = SkMulS16(a, b) + 128; | 178 unsigned prod = a*b + 128; |
207 return (prod + (prod >> 8)) >> 8; | 179 return (prod + (prod >> 8)) >> 8; |
208 } | 180 } |
209 | 181 |
210 /** | 182 /** |
211 * Stores numer/denom and numer%denom into div and mod respectively. | 183 * Stores numer/denom and numer%denom into div and mod respectively. |
212 */ | 184 */ |
213 template <typename In, typename Out> | 185 template <typename In, typename Out> |
214 inline void SkTDivMod(In numer, In denom, Out* div, Out* mod) { | 186 inline void SkTDivMod(In numer, In denom, Out* div, Out* mod) { |
215 #ifdef SK_CPU_ARM32 | 187 #ifdef SK_CPU_ARM32 |
216 // If we wrote this as in the else branch, GCC won't fuse the two into one | 188 // If we wrote this as in the else branch, GCC won't fuse the two into one |
217 // divmod call, but rather a div call followed by a divmod. Silly! This | 189 // divmod call, but rather a div call followed by a divmod. Silly! This |
218 // version is just as fast as calling __aeabi_[u]idivmod manually, but with | 190 // version is just as fast as calling __aeabi_[u]idivmod manually, but with |
219 // prettier code. | 191 // prettier code. |
220 // | 192 // |
221 // This benches as around 2x faster than the code in the else branch. | 193 // This benches as around 2x faster than the code in the else branch. |
222 const In d = numer/denom; | 194 const In d = numer/denom; |
223 *div = static_cast<Out>(d); | 195 *div = static_cast<Out>(d); |
224 *mod = static_cast<Out>(numer-d*denom); | 196 *mod = static_cast<Out>(numer-d*denom); |
225 #else | 197 #else |
226 // On x86 this will just be a single idiv. | 198 // On x86 this will just be a single idiv. |
227 *div = static_cast<Out>(numer/denom); | 199 *div = static_cast<Out>(numer/denom); |
228 *mod = static_cast<Out>(numer%denom); | 200 *mod = static_cast<Out>(numer%denom); |
229 #endif | 201 #endif |
230 } | 202 } |
231 | 203 |
232 #endif | 204 #endif |
OLD | NEW |