Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(188)

Side by Side Diff: include/core/SkMath.h

Issue 1273203002: The compiler can generate smulbb perfectly well nowadays. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: parens Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « include/core/SkColorPriv.h ('k') | include/core/SkPreConfig.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 1
2 /* 2 /*
3 * Copyright 2006 The Android Open Source Project 3 * Copyright 2006 The Android Open Source Project
4 * 4 *
5 * Use of this source code is governed by a BSD-style license that can be 5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file. 6 * found in the LICENSE file.
7 */ 7 */
8 8
9 9
10 #ifndef SkMath_DEFINED 10 #ifndef SkMath_DEFINED
(...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after
150 * Returns true if value is a power of 2. Does not explicitly check for 150 * Returns true if value is a power of 2. Does not explicitly check for
151 * value <= 0. 151 * value <= 0.
152 */ 152 */
153 template <typename T> inline bool SkIsPow2(T value) { 153 template <typename T> inline bool SkIsPow2(T value) {
154 return (value & (value - 1)) == 0; 154 return (value & (value - 1)) == 0;
155 } 155 }
156 156
157 /////////////////////////////////////////////////////////////////////////////// 157 ///////////////////////////////////////////////////////////////////////////////
158 158
159 /** 159 /**
160 * SkMulS16(a, b) multiplies a * b, but requires that a and b are both int16_t.
161 * With this requirement, we can generate faster instructions on some
162 * architectures.
163 */
164 #ifdef SK_ARM_HAS_EDSP
165 static inline int32_t SkMulS16(S16CPU x, S16CPU y) {
166 SkASSERT((int16_t)x == x);
167 SkASSERT((int16_t)y == y);
168 int32_t product;
169 asm("smulbb %0, %1, %2 \n"
170 : "=r"(product)
171 : "r"(x), "r"(y)
172 );
173 return product;
174 }
175 #else
176 #ifdef SK_DEBUG
177 static inline int32_t SkMulS16(S16CPU x, S16CPU y) {
178 SkASSERT((int16_t)x == x);
179 SkASSERT((int16_t)y == y);
180 return x * y;
181 }
182 #else
183 #define SkMulS16(x, y) ((x) * (y))
184 #endif
185 #endif
186
187 /**
188 * Return a*b/((1 << shift) - 1), rounding any fractional bits. 160 * Return a*b/((1 << shift) - 1), rounding any fractional bits.
189 * Only valid if a and b are unsigned and <= 32767 and shift is > 0 and <= 8 161 * Only valid if a and b are unsigned and <= 32767 and shift is > 0 and <= 8
190 */ 162 */
191 static inline unsigned SkMul16ShiftRound(U16CPU a, U16CPU b, int shift) { 163 static inline unsigned SkMul16ShiftRound(U16CPU a, U16CPU b, int shift) {
192 SkASSERT(a <= 32767); 164 SkASSERT(a <= 32767);
193 SkASSERT(b <= 32767); 165 SkASSERT(b <= 32767);
194 SkASSERT(shift > 0 && shift <= 8); 166 SkASSERT(shift > 0 && shift <= 8);
195 unsigned prod = SkMulS16(a, b) + (1 << (shift - 1)); 167 unsigned prod = a*b + (1 << (shift - 1));
196 return (prod + (prod >> shift)) >> shift; 168 return (prod + (prod >> shift)) >> shift;
197 } 169 }
198 170
199 /** 171 /**
200 * Return a*b/255, rounding any fractional bits. 172 * Return a*b/255, rounding any fractional bits.
201 * Only valid if a and b are unsigned and <= 32767. 173 * Only valid if a and b are unsigned and <= 32767.
202 */ 174 */
203 static inline U8CPU SkMulDiv255Round(U16CPU a, U16CPU b) { 175 static inline U8CPU SkMulDiv255Round(U16CPU a, U16CPU b) {
204 SkASSERT(a <= 32767); 176 SkASSERT(a <= 32767);
205 SkASSERT(b <= 32767); 177 SkASSERT(b <= 32767);
206 unsigned prod = SkMulS16(a, b) + 128; 178 unsigned prod = a*b + 128;
207 return (prod + (prod >> 8)) >> 8; 179 return (prod + (prod >> 8)) >> 8;
208 } 180 }
209 181
210 /** 182 /**
211 * Stores numer/denom and numer%denom into div and mod respectively. 183 * Stores numer/denom and numer%denom into div and mod respectively.
212 */ 184 */
213 template <typename In, typename Out> 185 template <typename In, typename Out>
214 inline void SkTDivMod(In numer, In denom, Out* div, Out* mod) { 186 inline void SkTDivMod(In numer, In denom, Out* div, Out* mod) {
215 #ifdef SK_CPU_ARM32 187 #ifdef SK_CPU_ARM32
216 // If we wrote this as in the else branch, GCC won't fuse the two into one 188 // If we wrote this as in the else branch, GCC won't fuse the two into one
217 // divmod call, but rather a div call followed by a divmod. Silly! This 189 // divmod call, but rather a div call followed by a divmod. Silly! This
218 // version is just as fast as calling __aeabi_[u]idivmod manually, but with 190 // version is just as fast as calling __aeabi_[u]idivmod manually, but with
219 // prettier code. 191 // prettier code.
220 // 192 //
221 // This benches as around 2x faster than the code in the else branch. 193 // This benches as around 2x faster than the code in the else branch.
222 const In d = numer/denom; 194 const In d = numer/denom;
223 *div = static_cast<Out>(d); 195 *div = static_cast<Out>(d);
224 *mod = static_cast<Out>(numer-d*denom); 196 *mod = static_cast<Out>(numer-d*denom);
225 #else 197 #else
226 // On x86 this will just be a single idiv. 198 // On x86 this will just be a single idiv.
227 *div = static_cast<Out>(numer/denom); 199 *div = static_cast<Out>(numer/denom);
228 *mod = static_cast<Out>(numer%denom); 200 *mod = static_cast<Out>(numer%denom);
229 #endif 201 #endif
230 } 202 }
231 203
232 #endif 204 #endif
OLDNEW
« no previous file with comments | « include/core/SkColorPriv.h ('k') | include/core/SkPreConfig.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698