include/core/SkMath.h - Issue 1273203002: The compiler can generate smulbb perfectly well nowadays.

Side by Side Diff: include/core/SkMath.h

Issue 1273203002: The compiler can generate smulbb perfectly well nowadays. (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: parens Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1	1

2 /*	2 /*

3 * Copyright 2006 The Android Open Source Project	3 * Copyright 2006 The Android Open Source Project

4 *	4 *

5 * Use of this source code is governed by a BSD-style license that can be	5 * Use of this source code is governed by a BSD-style license that can be

6 * found in the LICENSE file.	6 * found in the LICENSE file.

7 */	7 */

8	8

9	9

10 #ifndef SkMath_DEFINED	10 #ifndef SkMath_DEFINED

(...skipping 139 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
150 * Returns true if value is a power of 2. Does not explicitly check for	150 * Returns true if value is a power of 2. Does not explicitly check for

151 * value <= 0.	151 * value <= 0.

152 */	152 */

153 template <typename T> inline bool SkIsPow2(T value) {	153 template <typename T> inline bool SkIsPow2(T value) {

154 return (value & (value - 1)) == 0;	154 return (value & (value - 1)) == 0;

155 }	155 }

156	156

157 ///////////////////////////////////////////////////////////////////////////////	157 ///////////////////////////////////////////////////////////////////////////////

158	158

159 /**	159 /**

160 * SkMulS16(a, b) multiplies a * b, but requires that a and b are both int16_t.

161 * With this requirement, we can generate faster instructions on some

162 * architectures.

163 */

164 #ifdef SK_ARM_HAS_EDSP

165 static inline int32_t SkMulS16(S16CPU x, S16CPU y) {

166 SkASSERT((int16_t)x == x);

167 SkASSERT((int16_t)y == y);

168 int32_t product;

169 asm("smulbb %0, %1, %2 \n"

170 : "=r"(product)

171 : "r"(x), "r"(y)

172 );

173 return product;

174 }

175 #else

176 #ifdef SK_DEBUG

177 static inline int32_t SkMulS16(S16CPU x, S16CPU y) {

178 SkASSERT((int16_t)x == x);

179 SkASSERT((int16_t)y == y);

180 return x * y;

181 }

182 #else

183 #define SkMulS16(x, y) ((x) * (y))

184 #endif

185 #endif

186

187 /**

188 * Return a*b/((1 << shift) - 1), rounding any fractional bits.	160 * Return a*b/((1 << shift) - 1), rounding any fractional bits.

189 * Only valid if a and b are unsigned and <= 32767 and shift is > 0 and <= 8	161 * Only valid if a and b are unsigned and <= 32767 and shift is > 0 and <= 8

190 */	162 */

191 static inline unsigned SkMul16ShiftRound(U16CPU a, U16CPU b, int shift) {	163 static inline unsigned SkMul16ShiftRound(U16CPU a, U16CPU b, int shift) {

192 SkASSERT(a <= 32767);	164 SkASSERT(a <= 32767);

193 SkASSERT(b <= 32767);	165 SkASSERT(b <= 32767);

194 SkASSERT(shift > 0 && shift <= 8);	166 SkASSERT(shift > 0 && shift <= 8);

195 unsigned prod = SkMulS16(a, b) + (1 << (shift - 1));	167 unsigned prod = a*b + (1 << (shift - 1));

196 return (prod + (prod >> shift)) >> shift;	168 return (prod + (prod >> shift)) >> shift;

197 }	169 }

198	170

199 /**	171 /**

200 * Return a*b/255, rounding any fractional bits.	172 * Return a*b/255, rounding any fractional bits.

201 * Only valid if a and b are unsigned and <= 32767.	173 * Only valid if a and b are unsigned and <= 32767.

202 */	174 */

203 static inline U8CPU SkMulDiv255Round(U16CPU a, U16CPU b) {	175 static inline U8CPU SkMulDiv255Round(U16CPU a, U16CPU b) {

204 SkASSERT(a <= 32767);	176 SkASSERT(a <= 32767);

205 SkASSERT(b <= 32767);	177 SkASSERT(b <= 32767);

206 unsigned prod = SkMulS16(a, b) + 128;	178 unsigned prod = a*b + 128;

207 return (prod + (prod >> 8)) >> 8;	179 return (prod + (prod >> 8)) >> 8;

208 }	180 }

209	181

210 /**	182 /**

211 * Stores numer/denom and numer%denom into div and mod respectively.	183 * Stores numer/denom and numer%denom into div and mod respectively.

212 */	184 */

213 template <typename In, typename Out>	185 template <typename In, typename Out>

214 inline void SkTDivMod(In numer, In denom, Out* div, Out* mod) {	186 inline void SkTDivMod(In numer, In denom, Out* div, Out* mod) {

215 #ifdef SK_CPU_ARM32	187 #ifdef SK_CPU_ARM32

216 // If we wrote this as in the else branch, GCC won't fuse the two into one	188 // If we wrote this as in the else branch, GCC won't fuse the two into one

217 // divmod call, but rather a div call followed by a divmod. Silly! This	189 // divmod call, but rather a div call followed by a divmod. Silly! This

218 // version is just as fast as calling __aeabi_[u]idivmod manually, but with	190 // version is just as fast as calling __aeabi_[u]idivmod manually, but with

219 // prettier code.	191 // prettier code.

220 //	192 //

221 // This benches as around 2x faster than the code in the else branch.	193 // This benches as around 2x faster than the code in the else branch.

222 const In d = numer/denom;	194 const In d = numer/denom;

223 *div = static_cast<Out>(d);	195 *div = static_cast<Out>(d);

224 mod = static_cast<Out>(numer-ddenom);	196 mod = static_cast<Out>(numer-ddenom);

225 #else	197 #else

226 // On x86 this will just be a single idiv.	198 // On x86 this will just be a single idiv.

227 *div = static_cast<Out>(numer/denom);	199 *div = static_cast<Out>(numer/denom);

228 *mod = static_cast<Out>(numer%denom);	200 *mod = static_cast<Out>(numer%denom);

229 #endif	201 #endif

230 }	202 }

231	203

232 #endif	204 #endif

OLD	NEW

« no previous file with comments | « include/core/SkColorPriv.h ('k') | include/core/SkPreConfig.h » ('j') | no next file with comments »