include/core/SkColorPriv.h - Issue 100923003: Refactor FourByteInterps. Add 64-bit Fast version. Add tests.

Side by Side Diff: include/core/SkColorPriv.h

Issue 100923003: Refactor FourByteInterps. Add 64-bit Fast version. Add tests. (Closed) Base URL: https://skia.googlecode.com/svn/trunk

Patch Set: Created 7 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1	1

2 /*	2 /*

3 * Copyright 2006 The Android Open Source Project	3 * Copyright 2006 The Android Open Source Project

4 *	4 *

5 * Use of this source code is governed by a BSD-style license that can be	5 * Use of this source code is governed by a BSD-style license that can be

6 * found in the LICENSE file.	6 * found in the LICENSE file.

7 */	7 */

8	8

9	9

10 #ifndef SkColorPriv_DEFINED	10 #ifndef SkColorPriv_DEFINED

(...skipping 251 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
262 * (src, dst, 0) returns dst	262 * (src, dst, 0) returns dst

263 * (src, dst, 0xFF) returns src	263 * (src, dst, 0xFF) returns src

264 */	264 */

265 static inline SkPMColor SkFourByteInterp(SkPMColor src, SkPMColor dst,	265 static inline SkPMColor SkFourByteInterp(SkPMColor src, SkPMColor dst,

266 U8CPU srcWeight) {	266 U8CPU srcWeight) {

267 unsigned scale = SkAlpha255To256(srcWeight);	267 unsigned scale = SkAlpha255To256(srcWeight);

268 return SkFourByteInterp256(src, dst, scale);	268 return SkFourByteInterp256(src, dst, scale);

269 }	269 }

270	270

271 /**	271 /**

272 * 32b optimized version; currently appears to be 10% faster even on 64b	272 * 0xAARRGGBB -> 0x00AA00GG, 0x00RR00BB

273 * architectures than an equivalent 64b version and 30% faster than

274 * SkFourByteInterp(). Third parameter controls blending of the first two:

275 * (src, dst, 0) returns dst

276 * (src, dst, 256) returns src

277 * ** Does not match the results of SkFourByteInterp256() because we use

278 * a more accurate scale computation!

279 * TODO: migrate Skia function to using an accurate 255->266 alpha

280 * conversion.

281 */	273 */

282 static inline SkPMColor SkFastFourByteInterp256(SkPMColor src,	274 static inline void SkSplay(SkPMColor color, uint32_t* ag, uint32_t* rb) {

283 SkPMColor dst,	275 const uint32_t mask = 0x00FF00FF;

284 unsigned scale) {	276 *ag = (color >> 8) & mask;

	277 *rb = color & mask;

	278 }

	279

	280 /**

	281 * 0xAARRGGBB -> 0x00AA00GG00RR00BB

	282 * (note, ARGB -> AGRB)

	283 */

	284 static inline uint64_t SkSplay(SkPMColor color) {

	285 const uint32_t mask = 0x00FF00FF;

	286 uint64_t agrb = (color >> 8) & mask; // 0x0000000000AA00GG

	287 agrb <<= 32; // 0x00AA00GG00000000

	288 agrb \|= color & mask; // 0x00AA00GG00RR00BB

	289 return agrb;

	290 }

	291

	292 /**

	293 * 0xAAxxGGxx, 0xRRxxBBxx-> 0xAARRGGBB

	294 */

	295 static inline SkPMColor SkUnsplay(uint32_t ag, uint32_t rb) {

	296 const uint32_t mask = 0xFF00FF00;

	297 return (ag & mask) \| ((rb & mask) >> 8);

	298 }

	299

	300 /**

	301 * 0xAAxxGGxxRRxxBBxx -> 0xAARRGGBB

	302 * (note, AGRB -> ARGB)

	303 */

	304 static inline SkPMColor SkUnsplay(uint64_t agrb) {

	305 const uint32_t mask = 0xFF00FF00;

	306 return ((agrb & mask) >> 8) \| // 0x00RR00BB

	307 ((agrb >> 32) & mask); // 0xAARRGGBB

	308 }

	309

	310 static inline SkPMColor SkFastFourByteInterp256_32(SkPMColor src, SkPMColor dst, unsigned scale) {

285 SkASSERT(scale <= 256);	311 SkASSERT(scale <= 256);

286	312

287 // Reorders ARGB to AG-RB in order to reduce the number of operations.	313 // Two 8-bit blends per two 32-bit registers, with space to make sure the ma th doesn't collide.

288 const uint32_t mask = 0xFF00FF;	314 uint32_t src_ag, src_rb, dst_ag, dst_rb;

289 uint32_t src_rb = src & mask;	315 SkSplay(src, &src_ag, &src_rb);

290 uint32_t src_ag = (src >> 8) & mask;	316 SkSplay(dst, &dst_ag, &dst_rb);

291 uint32_t dst_rb = dst & mask;

292 uint32_t dst_ag = (dst >> 8) & mask;

293	317

294 uint32_t ret_rb = src_rb * scale + (256 - scale) * dst_rb;	318 const uint32_t ret_ag = src_ag * scale + (256 - scale) * dst_ag;

295 uint32_t ret_ag = src_ag * scale + (256 - scale) * dst_ag;	319 const uint32_t ret_rb = src_rb * scale + (256 - scale) * dst_rb;

296	320

297 return (ret_ag & ~mask) \| ((ret_rb & ~mask) >> 8);	321 return SkUnsplay(ret_ag, ret_rb);

298 }	322 }

299	323

	324 static inline SkPMColor SkFastFourByteInterp256_64(SkPMColor src, SkPMColor dst, unsigned scale) {

	325 SkASSERT(scale <= 256);

	326 // Four 8-bit blends in one 64-bit register, with space to make sure the mat h doesn't collide.

	327 return SkUnsplay(SkSplay(src) * scale + (256-scale) * SkSplay(dst));

	328 }

	329

	330 // TODO(mtklein): Replace slow versions with fast versions, using scale + (scale >>7) everywhere.

	331

	332 /**

	333 * Same as SkFourByteInterp256, but faster.

	334 */

	335 static inline SkPMColor SkFastFourByteInterp256(SkPMColor src, SkPMColor dst, un signed scale) {

	336 // On a 64-bit machine, _64 is about 10% faster than _32, but ~40% slower on a 32-bit machine.

	337 if (sizeof(void*) == 4) {

	338 return SkFastFourByteInterp256_32(src, dst, scale);

	339 } else {

	340 return SkFastFourByteInterp256_64(src, dst, scale);

	341 }

	342 }

	343

	344 /**

	345 * Nearly the same as SkFourByteInterp, but faster and a touch more accurate, du e to better

	346 * srcWeight scaling to [0, 256].

	347 */

300 static inline SkPMColor SkFastFourByteInterp(SkPMColor src,	348 static inline SkPMColor SkFastFourByteInterp(SkPMColor src,

301 SkPMColor dst,	349 SkPMColor dst,

302 U8CPU srcWeight) {	350 U8CPU srcWeight) {

303 SkASSERT(srcWeight <= 255);	351 SkASSERT(srcWeight <= 255);

304 // scale = srcWeight + (srcWeight >> 7) is more accurate than	352 // scale = srcWeight + (srcWeight >> 7) is more accurate than

305 // scale = srcWeight + 1, but 7% slower	353 // scale = srcWeight + 1, but 7% slower

306 return SkFastFourByteInterp256(src, dst, srcWeight + (srcWeight >> 7));	354 return SkFastFourByteInterp256(src, dst, srcWeight + (srcWeight >> 7));

307 }	355 }

308	356

309 /**	357 /**

(...skipping 548 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
858 int srcG = SkColorGetG(src);	906 int srcG = SkColorGetG(src);

859 int srcB = SkColorGetB(src);	907 int srcB = SkColorGetB(src);

860	908

861 for (int i = 0; i < width; i++) {	909 for (int i = 0; i < width; i++) {

862 dst[i] = SkBlendLCD16Opaque(srcR, srcG, srcB, dst[i], mask[i],	910 dst[i] = SkBlendLCD16Opaque(srcR, srcG, srcB, dst[i], mask[i],

863 opaqueDst);	911 opaqueDst);

864 }	912 }

865 }	913 }

866	914

867 #endif	915 #endif

OLD	NEW

« no previous file with comments | « gyp/tests.gyp ('k') | tests/ColorPrivTest.cpp » ('j') | no next file with comments »