Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(67)

Side by Side Diff: include/core/SkColorPriv.h

Issue 100923003: Refactor FourByteInterps. Add 64-bit Fast version. Add tests. (Closed) Base URL: https://skia.googlecode.com/svn/trunk
Patch Set: Created 7 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « gyp/tests.gyp ('k') | tests/ColorPrivTest.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 1
2 /* 2 /*
3 * Copyright 2006 The Android Open Source Project 3 * Copyright 2006 The Android Open Source Project
4 * 4 *
5 * Use of this source code is governed by a BSD-style license that can be 5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file. 6 * found in the LICENSE file.
7 */ 7 */
8 8
9 9
10 #ifndef SkColorPriv_DEFINED 10 #ifndef SkColorPriv_DEFINED
(...skipping 251 matching lines...) Expand 10 before | Expand all | Expand 10 after
262 * (src, dst, 0) returns dst 262 * (src, dst, 0) returns dst
263 * (src, dst, 0xFF) returns src 263 * (src, dst, 0xFF) returns src
264 */ 264 */
265 static inline SkPMColor SkFourByteInterp(SkPMColor src, SkPMColor dst, 265 static inline SkPMColor SkFourByteInterp(SkPMColor src, SkPMColor dst,
266 U8CPU srcWeight) { 266 U8CPU srcWeight) {
267 unsigned scale = SkAlpha255To256(srcWeight); 267 unsigned scale = SkAlpha255To256(srcWeight);
268 return SkFourByteInterp256(src, dst, scale); 268 return SkFourByteInterp256(src, dst, scale);
269 } 269 }
270 270
271 /** 271 /**
272 * 32b optimized version; currently appears to be 10% faster even on 64b 272 * 0xAARRGGBB -> 0x00AA00GG, 0x00RR00BB
273 * architectures than an equivalent 64b version and 30% faster than
274 * SkFourByteInterp(). Third parameter controls blending of the first two:
275 * (src, dst, 0) returns dst
276 * (src, dst, 256) returns src
277 * ** Does not match the results of SkFourByteInterp256() because we use
278 * a more accurate scale computation!
279 * TODO: migrate Skia function to using an accurate 255->266 alpha
280 * conversion.
281 */ 273 */
282 static inline SkPMColor SkFastFourByteInterp256(SkPMColor src, 274 static inline void SkSplay(SkPMColor color, uint32_t* ag, uint32_t* rb) {
283 SkPMColor dst, 275 const uint32_t mask = 0x00FF00FF;
284 unsigned scale) { 276 *ag = (color >> 8) & mask;
277 *rb = color & mask;
278 }
279
280 /**
281 * 0xAARRGGBB -> 0x00AA00GG00RR00BB
282 * (note, ARGB -> AGRB)
283 */
284 static inline uint64_t SkSplay(SkPMColor color) {
285 const uint32_t mask = 0x00FF00FF;
286 uint64_t agrb = (color >> 8) & mask; // 0x0000000000AA00GG
287 agrb <<= 32; // 0x00AA00GG00000000
288 agrb |= color & mask; // 0x00AA00GG00RR00BB
289 return agrb;
290 }
291
292 /**
293 * 0xAAxxGGxx, 0xRRxxBBxx-> 0xAARRGGBB
294 */
295 static inline SkPMColor SkUnsplay(uint32_t ag, uint32_t rb) {
296 const uint32_t mask = 0xFF00FF00;
297 return (ag & mask) | ((rb & mask) >> 8);
298 }
299
300 /**
301 * 0xAAxxGGxxRRxxBBxx -> 0xAARRGGBB
302 * (note, AGRB -> ARGB)
303 */
304 static inline SkPMColor SkUnsplay(uint64_t agrb) {
305 const uint32_t mask = 0xFF00FF00;
306 return ((agrb & mask) >> 8) | // 0x00RR00BB
307 ((agrb >> 32) & mask); // 0xAARRGGBB
308 }
309
310 static inline SkPMColor SkFastFourByteInterp256_32(SkPMColor src, SkPMColor dst, unsigned scale) {
285 SkASSERT(scale <= 256); 311 SkASSERT(scale <= 256);
286 312
287 // Reorders ARGB to AG-RB in order to reduce the number of operations. 313 // Two 8-bit blends per two 32-bit registers, with space to make sure the ma th doesn't collide.
288 const uint32_t mask = 0xFF00FF; 314 uint32_t src_ag, src_rb, dst_ag, dst_rb;
289 uint32_t src_rb = src & mask; 315 SkSplay(src, &src_ag, &src_rb);
290 uint32_t src_ag = (src >> 8) & mask; 316 SkSplay(dst, &dst_ag, &dst_rb);
291 uint32_t dst_rb = dst & mask;
292 uint32_t dst_ag = (dst >> 8) & mask;
293 317
294 uint32_t ret_rb = src_rb * scale + (256 - scale) * dst_rb; 318 const uint32_t ret_ag = src_ag * scale + (256 - scale) * dst_ag;
295 uint32_t ret_ag = src_ag * scale + (256 - scale) * dst_ag; 319 const uint32_t ret_rb = src_rb * scale + (256 - scale) * dst_rb;
296 320
297 return (ret_ag & ~mask) | ((ret_rb & ~mask) >> 8); 321 return SkUnsplay(ret_ag, ret_rb);
298 } 322 }
299 323
324 static inline SkPMColor SkFastFourByteInterp256_64(SkPMColor src, SkPMColor dst, unsigned scale) {
325 SkASSERT(scale <= 256);
326 // Four 8-bit blends in one 64-bit register, with space to make sure the mat h doesn't collide.
327 return SkUnsplay(SkSplay(src) * scale + (256-scale) * SkSplay(dst));
328 }
329
330 // TODO(mtklein): Replace slow versions with fast versions, using scale + (scale >>7) everywhere.
331
332 /**
333 * Same as SkFourByteInterp256, but faster.
334 */
335 static inline SkPMColor SkFastFourByteInterp256(SkPMColor src, SkPMColor dst, un signed scale) {
336 // On a 64-bit machine, _64 is about 10% faster than _32, but ~40% slower on a 32-bit machine.
337 if (sizeof(void*) == 4) {
338 return SkFastFourByteInterp256_32(src, dst, scale);
339 } else {
340 return SkFastFourByteInterp256_64(src, dst, scale);
341 }
342 }
343
344 /**
345 * Nearly the same as SkFourByteInterp, but faster and a touch more accurate, du e to better
346 * srcWeight scaling to [0, 256].
347 */
300 static inline SkPMColor SkFastFourByteInterp(SkPMColor src, 348 static inline SkPMColor SkFastFourByteInterp(SkPMColor src,
301 SkPMColor dst, 349 SkPMColor dst,
302 U8CPU srcWeight) { 350 U8CPU srcWeight) {
303 SkASSERT(srcWeight <= 255); 351 SkASSERT(srcWeight <= 255);
304 // scale = srcWeight + (srcWeight >> 7) is more accurate than 352 // scale = srcWeight + (srcWeight >> 7) is more accurate than
305 // scale = srcWeight + 1, but 7% slower 353 // scale = srcWeight + 1, but 7% slower
306 return SkFastFourByteInterp256(src, dst, srcWeight + (srcWeight >> 7)); 354 return SkFastFourByteInterp256(src, dst, srcWeight + (srcWeight >> 7));
307 } 355 }
308 356
309 /** 357 /**
(...skipping 548 matching lines...) Expand 10 before | Expand all | Expand 10 after
858 int srcG = SkColorGetG(src); 906 int srcG = SkColorGetG(src);
859 int srcB = SkColorGetB(src); 907 int srcB = SkColorGetB(src);
860 908
861 for (int i = 0; i < width; i++) { 909 for (int i = 0; i < width; i++) {
862 dst[i] = SkBlendLCD16Opaque(srcR, srcG, srcB, dst[i], mask[i], 910 dst[i] = SkBlendLCD16Opaque(srcR, srcG, srcB, dst[i], mask[i],
863 opaqueDst); 911 opaqueDst);
864 } 912 }
865 } 913 }
866 914
867 #endif 915 #endif
OLDNEW
« no previous file with comments | « gyp/tests.gyp ('k') | tests/ColorPrivTest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698