Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(603)

Side by Side Diff: src/core/SkMipMap.cpp

Issue 1593073002: speedup mip builders (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: 2. remove redundant load/expand of 1 pixel per-row Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2013 Google Inc. 2 * Copyright 2013 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include "SkMipMap.h" 8 #include "SkMipMap.h"
9 #include "SkBitmap.h" 9 #include "SkBitmap.h"
10 #include "SkColorPriv.h" 10 #include "SkColorPriv.h"
(...skipping 365 matching lines...) Expand 10 before | Expand all | Expand 10 after
376 // 376 //
377 // To produce each mip level, we need to filter down by 1/2 (e.g. 100x100 -> 50 ,50) 377 // To produce each mip level, we need to filter down by 1/2 (e.g. 100x100 -> 50 ,50)
378 // If the starting dimension is odd, we floor the size of the lower level (e.g. 101 -> 50) 378 // If the starting dimension is odd, we floor the size of the lower level (e.g. 101 -> 50)
379 // In those (odd) cases, we use a triangle filter, with 1-pixel overlap between samplings, 379 // In those (odd) cases, we use a triangle filter, with 1-pixel overlap between samplings,
380 // else for even cases, we just use a 2x box filter. 380 // else for even cases, we just use a 2x box filter.
381 // 381 //
382 // This produces 4 possible filters: 2x2 2x3 3x2 3x3 where WxH indicates the nu mber of src pixels 382 // This produces 4 possible filters: 2x2 2x3 3x2 3x3 where WxH indicates the nu mber of src pixels
383 // we need to sample in each dimension to produce 1 dst pixel. 383 // we need to sample in each dimension to produce 1 dst pixel.
384 // 384 //
385 385
386 template <typename F> void downsample_2_2(void* dst, const void* src, size_t src RB) { 386 template <typename F> void downsample_2_2(void* dst, const void* src, size_t src RB, int count) {
387 auto p0 = static_cast<const typename F::Type*>(src); 387 auto p0 = static_cast<const typename F::Type*>(src);
388 auto p1 = (const typename F::Type*)((const char*)p0 + srcRB); 388 auto p1 = (const typename F::Type*)((const char*)p0 + srcRB);
389 auto d = static_cast<typename F::Type*>(dst);
390
391 for (int i = 0; i < count; ++i) {
392 auto c00 = F::Expand(p0[0]);
393 auto c01 = F::Expand(p0[1]);
394 auto c10 = F::Expand(p1[0]);
395 auto c11 = F::Expand(p1[1]);
396 auto c = c00 + c10 + c01 + c11;
397
398 d[i] = F::Compact(c >> 2);
399 p0 += 2;
400 p1 += 2;
401 }
402 }
403
404 template <typename F> void downsample_3_2(void* dst, const void* src, size_t src RB, int count) {
405 SkASSERT(count > 0);
406 auto p0 = static_cast<const typename F::Type*>(src);
407 auto p1 = (const typename F::Type*)((const char*)p0 + srcRB);
408 auto d = static_cast<typename F::Type*>(dst);
389 409
390 auto c00 = F::Expand(p0[0]); 410 auto c00 = F::Expand(p0[0]);
391 auto c01 = F::Expand(p0[1]); 411 auto c01 = F::Expand(p0[1]);
392 auto c10 = F::Expand(p1[0]);
393 auto c11 = F::Expand(p1[1]);
394
395 auto c = c00 + c10 + c01 + c11;
396 *(typename F::Type*)dst = F::Compact(c >> 2);
397 }
398
399 template <typename F> void downsample_3_2(void* dst, const void* src, size_t src RB) {
400 auto p0 = static_cast<const typename F::Type*>(src);
401 auto p1 = (const typename F::Type*)((const char*)p0 + srcRB);
402
403 auto c00 = F::Expand(p0[0]);
404 auto c01 = F::Expand(p0[1]);
405 auto c02 = F::Expand(p0[2]); 412 auto c02 = F::Expand(p0[2]);
406 auto c10 = F::Expand(p1[0]); 413 auto c10 = F::Expand(p1[0]);
407 auto c11 = F::Expand(p1[1]); 414 auto c11 = F::Expand(p1[1]);
408 auto c12 = F::Expand(p1[2]); 415 auto c12 = F::Expand(p1[2]);
416 auto c = add_121(c00, c01, c02) + add_121(c10, c11, c12);
417 d[0] = F::Compact(c >> 3);
409 418
410 auto c = add_121(c00, c01, c02) + add_121(c10, c11, c12); 419 for (int i = 1; i < count; ++i) {
411 *(typename F::Type*)dst = F::Compact(c >> 3); 420 p0 += 2;
421 p1 += 2;
422 c00 = c02;
423 c01 = F::Expand(p0[1]);
424 c02 = F::Expand(p0[2]);
425 c10 = c12;
426 c11 = F::Expand(p1[1]);
427 c12 = F::Expand(p1[2]);
428 auto c = add_121(c00, c01, c02) + add_121(c10, c11, c12);
429 d[i] = F::Compact(c >> 3);
430 }
f(malita) 2016/01/16 16:46:10 Instead of unrolling a full iteration, could we se
reed1 2016/01/16 17:03:10 Good catch! Thanks.
412 } 431 }
413 432
414 template <typename F> void downsample_2_3(void* dst, const void* src, size_t src RB) { 433 template <typename F> void downsample_2_3(void* dst, const void* src, size_t src RB, int count) {
415 auto p0 = static_cast<const typename F::Type*>(src); 434 auto p0 = static_cast<const typename F::Type*>(src);
416 auto p1 = (const typename F::Type*)((const char*)p0 + srcRB); 435 auto p1 = (const typename F::Type*)((const char*)p0 + srcRB);
417 auto p2 = (const typename F::Type*)((const char*)p1 + srcRB); 436 auto p2 = (const typename F::Type*)((const char*)p1 + srcRB);
437 auto d = static_cast<typename F::Type*>(dst);
438
439 for (int i = 0; i < count; ++i) {
440 auto c00 = F::Expand(p0[0]);
441 auto c01 = F::Expand(p0[1]);
442 auto c10 = F::Expand(p1[0]);
443 auto c11 = F::Expand(p1[1]);
444 auto c20 = F::Expand(p2[0]);
445 auto c21 = F::Expand(p2[1]);
446 auto c = add_121(c00, c10, c20) + add_121(c01, c11, c21);
418 447
419 auto c00 = F::Expand(p0[0]); 448 d[i] = F::Compact(c >> 3);
420 auto c01 = F::Expand(p0[1]); 449 p0 += 2;
421 auto c10 = F::Expand(p1[0]); 450 p1 += 2;
422 auto c11 = F::Expand(p1[1]); 451 p2 += 2;
423 auto c20 = F::Expand(p2[0]); 452 }
424 auto c21 = F::Expand(p2[1]);
425 auto c = add_121(c00, c10, c20) + add_121(c01, c11, c21);
426 *(typename F::Type*)dst = F::Compact(c >> 3);
427 } 453 }
428 454
429 template <typename F> void downsample_3_3(void* dst, const void* src, size_t src RB) { 455 template <typename F> void downsample_3_3(void* dst, const void* src, size_t src RB, int count) {
430 auto p0 = static_cast<const typename F::Type*>(src); 456 auto p0 = static_cast<const typename F::Type*>(src);
431 auto p1 = (const typename F::Type*)((const char*)p0 + srcRB); 457 auto p1 = (const typename F::Type*)((const char*)p0 + srcRB);
432 auto p2 = (const typename F::Type*)((const char*)p1 + srcRB); 458 auto p2 = (const typename F::Type*)((const char*)p1 + srcRB);
433 459 auto d = static_cast<typename F::Type*>(dst);
460
434 auto c00 = F::Expand(p0[0]); 461 auto c00 = F::Expand(p0[0]);
435 auto c01 = F::Expand(p0[1]); 462 auto c01 = F::Expand(p0[1]);
436 auto c02 = F::Expand(p0[2]); 463 auto c02 = F::Expand(p0[2]);
437 auto c10 = F::Expand(p1[0]); 464 auto c10 = F::Expand(p1[0]);
438 auto c11 = F::Expand(p1[1]); 465 auto c11 = F::Expand(p1[1]);
439 auto c12 = F::Expand(p1[2]); 466 auto c12 = F::Expand(p1[2]);
440 auto c20 = F::Expand(p2[0]); 467 auto c20 = F::Expand(p2[0]);
441 auto c21 = F::Expand(p2[1]); 468 auto c21 = F::Expand(p2[1]);
442 auto c22 = F::Expand(p2[2]); 469 auto c22 = F::Expand(p2[2]);
470 auto c = add_121(c00, c01, c02) + (add_121(c10, c11, c12) << 1) + add_121(c2 0, c21, c22);
471 d[0] = F::Compact(c >> 4);
443 472
444 auto c = add_121(c00, c01, c02) + (add_121(c10, c11, c12) << 1) + add_121(c2 0, c21, c22); 473 for (int i = 1; i < count; ++i) {
445 *(typename F::Type*)dst = F::Compact(c >> 4); 474 c00 = c02;
475 c01 = F::Expand(p0[1]);
476 c02 = F::Expand(p0[2]);
477 c10 = c12;
478 c11 = F::Expand(p1[1]);
479 c12 = F::Expand(p1[2]);
480 c20 = c22;
481 c21 = F::Expand(p2[1]);
482 c22 = F::Expand(p2[2]);
483 c = add_121(c00, c01, c02) + (add_121(c10, c11, c12) << 1) + add_121(c20 , c21, c22);
484
485 d[i] = F::Compact(c >> 4);
486 p0 += 2;
487 p1 += 2;
488 p2 += 2;
489 }
f(malita) 2016/01/16 16:46:10 Ditto.
reed1 2016/01/16 17:03:10 Done.
446 } 490 }
447 491
448 //////////////////////////////////////////////////////////////////////////////// /////////////////// 492 //////////////////////////////////////////////////////////////////////////////// ///////////////////
449 493
450 size_t SkMipMap::AllocLevelsSize(int levelCount, size_t pixelSize) { 494 size_t SkMipMap::AllocLevelsSize(int levelCount, size_t pixelSize) {
451 if (levelCount < 0) { 495 if (levelCount < 0) {
452 return 0; 496 return 0;
453 } 497 }
454 int64_t size = sk_64_mul(levelCount + 1, sizeof(Level)) + pixelSize; 498 int64_t size = sk_64_mul(levelCount + 1, sizeof(Level)) + pixelSize;
455 if (!sk_64_isS32(size)) { 499 if (!sk_64_isS32(size)) {
456 return 0; 500 return 0;
457 } 501 }
458 return sk_64_asS32(size); 502 return sk_64_asS32(size);
459 } 503 }
460 504
461 SkMipMap* SkMipMap::Build(const SkBitmap& src, SkDiscardableFactoryProc fact) { 505 SkMipMap* SkMipMap::Build(const SkBitmap& src, SkDiscardableFactoryProc fact) {
462 typedef void FilterProc(void*, const void* srcPtr, size_t srcRB); 506 typedef void FilterProc(void*, const void* srcPtr, size_t srcRB, int count);
463 507
464 FilterProc* proc_2_2 = nullptr; 508 FilterProc* proc_2_2 = nullptr;
465 FilterProc* proc_2_3 = nullptr; 509 FilterProc* proc_2_3 = nullptr;
466 FilterProc* proc_3_2 = nullptr; 510 FilterProc* proc_3_2 = nullptr;
467 FilterProc* proc_3_3 = nullptr; 511 FilterProc* proc_3_3 = nullptr;
468 512
469 const SkColorType ct = src.colorType(); 513 const SkColorType ct = src.colorType();
470 const SkAlphaType at = src.alphaType(); 514 const SkAlphaType at = src.alphaType();
471 switch (ct) { 515 switch (ct) {
472 case kRGBA_8888_SkColorType: 516 case kRGBA_8888_SkColorType:
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
567 rowBytes = SkToU32(SkColorTypeMinRowBytes(ct, width)); 611 rowBytes = SkToU32(SkColorTypeMinRowBytes(ct, width));
568 612
569 levels[i].fPixels = addr; 613 levels[i].fPixels = addr;
570 levels[i].fWidth = width; 614 levels[i].fWidth = width;
571 levels[i].fHeight = height; 615 levels[i].fHeight = height;
572 levels[i].fRowBytes = rowBytes; 616 levels[i].fRowBytes = rowBytes;
573 levels[i].fScale = (float)width / src.width(); 617 levels[i].fScale = (float)width / src.width();
574 618
575 SkPixmap dstPM(SkImageInfo::Make(width, height, ct, at), addr, rowBytes) ; 619 SkPixmap dstPM(SkImageInfo::Make(width, height, ct, at), addr, rowBytes) ;
576 620
577 const size_t pixelSize = srcPM.info().bytesPerPixel();
578
579 const void* srcBasePtr = srcPM.addr(); 621 const void* srcBasePtr = srcPM.addr();
580 void* dstBasePtr = dstPM.writable_addr(); 622 void* dstBasePtr = dstPM.writable_addr();
581 623
582 FilterProc* proc; 624 FilterProc* proc;
583 if (prevH & 1) { // src-height is 3 625 if (prevH & 1) { // src-height is 3
584 if (prevW & 1) { // src-width is 3 626 if (prevW & 1) { // src-width is 3
585 proc = proc_3_3; 627 proc = proc_3_3;
586 } else { // src-width is 2 628 } else { // src-width is 2
587 proc = proc_2_3; 629 proc = proc_2_3;
588 } 630 }
589 } else { // src-height is 2 631 } else { // src-height is 2
590 if (prevW & 1) { // src-width is 3 632 if (prevW & 1) { // src-width is 3
591 proc = proc_3_2; 633 proc = proc_3_2;
592 } else { // src-width is 2 634 } else { // src-width is 2
593 proc = proc_2_2; 635 proc = proc_2_2;
594 } 636 }
595 } 637 }
596 638
597 const size_t srcRB = srcPM.rowBytes(); 639 const size_t srcRB = srcPM.rowBytes();
598 for (int y = 0; y < height; y++) { 640 for (int y = 0; y < height; y++) {
599 const void* srcPtr = srcBasePtr; 641 proc(dstBasePtr, srcBasePtr, srcRB, width);
600 void* dstPtr = dstBasePtr;
601
602 for (int x = 0; x < width; x++) {
603 proc(dstPtr, srcPtr, srcRB);
604 srcPtr = (char*)srcPtr + pixelSize * 2;
605 dstPtr = (char*)dstPtr + pixelSize;
606 }
607
608 srcBasePtr = (char*)srcBasePtr + srcRB * 2; // jump two rows 642 srcBasePtr = (char*)srcBasePtr + srcRB * 2; // jump two rows
609 dstBasePtr = (char*)dstBasePtr + dstPM.rowBytes(); 643 dstBasePtr = (char*)dstBasePtr + dstPM.rowBytes();
610 } 644 }
611 srcPM = dstPM; 645 srcPM = dstPM;
612 addr += height * rowBytes; 646 addr += height * rowBytes;
613 prevW = width; 647 prevW = width;
614 prevH = height; 648 prevH = height;
615 } 649 }
616 SkASSERT(addr == baseAddr + size); 650 SkASSERT(addr == baseAddr + size);
617 651
(...skipping 27 matching lines...) Expand all
645 } 679 }
646 680
647 if (level > fCount) { 681 if (level > fCount) {
648 level = fCount; 682 level = fCount;
649 } 683 }
650 if (levelPtr) { 684 if (levelPtr) {
651 *levelPtr = fLevels[level - 1]; 685 *levelPtr = fLevels[level - 1];
652 } 686 }
653 return true; 687 return true;
654 } 688 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698