Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(420)

Side by Side Diff: src/opts/SkBlitRow_opts_arm_neon.cpp

Issue 13060004: Partial reapply of r5364 minus the non-neon code path. (Closed) Base URL: https://skia.googlecode.com/svn/trunk
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « bench/BitmapBench.cpp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2012 The Android Open Source Project 2 * Copyright 2012 The Android Open Source Project
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include "SkBlitRow_opts_arm.h" 8 #include "SkBlitRow_opts_arm.h"
9 9
10 #include "SkBlitMask.h" 10 #include "SkBlitMask.h"
(...skipping 499 matching lines...) Expand 10 before | Expand all | Expand 10 after
510 510
511 /* do any residual iterations */ 511 /* do any residual iterations */
512 while (--count >= 0) { 512 while (--count >= 0) {
513 *dst = SkPMSrcOver(*src, *dst); 513 *dst = SkPMSrcOver(*src, *dst);
514 src += 1; 514 src += 1;
515 dst += 1; 515 dst += 1;
516 } 516 }
517 } 517 }
518 } 518 }
519 519
520 void S32A_Opaque_BlitRow32_neon_src_alpha(SkPMColor* SK_RESTRICT dst,
521 const SkPMColor* SK_RESTRICT src,
522 int count, U8CPU alpha) {
523 SkASSERT(255 == alpha);
524
525 if (count <= 0)
526 return;
527
528 /* Use these to check if src is transparent or opaque */
529 const unsigned int ALPHA_OPAQ = 0xFF000000;
530 const unsigned int ALPHA_TRANS = 0x00FFFFFF;
531
532 #define UNROLL 4
533 const SkPMColor* SK_RESTRICT src_end = src + count - (UNROLL + 1);
534 const SkPMColor* SK_RESTRICT src_temp = src;
535
536 /* set up the NEON variables */
537 uint8x8_t alpha_mask;
538 static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7};
539 alpha_mask = vld1_u8(alpha_mask_setup);
540
541 uint8x8_t src_raw, dst_raw, dst_final;
542 uint8x8_t src_raw_2, dst_raw_2, dst_final_2;
543 uint8x8_t dst_cooked;
544 uint16x8_t dst_wide;
545 uint8x8_t alpha_narrow;
546 uint16x8_t alpha_wide;
547
548 /* choose the first processing type */
549 if( src >= src_end)
550 goto TAIL;
551 if(*src <= ALPHA_TRANS)
552 goto ALPHA_0;
553 if(*src >= ALPHA_OPAQ)
554 goto ALPHA_255;
555 /* fall-thru */
556
557 ALPHA_1_TO_254:
558 do {
559
560 /* get the source */
561 src_raw = vreinterpret_u8_u32(vld1_u32(src));
562 src_raw_2 = vreinterpret_u8_u32(vld1_u32(src+2));
563
564 /* get and hold the dst too */
565 dst_raw = vreinterpret_u8_u32(vld1_u32(dst));
566 dst_raw_2 = vreinterpret_u8_u32(vld1_u32(dst+2));
567
568
569 /* get the alphas spread out properly */
570 alpha_narrow = vtbl1_u8(src_raw, alpha_mask);
571 /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */
572 /* we collapsed (255-a)+1 ... */
573 alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow);
574
575 /* spread the dest */
576 dst_wide = vmovl_u8(dst_raw);
577
578 /* alpha mul the dest */
579 dst_wide = vmulq_u16 (dst_wide, alpha_wide);
580 dst_cooked = vshrn_n_u16(dst_wide, 8);
581
582 /* sum -- ignoring any byte lane overflows */
583 dst_final = vadd_u8(src_raw, dst_cooked);
584
585 alpha_narrow = vtbl1_u8(src_raw_2, alpha_mask);
586 /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */
587 /* we collapsed (255-a)+1 ... */
588 alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow);
589
590 /* spread the dest */
591 dst_wide = vmovl_u8(dst_raw_2);
592
593 /* alpha mul the dest */
594 dst_wide = vmulq_u16 (dst_wide, alpha_wide);
595 dst_cooked = vshrn_n_u16(dst_wide, 8);
596
597 /* sum -- ignoring any byte lane overflows */
598 dst_final_2 = vadd_u8(src_raw_2, dst_cooked);
599
600 vst1_u32(dst, vreinterpret_u32_u8(dst_final));
601 vst1_u32(dst+2, vreinterpret_u32_u8(dst_final_2));
602
603 src += UNROLL;
604 dst += UNROLL;
605
606 /* if 2 of the next pixels aren't between 1 and 254
607 it might make sense to go to the optimized loops */
608 if((src[0] <= ALPHA_TRANS && src[1] <= ALPHA_TRANS) || (src[0] >= ALPHA_ OPAQ && src[1] >= ALPHA_OPAQ))
609 break;
610
611 } while(src < src_end);
612
613 if (src >= src_end)
614 goto TAIL;
615
616 if(src[0] >= ALPHA_OPAQ && src[1] >= ALPHA_OPAQ)
617 goto ALPHA_255;
618
619 /*fall-thru*/
620
621 ALPHA_0:
622
623 /*In this state, we know the current alpha is 0 and
624 we optimize for the next alpha also being zero. */
625 src_temp = src; //so we don't have to increment dst every time
626 do {
627 if(*(++src) > ALPHA_TRANS)
628 break;
629 if(*(++src) > ALPHA_TRANS)
630 break;
631 if(*(++src) > ALPHA_TRANS)
632 break;
633 if(*(++src) > ALPHA_TRANS)
634 break;
635 } while(src < src_end);
636
637 dst += (src - src_temp);
638
639 /* no longer alpha 0, so determine where to go next. */
640 if( src >= src_end)
641 goto TAIL;
642 if(*src >= ALPHA_OPAQ)
643 goto ALPHA_255;
644 else
645 goto ALPHA_1_TO_254;
646
647 ALPHA_255:
648 while((src[0] & src[1] & src[2] & src[3]) >= ALPHA_OPAQ) {
649 dst[0]=src[0];
650 dst[1]=src[1];
651 dst[2]=src[2];
652 dst[3]=src[3];
653 src+=UNROLL;
654 dst+=UNROLL;
655 if(src >= src_end)
656 goto TAIL;
657 }
658
659 //Handle remainder.
660 if(*src >= ALPHA_OPAQ) { *dst++ = *src++;
661 if(*src >= ALPHA_OPAQ) { *dst++ = *src++;
662 if(*src >= ALPHA_OPAQ) { *dst++ = *src++; }
663 }
664 }
665
666 if( src >= src_end)
667 goto TAIL;
668 if(*src <= ALPHA_TRANS)
669 goto ALPHA_0;
670 else
671 goto ALPHA_1_TO_254;
672
673 TAIL:
674 /* do any residual iterations */
675 src_end += UNROLL + 1; //goto the real end
676 while(src != src_end) {
677 if( *src != 0 ) {
678 if( *src >= ALPHA_OPAQ ) {
679 *dst = *src;
680 }
681 else {
682 *dst = SkPMSrcOver(*src, *dst);
683 }
684 }
685 src++;
686 dst++;
687 }
688
689 #undef UNROLL
690 return;
691 }
520 692
521 /* Neon version of S32_Blend_BlitRow32() 693 /* Neon version of S32_Blend_BlitRow32()
522 * portable version is in src/core/SkBlitRow_D32.cpp 694 * portable version is in src/core/SkBlitRow_D32.cpp
523 */ 695 */
524 void S32_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst, 696 void S32_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst,
525 const SkPMColor* SK_RESTRICT src, 697 const SkPMColor* SK_RESTRICT src,
526 int count, U8CPU alpha) { 698 int count, U8CPU alpha) {
527 SkASSERT(alpha <= 255); 699 SkASSERT(alpha <= 255);
528 if (count > 0) { 700 if (count > 0) {
529 uint16_t src_scale = SkAlpha255To256(alpha); 701 uint16_t src_scale = SkAlpha255To256(alpha);
(...skipping 570 matching lines...) Expand 10 before | Expand all | Expand 10 after
1100 // dither 1272 // dither
1101 NULL, // S32_D4444_Opaque_Dither, 1273 NULL, // S32_D4444_Opaque_Dither,
1102 NULL, // S32_D4444_Blend_Dither, 1274 NULL, // S32_D4444_Blend_Dither,
1103 NULL, // S32A_D4444_Opaque_Dither, 1275 NULL, // S32A_D4444_Opaque_Dither,
1104 NULL, // S32A_D4444_Blend_Dither 1276 NULL, // S32A_D4444_Blend_Dither
1105 }; 1277 };
1106 1278
1107 const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = { 1279 const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = {
1108 NULL, // S32_Opaque, 1280 NULL, // S32_Opaque,
1109 S32_Blend_BlitRow32_neon, // S32_Blend, 1281 S32_Blend_BlitRow32_neon, // S32_Blend,
1110 S32A_Opaque_BlitRow32_neon, // S32A_Opaque, 1282 /*
1283 * We have two choices for S32A_Opaque procs. The one reads the src alpha
1284 * value and attempts to optimize accordingly. The optimization is
1285 * sensitive to the source content and is not a win in all cases. For
1286 * example, if there are a lot of transitions between the alpha states,
1287 * the performance will almost certainly be worse. However, for many
1288 * common cases the performance is equivalent or better than the standard
1289 * case where we do not inspect the src alpha.
1290 */
1291 #if SK_A32_SHIFT == 24
1292 // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor
1293 S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque,
1294 #else
1295 S32A_Opaque_BlitRow32_neon, // S32A_Opaque,
1296 #endif
1111 S32A_Blend_BlitRow32_arm // S32A_Blend 1297 S32A_Blend_BlitRow32_arm // S32A_Blend
1112 }; 1298 };
OLDNEW
« no previous file with comments | « bench/BitmapBench.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698