OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 633 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
644 const uint8* src_v0, int src_stride_v0, | 644 const uint8* src_v0, int src_stride_v0, |
645 const uint8* src_y1, int src_stride_y1, | 645 const uint8* src_y1, int src_stride_y1, |
646 const uint8* src_u1, int src_stride_u1, | 646 const uint8* src_u1, int src_stride_u1, |
647 const uint8* src_v1, int src_stride_v1, | 647 const uint8* src_v1, int src_stride_v1, |
648 const uint8* alpha, int alpha_stride, | 648 const uint8* alpha, int alpha_stride, |
649 uint8* dst_y, int dst_stride_y, | 649 uint8* dst_y, int dst_stride_y, |
650 uint8* dst_u, int dst_stride_u, | 650 uint8* dst_u, int dst_stride_u, |
651 uint8* dst_v, int dst_stride_v, | 651 uint8* dst_v, int dst_stride_v, |
652 int width, int height) { | 652 int width, int height) { |
653 int y; | 653 int y; |
| 654 // Half width/height for UV. |
| 655 int halfwidth = (width + 1) >> 1; |
654 void (*BlendPlaneRow)(const uint8* src0, const uint8* src1, | 656 void (*BlendPlaneRow)(const uint8* src0, const uint8* src1, |
655 const uint8* alpha, uint8* dst, int width) = BlendPlaneRow_C; | 657 const uint8* alpha, uint8* dst, int width) = BlendPlaneRow_C; |
656 void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride, | 658 void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride, |
657 uint8* dst_ptr, int dst_width) = ScaleRowDown2Box_C; | 659 uint8* dst_ptr, int dst_width) = ScaleRowDown2Box_C; |
658 if (!src_y0 || !src_u0 || !src_v0 || !src_y1 || !src_u1 || !src_v1 || | 660 if (!src_y0 || !src_u0 || !src_v0 || !src_y1 || !src_u1 || !src_v1 || |
659 !alpha || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { | 661 !alpha || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { |
660 return -1; | 662 return -1; |
661 } | 663 } |
662 | 664 |
663 // Negative height means invert the image. | 665 // Negative height means invert the image. |
664 if (height < 0) { | 666 if (height < 0) { |
665 height = -height; | 667 height = -height; |
666 dst_y = dst_y + (height - 1) * dst_stride_y; | 668 dst_y = dst_y + (height - 1) * dst_stride_y; |
667 dst_stride_y = -dst_stride_y; | 669 dst_stride_y = -dst_stride_y; |
668 } | 670 } |
669 | 671 |
670 // Blend Y plane. | 672 // Blend Y plane. |
671 BlendPlane(src_y0, src_stride_y0, | 673 BlendPlane(src_y0, src_stride_y0, |
672 src_y1, src_stride_y1, | 674 src_y1, src_stride_y1, |
673 alpha, alpha_stride, | 675 alpha, alpha_stride, |
674 dst_y, dst_stride_y, | 676 dst_y, dst_stride_y, |
675 width, height); | 677 width, height); |
676 | 678 |
677 // Half width/height for UV. | |
678 width = (width + 1) >> 1; | |
679 height = (height + 1) >> 1; | |
680 | |
681 #if defined(HAS_BLENDPLANEROW_SSSE3) | 679 #if defined(HAS_BLENDPLANEROW_SSSE3) |
682 if (TestCpuFlag(kCpuHasSSSE3)) { | 680 if (TestCpuFlag(kCpuHasSSSE3)) { |
683 // TODO(fbarchard): Implement any versions for odd width. | 681 // TODO(fbarchard): Implement any versions for odd width. |
684 // BlendPlaneRow = BlendPlaneRow_Any_SSSE3; | 682 // BlendPlaneRow = BlendPlaneRow_Any_SSSE3; |
685 if (IS_ALIGNED(width, 8)) { | 683 if (IS_ALIGNED(halfwidth, 8)) { |
686 BlendPlaneRow = BlendPlaneRow_SSSE3; | 684 BlendPlaneRow = BlendPlaneRow_SSSE3; |
687 } | 685 } |
688 } | 686 } |
689 #endif | 687 #endif |
690 #if defined(HAS_BLENDPLANEROW_AVX2) | 688 #if defined(HAS_BLENDPLANEROW_AVX2) |
691 if (TestCpuFlag(kCpuHasAVX2)) { | 689 if (TestCpuFlag(kCpuHasAVX2)) { |
692 // BlendPlaneRow = BlendPlaneRow_Any_AVX2; | 690 // BlendPlaneRow = BlendPlaneRow_Any_AVX2; |
693 if (IS_ALIGNED(width, 16)) { | 691 if (IS_ALIGNED(halfwidth, 16)) { |
694 BlendPlaneRow = BlendPlaneRow_AVX2; | 692 BlendPlaneRow = BlendPlaneRow_AVX2; |
695 } | 693 } |
696 } | 694 } |
697 #endif | 695 #endif |
698 #if defined(HAS_SCALEROWDOWN2_NEON) | 696 #if defined(HAS_SCALEROWDOWN2_NEON) |
699 if (TestCpuFlag(kCpuHasNEON)) { | 697 if (TestCpuFlag(kCpuHasNEON)) { |
700 ScaleRowDown2 = ScaleRowDown2Box_Any_NEON; | 698 ScaleRowDown2 = ScaleRowDown2Box_Any_NEON; |
701 if (IS_ALIGNED(width, 16)) { | 699 if (IS_ALIGNED(halfwidth, 16)) { |
702 ScaleRowDown2 = ScaleRowDown2Box_NEON; | 700 ScaleRowDown2 = ScaleRowDown2Box_NEON; |
703 } | 701 } |
704 } | 702 } |
705 #endif | 703 #endif |
706 #if defined(HAS_SCALEROWDOWN2_SSE2) | 704 #if defined(HAS_SCALEROWDOWN2_SSE2) |
707 if (TestCpuFlag(kCpuHasSSE2)) { | 705 if (TestCpuFlag(kCpuHasSSE2)) { |
708 ScaleRowDown2 = ScaleRowDown2Box_Any_SSE2; | 706 ScaleRowDown2 = ScaleRowDown2Box_Any_SSE2; |
709 if (IS_ALIGNED(width, 16)) { | 707 if (IS_ALIGNED(halfwidth, 16)) { |
710 ScaleRowDown2 = ScaleRowDown2Box_SSE2; | 708 ScaleRowDown2 = ScaleRowDown2Box_SSE2; |
711 } | 709 } |
712 } | 710 } |
713 #endif | 711 #endif |
714 #if defined(HAS_SCALEROWDOWN2_AVX2) | 712 #if defined(HAS_SCALEROWDOWN2_AVX2) |
715 if (TestCpuFlag(kCpuHasAVX2)) { | 713 if (TestCpuFlag(kCpuHasAVX2)) { |
716 ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2; | 714 ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2; |
717 if (IS_ALIGNED(width, 32)) { | 715 if (IS_ALIGNED(halfwidth, 32)) { |
718 ScaleRowDown2 = ScaleRowDown2Box_AVX2; | 716 ScaleRowDown2 = ScaleRowDown2Box_AVX2; |
719 } | 717 } |
720 } | 718 } |
721 #endif | 719 #endif |
722 | 720 |
723 // Row buffer for intermediate alpha pixels. | 721 // Row buffer for intermediate alpha pixels. |
724 align_buffer_64(halfalpha, width); | 722 align_buffer_64(halfalpha, halfwidth); |
725 for (y = 0; y < height; ++y) { | 723 for (y = 0; y < height; y += 2) { |
| 724 // last row of odd height image use 1 row of alpha instead of 2. |
| 725 if (y == (height - 1)) { |
| 726 alpha_stride = 0; |
| 727 } |
726 // Subsample 2 rows of UV to half width and half height. | 728 // Subsample 2 rows of UV to half width and half height. |
727 ScaleRowDown2(alpha, alpha_stride, halfalpha, width); | 729 ScaleRowDown2(alpha, alpha_stride, halfalpha, halfwidth); |
728 alpha += alpha_stride * 2; | 730 alpha += alpha_stride * 2; |
729 BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, width); | 731 BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, halfwidth); |
730 BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, width); | 732 BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, halfwidth); |
731 src_u0 += src_stride_u0; | 733 src_u0 += src_stride_u0; |
732 src_u1 += src_stride_u1; | 734 src_u1 += src_stride_u1; |
733 dst_u += dst_stride_u; | 735 dst_u += dst_stride_u; |
734 src_v0 += src_stride_v0; | 736 src_v0 += src_stride_v0; |
735 src_v1 += src_stride_v1; | 737 src_v1 += src_stride_v1; |
736 dst_v += dst_stride_v; | 738 dst_v += dst_stride_v; |
737 } | 739 } |
738 free_aligned_buffer_64(halfalpha); | 740 free_aligned_buffer_64(halfalpha); |
739 return 0; | 741 return 0; |
740 } | 742 } |
(...skipping 1884 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2625 } | 2627 } |
2626 free_aligned_buffer_64(rows); | 2628 free_aligned_buffer_64(rows); |
2627 } | 2629 } |
2628 return 0; | 2630 return 0; |
2629 } | 2631 } |
2630 | 2632 |
2631 #ifdef __cplusplus | 2633 #ifdef __cplusplus |
2632 } // extern "C" | 2634 } // extern "C" |
2633 } // namespace libyuv | 2635 } // namespace libyuv |
2634 #endif | 2636 #endif |
OLD | NEW |