Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(406)

Side by Side Diff: source/planar_functions.cc

Issue 1505433002: AVX2 YUV alpha blender and improved unittests (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: off by 1 fix on win Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « include/libyuv/version.h ('k') | source/row_gcc.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include "libyuv/planar_functions.h" 11 #include "libyuv/planar_functions.h"
12 12
13 #include <string.h> // for memset() 13 #include <string.h> // for memset()
14 14
15 #include "libyuv/cpu_id.h" 15 #include "libyuv/cpu_id.h"
16 #ifdef HAVE_JPEG 16 #ifdef HAVE_JPEG
17 #include "libyuv/mjpeg_decoder.h" 17 #include "libyuv/mjpeg_decoder.h"
18 #endif 18 #endif
19 #include "libyuv/row.h" 19 #include "libyuv/row.h"
20 #include "libyuv/scale_row.h" // for ScaleRowDown2
20 21
21 #ifdef __cplusplus 22 #ifdef __cplusplus
22 namespace libyuv { 23 namespace libyuv {
23 extern "C" { 24 extern "C" {
24 #endif 25 #endif
25 26
26 // Copy a plane of data 27 // Copy a plane of data
27 LIBYUV_API 28 LIBYUV_API
28 void CopyPlane(const uint8* src_y, int src_stride_y, 29 void CopyPlane(const uint8* src_y, int src_stride_y,
29 uint8* dst_y, int dst_stride_y, 30 uint8* dst_y, int dst_stride_y,
(...skipping 540 matching lines...) Expand 10 before | Expand all | Expand 10 after
570 571
571 for (y = 0; y < height; ++y) { 572 for (y = 0; y < height; ++y) {
572 ARGBBlendRow(src_argb0, src_argb1, dst_argb, width); 573 ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
573 src_argb0 += src_stride_argb0; 574 src_argb0 += src_stride_argb0;
574 src_argb1 += src_stride_argb1; 575 src_argb1 += src_stride_argb1;
575 dst_argb += dst_stride_argb; 576 dst_argb += dst_stride_argb;
576 } 577 }
577 return 0; 578 return 0;
578 } 579 }
579 580
581 // Alpha Blend plane and store to destination.
582 LIBYUV_API
583 int BlendPlane(const uint8* src_y0, int src_stride_y0,
584 const uint8* src_y1, int src_stride_y1,
585 const uint8* alpha, int alpha_stride,
586 uint8* dst_y, int dst_stride_y,
587 int width, int height) {
588 int y;
589 void (*BlendPlaneRow)(const uint8* src0, const uint8* src1,
590 const uint8* alpha, uint8* dst, int width) = BlendPlaneRow_C;
591 if (!src_y0 || !src_y1 || !alpha || !dst_y || width <= 0 || height == 0) {
592 return -1;
593 }
594 // Negative height means invert the image.
595 if (height < 0) {
596 height = -height;
597 dst_y = dst_y + (height - 1) * dst_stride_y;
598 dst_stride_y = -dst_stride_y;
599 }
600
601 // Coalesce rows for Y plane.
602 if (src_stride_y0 == width &&
603 src_stride_y1 == width &&
604 alpha_stride == width &&
605 dst_stride_y == width) {
606 width *= height;
607 height = 1;
608 src_stride_y0 = src_stride_y1 = alpha_stride = dst_stride_y = 0;
609 }
610
611 #if defined(HAS_BLENDPLANEROW_SSSE3)
612 if (TestCpuFlag(kCpuHasSSSE3)) {
613 // TODO(fbarchard): Implement any versions for odd width.
614 // BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
615 if (IS_ALIGNED(width, 8)) {
616 BlendPlaneRow = BlendPlaneRow_SSSE3;
617 }
618 }
619 #endif
620 #if defined(HAS_BLENDPLANEROW_AVX2)
621 if (TestCpuFlag(kCpuHasAVX2)) {
622 // BlendPlaneRow = BlendPlaneRow_Any_AVX2;
623 if (IS_ALIGNED(width, 16)) {
624 BlendPlaneRow = BlendPlaneRow_AVX2;
625 }
626 }
627 #endif
628
629 for (y = 0; y < height; ++y) {
630 BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width);
631 src_y0 += src_stride_y0;
632 src_y1 += src_stride_y1;
633 alpha += alpha_stride;
634 dst_y += dst_stride_y;
635 }
636 return 0;
637 }
638
639 #define MAXTWIDTH 2048
640 // Alpha Blend YUV images and store to destination.
641 LIBYUV_API
642 int I420Blend(const uint8* src_y0, int src_stride_y0,
643 const uint8* src_u0, int src_stride_u0,
644 const uint8* src_v0, int src_stride_v0,
645 const uint8* src_y1, int src_stride_y1,
646 const uint8* src_u1, int src_stride_u1,
647 const uint8* src_v1, int src_stride_v1,
648 const uint8* alpha, int alpha_stride,
649 uint8* dst_y, int dst_stride_y,
650 uint8* dst_u, int dst_stride_u,
651 uint8* dst_v, int dst_stride_v,
652 int width, int height) {
653 int y;
654 void (*BlendPlaneRow)(const uint8* src0, const uint8* src1,
655 const uint8* alpha, uint8* dst, int width) = BlendPlaneRow_C;
656 void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
657 uint8* dst_ptr, int dst_width) = ScaleRowDown2Box_C;
658 if (!src_y0 || !src_u0 || !src_v0 || !src_y1 || !src_u1 || !src_v1 ||
659 !alpha || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
660 return -1;
661 }
662
663 // Negative height means invert the image.
664 if (height < 0) {
665 height = -height;
666 dst_y = dst_y + (height - 1) * dst_stride_y;
667 dst_stride_y = -dst_stride_y;
668 }
669
670 // Blend Y plane.
671 BlendPlane(src_y0, src_stride_y0,
672 src_y1, src_stride_y1,
673 alpha, alpha_stride,
674 dst_y, dst_stride_y,
675 width, height);
676
677 // Half width/height for UV.
678 width = (width + 1) >> 1;
679 height = (height + 1) >> 1;
680
681 #if defined(HAS_BLENDPLANEROW_SSSE3)
682 if (TestCpuFlag(kCpuHasSSSE3)) {
683 // TODO(fbarchard): Implement any versions for odd width.
684 // BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
685 if (IS_ALIGNED(width, 8)) {
686 BlendPlaneRow = BlendPlaneRow_SSSE3;
687 }
688 }
689 #endif
690 #if defined(HAS_BLENDPLANEROW_AVX2)
691 if (TestCpuFlag(kCpuHasAVX2)) {
692 // BlendPlaneRow = BlendPlaneRow_Any_AVX2;
693 if (IS_ALIGNED(width, 16)) {
694 BlendPlaneRow = BlendPlaneRow_AVX2;
695 }
696 }
697 #endif
698 #if defined(HAS_SCALEROWDOWN2_NEON)
699 if (TestCpuFlag(kCpuHasNEON)) {
700 ScaleRowDown2 = ScaleRowDown2Box_Any_NEON;
701 if (IS_ALIGNED(width, 16)) {
702 ScaleRowDown2 = ScaleRowDown2Box_NEON;
703 }
704 }
705 #endif
706 #if defined(HAS_SCALEROWDOWN2_SSE2)
707 if (TestCpuFlag(kCpuHasSSE2)) {
708 ScaleRowDown2 = ScaleRowDown2Box_Any_SSE2;
709 if (IS_ALIGNED(width, 16)) {
710 ScaleRowDown2 = ScaleRowDown2Box_SSE2;
711 }
712 }
713 #endif
714 #if defined(HAS_SCALEROWDOWN2_AVX2)
715 if (TestCpuFlag(kCpuHasAVX2)) {
716 ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2;
717 if (IS_ALIGNED(width, 32)) {
718 ScaleRowDown2 = ScaleRowDown2Box_AVX2;
719 }
720 }
721 #endif
722
723 // Row buffer for intermediate alpha pixels.
724 align_buffer_64(halfalpha, width);
725 for (y = 0; y < height; ++y) {
726 // Subsample 2 rows of UV to half width and half height.
727 ScaleRowDown2(alpha, alpha_stride, halfalpha, width);
728 alpha += alpha_stride * 2;
729 BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, width);
730 BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, width);
731 src_u0 += src_stride_u0;
732 src_u1 += src_stride_u1;
733 dst_u += dst_stride_u;
734 src_v0 += src_stride_v0;
735 src_v1 += src_stride_v1;
736 dst_v += dst_stride_v;
737 }
738 free_aligned_buffer_64(halfalpha);
739 return 0;
740 }
741
580 // Multiply 2 ARGB images and store to destination. 742 // Multiply 2 ARGB images and store to destination.
581 LIBYUV_API 743 LIBYUV_API
582 int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0, 744 int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
583 const uint8* src_argb1, int src_stride_argb1, 745 const uint8* src_argb1, int src_stride_argb1,
584 uint8* dst_argb, int dst_stride_argb, 746 uint8* dst_argb, int dst_stride_argb,
585 int width, int height) { 747 int width, int height) {
586 int y; 748 int y;
587 void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst, 749 void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst,
588 int width) = ARGBMultiplyRow_C; 750 int width) = ARGBMultiplyRow_C;
589 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { 751 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
(...skipping 1873 matching lines...) Expand 10 before | Expand all | Expand 10 after
2463 } 2625 }
2464 free_aligned_buffer_64(rows); 2626 free_aligned_buffer_64(rows);
2465 } 2627 }
2466 return 0; 2628 return 0;
2467 } 2629 }
2468 2630
2469 #ifdef __cplusplus 2631 #ifdef __cplusplus
2470 } // extern "C" 2632 } // extern "C"
2471 } // namespace libyuv 2633 } // namespace libyuv
2472 #endif 2634 #endif
OLDNEW
« no previous file with comments | « include/libyuv/version.h ('k') | source/row_gcc.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698