Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(18)

Unified Diff: source/planar_functions.cc

Issue 1505433002: AVX2 YUV alpha blender and improved unittests (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: off by 1 fix on win Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « include/libyuv/version.h ('k') | source/row_gcc.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/planar_functions.cc
diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index b15b6e523b35229a7e162a1b239b8ef23194b718..85425feaf98f09c90fb980f437a81e0954b33ae7 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -17,6 +17,7 @@
#include "libyuv/mjpeg_decoder.h"
#endif
#include "libyuv/row.h"
+#include "libyuv/scale_row.h" // for ScaleRowDown2
#ifdef __cplusplus
namespace libyuv {
@@ -577,6 +578,167 @@ int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
return 0;
}
+// Alpha Blend plane and store to destination.
+LIBYUV_API
+int BlendPlane(const uint8* src_y0, int src_stride_y0,
+ const uint8* src_y1, int src_stride_y1,
+ const uint8* alpha, int alpha_stride,
+ uint8* dst_y, int dst_stride_y,
+ int width, int height) {
+ int y;
+ void (*BlendPlaneRow)(const uint8* src0, const uint8* src1,
+ const uint8* alpha, uint8* dst, int width) = BlendPlaneRow_C;
+ if (!src_y0 || !src_y1 || !alpha || !dst_y || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_y = dst_y + (height - 1) * dst_stride_y;
+ dst_stride_y = -dst_stride_y;
+ }
+
+ // Coalesce rows for Y plane.
+ if (src_stride_y0 == width &&
+ src_stride_y1 == width &&
+ alpha_stride == width &&
+ dst_stride_y == width) {
+ width *= height;
+ height = 1;
+ src_stride_y0 = src_stride_y1 = alpha_stride = dst_stride_y = 0;
+ }
+
+#if defined(HAS_BLENDPLANEROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+// TODO(fbarchard): Implement any versions for odd width.
+// BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ BlendPlaneRow = BlendPlaneRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_BLENDPLANEROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+// BlendPlaneRow = BlendPlaneRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ BlendPlaneRow = BlendPlaneRow_AVX2;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width);
+ src_y0 += src_stride_y0;
+ src_y1 += src_stride_y1;
+ alpha += alpha_stride;
+ dst_y += dst_stride_y;
+ }
+ return 0;
+}
+
+#define MAXTWIDTH 2048
+// Alpha Blend YUV images and store to destination.
+LIBYUV_API
+int I420Blend(const uint8* src_y0, int src_stride_y0,
+ const uint8* src_u0, int src_stride_u0,
+ const uint8* src_v0, int src_stride_v0,
+ const uint8* src_y1, int src_stride_y1,
+ const uint8* src_u1, int src_stride_u1,
+ const uint8* src_v1, int src_stride_v1,
+ const uint8* alpha, int alpha_stride,
+ uint8* dst_y, int dst_stride_y,
+ uint8* dst_u, int dst_stride_u,
+ uint8* dst_v, int dst_stride_v,
+ int width, int height) {
+ int y;
+ void (*BlendPlaneRow)(const uint8* src0, const uint8* src1,
+ const uint8* alpha, uint8* dst, int width) = BlendPlaneRow_C;
+ void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
+ uint8* dst_ptr, int dst_width) = ScaleRowDown2Box_C;
+ if (!src_y0 || !src_u0 || !src_v0 || !src_y1 || !src_u1 || !src_v1 ||
+ !alpha || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_y = dst_y + (height - 1) * dst_stride_y;
+ dst_stride_y = -dst_stride_y;
+ }
+
+ // Blend Y plane.
+ BlendPlane(src_y0, src_stride_y0,
+ src_y1, src_stride_y1,
+ alpha, alpha_stride,
+ dst_y, dst_stride_y,
+ width, height);
+
+ // Half width/height for UV.
+ width = (width + 1) >> 1;
+ height = (height + 1) >> 1;
+
+#if defined(HAS_BLENDPLANEROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+// TODO(fbarchard): Implement any versions for odd width.
+// BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ BlendPlaneRow = BlendPlaneRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_BLENDPLANEROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+// BlendPlaneRow = BlendPlaneRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ BlendPlaneRow = BlendPlaneRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN2_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleRowDown2 = ScaleRowDown2Box_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ScaleRowDown2 = ScaleRowDown2Box_NEON;
+ }
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN2_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ScaleRowDown2 = ScaleRowDown2Box_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ ScaleRowDown2 = ScaleRowDown2Box_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN2_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ScaleRowDown2 = ScaleRowDown2Box_AVX2;
+ }
+ }
+#endif
+
+ // Row buffer for intermediate alpha pixels.
+ align_buffer_64(halfalpha, width);
+ for (y = 0; y < height; ++y) {
+ // Subsample 2 rows of UV to half width and half height.
+ ScaleRowDown2(alpha, alpha_stride, halfalpha, width);
+ alpha += alpha_stride * 2;
+ BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, width);
+ BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, width);
+ src_u0 += src_stride_u0;
+ src_u1 += src_stride_u1;
+ dst_u += dst_stride_u;
+ src_v0 += src_stride_v0;
+ src_v1 += src_stride_v1;
+ dst_v += dst_stride_v;
+ }
+ free_aligned_buffer_64(halfalpha);
+ return 0;
+}
+
// Multiply 2 ARGB images and store to destination.
LIBYUV_API
int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
« no previous file with comments | « include/libyuv/version.h ('k') | source/row_gcc.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698