| Index: source/planar_functions.cc
|
| diff --git a/source/planar_functions.cc b/source/planar_functions.cc
|
| index b15b6e523b35229a7e162a1b239b8ef23194b718..85425feaf98f09c90fb980f437a81e0954b33ae7 100644
|
| --- a/source/planar_functions.cc
|
| +++ b/source/planar_functions.cc
|
| @@ -17,6 +17,7 @@
|
| #include "libyuv/mjpeg_decoder.h"
|
| #endif
|
| #include "libyuv/row.h"
|
| +#include "libyuv/scale_row.h" // for ScaleRowDown2
|
|
|
| #ifdef __cplusplus
|
| namespace libyuv {
|
| @@ -577,6 +578,167 @@ int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
|
| return 0;
|
| }
|
|
|
| +// Alpha Blend plane and store to destination.
|
| +LIBYUV_API
|
| +int BlendPlane(const uint8* src_y0, int src_stride_y0,
|
| + const uint8* src_y1, int src_stride_y1,
|
| + const uint8* alpha, int alpha_stride,
|
| + uint8* dst_y, int dst_stride_y,
|
| + int width, int height) {
|
| + int y;
|
| + void (*BlendPlaneRow)(const uint8* src0, const uint8* src1,
|
| + const uint8* alpha, uint8* dst, int width) = BlendPlaneRow_C;
|
| + if (!src_y0 || !src_y1 || !alpha || !dst_y || width <= 0 || height == 0) {
|
| + return -1;
|
| + }
|
| + // Negative height means invert the image.
|
| + if (height < 0) {
|
| + height = -height;
|
| + dst_y = dst_y + (height - 1) * dst_stride_y;
|
| + dst_stride_y = -dst_stride_y;
|
| + }
|
| +
|
| + // Coalesce rows for Y plane.
|
| + if (src_stride_y0 == width &&
|
| + src_stride_y1 == width &&
|
| + alpha_stride == width &&
|
| + dst_stride_y == width) {
|
| + width *= height;
|
| + height = 1;
|
| + src_stride_y0 = src_stride_y1 = alpha_stride = dst_stride_y = 0;
|
| + }
|
| +
|
| +#if defined(HAS_BLENDPLANEROW_SSSE3)
|
| + if (TestCpuFlag(kCpuHasSSSE3)) {
|
| +// TODO(fbarchard): Implement any versions for odd width.
|
| +// BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
|
| + if (IS_ALIGNED(width, 8)) {
|
| + BlendPlaneRow = BlendPlaneRow_SSSE3;
|
| + }
|
| + }
|
| +#endif
|
| +#if defined(HAS_BLENDPLANEROW_AVX2)
|
| + if (TestCpuFlag(kCpuHasAVX2)) {
|
| +// BlendPlaneRow = BlendPlaneRow_Any_AVX2;
|
| + if (IS_ALIGNED(width, 16)) {
|
| + BlendPlaneRow = BlendPlaneRow_AVX2;
|
| + }
|
| + }
|
| +#endif
|
| +
|
| + for (y = 0; y < height; ++y) {
|
| + BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width);
|
| + src_y0 += src_stride_y0;
|
| + src_y1 += src_stride_y1;
|
| + alpha += alpha_stride;
|
| + dst_y += dst_stride_y;
|
| + }
|
| + return 0;
|
| +}
|
| +
|
| +#define MAXTWIDTH 2048
|
| +// Alpha Blend YUV images and store to destination.
|
| +LIBYUV_API
|
| +int I420Blend(const uint8* src_y0, int src_stride_y0,
|
| + const uint8* src_u0, int src_stride_u0,
|
| + const uint8* src_v0, int src_stride_v0,
|
| + const uint8* src_y1, int src_stride_y1,
|
| + const uint8* src_u1, int src_stride_u1,
|
| + const uint8* src_v1, int src_stride_v1,
|
| + const uint8* alpha, int alpha_stride,
|
| + uint8* dst_y, int dst_stride_y,
|
| + uint8* dst_u, int dst_stride_u,
|
| + uint8* dst_v, int dst_stride_v,
|
| + int width, int height) {
|
| + int y;
|
| + void (*BlendPlaneRow)(const uint8* src0, const uint8* src1,
|
| + const uint8* alpha, uint8* dst, int width) = BlendPlaneRow_C;
|
| + void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
|
| + uint8* dst_ptr, int dst_width) = ScaleRowDown2Box_C;
|
| + if (!src_y0 || !src_u0 || !src_v0 || !src_y1 || !src_u1 || !src_v1 ||
|
| + !alpha || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
| + return -1;
|
| + }
|
| +
|
| + // Negative height means invert the image.
|
| + if (height < 0) {
|
| + height = -height;
|
| + dst_y = dst_y + (height - 1) * dst_stride_y;
|
| + dst_stride_y = -dst_stride_y;
|
| + }
|
| +
|
| + // Blend Y plane.
|
| + BlendPlane(src_y0, src_stride_y0,
|
| + src_y1, src_stride_y1,
|
| + alpha, alpha_stride,
|
| + dst_y, dst_stride_y,
|
| + width, height);
|
| +
|
| + // Half width/height for UV.
|
| + width = (width + 1) >> 1;
|
| + height = (height + 1) >> 1;
|
| +
|
| +#if defined(HAS_BLENDPLANEROW_SSSE3)
|
| + if (TestCpuFlag(kCpuHasSSSE3)) {
|
| +// TODO(fbarchard): Implement any versions for odd width.
|
| +// BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
|
| + if (IS_ALIGNED(width, 8)) {
|
| + BlendPlaneRow = BlendPlaneRow_SSSE3;
|
| + }
|
| + }
|
| +#endif
|
| +#if defined(HAS_BLENDPLANEROW_AVX2)
|
| + if (TestCpuFlag(kCpuHasAVX2)) {
|
| +// BlendPlaneRow = BlendPlaneRow_Any_AVX2;
|
| + if (IS_ALIGNED(width, 16)) {
|
| + BlendPlaneRow = BlendPlaneRow_AVX2;
|
| + }
|
| + }
|
| +#endif
|
| +#if defined(HAS_SCALEROWDOWN2_NEON)
|
| + if (TestCpuFlag(kCpuHasNEON)) {
|
| + ScaleRowDown2 = ScaleRowDown2Box_Any_NEON;
|
| + if (IS_ALIGNED(width, 16)) {
|
| + ScaleRowDown2 = ScaleRowDown2Box_NEON;
|
| + }
|
| + }
|
| +#endif
|
| +#if defined(HAS_SCALEROWDOWN2_SSE2)
|
| + if (TestCpuFlag(kCpuHasSSE2)) {
|
| + ScaleRowDown2 = ScaleRowDown2Box_Any_SSE2;
|
| + if (IS_ALIGNED(width, 16)) {
|
| + ScaleRowDown2 = ScaleRowDown2Box_SSE2;
|
| + }
|
| + }
|
| +#endif
|
| +#if defined(HAS_SCALEROWDOWN2_AVX2)
|
| + if (TestCpuFlag(kCpuHasAVX2)) {
|
| + ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2;
|
| + if (IS_ALIGNED(width, 32)) {
|
| + ScaleRowDown2 = ScaleRowDown2Box_AVX2;
|
| + }
|
| + }
|
| +#endif
|
| +
|
| + // Row buffer for intermediate alpha pixels.
|
| + align_buffer_64(halfalpha, width);
|
| + for (y = 0; y < height; ++y) {
|
| + // Subsample 2 rows of UV to half width and half height.
|
| + ScaleRowDown2(alpha, alpha_stride, halfalpha, width);
|
| + alpha += alpha_stride * 2;
|
| + BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, width);
|
| + BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, width);
|
| + src_u0 += src_stride_u0;
|
| + src_u1 += src_stride_u1;
|
| + dst_u += dst_stride_u;
|
| + src_v0 += src_stride_v0;
|
| + src_v1 += src_stride_v1;
|
| + dst_v += dst_stride_v;
|
| + }
|
| + free_aligned_buffer_64(halfalpha);
|
| + return 0;
|
| +}
|
| +
|
| // Multiply 2 ARGB images and store to destination.
|
| LIBYUV_API
|
| int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
|
|
|