| Index: source/planar_functions.cc
|
| diff --git a/source/planar_functions.cc b/source/planar_functions.cc
|
| index 85425feaf98f09c90fb980f437a81e0954b33ae7..28750566ef339a37b21c632b1039f6f06835628d 100644
|
| --- a/source/planar_functions.cc
|
| +++ b/source/planar_functions.cc
|
| @@ -651,6 +651,8 @@ int I420Blend(const uint8* src_y0, int src_stride_y0,
|
| uint8* dst_v, int dst_stride_v,
|
| int width, int height) {
|
| int y;
|
| + // Half width/height for UV.
|
| + int halfwidth = (width + 1) >> 1;
|
| void (*BlendPlaneRow)(const uint8* src0, const uint8* src1,
|
| const uint8* alpha, uint8* dst, int width) = BlendPlaneRow_C;
|
| void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
|
| @@ -674,15 +676,11 @@ int I420Blend(const uint8* src_y0, int src_stride_y0,
|
| dst_y, dst_stride_y,
|
| width, height);
|
|
|
| - // Half width/height for UV.
|
| - width = (width + 1) >> 1;
|
| - height = (height + 1) >> 1;
|
| -
|
| #if defined(HAS_BLENDPLANEROW_SSSE3)
|
| if (TestCpuFlag(kCpuHasSSSE3)) {
|
| // TODO(fbarchard): Implement any versions for odd width.
|
| // BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
|
| - if (IS_ALIGNED(width, 8)) {
|
| + if (IS_ALIGNED(halfwidth, 8)) {
|
| BlendPlaneRow = BlendPlaneRow_SSSE3;
|
| }
|
| }
|
| @@ -690,7 +688,7 @@ int I420Blend(const uint8* src_y0, int src_stride_y0,
|
| #if defined(HAS_BLENDPLANEROW_AVX2)
|
| if (TestCpuFlag(kCpuHasAVX2)) {
|
| // BlendPlaneRow = BlendPlaneRow_Any_AVX2;
|
| - if (IS_ALIGNED(width, 16)) {
|
| + if (IS_ALIGNED(halfwidth, 16)) {
|
| BlendPlaneRow = BlendPlaneRow_AVX2;
|
| }
|
| }
|
| @@ -698,7 +696,7 @@ int I420Blend(const uint8* src_y0, int src_stride_y0,
|
| #if defined(HAS_SCALEROWDOWN2_NEON)
|
| if (TestCpuFlag(kCpuHasNEON)) {
|
| ScaleRowDown2 = ScaleRowDown2Box_Any_NEON;
|
| - if (IS_ALIGNED(width, 16)) {
|
| + if (IS_ALIGNED(halfwidth, 16)) {
|
| ScaleRowDown2 = ScaleRowDown2Box_NEON;
|
| }
|
| }
|
| @@ -706,7 +704,7 @@ int I420Blend(const uint8* src_y0, int src_stride_y0,
|
| #if defined(HAS_SCALEROWDOWN2_SSE2)
|
| if (TestCpuFlag(kCpuHasSSE2)) {
|
| ScaleRowDown2 = ScaleRowDown2Box_Any_SSE2;
|
| - if (IS_ALIGNED(width, 16)) {
|
| + if (IS_ALIGNED(halfwidth, 16)) {
|
| ScaleRowDown2 = ScaleRowDown2Box_SSE2;
|
| }
|
| }
|
| @@ -714,20 +712,24 @@ int I420Blend(const uint8* src_y0, int src_stride_y0,
|
| #if defined(HAS_SCALEROWDOWN2_AVX2)
|
| if (TestCpuFlag(kCpuHasAVX2)) {
|
| ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2;
|
| - if (IS_ALIGNED(width, 32)) {
|
| + if (IS_ALIGNED(halfwidth, 32)) {
|
| ScaleRowDown2 = ScaleRowDown2Box_AVX2;
|
| }
|
| }
|
| #endif
|
|
|
| // Row buffer for intermediate alpha pixels.
|
| - align_buffer_64(halfalpha, width);
|
| - for (y = 0; y < height; ++y) {
|
| + align_buffer_64(halfalpha, halfwidth);
|
| + for (y = 0; y < height; y += 2) {
|
| + // last row of odd height image use 1 row of alpha instead of 2.
|
| + if (y == (height - 1)) {
|
| + alpha_stride = 0;
|
| + }
|
| // Subsample 2 rows of UV to half width and half height.
|
| - ScaleRowDown2(alpha, alpha_stride, halfalpha, width);
|
| + ScaleRowDown2(alpha, alpha_stride, halfalpha, halfwidth);
|
| alpha += alpha_stride * 2;
|
| - BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, width);
|
| - BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, width);
|
| + BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, halfwidth);
|
| + BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, halfwidth);
|
| src_u0 += src_stride_u0;
|
| src_u1 += src_stride_u1;
|
| dst_u += dst_stride_u;
|
|
|