| Index: source/libvpx/third_party/libyuv/source/convert.cc
|
| diff --git a/source/libvpx/third_party/libyuv/source/convert.cc b/source/libvpx/third_party/libyuv/source/convert.cc
|
| index 41696c18f87866ecf9745ceb044ed3e97e1a1486..3ad6bd7a4b6eada9c7da37814d6269a98722745c 100644
|
| --- a/source/libvpx/third_party/libyuv/source/convert.cc
|
| +++ b/source/libvpx/third_party/libyuv/source/convert.cc
|
| @@ -817,22 +817,20 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
|
| src_stride_rgb24 = -src_stride_rgb24;
|
| }
|
|
|
| +// Neon version does direct RGB24 to YUV.
|
| #if defined(HAS_RGB24TOYROW_NEON)
|
| if (TestCpuFlag(kCpuHasNEON)) {
|
| + RGB24ToUVRow = RGB24ToUVRow_Any_NEON;
|
| RGB24ToYRow = RGB24ToYRow_Any_NEON;
|
| if (IS_ALIGNED(width, 8)) {
|
| RGB24ToYRow = RGB24ToYRow_NEON;
|
| + if (IS_ALIGNED(width, 16)) {
|
| + RGB24ToUVRow = RGB24ToUVRow_NEON;
|
| + }
|
| }
|
| }
|
| -#endif
|
| -#if defined(HAS_RGB24TOUVROW_NEON)
|
| - if (TestCpuFlag(kCpuHasNEON)) {
|
| - RGB24ToUVRow = RGB24ToUVRow_Any_NEON;
|
| - if (IS_ALIGNED(width, 16)) {
|
| - RGB24ToUVRow = RGB24ToUVRow_NEON;
|
| - }
|
| - }
|
| -#endif
|
| +// Other platforms do intermediate conversion from RGB24 to ARGB.
|
| +#else
|
| #if defined(HAS_RGB24TOARGBROW_SSSE3)
|
| if (TestCpuFlag(kCpuHasSSSE3)) {
|
| RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
|
| @@ -841,27 +839,29 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
|
| }
|
| }
|
| #endif
|
| -#if defined(HAS_ARGBTOUVROW_SSSE3)
|
| +#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
|
| if (TestCpuFlag(kCpuHasSSSE3)) {
|
| ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
| + ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
| if (IS_ALIGNED(width, 16)) {
|
| ARGBToUVRow = ARGBToUVRow_SSSE3;
|
| + ARGBToYRow = ARGBToYRow_SSSE3;
|
| }
|
| }
|
| #endif
|
| -#if defined(HAS_ARGBTOUVROW_SSSE3)
|
| - if (TestCpuFlag(kCpuHasSSSE3)) {
|
| - ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
| - if (IS_ALIGNED(width, 16)) {
|
| - ARGBToYRow = ARGBToYRow_SSSE3;
|
| +#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
|
| + if (TestCpuFlag(kCpuHasAVX2)) {
|
| + ARGBToUVRow = ARGBToUVRow_Any_AVX2;
|
| + ARGBToYRow = ARGBToYRow_Any_AVX2;
|
| + if (IS_ALIGNED(width, 32)) {
|
| + ARGBToUVRow = ARGBToUVRow_AVX2;
|
| + ARGBToYRow = ARGBToYRow_AVX2;
|
| }
|
| }
|
| -#endif // HAS_ARGBTOUVROW_SSSE3
|
| -
|
| +#endif
|
| {
|
| -#if !defined(HAS_RGB24TOYROW_NEON)
|
| // Allocate 2 rows of ARGB.
|
| - const int kRowSize = (width * 4 + 15) & ~15;
|
| + const int kRowSize = (width * 4 + 31) & ~31;
|
| align_buffer_64(row, kRowSize * 2);
|
| #endif
|
|
|
| @@ -894,8 +894,8 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
|
| }
|
| #if !defined(HAS_RGB24TOYROW_NEON)
|
| free_aligned_buffer_64(row);
|
| -#endif
|
| }
|
| +#endif
|
| return 0;
|
| }
|
|
|
| @@ -931,22 +931,20 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
|
| src_stride_raw = -src_stride_raw;
|
| }
|
|
|
| +// Neon version does direct RAW to YUV.
|
| #if defined(HAS_RAWTOYROW_NEON)
|
| if (TestCpuFlag(kCpuHasNEON)) {
|
| + RAWToUVRow = RAWToUVRow_Any_NEON;
|
| RAWToYRow = RAWToYRow_Any_NEON;
|
| if (IS_ALIGNED(width, 8)) {
|
| RAWToYRow = RAWToYRow_NEON;
|
| + if (IS_ALIGNED(width, 16)) {
|
| + RAWToUVRow = RAWToUVRow_NEON;
|
| + }
|
| }
|
| }
|
| -#endif
|
| -#if defined(HAS_RAWTOUVROW_NEON)
|
| - if (TestCpuFlag(kCpuHasNEON)) {
|
| - RAWToUVRow = RAWToUVRow_Any_NEON;
|
| - if (IS_ALIGNED(width, 16)) {
|
| - RAWToUVRow = RAWToUVRow_NEON;
|
| - }
|
| - }
|
| -#endif
|
| +// Other platforms do intermediate conversion from RAW to ARGB.
|
| +#else
|
| #if defined(HAS_RAWTOARGBROW_SSSE3)
|
| if (TestCpuFlag(kCpuHasSSSE3)) {
|
| RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
|
| @@ -955,59 +953,63 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
|
| }
|
| }
|
| #endif
|
| -#if defined(HAS_ARGBTOUVROW_SSSE3)
|
| +#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
|
| if (TestCpuFlag(kCpuHasSSSE3)) {
|
| ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
| + ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
| if (IS_ALIGNED(width, 16)) {
|
| ARGBToUVRow = ARGBToUVRow_SSSE3;
|
| + ARGBToYRow = ARGBToYRow_SSSE3;
|
| }
|
| }
|
| #endif
|
| -#if defined(HAS_ARGBTOUVROW_SSSE3)
|
| - if (TestCpuFlag(kCpuHasSSSE3)) {
|
| - ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
| - if (IS_ALIGNED(width, 16)) {
|
| - ARGBToYRow = ARGBToYRow_SSSE3;
|
| +#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
|
| + if (TestCpuFlag(kCpuHasAVX2)) {
|
| + ARGBToUVRow = ARGBToUVRow_Any_AVX2;
|
| + ARGBToYRow = ARGBToYRow_Any_AVX2;
|
| + if (IS_ALIGNED(width, 32)) {
|
| + ARGBToUVRow = ARGBToUVRow_AVX2;
|
| + ARGBToYRow = ARGBToYRow_AVX2;
|
| }
|
| }
|
| -#endif // HAS_ARGBTOUVROW_SSSE3
|
| -
|
| +#endif
|
| {
|
| // Allocate 2 rows of ARGB.
|
| - const int kRowSize = (width * 4 + 15) & ~15;
|
| + const int kRowSize = (width * 4 + 31) & ~31;
|
| align_buffer_64(row, kRowSize * 2);
|
| +#endif
|
|
|
| for (y = 0; y < height - 1; y += 2) {
|
| - #if defined(HAS_RAWTOYROW_NEON)
|
| +#if defined(HAS_RAWTOYROW_NEON)
|
| RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width);
|
| RAWToYRow(src_raw, dst_y, width);
|
| RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
|
| - #else
|
| +#else
|
| RAWToARGBRow(src_raw, row, width);
|
| RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
|
| ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
|
| ARGBToYRow(row, dst_y, width);
|
| ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
|
| - #endif
|
| +#endif
|
| src_raw += src_stride_raw * 2;
|
| dst_y += dst_stride_y * 2;
|
| dst_u += dst_stride_u;
|
| dst_v += dst_stride_v;
|
| }
|
| if (height & 1) {
|
| - #if defined(HAS_RAWTOYROW_NEON)
|
| +#if defined(HAS_RAWTOYROW_NEON)
|
| RAWToUVRow(src_raw, 0, dst_u, dst_v, width);
|
| RAWToYRow(src_raw, dst_y, width);
|
| - #else
|
| +#else
|
| RAWToARGBRow(src_raw, row, width);
|
| ARGBToUVRow(row, 0, dst_u, dst_v, width);
|
| ARGBToYRow(row, dst_y, width);
|
| - #endif
|
| +#endif
|
| }
|
| - #if !defined(HAS_RAWTOYROW_NEON)
|
| +#if !defined(HAS_RAWTOYROW_NEON)
|
| free_aligned_buffer_64(row);
|
| - #endif
|
| }
|
| +#endif
|
| return 0;
|
| }
|
|
|
| @@ -1043,19 +1045,20 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
|
| src_stride_rgb565 = -src_stride_rgb565;
|
| }
|
|
|
| +// Neon version does direct RGB565 to YUV.
|
| #if defined(HAS_RGB565TOYROW_NEON)
|
| if (TestCpuFlag(kCpuHasNEON)) {
|
| + RGB565ToUVRow = RGB565ToUVRow_Any_NEON;
|
| RGB565ToYRow = RGB565ToYRow_Any_NEON;
|
| if (IS_ALIGNED(width, 8)) {
|
| RGB565ToYRow = RGB565ToYRow_NEON;
|
| - }
|
| - RGB565ToUVRow = RGB565ToUVRow_Any_NEON;
|
| - if (IS_ALIGNED(width, 16)) {
|
| - RGB565ToUVRow = RGB565ToUVRow_NEON;
|
| + if (IS_ALIGNED(width, 16)) {
|
| + RGB565ToUVRow = RGB565ToUVRow_NEON;
|
| + }
|
| }
|
| }
|
| -#else // HAS_RGB565TOYROW_NEON
|
| -
|
| +// Other platforms do intermediate conversion from RGB565 to ARGB.
|
| +#else
|
| #if defined(HAS_RGB565TOARGBROW_SSE2)
|
| if (TestCpuFlag(kCpuHasSSE2)) {
|
| RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
|
| @@ -1064,28 +1067,37 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
|
| }
|
| }
|
| #endif
|
| -#if defined(HAS_ARGBTOUVROW_SSSE3)
|
| - if (TestCpuFlag(kCpuHasSSSE3)) {
|
| - ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
| +#if defined(HAS_RGB565TOARGBROW_AVX2)
|
| + if (TestCpuFlag(kCpuHasAVX2)) {
|
| + RGB565ToARGBRow = RGB565ToARGBRow_Any_AVX2;
|
| if (IS_ALIGNED(width, 16)) {
|
| - ARGBToUVRow = ARGBToUVRow_SSSE3;
|
| + RGB565ToARGBRow = RGB565ToARGBRow_AVX2;
|
| }
|
| }
|
| #endif
|
| -#if defined(HAS_ARGBTOUVROW_SSSE3)
|
| +#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
|
| if (TestCpuFlag(kCpuHasSSSE3)) {
|
| + ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
| ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
| if (IS_ALIGNED(width, 16)) {
|
| + ARGBToUVRow = ARGBToUVRow_SSSE3;
|
| ARGBToYRow = ARGBToYRow_SSSE3;
|
| }
|
| }
|
| -#endif // HAS_ARGBTOUVROW_SSSE3
|
| -#endif // HAS_RGB565TOYROW_NEON
|
| -
|
| +#endif
|
| +#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
|
| + if (TestCpuFlag(kCpuHasAVX2)) {
|
| + ARGBToUVRow = ARGBToUVRow_Any_AVX2;
|
| + ARGBToYRow = ARGBToYRow_Any_AVX2;
|
| + if (IS_ALIGNED(width, 32)) {
|
| + ARGBToUVRow = ARGBToUVRow_AVX2;
|
| + ARGBToYRow = ARGBToYRow_AVX2;
|
| + }
|
| + }
|
| +#endif
|
| {
|
| -#if !defined(HAS_RGB565TOYROW_NEON)
|
| // Allocate 2 rows of ARGB.
|
| - const int kRowSize = (width * 4 + 15) & ~15;
|
| + const int kRowSize = (width * 4 + 31) & ~31;
|
| align_buffer_64(row, kRowSize * 2);
|
| #endif
|
|
|
| @@ -1118,8 +1130,8 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
|
| }
|
| #if !defined(HAS_RGB565TOYROW_NEON)
|
| free_aligned_buffer_64(row);
|
| -#endif
|
| }
|
| +#endif
|
| return 0;
|
| }
|
|
|
| @@ -1155,19 +1167,20 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
|
| src_stride_argb1555 = -src_stride_argb1555;
|
| }
|
|
|
| +// Neon version does direct ARGB1555 to YUV.
|
| #if defined(HAS_ARGB1555TOYROW_NEON)
|
| if (TestCpuFlag(kCpuHasNEON)) {
|
| + ARGB1555ToUVRow = ARGB1555ToUVRow_Any_NEON;
|
| ARGB1555ToYRow = ARGB1555ToYRow_Any_NEON;
|
| if (IS_ALIGNED(width, 8)) {
|
| ARGB1555ToYRow = ARGB1555ToYRow_NEON;
|
| - }
|
| - ARGB1555ToUVRow = ARGB1555ToUVRow_Any_NEON;
|
| - if (IS_ALIGNED(width, 16)) {
|
| - ARGB1555ToUVRow = ARGB1555ToUVRow_NEON;
|
| + if (IS_ALIGNED(width, 16)) {
|
| + ARGB1555ToUVRow = ARGB1555ToUVRow_NEON;
|
| + }
|
| }
|
| }
|
| -#else // HAS_ARGB1555TOYROW_NEON
|
| -
|
| +// Other platforms do intermediate conversion from ARGB1555 to ARGB.
|
| +#else
|
| #if defined(HAS_ARGB1555TOARGBROW_SSE2)
|
| if (TestCpuFlag(kCpuHasSSE2)) {
|
| ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2;
|
| @@ -1176,30 +1189,40 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
|
| }
|
| }
|
| #endif
|
| -#if defined(HAS_ARGBTOUVROW_SSSE3)
|
| - if (TestCpuFlag(kCpuHasSSSE3)) {
|
| - ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
| +#if defined(HAS_ARGB1555TOARGBROW_AVX2)
|
| + if (TestCpuFlag(kCpuHasAVX2)) {
|
| + ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_AVX2;
|
| if (IS_ALIGNED(width, 16)) {
|
| - ARGBToUVRow = ARGBToUVRow_SSSE3;
|
| + ARGB1555ToARGBRow = ARGB1555ToARGBRow_AVX2;
|
| }
|
| }
|
| #endif
|
| -#if defined(HAS_ARGBTOUVROW_SSSE3)
|
| +#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
|
| if (TestCpuFlag(kCpuHasSSSE3)) {
|
| + ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
| ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
| if (IS_ALIGNED(width, 16)) {
|
| + ARGBToUVRow = ARGBToUVRow_SSSE3;
|
| ARGBToYRow = ARGBToYRow_SSSE3;
|
| }
|
| }
|
| -#endif // HAS_ARGBTOUVROW_SSSE3
|
| -#endif // HAS_ARGB1555TOYROW_NEON
|
| -
|
| +#endif
|
| +#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
|
| + if (TestCpuFlag(kCpuHasAVX2)) {
|
| + ARGBToUVRow = ARGBToUVRow_Any_AVX2;
|
| + ARGBToYRow = ARGBToYRow_Any_AVX2;
|
| + if (IS_ALIGNED(width, 32)) {
|
| + ARGBToUVRow = ARGBToUVRow_AVX2;
|
| + ARGBToYRow = ARGBToYRow_AVX2;
|
| + }
|
| + }
|
| +#endif
|
| {
|
| -#if !defined(HAS_ARGB1555TOYROW_NEON)
|
| // Allocate 2 rows of ARGB.
|
| - const int kRowSize = (width * 4 + 15) & ~15;
|
| + const int kRowSize = (width * 4 + 31) & ~31;
|
| align_buffer_64(row, kRowSize * 2);
|
| #endif
|
| +
|
| for (y = 0; y < height - 1; y += 2) {
|
| #if defined(HAS_ARGB1555TOYROW_NEON)
|
| ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width);
|
| @@ -1230,9 +1253,9 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
|
| #endif
|
| }
|
| #if !defined(HAS_ARGB1555TOYROW_NEON)
|
| - free_aligned_buffer_64(row);
|
| -#endif
|
| + free_aligned_buffer_64(row);
|
| }
|
| +#endif
|
| return 0;
|
| }
|
|
|
| @@ -1268,19 +1291,20 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
|
| src_stride_argb4444 = -src_stride_argb4444;
|
| }
|
|
|
| +// Neon version does direct ARGB4444 to YUV.
|
| #if defined(HAS_ARGB4444TOYROW_NEON)
|
| if (TestCpuFlag(kCpuHasNEON)) {
|
| + ARGB4444ToUVRow = ARGB4444ToUVRow_Any_NEON;
|
| ARGB4444ToYRow = ARGB4444ToYRow_Any_NEON;
|
| if (IS_ALIGNED(width, 8)) {
|
| ARGB4444ToYRow = ARGB4444ToYRow_NEON;
|
| - }
|
| - ARGB4444ToUVRow = ARGB4444ToUVRow_Any_NEON;
|
| - if (IS_ALIGNED(width, 16)) {
|
| - ARGB4444ToUVRow = ARGB4444ToUVRow_NEON;
|
| + if (IS_ALIGNED(width, 16)) {
|
| + ARGB4444ToUVRow = ARGB4444ToUVRow_NEON;
|
| + }
|
| }
|
| }
|
| -#else // HAS_ARGB4444TOYROW_NEON
|
| -
|
| +// Other platforms do intermediate conversion from ARGB4444 to ARGB.
|
| +#else
|
| #if defined(HAS_ARGB4444TOARGBROW_SSE2)
|
| if (TestCpuFlag(kCpuHasSSE2)) {
|
| ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2;
|
| @@ -1289,28 +1313,37 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
|
| }
|
| }
|
| #endif
|
| -#if defined(HAS_ARGBTOUVROW_SSSE3)
|
| - if (TestCpuFlag(kCpuHasSSSE3)) {
|
| - ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
| +#if defined(HAS_ARGB4444TOARGBROW_AVX2)
|
| + if (TestCpuFlag(kCpuHasAVX2)) {
|
| + ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_AVX2;
|
| if (IS_ALIGNED(width, 16)) {
|
| - ARGBToUVRow = ARGBToUVRow_SSSE3;
|
| + ARGB4444ToARGBRow = ARGB4444ToARGBRow_AVX2;
|
| }
|
| }
|
| #endif
|
| -#if defined(HAS_ARGBTOUVROW_SSSE3)
|
| +#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
|
| if (TestCpuFlag(kCpuHasSSSE3)) {
|
| + ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
| ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
| if (IS_ALIGNED(width, 16)) {
|
| + ARGBToUVRow = ARGBToUVRow_SSSE3;
|
| ARGBToYRow = ARGBToYRow_SSSE3;
|
| }
|
| }
|
| -#endif // HAS_ARGBTOUVROW_SSSE3
|
| -#endif // HAS_ARGB4444TOYROW_NEON
|
| -
|
| +#endif
|
| +#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
|
| + if (TestCpuFlag(kCpuHasAVX2)) {
|
| + ARGBToUVRow = ARGBToUVRow_Any_AVX2;
|
| + ARGBToYRow = ARGBToYRow_Any_AVX2;
|
| + if (IS_ALIGNED(width, 32)) {
|
| + ARGBToUVRow = ARGBToUVRow_AVX2;
|
| + ARGBToYRow = ARGBToYRow_AVX2;
|
| + }
|
| + }
|
| +#endif
|
| {
|
| -#if !defined(HAS_ARGB4444TOYROW_NEON)
|
| // Allocate 2 rows of ARGB.
|
| - const int kRowSize = (width * 4 + 15) & ~15;
|
| + const int kRowSize = (width * 4 + 31) & ~31;
|
| align_buffer_64(row, kRowSize * 2);
|
| #endif
|
|
|
| @@ -1345,8 +1378,8 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
|
| }
|
| #if !defined(HAS_ARGB4444TOYROW_NEON)
|
| free_aligned_buffer_64(row);
|
| -#endif
|
| }
|
| +#endif
|
| return 0;
|
| }
|
|
|
|
|