Index: source/libvpx/third_party/libyuv/source/convert.cc |
diff --git a/source/libvpx/third_party/libyuv/source/convert.cc b/source/libvpx/third_party/libyuv/source/convert.cc |
index 41696c18f87866ecf9745ceb044ed3e97e1a1486..3ad6bd7a4b6eada9c7da37814d6269a98722745c 100644 |
--- a/source/libvpx/third_party/libyuv/source/convert.cc |
+++ b/source/libvpx/third_party/libyuv/source/convert.cc |
@@ -817,22 +817,20 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24, |
src_stride_rgb24 = -src_stride_rgb24; |
} |
+// Neon version does direct RGB24 to YUV. |
#if defined(HAS_RGB24TOYROW_NEON) |
if (TestCpuFlag(kCpuHasNEON)) { |
+ RGB24ToUVRow = RGB24ToUVRow_Any_NEON; |
RGB24ToYRow = RGB24ToYRow_Any_NEON; |
if (IS_ALIGNED(width, 8)) { |
RGB24ToYRow = RGB24ToYRow_NEON; |
+ if (IS_ALIGNED(width, 16)) { |
+ RGB24ToUVRow = RGB24ToUVRow_NEON; |
+ } |
} |
} |
-#endif |
-#if defined(HAS_RGB24TOUVROW_NEON) |
- if (TestCpuFlag(kCpuHasNEON)) { |
- RGB24ToUVRow = RGB24ToUVRow_Any_NEON; |
- if (IS_ALIGNED(width, 16)) { |
- RGB24ToUVRow = RGB24ToUVRow_NEON; |
- } |
- } |
-#endif |
+// Other platforms do intermediate conversion from RGB24 to ARGB. |
+#else |
#if defined(HAS_RGB24TOARGBROW_SSSE3) |
if (TestCpuFlag(kCpuHasSSSE3)) { |
RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; |
@@ -841,27 +839,29 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24, |
} |
} |
#endif |
-#if defined(HAS_ARGBTOUVROW_SSSE3) |
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) |
if (TestCpuFlag(kCpuHasSSSE3)) { |
ARGBToUVRow = ARGBToUVRow_Any_SSSE3; |
+ ARGBToYRow = ARGBToYRow_Any_SSSE3; |
if (IS_ALIGNED(width, 16)) { |
ARGBToUVRow = ARGBToUVRow_SSSE3; |
+ ARGBToYRow = ARGBToYRow_SSSE3; |
} |
} |
#endif |
-#if defined(HAS_ARGBTOUVROW_SSSE3) |
- if (TestCpuFlag(kCpuHasSSSE3)) { |
- ARGBToYRow = ARGBToYRow_Any_SSSE3; |
- if (IS_ALIGNED(width, 16)) { |
- ARGBToYRow = ARGBToYRow_SSSE3; |
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) |
+ if (TestCpuFlag(kCpuHasAVX2)) { |
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2; |
+ ARGBToYRow = ARGBToYRow_Any_AVX2; |
+ if (IS_ALIGNED(width, 32)) { |
+ ARGBToUVRow = ARGBToUVRow_AVX2; |
+ ARGBToYRow = ARGBToYRow_AVX2; |
} |
} |
-#endif // HAS_ARGBTOUVROW_SSSE3 |
- |
+#endif |
{ |
-#if !defined(HAS_RGB24TOYROW_NEON) |
// Allocate 2 rows of ARGB. |
- const int kRowSize = (width * 4 + 15) & ~15; |
+ const int kRowSize = (width * 4 + 31) & ~31; |
align_buffer_64(row, kRowSize * 2); |
#endif |
@@ -894,8 +894,8 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24, |
} |
#if !defined(HAS_RGB24TOYROW_NEON) |
free_aligned_buffer_64(row); |
-#endif |
} |
+#endif |
return 0; |
} |
@@ -931,22 +931,20 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw, |
src_stride_raw = -src_stride_raw; |
} |
+// Neon version does direct RAW to YUV. |
#if defined(HAS_RAWTOYROW_NEON) |
if (TestCpuFlag(kCpuHasNEON)) { |
+ RAWToUVRow = RAWToUVRow_Any_NEON; |
RAWToYRow = RAWToYRow_Any_NEON; |
if (IS_ALIGNED(width, 8)) { |
RAWToYRow = RAWToYRow_NEON; |
+ if (IS_ALIGNED(width, 16)) { |
+ RAWToUVRow = RAWToUVRow_NEON; |
+ } |
} |
} |
-#endif |
-#if defined(HAS_RAWTOUVROW_NEON) |
- if (TestCpuFlag(kCpuHasNEON)) { |
- RAWToUVRow = RAWToUVRow_Any_NEON; |
- if (IS_ALIGNED(width, 16)) { |
- RAWToUVRow = RAWToUVRow_NEON; |
- } |
- } |
-#endif |
+// Other platforms do intermediate conversion from RAW to ARGB. |
+#else |
#if defined(HAS_RAWTOARGBROW_SSSE3) |
if (TestCpuFlag(kCpuHasSSSE3)) { |
RAWToARGBRow = RAWToARGBRow_Any_SSSE3; |
@@ -955,59 +953,63 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw, |
} |
} |
#endif |
-#if defined(HAS_ARGBTOUVROW_SSSE3) |
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) |
if (TestCpuFlag(kCpuHasSSSE3)) { |
ARGBToUVRow = ARGBToUVRow_Any_SSSE3; |
+ ARGBToYRow = ARGBToYRow_Any_SSSE3; |
if (IS_ALIGNED(width, 16)) { |
ARGBToUVRow = ARGBToUVRow_SSSE3; |
+ ARGBToYRow = ARGBToYRow_SSSE3; |
} |
} |
#endif |
-#if defined(HAS_ARGBTOUVROW_SSSE3) |
- if (TestCpuFlag(kCpuHasSSSE3)) { |
- ARGBToYRow = ARGBToYRow_Any_SSSE3; |
- if (IS_ALIGNED(width, 16)) { |
- ARGBToYRow = ARGBToYRow_SSSE3; |
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) |
+ if (TestCpuFlag(kCpuHasAVX2)) { |
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2; |
+ ARGBToYRow = ARGBToYRow_Any_AVX2; |
+ if (IS_ALIGNED(width, 32)) { |
+ ARGBToUVRow = ARGBToUVRow_AVX2; |
+ ARGBToYRow = ARGBToYRow_AVX2; |
} |
} |
-#endif // HAS_ARGBTOUVROW_SSSE3 |
- |
+#endif |
{ |
// Allocate 2 rows of ARGB. |
- const int kRowSize = (width * 4 + 15) & ~15; |
+ const int kRowSize = (width * 4 + 31) & ~31; |
align_buffer_64(row, kRowSize * 2); |
+#endif |
for (y = 0; y < height - 1; y += 2) { |
- #if defined(HAS_RAWTOYROW_NEON) |
+#if defined(HAS_RAWTOYROW_NEON) |
RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width); |
RAWToYRow(src_raw, dst_y, width); |
RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); |
- #else |
+#else |
RAWToARGBRow(src_raw, row, width); |
RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width); |
ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); |
ARGBToYRow(row, dst_y, width); |
ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); |
- #endif |
+#endif |
src_raw += src_stride_raw * 2; |
dst_y += dst_stride_y * 2; |
dst_u += dst_stride_u; |
dst_v += dst_stride_v; |
} |
if (height & 1) { |
- #if defined(HAS_RAWTOYROW_NEON) |
+#if defined(HAS_RAWTOYROW_NEON) |
RAWToUVRow(src_raw, 0, dst_u, dst_v, width); |
RAWToYRow(src_raw, dst_y, width); |
- #else |
+#else |
RAWToARGBRow(src_raw, row, width); |
ARGBToUVRow(row, 0, dst_u, dst_v, width); |
ARGBToYRow(row, dst_y, width); |
- #endif |
+#endif |
} |
- #if !defined(HAS_RAWTOYROW_NEON) |
+#if !defined(HAS_RAWTOYROW_NEON) |
free_aligned_buffer_64(row); |
- #endif |
} |
+#endif |
return 0; |
} |
@@ -1043,19 +1045,20 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565, |
src_stride_rgb565 = -src_stride_rgb565; |
} |
+// Neon version does direct RGB565 to YUV. |
#if defined(HAS_RGB565TOYROW_NEON) |
if (TestCpuFlag(kCpuHasNEON)) { |
+ RGB565ToUVRow = RGB565ToUVRow_Any_NEON; |
RGB565ToYRow = RGB565ToYRow_Any_NEON; |
if (IS_ALIGNED(width, 8)) { |
RGB565ToYRow = RGB565ToYRow_NEON; |
- } |
- RGB565ToUVRow = RGB565ToUVRow_Any_NEON; |
- if (IS_ALIGNED(width, 16)) { |
- RGB565ToUVRow = RGB565ToUVRow_NEON; |
+ if (IS_ALIGNED(width, 16)) { |
+ RGB565ToUVRow = RGB565ToUVRow_NEON; |
+ } |
} |
} |
-#else // HAS_RGB565TOYROW_NEON |
- |
+// Other platforms do intermediate conversion from RGB565 to ARGB. |
+#else |
#if defined(HAS_RGB565TOARGBROW_SSE2) |
if (TestCpuFlag(kCpuHasSSE2)) { |
RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2; |
@@ -1064,28 +1067,37 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565, |
} |
} |
#endif |
-#if defined(HAS_ARGBTOUVROW_SSSE3) |
- if (TestCpuFlag(kCpuHasSSSE3)) { |
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3; |
+#if defined(HAS_RGB565TOARGBROW_AVX2) |
+ if (TestCpuFlag(kCpuHasAVX2)) { |
+ RGB565ToARGBRow = RGB565ToARGBRow_Any_AVX2; |
if (IS_ALIGNED(width, 16)) { |
- ARGBToUVRow = ARGBToUVRow_SSSE3; |
+ RGB565ToARGBRow = RGB565ToARGBRow_AVX2; |
} |
} |
#endif |
-#if defined(HAS_ARGBTOUVROW_SSSE3) |
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) |
if (TestCpuFlag(kCpuHasSSSE3)) { |
+ ARGBToUVRow = ARGBToUVRow_Any_SSSE3; |
ARGBToYRow = ARGBToYRow_Any_SSSE3; |
if (IS_ALIGNED(width, 16)) { |
+ ARGBToUVRow = ARGBToUVRow_SSSE3; |
ARGBToYRow = ARGBToYRow_SSSE3; |
} |
} |
-#endif // HAS_ARGBTOUVROW_SSSE3 |
-#endif // HAS_RGB565TOYROW_NEON |
- |
+#endif |
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) |
+ if (TestCpuFlag(kCpuHasAVX2)) { |
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2; |
+ ARGBToYRow = ARGBToYRow_Any_AVX2; |
+ if (IS_ALIGNED(width, 32)) { |
+ ARGBToUVRow = ARGBToUVRow_AVX2; |
+ ARGBToYRow = ARGBToYRow_AVX2; |
+ } |
+ } |
+#endif |
{ |
-#if !defined(HAS_RGB565TOYROW_NEON) |
// Allocate 2 rows of ARGB. |
- const int kRowSize = (width * 4 + 15) & ~15; |
+ const int kRowSize = (width * 4 + 31) & ~31; |
align_buffer_64(row, kRowSize * 2); |
#endif |
@@ -1118,8 +1130,8 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565, |
} |
#if !defined(HAS_RGB565TOYROW_NEON) |
free_aligned_buffer_64(row); |
-#endif |
} |
+#endif |
return 0; |
} |
@@ -1155,19 +1167,20 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555, |
src_stride_argb1555 = -src_stride_argb1555; |
} |
+// Neon version does direct ARGB1555 to YUV. |
#if defined(HAS_ARGB1555TOYROW_NEON) |
if (TestCpuFlag(kCpuHasNEON)) { |
+ ARGB1555ToUVRow = ARGB1555ToUVRow_Any_NEON; |
ARGB1555ToYRow = ARGB1555ToYRow_Any_NEON; |
if (IS_ALIGNED(width, 8)) { |
ARGB1555ToYRow = ARGB1555ToYRow_NEON; |
- } |
- ARGB1555ToUVRow = ARGB1555ToUVRow_Any_NEON; |
- if (IS_ALIGNED(width, 16)) { |
- ARGB1555ToUVRow = ARGB1555ToUVRow_NEON; |
+ if (IS_ALIGNED(width, 16)) { |
+ ARGB1555ToUVRow = ARGB1555ToUVRow_NEON; |
+ } |
} |
} |
-#else // HAS_ARGB1555TOYROW_NEON |
- |
+// Other platforms do intermediate conversion from ARGB1555 to ARGB. |
+#else |
#if defined(HAS_ARGB1555TOARGBROW_SSE2) |
if (TestCpuFlag(kCpuHasSSE2)) { |
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2; |
@@ -1176,30 +1189,40 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555, |
} |
} |
#endif |
-#if defined(HAS_ARGBTOUVROW_SSSE3) |
- if (TestCpuFlag(kCpuHasSSSE3)) { |
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3; |
+#if defined(HAS_ARGB1555TOARGBROW_AVX2) |
+ if (TestCpuFlag(kCpuHasAVX2)) { |
+ ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_AVX2; |
if (IS_ALIGNED(width, 16)) { |
- ARGBToUVRow = ARGBToUVRow_SSSE3; |
+ ARGB1555ToARGBRow = ARGB1555ToARGBRow_AVX2; |
} |
} |
#endif |
-#if defined(HAS_ARGBTOUVROW_SSSE3) |
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) |
if (TestCpuFlag(kCpuHasSSSE3)) { |
+ ARGBToUVRow = ARGBToUVRow_Any_SSSE3; |
ARGBToYRow = ARGBToYRow_Any_SSSE3; |
if (IS_ALIGNED(width, 16)) { |
+ ARGBToUVRow = ARGBToUVRow_SSSE3; |
ARGBToYRow = ARGBToYRow_SSSE3; |
} |
} |
-#endif // HAS_ARGBTOUVROW_SSSE3 |
-#endif // HAS_ARGB1555TOYROW_NEON |
- |
+#endif |
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) |
+ if (TestCpuFlag(kCpuHasAVX2)) { |
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2; |
+ ARGBToYRow = ARGBToYRow_Any_AVX2; |
+ if (IS_ALIGNED(width, 32)) { |
+ ARGBToUVRow = ARGBToUVRow_AVX2; |
+ ARGBToYRow = ARGBToYRow_AVX2; |
+ } |
+ } |
+#endif |
{ |
-#if !defined(HAS_ARGB1555TOYROW_NEON) |
// Allocate 2 rows of ARGB. |
- const int kRowSize = (width * 4 + 15) & ~15; |
+ const int kRowSize = (width * 4 + 31) & ~31; |
align_buffer_64(row, kRowSize * 2); |
#endif |
+ |
for (y = 0; y < height - 1; y += 2) { |
#if defined(HAS_ARGB1555TOYROW_NEON) |
ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width); |
@@ -1230,9 +1253,9 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555, |
#endif |
} |
#if !defined(HAS_ARGB1555TOYROW_NEON) |
- free_aligned_buffer_64(row); |
-#endif |
+ free_aligned_buffer_64(row); |
} |
+#endif |
return 0; |
} |
@@ -1268,19 +1291,20 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444, |
src_stride_argb4444 = -src_stride_argb4444; |
} |
+// Neon version does direct ARGB4444 to YUV. |
#if defined(HAS_ARGB4444TOYROW_NEON) |
if (TestCpuFlag(kCpuHasNEON)) { |
+ ARGB4444ToUVRow = ARGB4444ToUVRow_Any_NEON; |
ARGB4444ToYRow = ARGB4444ToYRow_Any_NEON; |
if (IS_ALIGNED(width, 8)) { |
ARGB4444ToYRow = ARGB4444ToYRow_NEON; |
- } |
- ARGB4444ToUVRow = ARGB4444ToUVRow_Any_NEON; |
- if (IS_ALIGNED(width, 16)) { |
- ARGB4444ToUVRow = ARGB4444ToUVRow_NEON; |
+ if (IS_ALIGNED(width, 16)) { |
+ ARGB4444ToUVRow = ARGB4444ToUVRow_NEON; |
+ } |
} |
} |
-#else // HAS_ARGB4444TOYROW_NEON |
- |
+// Other platforms do intermediate conversion from ARGB4444 to ARGB. |
+#else |
#if defined(HAS_ARGB4444TOARGBROW_SSE2) |
if (TestCpuFlag(kCpuHasSSE2)) { |
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2; |
@@ -1289,28 +1313,37 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444, |
} |
} |
#endif |
-#if defined(HAS_ARGBTOUVROW_SSSE3) |
- if (TestCpuFlag(kCpuHasSSSE3)) { |
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3; |
+#if defined(HAS_ARGB4444TOARGBROW_AVX2) |
+ if (TestCpuFlag(kCpuHasAVX2)) { |
+ ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_AVX2; |
if (IS_ALIGNED(width, 16)) { |
- ARGBToUVRow = ARGBToUVRow_SSSE3; |
+ ARGB4444ToARGBRow = ARGB4444ToARGBRow_AVX2; |
} |
} |
#endif |
-#if defined(HAS_ARGBTOUVROW_SSSE3) |
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) |
if (TestCpuFlag(kCpuHasSSSE3)) { |
+ ARGBToUVRow = ARGBToUVRow_Any_SSSE3; |
ARGBToYRow = ARGBToYRow_Any_SSSE3; |
if (IS_ALIGNED(width, 16)) { |
+ ARGBToUVRow = ARGBToUVRow_SSSE3; |
ARGBToYRow = ARGBToYRow_SSSE3; |
} |
} |
-#endif // HAS_ARGBTOUVROW_SSSE3 |
-#endif // HAS_ARGB4444TOYROW_NEON |
- |
+#endif |
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) |
+ if (TestCpuFlag(kCpuHasAVX2)) { |
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2; |
+ ARGBToYRow = ARGBToYRow_Any_AVX2; |
+ if (IS_ALIGNED(width, 32)) { |
+ ARGBToUVRow = ARGBToUVRow_AVX2; |
+ ARGBToYRow = ARGBToYRow_AVX2; |
+ } |
+ } |
+#endif |
{ |
-#if !defined(HAS_ARGB4444TOYROW_NEON) |
// Allocate 2 rows of ARGB. |
- const int kRowSize = (width * 4 + 15) & ~15; |
+ const int kRowSize = (width * 4 + 31) & ~31; |
align_buffer_64(row, kRowSize * 2); |
#endif |
@@ -1345,8 +1378,8 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444, |
} |
#if !defined(HAS_ARGB4444TOYROW_NEON) |
free_aligned_buffer_64(row); |
-#endif |
} |
+#endif |
return 0; |
} |