Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(9)

Unified Diff: source/libvpx/third_party/libyuv/source/rotate.cc

Issue 996503002: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: source/libvpx/third_party/libyuv/source/rotate.cc
diff --git a/source/libvpx/third_party/libyuv/source/rotate.cc b/source/libvpx/third_party/libyuv/source/rotate.cc
index 2ef3228cb80219ff6693fd944637b979d0f19e0a..5acaccfd89d3323b1c1f0770fc73e2b053b901d6 100644
--- a/source/libvpx/third_party/libyuv/source/rotate.cc
+++ b/source/libvpx/third_party/libyuv/source/rotate.cc
@@ -42,11 +42,7 @@ extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
- (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
-#define HAS_MIRRORROW_NEON
-void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
-#define HAS_MIRRORROW_UV_NEON
-void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width);
+ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_TRANSPOSE_WX8_NEON
void TransposeWx8_NEON(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
@@ -55,7 +51,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width);
-#endif // defined(__ARM_NEON__)
+#endif
#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
defined(__mips__) && \
@@ -194,31 +190,31 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
convertloop:
// Read in the data from the source pointer.
// First round of bit swap.
- movdqa xmm0, [eax]
- movdqa xmm1, [eax + edi]
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + edi]
lea eax, [eax + 2 * edi]
movdqa xmm7, xmm0 // use xmm7 as temp register.
punpcklbw xmm0, xmm1
punpckhbw xmm7, xmm1
movdqa xmm1, xmm7
- movdqa xmm2, [eax]
- movdqa xmm3, [eax + edi]
+ movdqu xmm2, [eax]
+ movdqu xmm3, [eax + edi]
lea eax, [eax + 2 * edi]
movdqa xmm7, xmm2
punpcklbw xmm2, xmm3
punpckhbw xmm7, xmm3
movdqa xmm3, xmm7
- movdqa xmm4, [eax]
- movdqa xmm5, [eax + edi]
+ movdqu xmm4, [eax]
+ movdqu xmm5, [eax + edi]
lea eax, [eax + 2 * edi]
movdqa xmm7, xmm4
punpcklbw xmm4, xmm5
punpckhbw xmm7, xmm5
movdqa xmm5, xmm7
- movdqa xmm6, [eax]
- movdqa xmm7, [eax + edi]
+ movdqu xmm6, [eax]
+ movdqu xmm7, [eax + edi]
lea eax, [eax + 2 * edi]
- movdqa [esp], xmm5 // backup xmm5
+ movdqu [esp], xmm5 // backup xmm5
neg edi
movdqa xmm5, xmm6 // use xmm5 as temp register.
punpcklbw xmm6, xmm7
@@ -239,8 +235,8 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
punpcklwd xmm4, xmm6
punpckhwd xmm5, xmm6
movdqa xmm6, xmm5
- movdqa xmm5, [esp] // restore xmm5
- movdqa [esp], xmm6 // backup xmm6
+ movdqu xmm5, [esp] // restore xmm5
+ movdqu [esp], xmm6 // backup xmm6
movdqa xmm6, xmm5 // use xmm6 as temp register.
punpcklwd xmm5, xmm7
punpckhwd xmm6, xmm7
@@ -251,7 +247,7 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
punpckldq xmm0, xmm4
punpckhdq xmm6, xmm4
movdqa xmm4, xmm6
- movdqa xmm6, [esp] // restore xmm6
+ movdqu xmm6, [esp] // restore xmm6
movlpd qword ptr [edx], xmm0
movhpd qword ptr [ebx], xmm0
movlpd qword ptr [edx + esi], xmm4
@@ -296,7 +292,8 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
ret
}
}
-#elif !defined(LIBYUV_DISABLE_X86) && \
+#endif
+#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__)))
#define HAS_TRANSPOSE_WX8_SSSE3
static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
@@ -379,10 +376,8 @@ static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
"+r"(width) // %2
: "r"((intptr_t)(src_stride)), // %3
"r"((intptr_t)(dst_stride)) // %4
- : "memory", "cc"
- #if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- #endif
+ : "memory", "cc",
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
);
}
@@ -411,31 +406,31 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
"mov 0x2c(%ecx),%ecx \n"
"1: \n"
- "movdqa (%eax),%xmm0 \n"
- "movdqa (%eax,%edi,1),%xmm1 \n"
+ "movdqu (%eax),%xmm0 \n"
+ "movdqu (%eax,%edi,1),%xmm1 \n"
"lea (%eax,%edi,2),%eax \n"
"movdqa %xmm0,%xmm7 \n"
"punpcklbw %xmm1,%xmm0 \n"
"punpckhbw %xmm1,%xmm7 \n"
"movdqa %xmm7,%xmm1 \n"
- "movdqa (%eax),%xmm2 \n"
- "movdqa (%eax,%edi,1),%xmm3 \n"
+ "movdqu (%eax),%xmm2 \n"
+ "movdqu (%eax,%edi,1),%xmm3 \n"
"lea (%eax,%edi,2),%eax \n"
"movdqa %xmm2,%xmm7 \n"
"punpcklbw %xmm3,%xmm2 \n"
"punpckhbw %xmm3,%xmm7 \n"
"movdqa %xmm7,%xmm3 \n"
- "movdqa (%eax),%xmm4 \n"
- "movdqa (%eax,%edi,1),%xmm5 \n"
+ "movdqu (%eax),%xmm4 \n"
+ "movdqu (%eax,%edi,1),%xmm5 \n"
"lea (%eax,%edi,2),%eax \n"
"movdqa %xmm4,%xmm7 \n"
"punpcklbw %xmm5,%xmm4 \n"
"punpckhbw %xmm5,%xmm7 \n"
"movdqa %xmm7,%xmm5 \n"
- "movdqa (%eax),%xmm6 \n"
- "movdqa (%eax,%edi,1),%xmm7 \n"
+ "movdqu (%eax),%xmm6 \n"
+ "movdqu (%eax,%edi,1),%xmm7 \n"
"lea (%eax,%edi,2),%eax \n"
- "movdqa %xmm5,(%esp) \n"
+ "movdqu %xmm5,(%esp) \n"
"neg %edi \n"
"movdqa %xmm6,%xmm5 \n"
"punpcklbw %xmm7,%xmm6 \n"
@@ -455,8 +450,8 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
"punpcklwd %xmm6,%xmm4 \n"
"punpckhwd %xmm6,%xmm5 \n"
"movdqa %xmm5,%xmm6 \n"
- "movdqa (%esp),%xmm5 \n"
- "movdqa %xmm6,(%esp) \n"
+ "movdqu (%esp),%xmm5 \n"
+ "movdqu %xmm6,(%esp) \n"
"movdqa %xmm5,%xmm6 \n"
"punpcklwd %xmm7,%xmm5 \n"
"punpckhwd %xmm7,%xmm6 \n"
@@ -465,7 +460,7 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
"punpckldq %xmm4,%xmm0 \n"
"punpckhdq %xmm4,%xmm6 \n"
"movdqa %xmm6,%xmm4 \n"
- "movdqa (%esp),%xmm6 \n"
+ "movdqu (%esp),%xmm6 \n"
"movlpd %xmm0,(%edx) \n"
"movhpd %xmm0,(%ebx) \n"
"movlpd %xmm4,(%edx,%esi,1) \n"
@@ -514,7 +509,8 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
"ret \n"
#endif
);
-#elif !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \
+#endif
+#if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \
defined(__x86_64__)
// 64 bit version has enough registers to do 16x8 to 8x16 at a time.
#define HAS_TRANSPOSE_WX8_FAST_SSSE3
@@ -525,38 +521,38 @@ static void TransposeWx8_FAST_SSSE3(const uint8* src, int src_stride,
// First round of bit swap.
".p2align 2 \n"
"1: \n"
- "movdqa (%0),%%xmm0 \n"
- "movdqa (%0,%3),%%xmm1 \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu (%0,%3),%%xmm1 \n"
"lea (%0,%3,2),%0 \n"
"movdqa %%xmm0,%%xmm8 \n"
"punpcklbw %%xmm1,%%xmm0 \n"
"punpckhbw %%xmm1,%%xmm8 \n"
- "movdqa (%0),%%xmm2 \n"
+ "movdqu (%0),%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n"
"movdqa %%xmm8,%%xmm9 \n"
"palignr $0x8,%%xmm1,%%xmm1 \n"
"palignr $0x8,%%xmm9,%%xmm9 \n"
- "movdqa (%0,%3),%%xmm3 \n"
+ "movdqu (%0,%3),%%xmm3 \n"
"lea (%0,%3,2),%0 \n"
"movdqa %%xmm2,%%xmm10 \n"
"punpcklbw %%xmm3,%%xmm2 \n"
"punpckhbw %%xmm3,%%xmm10 \n"
"movdqa %%xmm2,%%xmm3 \n"
"movdqa %%xmm10,%%xmm11 \n"
- "movdqa (%0),%%xmm4 \n"
+ "movdqu (%0),%%xmm4 \n"
"palignr $0x8,%%xmm3,%%xmm3 \n"
"palignr $0x8,%%xmm11,%%xmm11 \n"
- "movdqa (%0,%3),%%xmm5 \n"
+ "movdqu (%0,%3),%%xmm5 \n"
"lea (%0,%3,2),%0 \n"
"movdqa %%xmm4,%%xmm12 \n"
"punpcklbw %%xmm5,%%xmm4 \n"
"punpckhbw %%xmm5,%%xmm12 \n"
"movdqa %%xmm4,%%xmm5 \n"
"movdqa %%xmm12,%%xmm13 \n"
- "movdqa (%0),%%xmm6 \n"
+ "movdqu (%0),%%xmm6 \n"
"palignr $0x8,%%xmm5,%%xmm5 \n"
"palignr $0x8,%%xmm13,%%xmm13 \n"
- "movdqa (%0,%3),%%xmm7 \n"
+ "movdqu (%0,%3),%%xmm7 \n"
"lea (%0,%3,2),%0 \n"
"movdqa %%xmm6,%%xmm14 \n"
"punpcklbw %%xmm7,%%xmm6 \n"
@@ -666,29 +662,29 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
// First round of bit swap.
".p2align 2 \n"
"1: \n"
- "movdqa (%0),%%xmm0 \n"
- "movdqa (%0,%4),%%xmm1 \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu (%0,%4),%%xmm1 \n"
"lea (%0,%4,2),%0 \n"
"movdqa %%xmm0,%%xmm8 \n"
"punpcklbw %%xmm1,%%xmm0 \n"
"punpckhbw %%xmm1,%%xmm8 \n"
"movdqa %%xmm8,%%xmm1 \n"
- "movdqa (%0),%%xmm2 \n"
- "movdqa (%0,%4),%%xmm3 \n"
+ "movdqu (%0),%%xmm2 \n"
+ "movdqu (%0,%4),%%xmm3 \n"
"lea (%0,%4,2),%0 \n"
"movdqa %%xmm2,%%xmm8 \n"
"punpcklbw %%xmm3,%%xmm2 \n"
"punpckhbw %%xmm3,%%xmm8 \n"
"movdqa %%xmm8,%%xmm3 \n"
- "movdqa (%0),%%xmm4 \n"
- "movdqa (%0,%4),%%xmm5 \n"
+ "movdqu (%0),%%xmm4 \n"
+ "movdqu (%0,%4),%%xmm5 \n"
"lea (%0,%4,2),%0 \n"
"movdqa %%xmm4,%%xmm8 \n"
"punpcklbw %%xmm5,%%xmm4 \n"
"punpckhbw %%xmm5,%%xmm8 \n"
"movdqa %%xmm8,%%xmm5 \n"
- "movdqa (%0),%%xmm6 \n"
- "movdqa (%0,%4),%%xmm7 \n"
+ "movdqu (%0),%%xmm6 \n"
+ "movdqu (%0,%4),%%xmm7 \n"
"lea (%0,%4,2),%0 \n"
"movdqa %%xmm6,%%xmm8 \n"
"punpcklbw %%xmm7,%%xmm6 \n"
@@ -818,9 +814,7 @@ void TransposePlane(const uint8* src, int src_stride,
}
#endif
#if defined(HAS_TRANSPOSE_WX8_FAST_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) &&
- IS_ALIGNED(width, 16) &&
- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
+ if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
TransposeWx8 = TransposeWx8_FAST_SSSE3;
}
#endif
@@ -883,29 +877,38 @@ void RotatePlane180(const uint8* src, int src_stride,
void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
#if defined(HAS_MIRRORROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
- MirrorRow = MirrorRow_NEON;
+ if (TestCpuFlag(kCpuHasNEON)) {
+ MirrorRow = MirrorRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ MirrorRow = MirrorRow_NEON;
+ }
}
#endif
#if defined(HAS_MIRRORROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
- MirrorRow = MirrorRow_SSE2;
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ MirrorRow = MirrorRow_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ MirrorRow = MirrorRow_SSE2;
+ }
}
#endif
#if defined(HAS_MIRRORROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
- MirrorRow = MirrorRow_SSSE3;
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ MirrorRow = MirrorRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ MirrorRow = MirrorRow_SSSE3;
+ }
}
#endif
#if defined(HAS_MIRRORROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
- MirrorRow = MirrorRow_AVX2;
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ MirrorRow = MirrorRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ MirrorRow = MirrorRow_AVX2;
+ }
}
#endif
+// TODO(fbarchard): Mirror on mips handle unaligned memory.
#if defined(HAS_MIRRORROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4) &&
@@ -913,21 +916,14 @@ void RotatePlane180(const uint8* src, int src_stride,
MirrorRow = MirrorRow_MIPS_DSPR2;
}
#endif
-#if defined(HAS_COPYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
- CopyRow = CopyRow_NEON;
- }
-#endif
-#if defined(HAS_COPYROW_X86)
- if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
- CopyRow = CopyRow_X86;
+#if defined(HAS_COPYROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
}
#endif
-#if defined(HAS_COPYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
- IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
- CopyRow = CopyRow_SSE2;
+#if defined(HAS_COPYROW_AVX)
+ if (TestCpuFlag(kCpuHasAVX)) {
+ CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
}
#endif
#if defined(HAS_COPYROW_ERMS)
@@ -935,6 +931,11 @@ void RotatePlane180(const uint8* src, int src_stride,
CopyRow = CopyRow_ERMS;
}
#endif
+#if defined(HAS_COPYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
+ }
+#endif
#if defined(HAS_COPYROW_MIPS)
if (TestCpuFlag(kCpuHasMIPS)) {
CopyRow = CopyRow_MIPS;
@@ -1010,13 +1011,13 @@ void TransposeUV(const uint8* src, int src_stride,
if (TestCpuFlag(kCpuHasNEON)) {
TransposeUVWx8 = TransposeUVWx8_NEON;
}
-#elif defined(HAS_TRANSPOSE_UVWX8_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) &&
- IS_ALIGNED(width, 8) &&
- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
+#endif
+#if defined(HAS_TRANSPOSE_UVWX8_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 8)) {
TransposeUVWx8 = TransposeUVWx8_SSE2;
}
-#elif defined(HAS_TRANSPOSE_UVWx8_MIPS_DSPR2)
+#endif
+#if defined(HAS_TRANSPOSE_UVWx8_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 2) &&
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
TransposeUVWx8 = TransposeUVWx8_MIPS_DSPR2;
@@ -1084,12 +1085,13 @@ void RotateUV180(const uint8* src, int src_stride,
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
MirrorRowUV = MirrorUVRow_NEON;
}
-#elif defined(HAS_MIRRORROW_UV_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
+#endif
+#if defined(HAS_MIRRORROW_UV_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
MirrorRowUV = MirrorUVRow_SSSE3;
}
-#elif defined(HAS_MIRRORUVROW_MIPS_DSPR2)
+#endif
+#if defined(HAS_MIRRORUVROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
MirrorRowUV = MirrorUVRow_MIPS_DSPR2;
« no previous file with comments | « source/libvpx/third_party/libyuv/source/planar_functions.cc ('k') | source/libvpx/third_party/libyuv/source/rotate_argb.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698