source/libvpx/third_party/libyuv/source/rotate.cc - Issue 996503002: libvpx: Pull from upstream

Unified Diff: source/libvpx/third_party/libyuv/source/rotate.cc

Issue 996503002: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: Created 5 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « source/libvpx/third_party/libyuv/source/planar_functions.cc ('k') | source/libvpx/third_party/libyuv/source/rotate_argb.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: source/libvpx/third_party/libyuv/source/rotate.cc

diff --git a/source/libvpx/third_party/libyuv/source/rotate.cc b/source/libvpx/third_party/libyuv/source/rotate.cc

index 2ef3228cb80219ff6693fd944637b979d0f19e0a..5acaccfd89d3323b1c1f0770fc73e2b053b901d6 100644

--- a/source/libvpx/third_party/libyuv/source/rotate.cc

+++ b/source/libvpx/third_party/libyuv/source/rotate.cc

@@ -42,11 +42,7 @@ extern "C" {

#endif

#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \

- (defined(__ARM_NEON__) || defined(LIBYUV_NEON))

-#define HAS_MIRRORROW_NEON

-void MirrorRow_NEON(const uint8* src, uint8* dst, int width);

-#define HAS_MIRRORROW_UV_NEON

-void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width);

+ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))

#define HAS_TRANSPOSE_WX8_NEON

void TransposeWx8_NEON(const uint8* src, int src_stride,

uint8* dst, int dst_stride, int width);

@@ -55,7 +51,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,

uint8* dst_a, int dst_stride_a,

uint8* dst_b, int dst_stride_b,

int width);

-#endif // defined(__ARM_NEON__)

+#endif

#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \

defined(__mips__) && \

@@ -194,31 +190,31 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,

convertloop:

// Read in the data from the source pointer.

// First round of bit swap.

- movdqa xmm0, [eax]

- movdqa xmm1, [eax + edi]

+ movdqu xmm0, [eax]

+ movdqu xmm1, [eax + edi]

lea eax, [eax + 2 * edi]

movdqa xmm7, xmm0 // use xmm7 as temp register.

punpcklbw xmm0, xmm1

punpckhbw xmm7, xmm1

movdqa xmm1, xmm7

- movdqa xmm2, [eax]

- movdqa xmm3, [eax + edi]

+ movdqu xmm2, [eax]

+ movdqu xmm3, [eax + edi]

lea eax, [eax + 2 * edi]

movdqa xmm7, xmm2

punpcklbw xmm2, xmm3

punpckhbw xmm7, xmm3

movdqa xmm3, xmm7

- movdqa xmm4, [eax]

- movdqa xmm5, [eax + edi]

+ movdqu xmm4, [eax]

+ movdqu xmm5, [eax + edi]

lea eax, [eax + 2 * edi]

movdqa xmm7, xmm4

punpcklbw xmm4, xmm5

punpckhbw xmm7, xmm5

movdqa xmm5, xmm7

- movdqa xmm6, [eax]

- movdqa xmm7, [eax + edi]

+ movdqu xmm6, [eax]

+ movdqu xmm7, [eax + edi]

lea eax, [eax + 2 * edi]

- movdqa [esp], xmm5 // backup xmm5

+ movdqu [esp], xmm5 // backup xmm5

neg edi

movdqa xmm5, xmm6 // use xmm5 as temp register.

punpcklbw xmm6, xmm7

@@ -239,8 +235,8 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,

punpcklwd xmm4, xmm6

punpckhwd xmm5, xmm6

movdqa xmm6, xmm5

- movdqa xmm5, [esp] // restore xmm5

- movdqa [esp], xmm6 // backup xmm6

+ movdqu xmm5, [esp] // restore xmm5

+ movdqu [esp], xmm6 // backup xmm6

movdqa xmm6, xmm5 // use xmm6 as temp register.

punpcklwd xmm5, xmm7

punpckhwd xmm6, xmm7

@@ -251,7 +247,7 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,

punpckldq xmm0, xmm4

punpckhdq xmm6, xmm4

movdqa xmm4, xmm6

- movdqa xmm6, [esp] // restore xmm6

+ movdqu xmm6, [esp] // restore xmm6

movlpd qword ptr [edx], xmm0

movhpd qword ptr [ebx], xmm0

movlpd qword ptr [edx + esi], xmm4

@@ -296,7 +292,8 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,

ret

}

-#elif !defined(LIBYUV_DISABLE_X86) && \

+#endif

+#if !defined(LIBYUV_DISABLE_X86) && \

(defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__)))

#define HAS_TRANSPOSE_WX8_SSSE3

static void TransposeWx8_SSSE3(const uint8* src, int src_stride,

@@ -379,10 +376,8 @@ static void TransposeWx8_SSSE3(const uint8* src, int src_stride,

"+r"(width) // %2

: "r"((intptr_t)(src_stride)), // %3

"r"((intptr_t)(dst_stride)) // %4

- : "memory", "cc"

- #if defined(__SSE2__)

- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"

- #endif

+ : "memory", "cc",

+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"

);

}

@@ -411,31 +406,31 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,

"mov 0x2c(%ecx),%ecx \n"

"1: \n"

- "movdqa (%eax),%xmm0 \n"

- "movdqa (%eax,%edi,1),%xmm1 \n"

+ "movdqu (%eax),%xmm0 \n"

+ "movdqu (%eax,%edi,1),%xmm1 \n"

"lea (%eax,%edi,2),%eax \n"

"movdqa %xmm0,%xmm7 \n"

"punpcklbw %xmm1,%xmm0 \n"

"punpckhbw %xmm1,%xmm7 \n"

"movdqa %xmm7,%xmm1 \n"

- "movdqa (%eax),%xmm2 \n"

- "movdqa (%eax,%edi,1),%xmm3 \n"

+ "movdqu (%eax),%xmm2 \n"

+ "movdqu (%eax,%edi,1),%xmm3 \n"

"lea (%eax,%edi,2),%eax \n"

"movdqa %xmm2,%xmm7 \n"

"punpcklbw %xmm3,%xmm2 \n"

"punpckhbw %xmm3,%xmm7 \n"

"movdqa %xmm7,%xmm3 \n"

- "movdqa (%eax),%xmm4 \n"

- "movdqa (%eax,%edi,1),%xmm5 \n"

+ "movdqu (%eax),%xmm4 \n"

+ "movdqu (%eax,%edi,1),%xmm5 \n"

"lea (%eax,%edi,2),%eax \n"

"movdqa %xmm4,%xmm7 \n"

"punpcklbw %xmm5,%xmm4 \n"

"punpckhbw %xmm5,%xmm7 \n"

"movdqa %xmm7,%xmm5 \n"

- "movdqa (%eax),%xmm6 \n"

- "movdqa (%eax,%edi,1),%xmm7 \n"

+ "movdqu (%eax),%xmm6 \n"

+ "movdqu (%eax,%edi,1),%xmm7 \n"

"lea (%eax,%edi,2),%eax \n"

- "movdqa %xmm5,(%esp) \n"

+ "movdqu %xmm5,(%esp) \n"

"neg %edi \n"

"movdqa %xmm6,%xmm5 \n"

"punpcklbw %xmm7,%xmm6 \n"

@@ -455,8 +450,8 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,

"punpcklwd %xmm6,%xmm4 \n"

"punpckhwd %xmm6,%xmm5 \n"

"movdqa %xmm5,%xmm6 \n"

- "movdqa (%esp),%xmm5 \n"

- "movdqa %xmm6,(%esp) \n"

+ "movdqu (%esp),%xmm5 \n"

+ "movdqu %xmm6,(%esp) \n"

"movdqa %xmm5,%xmm6 \n"

"punpcklwd %xmm7,%xmm5 \n"

"punpckhwd %xmm7,%xmm6 \n"

@@ -465,7 +460,7 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,

"punpckldq %xmm4,%xmm0 \n"

"punpckhdq %xmm4,%xmm6 \n"

"movdqa %xmm6,%xmm4 \n"

- "movdqa (%esp),%xmm6 \n"

+ "movdqu (%esp),%xmm6 \n"

"movlpd %xmm0,(%edx) \n"

"movhpd %xmm0,(%ebx) \n"

"movlpd %xmm4,(%edx,%esi,1) \n"

@@ -514,7 +509,8 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,

"ret \n"

#endif

);

-#elif !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \

+#endif

+#if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \

defined(__x86_64__)

// 64 bit version has enough registers to do 16x8 to 8x16 at a time.

#define HAS_TRANSPOSE_WX8_FAST_SSSE3

@@ -525,38 +521,38 @@ static void TransposeWx8_FAST_SSSE3(const uint8* src, int src_stride,

// First round of bit swap.

".p2align 2 \n"

"1: \n"

- "movdqa (%0),%%xmm0 \n"

- "movdqa (%0,%3),%%xmm1 \n"

+ "movdqu (%0),%%xmm0 \n"

+ "movdqu (%0,%3),%%xmm1 \n"

"lea (%0,%3,2),%0 \n"

"movdqa %%xmm0,%%xmm8 \n"

"punpcklbw %%xmm1,%%xmm0 \n"

"punpckhbw %%xmm1,%%xmm8 \n"

- "movdqa (%0),%%xmm2 \n"

+ "movdqu (%0),%%xmm2 \n"

"movdqa %%xmm0,%%xmm1 \n"

"movdqa %%xmm8,%%xmm9 \n"

"palignr $0x8,%%xmm1,%%xmm1 \n"

"palignr $0x8,%%xmm9,%%xmm9 \n"

- "movdqa (%0,%3),%%xmm3 \n"

+ "movdqu (%0,%3),%%xmm3 \n"

"lea (%0,%3,2),%0 \n"

"movdqa %%xmm2,%%xmm10 \n"

"punpcklbw %%xmm3,%%xmm2 \n"

"punpckhbw %%xmm3,%%xmm10 \n"

"movdqa %%xmm2,%%xmm3 \n"

"movdqa %%xmm10,%%xmm11 \n"

- "movdqa (%0),%%xmm4 \n"

+ "movdqu (%0),%%xmm4 \n"

"palignr $0x8,%%xmm3,%%xmm3 \n"

"palignr $0x8,%%xmm11,%%xmm11 \n"

- "movdqa (%0,%3),%%xmm5 \n"

+ "movdqu (%0,%3),%%xmm5 \n"

"lea (%0,%3,2),%0 \n"

"movdqa %%xmm4,%%xmm12 \n"

"punpcklbw %%xmm5,%%xmm4 \n"

"punpckhbw %%xmm5,%%xmm12 \n"

"movdqa %%xmm4,%%xmm5 \n"

"movdqa %%xmm12,%%xmm13 \n"

- "movdqa (%0),%%xmm6 \n"

+ "movdqu (%0),%%xmm6 \n"

"palignr $0x8,%%xmm5,%%xmm5 \n"

"palignr $0x8,%%xmm13,%%xmm13 \n"

- "movdqa (%0,%3),%%xmm7 \n"

+ "movdqu (%0,%3),%%xmm7 \n"

"lea (%0,%3,2),%0 \n"

"movdqa %%xmm6,%%xmm14 \n"

"punpcklbw %%xmm7,%%xmm6 \n"

@@ -666,29 +662,29 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,

// First round of bit swap.

".p2align 2 \n"

"1: \n"

- "movdqa (%0),%%xmm0 \n"

- "movdqa (%0,%4),%%xmm1 \n"

+ "movdqu (%0),%%xmm0 \n"

+ "movdqu (%0,%4),%%xmm1 \n"

"lea (%0,%4,2),%0 \n"

"movdqa %%xmm0,%%xmm8 \n"

"punpcklbw %%xmm1,%%xmm0 \n"

"punpckhbw %%xmm1,%%xmm8 \n"

"movdqa %%xmm8,%%xmm1 \n"

- "movdqa (%0),%%xmm2 \n"

- "movdqa (%0,%4),%%xmm3 \n"

+ "movdqu (%0),%%xmm2 \n"

+ "movdqu (%0,%4),%%xmm3 \n"

"lea (%0,%4,2),%0 \n"

"movdqa %%xmm2,%%xmm8 \n"

"punpcklbw %%xmm3,%%xmm2 \n"

"punpckhbw %%xmm3,%%xmm8 \n"

"movdqa %%xmm8,%%xmm3 \n"

- "movdqa (%0),%%xmm4 \n"

- "movdqa (%0,%4),%%xmm5 \n"

+ "movdqu (%0),%%xmm4 \n"

+ "movdqu (%0,%4),%%xmm5 \n"

"lea (%0,%4,2),%0 \n"

"movdqa %%xmm4,%%xmm8 \n"

"punpcklbw %%xmm5,%%xmm4 \n"

"punpckhbw %%xmm5,%%xmm8 \n"

"movdqa %%xmm8,%%xmm5 \n"

- "movdqa (%0),%%xmm6 \n"

- "movdqa (%0,%4),%%xmm7 \n"

+ "movdqu (%0),%%xmm6 \n"

+ "movdqu (%0,%4),%%xmm7 \n"

"lea (%0,%4,2),%0 \n"

"movdqa %%xmm6,%%xmm8 \n"

"punpcklbw %%xmm7,%%xmm6 \n"

@@ -818,9 +814,7 @@ void TransposePlane(const uint8* src, int src_stride,

}

#endif

#if defined(HAS_TRANSPOSE_WX8_FAST_SSSE3)

- if (TestCpuFlag(kCpuHasSSSE3) &&

- IS_ALIGNED(width, 16) &&

- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {

+ if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {

TransposeWx8 = TransposeWx8_FAST_SSSE3;

}

#endif

@@ -883,29 +877,38 @@ void RotatePlane180(const uint8* src, int src_stride,

void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;

void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;

#if defined(HAS_MIRRORROW_NEON)

- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {

- MirrorRow = MirrorRow_NEON;

+ if (TestCpuFlag(kCpuHasNEON)) {

+ MirrorRow = MirrorRow_Any_NEON;

+ if (IS_ALIGNED(width, 16)) {

+ MirrorRow = MirrorRow_NEON;

+ }

}

#endif

#if defined(HAS_MIRRORROW_SSE2)

- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&

- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&

- IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {

- MirrorRow = MirrorRow_SSE2;

+ if (TestCpuFlag(kCpuHasSSE2)) {

+ MirrorRow = MirrorRow_Any_SSE2;

+ if (IS_ALIGNED(width, 16)) {

+ MirrorRow = MirrorRow_SSE2;

+ }

}

#endif

#if defined(HAS_MIRRORROW_SSSE3)

- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&

- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&

- IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {

- MirrorRow = MirrorRow_SSSE3;

+ if (TestCpuFlag(kCpuHasSSSE3)) {

+ MirrorRow = MirrorRow_Any_SSSE3;

+ if (IS_ALIGNED(width, 16)) {

+ MirrorRow = MirrorRow_SSSE3;

+ }

}

#endif

#if defined(HAS_MIRRORROW_AVX2)

- if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {

- MirrorRow = MirrorRow_AVX2;

+ if (TestCpuFlag(kCpuHasAVX2)) {

+ MirrorRow = MirrorRow_Any_AVX2;

+ if (IS_ALIGNED(width, 32)) {

+ MirrorRow = MirrorRow_AVX2;

+ }

}

#endif

+// TODO(fbarchard): Mirror on mips handle unaligned memory.

#if defined(HAS_MIRRORROW_MIPS_DSPR2)

if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&

IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4) &&

@@ -913,21 +916,14 @@ void RotatePlane180(const uint8* src, int src_stride,

MirrorRow = MirrorRow_MIPS_DSPR2;

}

#endif

-#if defined(HAS_COPYROW_NEON)

- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {

- CopyRow = CopyRow_NEON;

- }

-#endif

-#if defined(HAS_COPYROW_X86)

- if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {

- CopyRow = CopyRow_X86;

+#if defined(HAS_COPYROW_SSE2)

+ if (TestCpuFlag(kCpuHasSSE2)) {

+ CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;

}

#endif

-#if defined(HAS_COPYROW_SSE2)

- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&

- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&

- IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {

- CopyRow = CopyRow_SSE2;

+#if defined(HAS_COPYROW_AVX)

+ if (TestCpuFlag(kCpuHasAVX)) {

+ CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;

}

#endif

#if defined(HAS_COPYROW_ERMS)

@@ -935,6 +931,11 @@ void RotatePlane180(const uint8* src, int src_stride,

CopyRow = CopyRow_ERMS;

}

#endif

+#if defined(HAS_COPYROW_NEON)

+ if (TestCpuFlag(kCpuHasNEON)) {

+ CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;

+ }

+#endif

#if defined(HAS_COPYROW_MIPS)

if (TestCpuFlag(kCpuHasMIPS)) {

CopyRow = CopyRow_MIPS;

@@ -1010,13 +1011,13 @@ void TransposeUV(const uint8* src, int src_stride,

if (TestCpuFlag(kCpuHasNEON)) {

TransposeUVWx8 = TransposeUVWx8_NEON;

}

-#elif defined(HAS_TRANSPOSE_UVWX8_SSE2)

- if (TestCpuFlag(kCpuHasSSE2) &&

- IS_ALIGNED(width, 8) &&

- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {

+#endif

+#if defined(HAS_TRANSPOSE_UVWX8_SSE2)

+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 8)) {

TransposeUVWx8 = TransposeUVWx8_SSE2;

}

-#elif defined(HAS_TRANSPOSE_UVWx8_MIPS_DSPR2)

+#endif

+#if defined(HAS_TRANSPOSE_UVWx8_MIPS_DSPR2)

if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 2) &&

IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {

TransposeUVWx8 = TransposeUVWx8_MIPS_DSPR2;

@@ -1084,12 +1085,13 @@ void RotateUV180(const uint8* src, int src_stride,

if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {

MirrorRowUV = MirrorUVRow_NEON;

}

-#elif defined(HAS_MIRRORROW_UV_SSSE3)

- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&

- IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {

+#endif

+#if defined(HAS_MIRRORROW_UV_SSSE3)

+ if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {

MirrorRowUV = MirrorUVRow_SSSE3;

}

-#elif defined(HAS_MIRRORUVROW_MIPS_DSPR2)

+#endif

+#if defined(HAS_MIRRORUVROW_MIPS_DSPR2)

if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&

IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {

MirrorRowUV = MirrorUVRow_MIPS_DSPR2;