source/libvpx/third_party/libyuv/source/rotate_win.cc - Issue 1302353004: libvpx: Pull from upstream

Unified Diff: source/libvpx/third_party/libyuv/source/rotate_win.cc

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « source/libvpx/third_party/libyuv/source/rotate_neon64.cc ('k') | source/libvpx/third_party/libyuv/source/row_any.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: source/libvpx/third_party/libyuv/source/rotate_win.cc

diff --git a/source/libvpx/third_party/libyuv/source/rotate_win.cc b/source/libvpx/third_party/libyuv/source/rotate_win.cc

new file mode 100644

index 0000000000000000000000000000000000000000..2760066dfd050d4e932a0ff7891cd0e0e7ea0ca7

--- /dev/null

+++ b/source/libvpx/third_party/libyuv/source/rotate_win.cc

@@ -0,0 +1,248 @@

+/*

+ *

+ * Use of this source code is governed by a BSD-style license

+ * that can be found in the LICENSE file in the root of the source

+ * tree. An additional intellectual property rights grant can be found

+ * in the file PATENTS. All contributing project authors may

+ * be found in the AUTHORS file in the root of the source tree.

+ */

+#include "libyuv/row.h"

+#include "libyuv/rotate_row.h"

+#ifdef __cplusplus

+namespace libyuv {

+extern "C" {

+#endif

+// This module is for Visual C x86.

+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \

+ defined(_MSC_VER) && !defined(__clang__)

+__declspec(naked)

+void TransposeWx8_SSSE3(const uint8* src, int src_stride,

+ uint8* dst, int dst_stride, int width) {

+ __asm {

+ push edi

+ push esi

+ push ebp

+ mov eax, [esp + 12 + 4] // src

+ mov edi, [esp + 12 + 8] // src_stride

+ mov edx, [esp + 12 + 12] // dst

+ mov esi, [esp + 12 + 16] // dst_stride

+ mov ecx, [esp + 12 + 20] // width

+ // Read in the data from the source pointer.

+ // First round of bit swap.

+ align 4

+ convertloop:

+ movq xmm0, qword ptr [eax]

+ lea ebp, [eax + 8]

+ movq xmm1, qword ptr [eax + edi]

+ lea eax, [eax + 2 * edi]

+ punpcklbw xmm0, xmm1

+ movq xmm2, qword ptr [eax]

+ movdqa xmm1, xmm0

+ palignr xmm1, xmm1, 8

+ movq xmm3, qword ptr [eax + edi]

+ lea eax, [eax + 2 * edi]

+ punpcklbw xmm2, xmm3

+ movdqa xmm3, xmm2

+ movq xmm4, qword ptr [eax]

+ palignr xmm3, xmm3, 8

+ movq xmm5, qword ptr [eax + edi]

+ punpcklbw xmm4, xmm5

+ lea eax, [eax + 2 * edi]

+ movdqa xmm5, xmm4

+ movq xmm6, qword ptr [eax]

+ palignr xmm5, xmm5, 8

+ movq xmm7, qword ptr [eax + edi]

+ punpcklbw xmm6, xmm7

+ mov eax, ebp

+ movdqa xmm7, xmm6

+ palignr xmm7, xmm7, 8

+ // Second round of bit swap.

+ punpcklwd xmm0, xmm2

+ punpcklwd xmm1, xmm3

+ movdqa xmm2, xmm0

+ movdqa xmm3, xmm1

+ palignr xmm2, xmm2, 8

+ palignr xmm3, xmm3, 8

+ punpcklwd xmm4, xmm6

+ punpcklwd xmm5, xmm7

+ movdqa xmm6, xmm4

+ movdqa xmm7, xmm5

+ palignr xmm6, xmm6, 8

+ palignr xmm7, xmm7, 8

+ // Third round of bit swap.

+ // Write to the destination pointer.

+ punpckldq xmm0, xmm4

+ movq qword ptr [edx], xmm0

+ movdqa xmm4, xmm0

+ palignr xmm4, xmm4, 8

+ movq qword ptr [edx + esi], xmm4

+ lea edx, [edx + 2 * esi]

+ punpckldq xmm2, xmm6

+ movdqa xmm6, xmm2

+ palignr xmm6, xmm6, 8

+ movq qword ptr [edx], xmm2

+ punpckldq xmm1, xmm5

+ movq qword ptr [edx + esi], xmm6

+ lea edx, [edx + 2 * esi]

+ movdqa xmm5, xmm1

+ movq qword ptr [edx], xmm1

+ palignr xmm5, xmm5, 8

+ punpckldq xmm3, xmm7

+ movq qword ptr [edx + esi], xmm5

+ lea edx, [edx + 2 * esi]

+ movq qword ptr [edx], xmm3

+ movdqa xmm7, xmm3

+ palignr xmm7, xmm7, 8

+ sub ecx, 8

+ movq qword ptr [edx + esi], xmm7

+ lea edx, [edx + 2 * esi]

+ jg convertloop

+ pop ebp

+ pop esi

+ pop edi

+ ret

+ }

+__declspec(naked)

+void TransposeUVWx8_SSE2(const uint8* src, int src_stride,

+ uint8* dst_a, int dst_stride_a,

+ uint8* dst_b, int dst_stride_b,

+ int w) {

+ __asm {

+ push ebx

+ push esi

+ push edi

+ push ebp

+ mov eax, [esp + 16 + 4] // src

+ mov edi, [esp + 16 + 8] // src_stride

+ mov edx, [esp + 16 + 12] // dst_a

+ mov esi, [esp + 16 + 16] // dst_stride_a

+ mov ebx, [esp + 16 + 20] // dst_b

+ mov ebp, [esp + 16 + 24] // dst_stride_b

+ mov ecx, esp

+ sub esp, 4 + 16

+ and esp, ~15

+ mov [esp + 16], ecx

+ mov ecx, [ecx + 16 + 28] // w

+ align 4

+ convertloop:

+ // Read in the data from the source pointer.

+ // First round of bit swap.

+ movdqu xmm0, [eax]

+ movdqu xmm1, [eax + edi]

+ lea eax, [eax + 2 * edi]

+ movdqa xmm7, xmm0 // use xmm7 as temp register.

+ punpcklbw xmm0, xmm1

+ punpckhbw xmm7, xmm1

+ movdqa xmm1, xmm7

+ movdqu xmm2, [eax]

+ movdqu xmm3, [eax + edi]

+ lea eax, [eax + 2 * edi]

+ movdqa xmm7, xmm2

+ punpcklbw xmm2, xmm3

+ punpckhbw xmm7, xmm3

+ movdqa xmm3, xmm7

+ movdqu xmm4, [eax]

+ movdqu xmm5, [eax + edi]

+ lea eax, [eax + 2 * edi]

+ movdqa xmm7, xmm4

+ punpcklbw xmm4, xmm5

+ punpckhbw xmm7, xmm5

+ movdqa xmm5, xmm7

+ movdqu xmm6, [eax]

+ movdqu xmm7, [eax + edi]

+ lea eax, [eax + 2 * edi]

+ movdqu [esp], xmm5 // backup xmm5

+ neg edi

+ movdqa xmm5, xmm6 // use xmm5 as temp register.

+ punpcklbw xmm6, xmm7

+ punpckhbw xmm5, xmm7

+ movdqa xmm7, xmm5

+ lea eax, [eax + 8 * edi + 16]

+ neg edi

+ // Second round of bit swap.

+ movdqa xmm5, xmm0

+ punpcklwd xmm0, xmm2

+ punpckhwd xmm5, xmm2

+ movdqa xmm2, xmm5

+ movdqa xmm5, xmm1

+ punpcklwd xmm1, xmm3

+ punpckhwd xmm5, xmm3

+ movdqa xmm3, xmm5

+ movdqa xmm5, xmm4

+ punpcklwd xmm4, xmm6

+ punpckhwd xmm5, xmm6

+ movdqa xmm6, xmm5

+ movdqu xmm5, [esp] // restore xmm5

+ movdqu [esp], xmm6 // backup xmm6

+ movdqa xmm6, xmm5 // use xmm6 as temp register.

+ punpcklwd xmm5, xmm7

+ punpckhwd xmm6, xmm7

+ movdqa xmm7, xmm6

+ // Third round of bit swap.

+ // Write to the destination pointer.

+ movdqa xmm6, xmm0

+ punpckldq xmm0, xmm4

+ punpckhdq xmm6, xmm4

+ movdqa xmm4, xmm6

+ movdqu xmm6, [esp] // restore xmm6

+ movlpd qword ptr [edx], xmm0

+ movhpd qword ptr [ebx], xmm0

+ movlpd qword ptr [edx + esi], xmm4

+ lea edx, [edx + 2 * esi]

+ movhpd qword ptr [ebx + ebp], xmm4

+ lea ebx, [ebx + 2 * ebp]

+ movdqa xmm0, xmm2 // use xmm0 as the temp register.

+ punpckldq xmm2, xmm6

+ movlpd qword ptr [edx], xmm2

+ movhpd qword ptr [ebx], xmm2

+ punpckhdq xmm0, xmm6

+ movlpd qword ptr [edx + esi], xmm0

+ lea edx, [edx + 2 * esi]

+ movhpd qword ptr [ebx + ebp], xmm0

+ lea ebx, [ebx + 2 * ebp]

+ movdqa xmm0, xmm1 // use xmm0 as the temp register.

+ punpckldq xmm1, xmm5

+ movlpd qword ptr [edx], xmm1

+ movhpd qword ptr [ebx], xmm1

+ punpckhdq xmm0, xmm5

+ movlpd qword ptr [edx + esi], xmm0

+ lea edx, [edx + 2 * esi]

+ movhpd qword ptr [ebx + ebp], xmm0

+ lea ebx, [ebx + 2 * ebp]

+ movdqa xmm0, xmm3 // use xmm0 as the temp register.

+ punpckldq xmm3, xmm7

+ movlpd qword ptr [edx], xmm3

+ movhpd qword ptr [ebx], xmm3

+ punpckhdq xmm0, xmm7

+ sub ecx, 8

+ movlpd qword ptr [edx + esi], xmm0

+ lea edx, [edx + 2 * esi]

+ movhpd qword ptr [ebx + ebp], xmm0

+ lea ebx, [ebx + 2 * ebp]

+ jg convertloop

+ mov esp, [esp + 16]

+ pop ebp

+ pop edi

+ pop esi

+ pop ebx

+ ret

+ }

+#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)

+#ifdef __cplusplus

+} // extern "C"

+} // namespace libyuv

+#endif

« no previous file with comments | « source/libvpx/third_party/libyuv/source/rotate_neon64.cc ('k') | source/libvpx/third_party/libyuv/source/row_any.cc » ('j') | no next file with comments »