source/libvpx/third_party/libyuv/source/row_x86.asm - Issue 478033002: libvpx: Pull from upstream

Unified Diff: source/libvpx/third_party/libyuv/source/row_x86.asm

Issue 478033002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 6 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/libvpx/third_party/libyuv/source/row_x86.asm

===================================================================

--- source/libvpx/third_party/libyuv/source/row_x86.asm (revision 0)

+++ source/libvpx/third_party/libyuv/source/row_x86.asm (revision 0)

@@ -0,0 +1,146 @@

+; Use of this source code is governed by a BSD-style license

+; that can be found in the LICENSE file in the root of the source

+; tree. An additional intellectual property rights grant can be found

+; in the file PATENTS. All contributing project authors may

+; be found in the AUTHORS file in the root of the source tree.

+%ifdef __YASM_VERSION_ID__

+%if __YASM_VERSION_ID__ < 01020000h

+%error AVX2 is supported only by yasm 1.2.0 or later.

+%endif

+%include "x86inc.asm"

+SECTION .text

+; cglobal numeric constants are parameters, gpr regs, mm regs

+; void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix)

+%macro YUY2TOYROW 2-3

+cglobal %1ToYRow%3, 3, 3, 3, src_yuy2, dst_y, pix

+%ifidn %1,YUY2

+ pcmpeqb m2, m2, m2 ; generate mask 0x00ff00ff

+ psrlw m2, m2, 8

+%endif

+ ALIGN 4

+.convertloop:

+ mov%2 m0, [src_yuy2q]

+ mov%2 m1, [src_yuy2q + mmsize]

+ lea src_yuy2q, [src_yuy2q + mmsize * 2]

+%ifidn %1,YUY2

+ pand m0, m0, m2 ; YUY2 even bytes are Y

+ pand m1, m1, m2

+%else

+ psrlw m0, m0, 8 ; UYVY odd bytes are Y

+ psrlw m1, m1, 8

+%endif

+ packuswb m0, m0, m1

+%if cpuflag(AVX2)

+ vpermq m0, m0, 0xd8

+%endif

+ sub pixd, mmsize

+ mov%2 [dst_yq], m0

+ lea dst_yq, [dst_yq + mmsize]

+ jg .convertloop

+ REP_RET

+%endmacro

+; TODO(fbarchard): Remove MMX. Add SSSE3 pshufb version.

+INIT_MMX MMX

+YUY2TOYROW YUY2,a,

+YUY2TOYROW YUY2,u,_Unaligned

+YUY2TOYROW UYVY,a,

+YUY2TOYROW UYVY,u,_Unaligned

+INIT_XMM SSE2

+YUY2TOYROW YUY2,a,

+YUY2TOYROW YUY2,u,_Unaligned

+YUY2TOYROW UYVY,a,

+YUY2TOYROW UYVY,u,_Unaligned

+INIT_YMM AVX2

+YUY2TOYROW YUY2,a,

+YUY2TOYROW UYVY,a,

+; void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix)

+%macro SplitUVRow 1-2

+cglobal SplitUVRow%2, 4, 4, 5, src_uv, dst_u, dst_v, pix

+ pcmpeqb m4, m4, m4 ; generate mask 0x00ff00ff

+ psrlw m4, m4, 8

+ sub dst_vq, dst_uq

+ ALIGN 4

+.convertloop:

+ mov%1 m0, [src_uvq]

+ mov%1 m1, [src_uvq + mmsize]

+ lea src_uvq, [src_uvq + mmsize * 2]

+ psrlw m2, m0, 8 ; odd bytes

+ psrlw m3, m1, 8

+ pand m0, m0, m4 ; even bytes

+ pand m1, m1, m4

+ packuswb m0, m0, m1

+ packuswb m2, m2, m3

+%if cpuflag(AVX2)

+ vpermq m0, m0, 0xd8

+ vpermq m2, m2, 0xd8

+%endif

+ mov%1 [dst_uq], m0

+ mov%1 [dst_uq + dst_vq], m2

+ lea dst_uq, [dst_uq + mmsize]

+ sub pixd, mmsize

+ jg .convertloop

+ REP_RET

+%endmacro

+INIT_MMX MMX

+SplitUVRow a,

+SplitUVRow u,_Unaligned

+INIT_XMM SSE2

+SplitUVRow a,

+SplitUVRow u,_Unaligned

+INIT_YMM AVX2

+SplitUVRow a,

+; void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,

+; int width);

+%macro MergeUVRow_ 1-2

+cglobal MergeUVRow_%2, 4, 4, 3, src_u, src_v, dst_uv, pix

+ sub src_vq, src_uq

+ ALIGN 4

+.convertloop:

+ mov%1 m0, [src_uq]

+ mov%1 m1, [src_vq]

+ lea src_uq, [src_uq + mmsize]

+ punpcklbw m2, m0, m1 // first 8 UV pairs

+ punpckhbw m0, m0, m1 // next 8 UV pairs

+%if cpuflag(AVX2)

+ vperm2i128 m1, m2, m0, 0x20 // low 128 of ymm2 and low 128 of ymm0

+ vperm2i128 m2, m2, m0, 0x31 // high 128 of ymm2 and high 128 of ymm0

+ mov%1 [dst_uvq], m1

+ mov%1 [dst_uvq + mmsize], m2

+%else

+ mov%1 [dst_uvq], m2

+ mov%1 [dst_uvq + mmsize], m0

+%endif

+ lea dst_uvq, [dst_uvq + mmsize * 2]

+ sub pixd, mmsize

+ jg .convertloop

+ REP_RET

+%endmacro

+INIT_MMX MMX

+MergeUVRow_ a,

+MergeUVRow_ u,_Unaligned

+INIT_XMM SSE2

+MergeUVRow_ a,

+MergeUVRow_ u,_Unaligned

+INIT_YMM AVX2

+MergeUVRow_ a,

« no previous file with comments | « source/libvpx/third_party/libyuv/source/row_win.cc ('k') | source/libvpx/third_party/libyuv/source/scale.cc » ('j') | no next file with comments »