source/libvpx/third_party/libyuv/source/rotate_win.cc - Issue 1302353004: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/third_party/libyuv/source/rotate_win.cc

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 /*

	2 * Copyright 2013 The LibYuv Project Authors. All rights reserved.

	3 *

	4 * Use of this source code is governed by a BSD-style license

	5 * that can be found in the LICENSE file in the root of the source

	6 * tree. An additional intellectual property rights grant can be found

	7 * in the file PATENTS. All contributing project authors may

	8 * be found in the AUTHORS file in the root of the source tree.

	9 */

	10

	11 #include "libyuv/row.h"

	12 #include "libyuv/rotate_row.h"

	13

	14 #ifdef __cplusplus

	15 namespace libyuv {

	16 extern "C" {

	17 #endif

	18

	19 // This module is for Visual C x86.

	20 #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \

	21 defined(_MSC_VER) && !defined(__clang__)

	22

	23 __declspec(naked)

	24 void TransposeWx8_SSSE3(const uint8* src, int src_stride,

	25 uint8* dst, int dst_stride, int width) {

	26 __asm {

	27 push edi

	28 push esi

	29 push ebp

	30 mov eax, [esp + 12 + 4] // src

	31 mov edi, [esp + 12 + 8] // src_stride

	32 mov edx, [esp + 12 + 12] // dst

	33 mov esi, [esp + 12 + 16] // dst_stride

	34 mov ecx, [esp + 12 + 20] // width

	35

	36 // Read in the data from the source pointer.

	37 // First round of bit swap.

	38 align 4

	39 convertloop:

	40 movq xmm0, qword ptr [eax]

	41 lea ebp, [eax + 8]

	42 movq xmm1, qword ptr [eax + edi]

	43 lea eax, [eax + 2 * edi]

	44 punpcklbw xmm0, xmm1

	45 movq xmm2, qword ptr [eax]

	46 movdqa xmm1, xmm0

	47 palignr xmm1, xmm1, 8

	48 movq xmm3, qword ptr [eax + edi]

	49 lea eax, [eax + 2 * edi]

	50 punpcklbw xmm2, xmm3

	51 movdqa xmm3, xmm2

	52 movq xmm4, qword ptr [eax]

	53 palignr xmm3, xmm3, 8

	54 movq xmm5, qword ptr [eax + edi]

	55 punpcklbw xmm4, xmm5

	56 lea eax, [eax + 2 * edi]

	57 movdqa xmm5, xmm4

	58 movq xmm6, qword ptr [eax]

	59 palignr xmm5, xmm5, 8

	60 movq xmm7, qword ptr [eax + edi]

	61 punpcklbw xmm6, xmm7

	62 mov eax, ebp

	63 movdqa xmm7, xmm6

	64 palignr xmm7, xmm7, 8

	65 // Second round of bit swap.

	66 punpcklwd xmm0, xmm2

	67 punpcklwd xmm1, xmm3

	68 movdqa xmm2, xmm0

	69 movdqa xmm3, xmm1

	70 palignr xmm2, xmm2, 8

	71 palignr xmm3, xmm3, 8

	72 punpcklwd xmm4, xmm6

	73 punpcklwd xmm5, xmm7

	74 movdqa xmm6, xmm4

	75 movdqa xmm7, xmm5

	76 palignr xmm6, xmm6, 8

	77 palignr xmm7, xmm7, 8

	78 // Third round of bit swap.

	79 // Write to the destination pointer.

	80 punpckldq xmm0, xmm4

	81 movq qword ptr [edx], xmm0

	82 movdqa xmm4, xmm0

	83 palignr xmm4, xmm4, 8

	84 movq qword ptr [edx + esi], xmm4

	85 lea edx, [edx + 2 * esi]

	86 punpckldq xmm2, xmm6

	87 movdqa xmm6, xmm2

	88 palignr xmm6, xmm6, 8

	89 movq qword ptr [edx], xmm2

	90 punpckldq xmm1, xmm5

	91 movq qword ptr [edx + esi], xmm6

	92 lea edx, [edx + 2 * esi]

	93 movdqa xmm5, xmm1

	94 movq qword ptr [edx], xmm1

	95 palignr xmm5, xmm5, 8

	96 punpckldq xmm3, xmm7

	97 movq qword ptr [edx + esi], xmm5

	98 lea edx, [edx + 2 * esi]

	99 movq qword ptr [edx], xmm3

	100 movdqa xmm7, xmm3

	101 palignr xmm7, xmm7, 8

	102 sub ecx, 8

	103 movq qword ptr [edx + esi], xmm7

	104 lea edx, [edx + 2 * esi]

	105 jg convertloop

	106

	107 pop ebp

	108 pop esi

	109 pop edi

	110 ret

	111 }

	112 }

	113

	114 __declspec(naked)

	115 void TransposeUVWx8_SSE2(const uint8* src, int src_stride,

	116 uint8* dst_a, int dst_stride_a,

	117 uint8* dst_b, int dst_stride_b,

	118 int w) {

	119 __asm {

	120 push ebx

	121 push esi

	122 push edi

	123 push ebp

	124 mov eax, [esp + 16 + 4] // src

	125 mov edi, [esp + 16 + 8] // src_stride

	126 mov edx, [esp + 16 + 12] // dst_a

	127 mov esi, [esp + 16 + 16] // dst_stride_a

	128 mov ebx, [esp + 16 + 20] // dst_b

	129 mov ebp, [esp + 16 + 24] // dst_stride_b

	130 mov ecx, esp

	131 sub esp, 4 + 16

	132 and esp, ~15

	133 mov [esp + 16], ecx

	134 mov ecx, [ecx + 16 + 28] // w

	135

	136 align 4

	137 convertloop:

	138 // Read in the data from the source pointer.

	139 // First round of bit swap.

	140 movdqu xmm0, [eax]

	141 movdqu xmm1, [eax + edi]

	142 lea eax, [eax + 2 * edi]

	143 movdqa xmm7, xmm0 // use xmm7 as temp register.

	144 punpcklbw xmm0, xmm1

	145 punpckhbw xmm7, xmm1

	146 movdqa xmm1, xmm7

	147 movdqu xmm2, [eax]

	148 movdqu xmm3, [eax + edi]

	149 lea eax, [eax + 2 * edi]

	150 movdqa xmm7, xmm2

	151 punpcklbw xmm2, xmm3

	152 punpckhbw xmm7, xmm3

	153 movdqa xmm3, xmm7

	154 movdqu xmm4, [eax]

	155 movdqu xmm5, [eax + edi]

	156 lea eax, [eax + 2 * edi]

	157 movdqa xmm7, xmm4

	158 punpcklbw xmm4, xmm5

	159 punpckhbw xmm7, xmm5

	160 movdqa xmm5, xmm7

	161 movdqu xmm6, [eax]

	162 movdqu xmm7, [eax + edi]

	163 lea eax, [eax + 2 * edi]

	164 movdqu [esp], xmm5 // backup xmm5

	165 neg edi

	166 movdqa xmm5, xmm6 // use xmm5 as temp register.

	167 punpcklbw xmm6, xmm7

	168 punpckhbw xmm5, xmm7

	169 movdqa xmm7, xmm5

	170 lea eax, [eax + 8 * edi + 16]

	171 neg edi

	172 // Second round of bit swap.

	173 movdqa xmm5, xmm0

	174 punpcklwd xmm0, xmm2

	175 punpckhwd xmm5, xmm2

	176 movdqa xmm2, xmm5

	177 movdqa xmm5, xmm1

	178 punpcklwd xmm1, xmm3

	179 punpckhwd xmm5, xmm3

	180 movdqa xmm3, xmm5

	181 movdqa xmm5, xmm4

	182 punpcklwd xmm4, xmm6

	183 punpckhwd xmm5, xmm6

	184 movdqa xmm6, xmm5

	185 movdqu xmm5, [esp] // restore xmm5

	186 movdqu [esp], xmm6 // backup xmm6

	187 movdqa xmm6, xmm5 // use xmm6 as temp register.

	188 punpcklwd xmm5, xmm7

	189 punpckhwd xmm6, xmm7

	190 movdqa xmm7, xmm6

	191 // Third round of bit swap.

	192 // Write to the destination pointer.

	193 movdqa xmm6, xmm0

	194 punpckldq xmm0, xmm4

	195 punpckhdq xmm6, xmm4

	196 movdqa xmm4, xmm6

	197 movdqu xmm6, [esp] // restore xmm6

	198 movlpd qword ptr [edx], xmm0

	199 movhpd qword ptr [ebx], xmm0

	200 movlpd qword ptr [edx + esi], xmm4

	201 lea edx, [edx + 2 * esi]

	202 movhpd qword ptr [ebx + ebp], xmm4

	203 lea ebx, [ebx + 2 * ebp]

	204 movdqa xmm0, xmm2 // use xmm0 as the temp register.

	205 punpckldq xmm2, xmm6

	206 movlpd qword ptr [edx], xmm2

	207 movhpd qword ptr [ebx], xmm2

	208 punpckhdq xmm0, xmm6

	209 movlpd qword ptr [edx + esi], xmm0

	210 lea edx, [edx + 2 * esi]

	211 movhpd qword ptr [ebx + ebp], xmm0

	212 lea ebx, [ebx + 2 * ebp]

	213 movdqa xmm0, xmm1 // use xmm0 as the temp register.

	214 punpckldq xmm1, xmm5

	215 movlpd qword ptr [edx], xmm1

	216 movhpd qword ptr [ebx], xmm1

	217 punpckhdq xmm0, xmm5

	218 movlpd qword ptr [edx + esi], xmm0

	219 lea edx, [edx + 2 * esi]

	220 movhpd qword ptr [ebx + ebp], xmm0

	221 lea ebx, [ebx + 2 * ebp]

	222 movdqa xmm0, xmm3 // use xmm0 as the temp register.

	223 punpckldq xmm3, xmm7

	224 movlpd qword ptr [edx], xmm3

	225 movhpd qword ptr [ebx], xmm3

	226 punpckhdq xmm0, xmm7

	227 sub ecx, 8

	228 movlpd qword ptr [edx + esi], xmm0

	229 lea edx, [edx + 2 * esi]

	230 movhpd qword ptr [ebx + ebp], xmm0

	231 lea ebx, [ebx + 2 * ebp]

	232 jg convertloop

	233

	234 mov esp, [esp + 16]

	235 pop ebp

	236 pop edi

	237 pop esi

	238 pop ebx

	239 ret

	240 }

	241 }

	242

	243 #endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)

	244

	245 #ifdef __cplusplus

	246 } // extern "C"

	247 } // namespace libyuv

	248 #endif

OLD	NEW

« no previous file with comments | « source/libvpx/third_party/libyuv/source/rotate_neon64.cc ('k') | source/libvpx/third_party/libyuv/source/row_any.cc » ('j') | no next file with comments »