Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(164)

Side by Side Diff: source/libvpx/third_party/libyuv/source/rotate_gcc.cc

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright 2015 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/row.h"
12 #include "libyuv/rotate_row.h"
13
14 #ifdef __cplusplus
15 namespace libyuv {
16 extern "C" {
17 #endif
18
19 // This module is for GCC x86 and x64.
20 #if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
21
22 #if !defined(LIBYUV_DISABLE_X86) && \
23 (defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__)))
24 void TransposeWx8_SSSE3(const uint8* src, int src_stride,
25 uint8* dst, int dst_stride, int width) {
26 asm volatile (
27 // Read in the data from the source pointer.
28 // First round of bit swap.
29 ".p2align 2 \n"
30 "1: \n"
31 "movq (%0),%%xmm0 \n"
32 "movq (%0,%3),%%xmm1 \n"
33 "lea (%0,%3,2),%0 \n"
34 "punpcklbw %%xmm1,%%xmm0 \n"
35 "movq (%0),%%xmm2 \n"
36 "movdqa %%xmm0,%%xmm1 \n"
37 "palignr $0x8,%%xmm1,%%xmm1 \n"
38 "movq (%0,%3),%%xmm3 \n"
39 "lea (%0,%3,2),%0 \n"
40 "punpcklbw %%xmm3,%%xmm2 \n"
41 "movdqa %%xmm2,%%xmm3 \n"
42 "movq (%0),%%xmm4 \n"
43 "palignr $0x8,%%xmm3,%%xmm3 \n"
44 "movq (%0,%3),%%xmm5 \n"
45 "lea (%0,%3,2),%0 \n"
46 "punpcklbw %%xmm5,%%xmm4 \n"
47 "movdqa %%xmm4,%%xmm5 \n"
48 "movq (%0),%%xmm6 \n"
49 "palignr $0x8,%%xmm5,%%xmm5 \n"
50 "movq (%0,%3),%%xmm7 \n"
51 "lea (%0,%3,2),%0 \n"
52 "punpcklbw %%xmm7,%%xmm6 \n"
53 "neg %3 \n"
54 "movdqa %%xmm6,%%xmm7 \n"
55 "lea 0x8(%0,%3,8),%0 \n"
56 "palignr $0x8,%%xmm7,%%xmm7 \n"
57 "neg %3 \n"
58 // Second round of bit swap.
59 "punpcklwd %%xmm2,%%xmm0 \n"
60 "punpcklwd %%xmm3,%%xmm1 \n"
61 "movdqa %%xmm0,%%xmm2 \n"
62 "movdqa %%xmm1,%%xmm3 \n"
63 "palignr $0x8,%%xmm2,%%xmm2 \n"
64 "palignr $0x8,%%xmm3,%%xmm3 \n"
65 "punpcklwd %%xmm6,%%xmm4 \n"
66 "punpcklwd %%xmm7,%%xmm5 \n"
67 "movdqa %%xmm4,%%xmm6 \n"
68 "movdqa %%xmm5,%%xmm7 \n"
69 "palignr $0x8,%%xmm6,%%xmm6 \n"
70 "palignr $0x8,%%xmm7,%%xmm7 \n"
71 // Third round of bit swap.
72 // Write to the destination pointer.
73 "punpckldq %%xmm4,%%xmm0 \n"
74 "movq %%xmm0,(%1) \n"
75 "movdqa %%xmm0,%%xmm4 \n"
76 "palignr $0x8,%%xmm4,%%xmm4 \n"
77 "movq %%xmm4,(%1,%4) \n"
78 "lea (%1,%4,2),%1 \n"
79 "punpckldq %%xmm6,%%xmm2 \n"
80 "movdqa %%xmm2,%%xmm6 \n"
81 "movq %%xmm2,(%1) \n"
82 "palignr $0x8,%%xmm6,%%xmm6 \n"
83 "punpckldq %%xmm5,%%xmm1 \n"
84 "movq %%xmm6,(%1,%4) \n"
85 "lea (%1,%4,2),%1 \n"
86 "movdqa %%xmm1,%%xmm5 \n"
87 "movq %%xmm1,(%1) \n"
88 "palignr $0x8,%%xmm5,%%xmm5 \n"
89 "movq %%xmm5,(%1,%4) \n"
90 "lea (%1,%4,2),%1 \n"
91 "punpckldq %%xmm7,%%xmm3 \n"
92 "movq %%xmm3,(%1) \n"
93 "movdqa %%xmm3,%%xmm7 \n"
94 "palignr $0x8,%%xmm7,%%xmm7 \n"
95 "sub $0x8,%2 \n"
96 "movq %%xmm7,(%1,%4) \n"
97 "lea (%1,%4,2),%1 \n"
98 "jg 1b \n"
99 : "+r"(src), // %0
100 "+r"(dst), // %1
101 "+r"(width) // %2
102 : "r"((intptr_t)(src_stride)), // %3
103 "r"((intptr_t)(dst_stride)) // %4
104 : "memory", "cc",
105 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
106 );
107 }
108
109 #if !defined(LIBYUV_DISABLE_X86) && defined(__i386__) && !defined(__clang__)
110 void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
111 uint8* dst_a, int dst_stride_a,
112 uint8* dst_b, int dst_stride_b, int width);
113 asm (
114 DECLARE_FUNCTION(TransposeUVWx8_SSE2)
115 "push %ebx \n"
116 "push %esi \n"
117 "push %edi \n"
118 "push %ebp \n"
119 "mov 0x14(%esp),%eax \n"
120 "mov 0x18(%esp),%edi \n"
121 "mov 0x1c(%esp),%edx \n"
122 "mov 0x20(%esp),%esi \n"
123 "mov 0x24(%esp),%ebx \n"
124 "mov 0x28(%esp),%ebp \n"
125 "mov %esp,%ecx \n"
126 "sub $0x14,%esp \n"
127 "and $0xfffffff0,%esp \n"
128 "mov %ecx,0x10(%esp) \n"
129 "mov 0x2c(%ecx),%ecx \n"
130
131 "1: \n"
132 "movdqu (%eax),%xmm0 \n"
133 "movdqu (%eax,%edi,1),%xmm1 \n"
134 "lea (%eax,%edi,2),%eax \n"
135 "movdqa %xmm0,%xmm7 \n"
136 "punpcklbw %xmm1,%xmm0 \n"
137 "punpckhbw %xmm1,%xmm7 \n"
138 "movdqa %xmm7,%xmm1 \n"
139 "movdqu (%eax),%xmm2 \n"
140 "movdqu (%eax,%edi,1),%xmm3 \n"
141 "lea (%eax,%edi,2),%eax \n"
142 "movdqa %xmm2,%xmm7 \n"
143 "punpcklbw %xmm3,%xmm2 \n"
144 "punpckhbw %xmm3,%xmm7 \n"
145 "movdqa %xmm7,%xmm3 \n"
146 "movdqu (%eax),%xmm4 \n"
147 "movdqu (%eax,%edi,1),%xmm5 \n"
148 "lea (%eax,%edi,2),%eax \n"
149 "movdqa %xmm4,%xmm7 \n"
150 "punpcklbw %xmm5,%xmm4 \n"
151 "punpckhbw %xmm5,%xmm7 \n"
152 "movdqa %xmm7,%xmm5 \n"
153 "movdqu (%eax),%xmm6 \n"
154 "movdqu (%eax,%edi,1),%xmm7 \n"
155 "lea (%eax,%edi,2),%eax \n"
156 "movdqu %xmm5,(%esp) \n"
157 "neg %edi \n"
158 "movdqa %xmm6,%xmm5 \n"
159 "punpcklbw %xmm7,%xmm6 \n"
160 "punpckhbw %xmm7,%xmm5 \n"
161 "movdqa %xmm5,%xmm7 \n"
162 "lea 0x10(%eax,%edi,8),%eax \n"
163 "neg %edi \n"
164 "movdqa %xmm0,%xmm5 \n"
165 "punpcklwd %xmm2,%xmm0 \n"
166 "punpckhwd %xmm2,%xmm5 \n"
167 "movdqa %xmm5,%xmm2 \n"
168 "movdqa %xmm1,%xmm5 \n"
169 "punpcklwd %xmm3,%xmm1 \n"
170 "punpckhwd %xmm3,%xmm5 \n"
171 "movdqa %xmm5,%xmm3 \n"
172 "movdqa %xmm4,%xmm5 \n"
173 "punpcklwd %xmm6,%xmm4 \n"
174 "punpckhwd %xmm6,%xmm5 \n"
175 "movdqa %xmm5,%xmm6 \n"
176 "movdqu (%esp),%xmm5 \n"
177 "movdqu %xmm6,(%esp) \n"
178 "movdqa %xmm5,%xmm6 \n"
179 "punpcklwd %xmm7,%xmm5 \n"
180 "punpckhwd %xmm7,%xmm6 \n"
181 "movdqa %xmm6,%xmm7 \n"
182 "movdqa %xmm0,%xmm6 \n"
183 "punpckldq %xmm4,%xmm0 \n"
184 "punpckhdq %xmm4,%xmm6 \n"
185 "movdqa %xmm6,%xmm4 \n"
186 "movdqu (%esp),%xmm6 \n"
187 "movlpd %xmm0,(%edx) \n"
188 "movhpd %xmm0,(%ebx) \n"
189 "movlpd %xmm4,(%edx,%esi,1) \n"
190 "lea (%edx,%esi,2),%edx \n"
191 "movhpd %xmm4,(%ebx,%ebp,1) \n"
192 "lea (%ebx,%ebp,2),%ebx \n"
193 "movdqa %xmm2,%xmm0 \n"
194 "punpckldq %xmm6,%xmm2 \n"
195 "movlpd %xmm2,(%edx) \n"
196 "movhpd %xmm2,(%ebx) \n"
197 "punpckhdq %xmm6,%xmm0 \n"
198 "movlpd %xmm0,(%edx,%esi,1) \n"
199 "lea (%edx,%esi,2),%edx \n"
200 "movhpd %xmm0,(%ebx,%ebp,1) \n"
201 "lea (%ebx,%ebp,2),%ebx \n"
202 "movdqa %xmm1,%xmm0 \n"
203 "punpckldq %xmm5,%xmm1 \n"
204 "movlpd %xmm1,(%edx) \n"
205 "movhpd %xmm1,(%ebx) \n"
206 "punpckhdq %xmm5,%xmm0 \n"
207 "movlpd %xmm0,(%edx,%esi,1) \n"
208 "lea (%edx,%esi,2),%edx \n"
209 "movhpd %xmm0,(%ebx,%ebp,1) \n"
210 "lea (%ebx,%ebp,2),%ebx \n"
211 "movdqa %xmm3,%xmm0 \n"
212 "punpckldq %xmm7,%xmm3 \n"
213 "movlpd %xmm3,(%edx) \n"
214 "movhpd %xmm3,(%ebx) \n"
215 "punpckhdq %xmm7,%xmm0 \n"
216 "sub $0x8,%ecx \n"
217 "movlpd %xmm0,(%edx,%esi,1) \n"
218 "lea (%edx,%esi,2),%edx \n"
219 "movhpd %xmm0,(%ebx,%ebp,1) \n"
220 "lea (%ebx,%ebp,2),%ebx \n"
221 "jg 1b \n"
222 "mov 0x10(%esp),%esp \n"
223 "pop %ebp \n"
224 "pop %edi \n"
225 "pop %esi \n"
226 "pop %ebx \n"
227 #if defined(__native_client__)
228 "pop %ecx \n"
229 "and $0xffffffe0,%ecx \n"
230 "jmp *%ecx \n"
231 #else
232 "ret \n"
233 #endif
234 );
235 #endif
236 #if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \
237 defined(__x86_64__)
238 // 64 bit version has enough registers to do 16x8 to 8x16 at a time.
239 void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride,
240 uint8* dst, int dst_stride, int width) {
241 asm volatile (
242 // Read in the data from the source pointer.
243 // First round of bit swap.
244 ".p2align 2 \n"
245 "1: \n"
246 "movdqu (%0),%%xmm0 \n"
247 "movdqu (%0,%3),%%xmm1 \n"
248 "lea (%0,%3,2),%0 \n"
249 "movdqa %%xmm0,%%xmm8 \n"
250 "punpcklbw %%xmm1,%%xmm0 \n"
251 "punpckhbw %%xmm1,%%xmm8 \n"
252 "movdqu (%0),%%xmm2 \n"
253 "movdqa %%xmm0,%%xmm1 \n"
254 "movdqa %%xmm8,%%xmm9 \n"
255 "palignr $0x8,%%xmm1,%%xmm1 \n"
256 "palignr $0x8,%%xmm9,%%xmm9 \n"
257 "movdqu (%0,%3),%%xmm3 \n"
258 "lea (%0,%3,2),%0 \n"
259 "movdqa %%xmm2,%%xmm10 \n"
260 "punpcklbw %%xmm3,%%xmm2 \n"
261 "punpckhbw %%xmm3,%%xmm10 \n"
262 "movdqa %%xmm2,%%xmm3 \n"
263 "movdqa %%xmm10,%%xmm11 \n"
264 "movdqu (%0),%%xmm4 \n"
265 "palignr $0x8,%%xmm3,%%xmm3 \n"
266 "palignr $0x8,%%xmm11,%%xmm11 \n"
267 "movdqu (%0,%3),%%xmm5 \n"
268 "lea (%0,%3,2),%0 \n"
269 "movdqa %%xmm4,%%xmm12 \n"
270 "punpcklbw %%xmm5,%%xmm4 \n"
271 "punpckhbw %%xmm5,%%xmm12 \n"
272 "movdqa %%xmm4,%%xmm5 \n"
273 "movdqa %%xmm12,%%xmm13 \n"
274 "movdqu (%0),%%xmm6 \n"
275 "palignr $0x8,%%xmm5,%%xmm5 \n"
276 "palignr $0x8,%%xmm13,%%xmm13 \n"
277 "movdqu (%0,%3),%%xmm7 \n"
278 "lea (%0,%3,2),%0 \n"
279 "movdqa %%xmm6,%%xmm14 \n"
280 "punpcklbw %%xmm7,%%xmm6 \n"
281 "punpckhbw %%xmm7,%%xmm14 \n"
282 "neg %3 \n"
283 "movdqa %%xmm6,%%xmm7 \n"
284 "movdqa %%xmm14,%%xmm15 \n"
285 "lea 0x10(%0,%3,8),%0 \n"
286 "palignr $0x8,%%xmm7,%%xmm7 \n"
287 "palignr $0x8,%%xmm15,%%xmm15 \n"
288 "neg %3 \n"
289 // Second round of bit swap.
290 "punpcklwd %%xmm2,%%xmm0 \n"
291 "punpcklwd %%xmm3,%%xmm1 \n"
292 "movdqa %%xmm0,%%xmm2 \n"
293 "movdqa %%xmm1,%%xmm3 \n"
294 "palignr $0x8,%%xmm2,%%xmm2 \n"
295 "palignr $0x8,%%xmm3,%%xmm3 \n"
296 "punpcklwd %%xmm6,%%xmm4 \n"
297 "punpcklwd %%xmm7,%%xmm5 \n"
298 "movdqa %%xmm4,%%xmm6 \n"
299 "movdqa %%xmm5,%%xmm7 \n"
300 "palignr $0x8,%%xmm6,%%xmm6 \n"
301 "palignr $0x8,%%xmm7,%%xmm7 \n"
302 "punpcklwd %%xmm10,%%xmm8 \n"
303 "punpcklwd %%xmm11,%%xmm9 \n"
304 "movdqa %%xmm8,%%xmm10 \n"
305 "movdqa %%xmm9,%%xmm11 \n"
306 "palignr $0x8,%%xmm10,%%xmm10 \n"
307 "palignr $0x8,%%xmm11,%%xmm11 \n"
308 "punpcklwd %%xmm14,%%xmm12 \n"
309 "punpcklwd %%xmm15,%%xmm13 \n"
310 "movdqa %%xmm12,%%xmm14 \n"
311 "movdqa %%xmm13,%%xmm15 \n"
312 "palignr $0x8,%%xmm14,%%xmm14 \n"
313 "palignr $0x8,%%xmm15,%%xmm15 \n"
314 // Third round of bit swap.
315 // Write to the destination pointer.
316 "punpckldq %%xmm4,%%xmm0 \n"
317 "movq %%xmm0,(%1) \n"
318 "movdqa %%xmm0,%%xmm4 \n"
319 "palignr $0x8,%%xmm4,%%xmm4 \n"
320 "movq %%xmm4,(%1,%4) \n"
321 "lea (%1,%4,2),%1 \n"
322 "punpckldq %%xmm6,%%xmm2 \n"
323 "movdqa %%xmm2,%%xmm6 \n"
324 "movq %%xmm2,(%1) \n"
325 "palignr $0x8,%%xmm6,%%xmm6 \n"
326 "punpckldq %%xmm5,%%xmm1 \n"
327 "movq %%xmm6,(%1,%4) \n"
328 "lea (%1,%4,2),%1 \n"
329 "movdqa %%xmm1,%%xmm5 \n"
330 "movq %%xmm1,(%1) \n"
331 "palignr $0x8,%%xmm5,%%xmm5 \n"
332 "movq %%xmm5,(%1,%4) \n"
333 "lea (%1,%4,2),%1 \n"
334 "punpckldq %%xmm7,%%xmm3 \n"
335 "movq %%xmm3,(%1) \n"
336 "movdqa %%xmm3,%%xmm7 \n"
337 "palignr $0x8,%%xmm7,%%xmm7 \n"
338 "movq %%xmm7,(%1,%4) \n"
339 "lea (%1,%4,2),%1 \n"
340 "punpckldq %%xmm12,%%xmm8 \n"
341 "movq %%xmm8,(%1) \n"
342 "movdqa %%xmm8,%%xmm12 \n"
343 "palignr $0x8,%%xmm12,%%xmm12 \n"
344 "movq %%xmm12,(%1,%4) \n"
345 "lea (%1,%4,2),%1 \n"
346 "punpckldq %%xmm14,%%xmm10 \n"
347 "movdqa %%xmm10,%%xmm14 \n"
348 "movq %%xmm10,(%1) \n"
349 "palignr $0x8,%%xmm14,%%xmm14 \n"
350 "punpckldq %%xmm13,%%xmm9 \n"
351 "movq %%xmm14,(%1,%4) \n"
352 "lea (%1,%4,2),%1 \n"
353 "movdqa %%xmm9,%%xmm13 \n"
354 "movq %%xmm9,(%1) \n"
355 "palignr $0x8,%%xmm13,%%xmm13 \n"
356 "movq %%xmm13,(%1,%4) \n"
357 "lea (%1,%4,2),%1 \n"
358 "punpckldq %%xmm15,%%xmm11 \n"
359 "movq %%xmm11,(%1) \n"
360 "movdqa %%xmm11,%%xmm15 \n"
361 "palignr $0x8,%%xmm15,%%xmm15 \n"
362 "sub $0x10,%2 \n"
363 "movq %%xmm15,(%1,%4) \n"
364 "lea (%1,%4,2),%1 \n"
365 "jg 1b \n"
366 : "+r"(src), // %0
367 "+r"(dst), // %1
368 "+r"(width) // %2
369 : "r"((intptr_t)(src_stride)), // %3
370 "r"((intptr_t)(dst_stride)) // %4
371 : "memory", "cc",
372 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
373 "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
374 );
375 }
376
377 void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
378 uint8* dst_a, int dst_stride_a,
379 uint8* dst_b, int dst_stride_b, int width) {
380 asm volatile (
381 // Read in the data from the source pointer.
382 // First round of bit swap.
383 ".p2align 2 \n"
384 "1: \n"
385 "movdqu (%0),%%xmm0 \n"
386 "movdqu (%0,%4),%%xmm1 \n"
387 "lea (%0,%4,2),%0 \n"
388 "movdqa %%xmm0,%%xmm8 \n"
389 "punpcklbw %%xmm1,%%xmm0 \n"
390 "punpckhbw %%xmm1,%%xmm8 \n"
391 "movdqa %%xmm8,%%xmm1 \n"
392 "movdqu (%0),%%xmm2 \n"
393 "movdqu (%0,%4),%%xmm3 \n"
394 "lea (%0,%4,2),%0 \n"
395 "movdqa %%xmm2,%%xmm8 \n"
396 "punpcklbw %%xmm3,%%xmm2 \n"
397 "punpckhbw %%xmm3,%%xmm8 \n"
398 "movdqa %%xmm8,%%xmm3 \n"
399 "movdqu (%0),%%xmm4 \n"
400 "movdqu (%0,%4),%%xmm5 \n"
401 "lea (%0,%4,2),%0 \n"
402 "movdqa %%xmm4,%%xmm8 \n"
403 "punpcklbw %%xmm5,%%xmm4 \n"
404 "punpckhbw %%xmm5,%%xmm8 \n"
405 "movdqa %%xmm8,%%xmm5 \n"
406 "movdqu (%0),%%xmm6 \n"
407 "movdqu (%0,%4),%%xmm7 \n"
408 "lea (%0,%4,2),%0 \n"
409 "movdqa %%xmm6,%%xmm8 \n"
410 "punpcklbw %%xmm7,%%xmm6 \n"
411 "neg %4 \n"
412 "lea 0x10(%0,%4,8),%0 \n"
413 "punpckhbw %%xmm7,%%xmm8 \n"
414 "movdqa %%xmm8,%%xmm7 \n"
415 "neg %4 \n"
416 // Second round of bit swap.
417 "movdqa %%xmm0,%%xmm8 \n"
418 "movdqa %%xmm1,%%xmm9 \n"
419 "punpckhwd %%xmm2,%%xmm8 \n"
420 "punpckhwd %%xmm3,%%xmm9 \n"
421 "punpcklwd %%xmm2,%%xmm0 \n"
422 "punpcklwd %%xmm3,%%xmm1 \n"
423 "movdqa %%xmm8,%%xmm2 \n"
424 "movdqa %%xmm9,%%xmm3 \n"
425 "movdqa %%xmm4,%%xmm8 \n"
426 "movdqa %%xmm5,%%xmm9 \n"
427 "punpckhwd %%xmm6,%%xmm8 \n"
428 "punpckhwd %%xmm7,%%xmm9 \n"
429 "punpcklwd %%xmm6,%%xmm4 \n"
430 "punpcklwd %%xmm7,%%xmm5 \n"
431 "movdqa %%xmm8,%%xmm6 \n"
432 "movdqa %%xmm9,%%xmm7 \n"
433 // Third round of bit swap.
434 // Write to the destination pointer.
435 "movdqa %%xmm0,%%xmm8 \n"
436 "punpckldq %%xmm4,%%xmm0 \n"
437 "movlpd %%xmm0,(%1) \n" // Write back U channel
438 "movhpd %%xmm0,(%2) \n" // Write back V channel
439 "punpckhdq %%xmm4,%%xmm8 \n"
440 "movlpd %%xmm8,(%1,%5) \n"
441 "lea (%1,%5,2),%1 \n"
442 "movhpd %%xmm8,(%2,%6) \n"
443 "lea (%2,%6,2),%2 \n"
444 "movdqa %%xmm2,%%xmm8 \n"
445 "punpckldq %%xmm6,%%xmm2 \n"
446 "movlpd %%xmm2,(%1) \n"
447 "movhpd %%xmm2,(%2) \n"
448 "punpckhdq %%xmm6,%%xmm8 \n"
449 "movlpd %%xmm8,(%1,%5) \n"
450 "lea (%1,%5,2),%1 \n"
451 "movhpd %%xmm8,(%2,%6) \n"
452 "lea (%2,%6,2),%2 \n"
453 "movdqa %%xmm1,%%xmm8 \n"
454 "punpckldq %%xmm5,%%xmm1 \n"
455 "movlpd %%xmm1,(%1) \n"
456 "movhpd %%xmm1,(%2) \n"
457 "punpckhdq %%xmm5,%%xmm8 \n"
458 "movlpd %%xmm8,(%1,%5) \n"
459 "lea (%1,%5,2),%1 \n"
460 "movhpd %%xmm8,(%2,%6) \n"
461 "lea (%2,%6,2),%2 \n"
462 "movdqa %%xmm3,%%xmm8 \n"
463 "punpckldq %%xmm7,%%xmm3 \n"
464 "movlpd %%xmm3,(%1) \n"
465 "movhpd %%xmm3,(%2) \n"
466 "punpckhdq %%xmm7,%%xmm8 \n"
467 "sub $0x8,%3 \n"
468 "movlpd %%xmm8,(%1,%5) \n"
469 "lea (%1,%5,2),%1 \n"
470 "movhpd %%xmm8,(%2,%6) \n"
471 "lea (%2,%6,2),%2 \n"
472 "jg 1b \n"
473 : "+r"(src), // %0
474 "+r"(dst_a), // %1
475 "+r"(dst_b), // %2
476 "+r"(width) // %3
477 : "r"((intptr_t)(src_stride)), // %4
478 "r"((intptr_t)(dst_stride_a)), // %5
479 "r"((intptr_t)(dst_stride_b)) // %6
480 : "memory", "cc",
481 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
482 "xmm8", "xmm9"
483 );
484 }
485 #endif
486 #endif
487
488 #endif // defined(__x86_64__) || defined(__i386__)
489
490 #ifdef __cplusplus
491 } // extern "C"
492 } // namespace libyuv
493 #endif
OLDNEW
« no previous file with comments | « source/libvpx/third_party/libyuv/source/rotate_common.cc ('k') | source/libvpx/third_party/libyuv/source/rotate_mips.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698