Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(438)

Side by Side Diff: source/libvpx/vp8/encoder/x86/sad_sse2.asm

Issue 7671004: Update libvpx snapshot to v0.9.7-p1 (Cayuga). (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/
Patch Set: '' Created 9 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 ; 1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ; 3 ;
4 ; Use of this source code is governed by a BSD-style license 4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source 5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found 6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may 7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree. 8 ; be found in the AUTHORS file in the root of the source tree.
9 ; 9 ;
10 10
11 11
12 %include "vpx_ports/x86_abi_support.asm" 12 %include "vpx_ports/x86_abi_support.asm"
13 13
14 ;unsigned int vp8_sad16x16_wmt( 14 ;unsigned int vp8_sad16x16_wmt(
15 ; unsigned char *src_ptr, 15 ; unsigned char *src_ptr,
16 ; int src_stride, 16 ; int src_stride,
17 ; unsigned char *ref_ptr, 17 ; unsigned char *ref_ptr,
18 ; int ref_stride) 18 ; int ref_stride)
19 global sym(vp8_sad16x16_wmt) 19 global sym(vp8_sad16x16_wmt)
20 sym(vp8_sad16x16_wmt): 20 sym(vp8_sad16x16_wmt):
21 push rbp 21 push rbp
22 mov rbp, rsp 22 mov rbp, rsp
23 SHADOW_ARGS_TO_STACK 4 23 SHADOW_ARGS_TO_STACK 4
24 SAVE_XMM 6
24 push rsi 25 push rsi
25 push rdi 26 push rdi
26 ; end prolog 27 ; end prolog
27 28
28 mov rsi, arg(0) ;src_ptr 29 mov rsi, arg(0) ;src_ptr
29 mov rdi, arg(2) ;ref_ptr 30 mov rdi, arg(2) ;ref_ptr
30 31
31 movsxd rax, dword ptr arg(1) ;src_stride 32 movsxd rax, dword ptr arg(1) ;src_stride
32 movsxd rdx, dword ptr arg(3) ;ref_stride 33 movsxd rdx, dword ptr arg(3) ;ref_stride
33 34
34 lea rcx, [rsi+rax*8] 35 lea rcx, [rsi+rax*8]
35 36
36 lea rcx, [rcx+rax*8] 37 lea rcx, [rcx+rax*8]
37 pxor xmm7, xmm7 38 pxor xmm6, xmm6
38 39
39 x16x16sad_wmt_loop: 40 x16x16sad_wmt_loop:
40 41
41 movq xmm0, QWORD PTR [rsi] 42 movq xmm0, QWORD PTR [rsi]
42 movq xmm2, QWORD PTR [rsi+8] 43 movq xmm2, QWORD PTR [rsi+8]
43 44
44 movq xmm1, QWORD PTR [rdi] 45 movq xmm1, QWORD PTR [rdi]
45 movq xmm3, QWORD PTR [rdi+8] 46 movq xmm3, QWORD PTR [rdi+8]
46 47
47 movq xmm4, QWORD PTR [rsi+rax] 48 movq xmm4, QWORD PTR [rsi+rax]
48 movq xmm5, QWORD PTR [rdi+rdx] 49 movq xmm5, QWORD PTR [rdi+rdx]
49 50
50 51
51 punpcklbw xmm0, xmm2 52 punpcklbw xmm0, xmm2
52 punpcklbw xmm1, xmm3 53 punpcklbw xmm1, xmm3
53 54
54 psadbw xmm0, xmm1 55 psadbw xmm0, xmm1
55 movq xmm6, QWORD PTR [rsi+rax+8] 56 movq xmm2, QWORD PTR [rsi+rax+8]
56 57
57 movq xmm3, QWORD PTR [rdi+rdx+8] 58 movq xmm3, QWORD PTR [rdi+rdx+8]
58 lea rsi, [rsi+rax*2] 59 lea rsi, [rsi+rax*2]
59 60
60 lea rdi, [rdi+rdx*2] 61 lea rdi, [rdi+rdx*2]
61 punpcklbw xmm4, xmm6 62 punpcklbw xmm4, xmm2
62 63
63 punpcklbw xmm5, xmm3 64 punpcklbw xmm5, xmm3
64 psadbw xmm4, xmm5 65 psadbw xmm4, xmm5
65 66
66 paddw xmm7, xmm0 67 paddw xmm6, xmm0
67 paddw xmm7, xmm4 68 paddw xmm6, xmm4
68 69
69 cmp rsi, rcx 70 cmp rsi, rcx
70 jne x16x16sad_wmt_loop 71 jne x16x16sad_wmt_loop
71 72
72 movq xmm0, xmm7 73 movq xmm0, xmm6
73 psrldq xmm7, 8 74 psrldq xmm6, 8
74 75
75 paddw xmm0, xmm7 76 paddw xmm0, xmm6
76 movq rax, xmm0 77 movq rax, xmm0
77 78
78 ; begin epilog 79 ; begin epilog
79 pop rdi 80 pop rdi
80 pop rsi 81 pop rsi
82 RESTORE_XMM
81 UNSHADOW_ARGS 83 UNSHADOW_ARGS
82 pop rbp 84 pop rbp
83 ret 85 ret
84 86
85 ;unsigned int vp8_sad8x16_wmt( 87 ;unsigned int vp8_sad8x16_wmt(
86 ; unsigned char *src_ptr, 88 ; unsigned char *src_ptr,
87 ; int src_stride, 89 ; int src_stride,
88 ; unsigned char *ref_ptr, 90 ; unsigned char *ref_ptr,
89 ; int ref_stride, 91 ; int ref_stride,
90 ; int max_err) 92 ; int max_err)
(...skipping 14 matching lines...) Expand all
105 movsxd rdx, dword ptr arg(3) ;ref_stride 107 movsxd rdx, dword ptr arg(3) ;ref_stride
106 108
107 lea rcx, [rsi+rbx*8] 109 lea rcx, [rsi+rbx*8]
108 110
109 lea rcx, [rcx+rbx*8] 111 lea rcx, [rcx+rbx*8]
110 pxor mm7, mm7 112 pxor mm7, mm7
111 113
112 x8x16sad_wmt_loop: 114 x8x16sad_wmt_loop:
113 115
114 movq rax, mm7 116 movq rax, mm7
115 cmp rax, arg(4) 117 cmp eax, arg(4)
116 jg x8x16sad_wmt_early_exit 118 jg x8x16sad_wmt_early_exit
117 119
118 movq mm0, QWORD PTR [rsi] 120 movq mm0, QWORD PTR [rsi]
119 movq mm1, QWORD PTR [rdi] 121 movq mm1, QWORD PTR [rdi]
120 122
121 movq mm2, QWORD PTR [rsi+rbx] 123 movq mm2, QWORD PTR [rsi+rbx]
122 movq mm3, QWORD PTR [rdi+rdx] 124 movq mm3, QWORD PTR [rdi+rdx]
123 125
124 psadbw mm0, mm1 126 psadbw mm0, mm1
125 psadbw mm2, mm3 127 psadbw mm2, mm3
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
166 168
167 movsxd rbx, dword ptr arg(1) ;src_stride 169 movsxd rbx, dword ptr arg(1) ;src_stride
168 movsxd rdx, dword ptr arg(3) ;ref_stride 170 movsxd rdx, dword ptr arg(3) ;ref_stride
169 171
170 lea rcx, [rsi+rbx*8] 172 lea rcx, [rsi+rbx*8]
171 pxor mm7, mm7 173 pxor mm7, mm7
172 174
173 x8x8sad_wmt_loop: 175 x8x8sad_wmt_loop:
174 176
175 movq rax, mm7 177 movq rax, mm7
176 cmp rax, arg(4) 178 cmp eax, arg(4)
177 jg x8x8sad_wmt_early_exit 179 jg x8x8sad_wmt_early_exit
178 180
179 movq mm0, QWORD PTR [rsi] 181 movq mm0, QWORD PTR [rsi]
180 movq mm1, QWORD PTR [rdi] 182 movq mm1, QWORD PTR [rdi]
181 183
182 psadbw mm0, mm1 184 psadbw mm0, mm1
183 lea rsi, [rsi+rbx] 185 lea rsi, [rsi+rbx]
184 186
185 add rdi, rdx 187 add rdi, rdx
186 paddw mm7, mm0 188 paddw mm7, mm0
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after
275 277
276 movsxd rbx, dword ptr arg(1) ;src_stride 278 movsxd rbx, dword ptr arg(1) ;src_stride
277 movsxd rdx, dword ptr arg(3) ;ref_stride 279 movsxd rdx, dword ptr arg(3) ;ref_stride
278 280
279 lea rcx, [rsi+rbx*8] 281 lea rcx, [rsi+rbx*8]
280 pxor mm7, mm7 282 pxor mm7, mm7
281 283
282 x16x8sad_wmt_loop: 284 x16x8sad_wmt_loop:
283 285
284 movq rax, mm7 286 movq rax, mm7
285 cmp rax, arg(4) 287 cmp eax, arg(4)
286 jg x16x8sad_wmt_early_exit 288 jg x16x8sad_wmt_early_exit
287 289
288 movq mm0, QWORD PTR [rsi] 290 movq mm0, QWORD PTR [rsi]
289 movq mm2, QWORD PTR [rsi+8] 291 movq mm2, QWORD PTR [rsi+8]
290 292
291 movq mm1, QWORD PTR [rdi] 293 movq mm1, QWORD PTR [rdi]
292 movq mm3, QWORD PTR [rdi+8] 294 movq mm3, QWORD PTR [rdi+8]
293 295
294 movq mm4, QWORD PTR [rsi+rbx] 296 movq mm4, QWORD PTR [rsi+rbx]
295 movq mm5, QWORD PTR [rdi+rdx] 297 movq mm5, QWORD PTR [rdi+rdx]
(...skipping 23 matching lines...) Expand all
319 321
320 x16x8sad_wmt_early_exit: 322 x16x8sad_wmt_early_exit:
321 323
322 ; begin epilog 324 ; begin epilog
323 pop rdi 325 pop rdi
324 pop rsi 326 pop rsi
325 pop rbx 327 pop rbx
326 UNSHADOW_ARGS 328 UNSHADOW_ARGS
327 pop rbp 329 pop rbp
328 ret 330 ret
331
332 ;void vp8_copy32xn_sse2(
333 ; unsigned char *src_ptr,
334 ; int src_stride,
335 ; unsigned char *dst_ptr,
336 ; int dst_stride,
337 ; int height);
338 global sym(vp8_copy32xn_sse2)
339 sym(vp8_copy32xn_sse2):
340 push rbp
341 mov rbp, rsp
342 SHADOW_ARGS_TO_STACK 5
343 SAVE_XMM 7
344 push rsi
345 push rdi
346 ; end prolog
347
348 mov rsi, arg(0) ;src_ptr
349 mov rdi, arg(2) ;dst_ptr
350
351 movsxd rax, dword ptr arg(1) ;src_stride
352 movsxd rdx, dword ptr arg(3) ;dst_stride
353 movsxd rcx, dword ptr arg(4) ;height
354
355 block_copy_sse2_loopx4:
356 movdqu xmm0, XMMWORD PTR [rsi]
357 movdqu xmm1, XMMWORD PTR [rsi + 16]
358 movdqu xmm2, XMMWORD PTR [rsi + rax]
359 movdqu xmm3, XMMWORD PTR [rsi + rax + 16]
360
361 lea rsi, [rsi+rax*2]
362
363 movdqu xmm4, XMMWORD PTR [rsi]
364 movdqu xmm5, XMMWORD PTR [rsi + 16]
365 movdqu xmm6, XMMWORD PTR [rsi + rax]
366 movdqu xmm7, XMMWORD PTR [rsi + rax + 16]
367
368 lea rsi, [rsi+rax*2]
369
370 movdqa XMMWORD PTR [rdi], xmm0
371 movdqa XMMWORD PTR [rdi + 16], xmm1
372 movdqa XMMWORD PTR [rdi + rdx], xmm2
373 movdqa XMMWORD PTR [rdi + rdx + 16], xmm3
374
375 lea rdi, [rdi+rdx*2]
376
377 movdqa XMMWORD PTR [rdi], xmm4
378 movdqa XMMWORD PTR [rdi + 16], xmm5
379 movdqa XMMWORD PTR [rdi + rdx], xmm6
380 movdqa XMMWORD PTR [rdi + rdx + 16], xmm7
381
382 lea rdi, [rdi+rdx*2]
383
384 sub rcx, 4
385 cmp rcx, 4
386 jge block_copy_sse2_loopx4
387
388 cmp rcx, 0
389 je copy_is_done
390
391 block_copy_sse2_loop:
392 movdqu xmm0, XMMWORD PTR [rsi]
393 movdqu xmm1, XMMWORD PTR [rsi + 16]
394 lea rsi, [rsi+rax]
395
396 movdqa XMMWORD PTR [rdi], xmm0
397 movdqa XMMWORD PTR [rdi + 16], xmm1
398 lea rdi, [rdi+rdx]
399
400 sub rcx, 1
401 jne block_copy_sse2_loop
402
403 copy_is_done:
404 ; begin epilog
405 pop rdi
406 pop rsi
407 RESTORE_XMM
408 UNSHADOW_ARGS
409 pop rbp
410 ret
OLDNEW
« no previous file with comments | « source/libvpx/vp8/encoder/x86/quantize_x86.h ('k') | source/libvpx/vp8/encoder/x86/sad_sse3.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698