Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(47)

Side by Side Diff: source/libvpx/vp8/common/x86/idctllm_sse2.asm

Issue 7671004: Update libvpx snapshot to v0.9.7-p1 (Cayuga). (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/
Patch Set: '' Created 9 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 ; 1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ; 3 ;
4 ; Use of this source code is governed by a BSD-style license 4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source 5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found 6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may 7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree. 8 ; be found in the AUTHORS file in the root of the source tree.
9 ; 9 ;
10 10
(...skipping 14 matching lines...) Expand all
25 sym(idct_dequant_0_2x_sse2): 25 sym(idct_dequant_0_2x_sse2):
26 push rbp 26 push rbp
27 mov rbp, rsp 27 mov rbp, rsp
28 SHADOW_ARGS_TO_STACK 6 28 SHADOW_ARGS_TO_STACK 6
29 GET_GOT rbx 29 GET_GOT rbx
30 ; end prolog 30 ; end prolog
31 31
32 mov rdx, arg(1) ; dequant 32 mov rdx, arg(1) ; dequant
33 mov rax, arg(0) ; qcoeff 33 mov rax, arg(0) ; qcoeff
34 34
35 ; Zero out xmm7, for use unpacking
36 pxor xmm7, xmm7
37
38 movd xmm4, [rax] 35 movd xmm4, [rax]
39 movd xmm5, [rdx] 36 movd xmm5, [rdx]
40 37
41 pinsrw xmm4, [rax+32], 4 38 pinsrw xmm4, [rax+32], 4
42 pinsrw xmm5, [rdx], 4 39 pinsrw xmm5, [rdx], 4
43 40
44 pmullw xmm4, xmm5 41 pmullw xmm4, xmm5
45 42
43 ; Zero out xmm5, for use unpacking
44 pxor xmm5, xmm5
45
46 ; clear coeffs 46 ; clear coeffs
47 movd [rax], xmm7 47 movd [rax], xmm5
48 movd [rax+32], xmm7 48 movd [rax+32], xmm5
49 ;pshufb 49 ;pshufb
50 pshuflw xmm4, xmm4, 00000000b 50 pshuflw xmm4, xmm4, 00000000b
51 pshufhw xmm4, xmm4, 00000000b 51 pshufhw xmm4, xmm4, 00000000b
52 52
53 mov rax, arg(2) ; pre 53 mov rax, arg(2) ; pre
54 paddw xmm4, [GLOBAL(fours)] 54 paddw xmm4, [GLOBAL(fours)]
55 55
56 movsxd rcx, dword ptr arg(5) ; blk_stride 56 movsxd rcx, dword ptr arg(5) ; blk_stride
57 psraw xmm4, 3 57 psraw xmm4, 3
58 58
59 movq xmm0, [rax] 59 movq xmm0, [rax]
60 movq xmm1, [rax+rcx] 60 movq xmm1, [rax+rcx]
61 movq xmm2, [rax+2*rcx] 61 movq xmm2, [rax+2*rcx]
62 lea rcx, [3*rcx] 62 lea rcx, [3*rcx]
63 movq xmm3, [rax+rcx] 63 movq xmm3, [rax+rcx]
64 64
65 punpcklbw xmm0, xmm7 65 punpcklbw xmm0, xmm5
66 punpcklbw xmm1, xmm7 66 punpcklbw xmm1, xmm5
67 punpcklbw xmm2, xmm7 67 punpcklbw xmm2, xmm5
68 punpcklbw xmm3, xmm7 68 punpcklbw xmm3, xmm5
69 69
70 mov rax, arg(3) ; dst 70 mov rax, arg(3) ; dst
71 movsxd rdx, dword ptr arg(4) ; dst_stride 71 movsxd rdx, dword ptr arg(4) ; dst_stride
72 72
73 ; Add to predict buffer 73 ; Add to predict buffer
74 paddw xmm0, xmm4 74 paddw xmm0, xmm4
75 paddw xmm1, xmm4 75 paddw xmm1, xmm4
76 paddw xmm2, xmm4 76 paddw xmm2, xmm4
77 paddw xmm3, xmm4 77 paddw xmm3, xmm4
78 78
79 ; pack up before storing 79 ; pack up before storing
80 packuswb xmm0, xmm7 80 packuswb xmm0, xmm5
81 packuswb xmm1, xmm7 81 packuswb xmm1, xmm5
82 packuswb xmm2, xmm7 82 packuswb xmm2, xmm5
83 packuswb xmm3, xmm7 83 packuswb xmm3, xmm5
84 84
85 ; store blocks back out 85 ; store blocks back out
86 movq [rax], xmm0 86 movq [rax], xmm0
87 movq [rax + rdx], xmm1 87 movq [rax + rdx], xmm1
88 88
89 lea rax, [rax + 2*rdx] 89 lea rax, [rax + 2*rdx]
90 90
91 movq [rax], xmm2 91 movq [rax], xmm2
92 movq [rax + rdx], xmm3 92 movq [rax + rdx], xmm3
93 93
94 ; begin epilog 94 ; begin epilog
95 RESTORE_GOT 95 RESTORE_GOT
96 UNSHADOW_ARGS 96 UNSHADOW_ARGS
97 pop rbp 97 pop rbp
98 ret 98 ret
99 99
100 global sym(idct_dequant_full_2x_sse2) 100 global sym(idct_dequant_full_2x_sse2)
101 sym(idct_dequant_full_2x_sse2): 101 sym(idct_dequant_full_2x_sse2):
102 push rbp 102 push rbp
103 mov rbp, rsp 103 mov rbp, rsp
104 SHADOW_ARGS_TO_STACK 7 104 SHADOW_ARGS_TO_STACK 7
105 SAVE_XMM 7
105 GET_GOT rbx 106 GET_GOT rbx
106 push rsi 107 push rsi
107 push rdi 108 push rdi
108 ; end prolog 109 ; end prolog
109 110
110 ; special case when 2 blocks have 0 or 1 coeffs 111 ; special case when 2 blocks have 0 or 1 coeffs
111 ; dc is set as first coeff, so no need to load qcoeff 112 ; dc is set as first coeff, so no need to load qcoeff
112 mov rax, arg(0) ; qcoeff 113 mov rax, arg(0) ; qcoeff
113 mov rsi, arg(2) ; pre 114 mov rsi, arg(2) ; pre
114 mov rdi, arg(3) ; dst 115 mov rdi, arg(3) ; dst
(...skipping 225 matching lines...) Expand 10 before | Expand all | Expand 10 after
340 341
341 lea rdi, [rdi + 2*rdx] 342 lea rdi, [rdi + 2*rdx]
342 343
343 movq [rdi], xmm2 344 movq [rdi], xmm2
344 movq [rdi + rdx], xmm3 345 movq [rdi + rdx], xmm3
345 346
346 ; begin epilog 347 ; begin epilog
347 pop rdi 348 pop rdi
348 pop rsi 349 pop rsi
349 RESTORE_GOT 350 RESTORE_GOT
351 RESTORE_XMM
350 UNSHADOW_ARGS 352 UNSHADOW_ARGS
351 pop rbp 353 pop rbp
352 ret 354 ret
353 355
354 ;void idct_dequant_dc_0_2x_sse2 356 ;void idct_dequant_dc_0_2x_sse2
355 ; ( 357 ; (
356 ; short *qcoeff - 0 358 ; short *qcoeff - 0
357 ; short *dequant - 1 359 ; short *dequant - 1
358 ; unsigned char *pre - 2 360 ; unsigned char *pre - 2
359 ; unsigned char *dst - 3 361 ; unsigned char *dst - 3
(...skipping 10 matching lines...) Expand all
370 push rdi 372 push rdi
371 ; end prolog 373 ; end prolog
372 374
373 ; special case when 2 blocks have 0 or 1 coeffs 375 ; special case when 2 blocks have 0 or 1 coeffs
374 ; dc is set as first coeff, so no need to load qcoeff 376 ; dc is set as first coeff, so no need to load qcoeff
375 mov rax, arg(0) ; qcoeff 377 mov rax, arg(0) ; qcoeff
376 mov rsi, arg(2) ; pre 378 mov rsi, arg(2) ; pre
377 mov rdi, arg(3) ; dst 379 mov rdi, arg(3) ; dst
378 mov rdx, arg(5) ; dc 380 mov rdx, arg(5) ; dc
379 381
380 ; Zero out xmm7, for use unpacking 382 ; Zero out xmm5, for use unpacking
381 pxor xmm7, xmm7 383 pxor xmm5, xmm5
382 384
383 ; load up 2 dc words here == 2*16 = doubleword 385 ; load up 2 dc words here == 2*16 = doubleword
384 movd xmm4, [rdx] 386 movd xmm4, [rdx]
385 387
386 ; Load up predict blocks 388 ; Load up predict blocks
387 movq xmm0, [rsi] 389 movq xmm0, [rsi]
388 movq xmm1, [rsi+16] 390 movq xmm1, [rsi+16]
389 movq xmm2, [rsi+32] 391 movq xmm2, [rsi+32]
390 movq xmm3, [rsi+48] 392 movq xmm3, [rsi+48]
391 393
392 ; Duplicate and expand dc across 394 ; Duplicate and expand dc across
393 punpcklwd xmm4, xmm4 395 punpcklwd xmm4, xmm4
394 punpckldq xmm4, xmm4 396 punpckldq xmm4, xmm4
395 397
396 ; Rounding to dequant and downshift 398 ; Rounding to dequant and downshift
397 paddw xmm4, [GLOBAL(fours)] 399 paddw xmm4, [GLOBAL(fours)]
398 psraw xmm4, 3 400 psraw xmm4, 3
399 401
400 ; Predict buffer needs to be expanded from bytes to words 402 ; Predict buffer needs to be expanded from bytes to words
401 punpcklbw xmm0, xmm7 403 punpcklbw xmm0, xmm5
402 punpcklbw xmm1, xmm7 404 punpcklbw xmm1, xmm5
403 punpcklbw xmm2, xmm7 405 punpcklbw xmm2, xmm5
404 punpcklbw xmm3, xmm7 406 punpcklbw xmm3, xmm5
405 407
406 ; Add to predict buffer 408 ; Add to predict buffer
407 paddw xmm0, xmm4 409 paddw xmm0, xmm4
408 paddw xmm1, xmm4 410 paddw xmm1, xmm4
409 paddw xmm2, xmm4 411 paddw xmm2, xmm4
410 paddw xmm3, xmm4 412 paddw xmm3, xmm4
411 413
412 ; pack up before storing 414 ; pack up before storing
413 packuswb xmm0, xmm7 415 packuswb xmm0, xmm5
414 packuswb xmm1, xmm7 416 packuswb xmm1, xmm5
415 packuswb xmm2, xmm7 417 packuswb xmm2, xmm5
416 packuswb xmm3, xmm7 418 packuswb xmm3, xmm5
417 419
418 ; Load destination stride before writing out, 420 ; Load destination stride before writing out,
419 ; doesn't need to persist 421 ; doesn't need to persist
420 movsxd rdx, dword ptr arg(4) ; dst_stride 422 movsxd rdx, dword ptr arg(4) ; dst_stride
421 423
422 ; store blocks back out 424 ; store blocks back out
423 movq [rdi], xmm0 425 movq [rdi], xmm0
424 movq [rdi + rdx], xmm1 426 movq [rdi + rdx], xmm1
425 427
426 lea rdi, [rdi + 2*rdx] 428 lea rdi, [rdi + 2*rdx]
427 429
428 movq [rdi], xmm2 430 movq [rdi], xmm2
429 movq [rdi + rdx], xmm3 431 movq [rdi + rdx], xmm3
430 432
431 ; begin epilog 433 ; begin epilog
432 pop rdi 434 pop rdi
433 pop rsi 435 pop rsi
434 RESTORE_GOT 436 RESTORE_GOT
435 UNSHADOW_ARGS 437 UNSHADOW_ARGS
436 pop rbp 438 pop rbp
437 ret 439 ret
438 440
439 global sym(idct_dequant_dc_full_2x_sse2) 441 global sym(idct_dequant_dc_full_2x_sse2)
440 sym(idct_dequant_dc_full_2x_sse2): 442 sym(idct_dequant_dc_full_2x_sse2):
441 push rbp 443 push rbp
442 mov rbp, rsp 444 mov rbp, rsp
443 SHADOW_ARGS_TO_STACK 7 445 SHADOW_ARGS_TO_STACK 7
446 SAVE_XMM 7
444 GET_GOT rbx 447 GET_GOT rbx
445 push rsi 448 push rsi
446 push rdi 449 push rdi
447 ; end prolog 450 ; end prolog
448 451
449 ; special case when 2 blocks have 0 or 1 coeffs 452 ; special case when 2 blocks have 0 or 1 coeffs
450 ; dc is set as first coeff, so no need to load qcoeff 453 ; dc is set as first coeff, so no need to load qcoeff
451 mov rax, arg(0) ; qcoeff 454 mov rax, arg(0) ; qcoeff
452 mov rsi, arg(2) ; pre 455 mov rsi, arg(2) ; pre
453 mov rdi, arg(3) ; dst 456 mov rdi, arg(3) ; dst
(...skipping 231 matching lines...) Expand 10 before | Expand all | Expand 10 after
685 lea rdi, [rdi + 2*rdx] 688 lea rdi, [rdi + 2*rdx]
686 689
687 movq [rdi], xmm2 690 movq [rdi], xmm2
688 movq [rdi + rdx], xmm3 691 movq [rdi + rdx], xmm3
689 692
690 693
691 ; begin epilog 694 ; begin epilog
692 pop rdi 695 pop rdi
693 pop rsi 696 pop rsi
694 RESTORE_GOT 697 RESTORE_GOT
698 RESTORE_XMM
695 UNSHADOW_ARGS 699 UNSHADOW_ARGS
696 pop rbp 700 pop rbp
697 ret 701 ret
698 702
699 SECTION_RODATA 703 SECTION_RODATA
700 align 16 704 align 16
701 fours: 705 fours:
702 times 8 dw 0x0004 706 times 8 dw 0x0004
703 align 16 707 align 16
704 x_s1sqr2: 708 x_s1sqr2:
705 times 8 dw 0x8A8C 709 times 8 dw 0x8A8C
706 align 16 710 align 16
707 x_c1sqr2less1: 711 x_c1sqr2less1:
708 times 8 dw 0x4E7B 712 times 8 dw 0x4E7B
OLDNEW
« no previous file with comments | « source/libvpx/vp8/common/x86/idctllm_mmx.asm ('k') | source/libvpx/vp8/common/x86/iwalsh_sse2.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698