Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Side by Side Diff: source/libvpx/vp8/encoder/x86/quantize_ssse3.asm

Issue 7671004: Update libvpx snapshot to v0.9.7-p1 (Cayuga). (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/
Patch Set: '' Created 9 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 ; 1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ; 3 ;
4 ; Use of this source code is governed by a BSD-style license and patent 4 ; Use of this source code is governed by a BSD-style license and patent
5 ; grant that can be found in the LICENSE file in the root of the source 5 ; grant that can be found in the LICENSE file in the root of the source
6 ; tree. All contributing project authors may be found in the AUTHORS 6 ; tree. All contributing project authors may be found in the AUTHORS
7 ; file in the root of the source tree. 7 ; file in the root of the source tree.
8 ; 8 ;
9 9
10 10
11 %include "vpx_ports/x86_abi_support.asm" 11 %include "vpx_ports/x86_abi_support.asm"
12 %include "asm_enc_offsets.asm"
12 13
13 14
14 ;int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr 15 ; void vp8_fast_quantize_b_ssse3 | arg
15 ; short *qcoeff_ptr,short *dequant_ptr, 16 ; (BLOCK *b, | 0
16 ; short *round_ptr, 17 ; BLOCKD *d) | 1
17 ; short *quant_ptr, short *dqcoeff_ptr);
18 ; 18 ;
19 global sym(vp8_fast_quantize_b_impl_ssse3) 19
20 sym(vp8_fast_quantize_b_impl_ssse3): 20 global sym(vp8_fast_quantize_b_ssse3)
21 sym(vp8_fast_quantize_b_ssse3):
21 push rbp 22 push rbp
22 mov rbp, rsp 23 mov rbp, rsp
23 SHADOW_ARGS_TO_STACK 6
24 GET_GOT rbx 24 GET_GOT rbx
25
26 %if ABI_IS_32BIT
27 push rdi
25 push rsi 28 push rsi
29 %else
30 %ifidn __OUTPUT_FORMAT__,x64
26 push rdi 31 push rdi
32 push rsi
33 %endif
34 %endif
27 ; end prolog 35 ; end prolog
28 36
29 mov rdx, arg(0) ;coeff_ptr 37 %if ABI_IS_32BIT
30 mov rdi, arg(3) ;round_ptr 38 mov rdi, arg(0) ; BLOCK *b
31 mov rsi, arg(4) ;quant_ptr 39 mov rsi, arg(1) ; BLOCKD *d
40 %else
41 %ifidn __OUTPUT_FORMAT__,x64
42 mov rdi, rcx ; BLOCK *b
43 mov rsi, rdx ; BLOCKD *d
44 %else
45 ;mov rdi, rdi ; BLOCK *b
46 ;mov rsi, rsi ; BLOCKD *d
47 %endif
48 %endif
32 49
33 movdqa xmm0, [rdx] 50 mov rax, [rdi + vp8_block_coeff]
34 movdqa xmm4, [rdx + 16] 51 mov rcx, [rdi + vp8_block_round]
52 mov rdx, [rdi + vp8_block_quant_fast]
35 53
36 movdqa xmm2, [rdi] ;round lo 54 ; coeff
37 movdqa xmm3, [rdi + 16] ;round hi 55 movdqa xmm0, [rax]
56 movdqa xmm4, [rax + 16]
57
58 ; round
59 movdqa xmm2, [rcx]
60 movdqa xmm3, [rcx + 16]
38 61
39 movdqa xmm1, xmm0 62 movdqa xmm1, xmm0
40 movdqa xmm5, xmm4 63 movdqa xmm5, xmm4
41 64
42 psraw xmm0, 15 ;sign of z (aka sz) 65 ; sz = z >> 15
43 psraw xmm4, 15 ;sign of z (aka sz) 66 psraw xmm0, 15
67 psraw xmm4, 15
44 68
45 pabsw xmm1, xmm1 69 pabsw xmm1, xmm1
46 pabsw xmm5, xmm5 70 pabsw xmm5, xmm5
47 71
48 paddw xmm1, xmm2 72 paddw xmm1, xmm2
49 paddw xmm5, xmm3 73 paddw xmm5, xmm3
50 74
51 pmulhw xmm1, [rsi] 75 ; quant_fast
52 pmulhw xmm5, [rsi + 16] 76 pmulhw xmm1, [rdx]
77 pmulhw xmm5, [rdx + 16]
53 78
54 mov rdi, arg(1) ;qcoeff_ptr 79 mov rax, [rsi + vp8_blockd_qcoeff]
55 mov rcx, arg(2) ;dequant_ptr 80 mov rdi, [rsi + vp8_blockd_dequant]
56 mov rsi, arg(5) ;dqcoeff_ptr 81 mov rcx, [rsi + vp8_blockd_dqcoeff]
57 82
58 pxor xmm1, xmm0 83 pxor xmm1, xmm0
59 pxor xmm5, xmm4 84 pxor xmm5, xmm4
60 psubw xmm1, xmm0 85 psubw xmm1, xmm0
61 psubw xmm5, xmm4 86 psubw xmm5, xmm4
62 87
63 movdqa [rdi], xmm1 88 movdqa [rax], xmm1
64 movdqa [rdi + 16], xmm5 89 movdqa [rax + 16], xmm5
65 90
66 movdqa xmm2, [rcx] 91 movdqa xmm2, [rdi]
67 movdqa xmm3, [rcx + 16] 92 movdqa xmm3, [rdi + 16]
68 93
69 pxor xmm4, xmm4 94 pxor xmm4, xmm4
70 pmullw xmm2, xmm1 95 pmullw xmm2, xmm1
71 pmullw xmm3, xmm5 96 pmullw xmm3, xmm5
72 97
73 pcmpeqw xmm1, xmm4 ;non zero mask 98 pcmpeqw xmm1, xmm4 ;non zero mask
74 pcmpeqw xmm5, xmm4 ;non zero mask 99 pcmpeqw xmm5, xmm4 ;non zero mask
75 packsswb xmm1, xmm5 100 packsswb xmm1, xmm5
76 pshufb xmm1, [ GLOBAL(zz_shuf)] 101 pshufb xmm1, [GLOBAL(zz_shuf)]
77 102
78 pmovmskb edx, xmm1 103 pmovmskb edx, xmm1
79 104
80 ; xor ecx, ecx
81 ; mov eax, -1
82 ;find_eob_loop:
83 ; shr edx, 1
84 ; jc fq_skip
85 ; mov eax, ecx
86 ;fq_skip:
87 ; inc ecx
88 ; cmp ecx, 16
89 ; jne find_eob_loop
90 xor rdi, rdi 105 xor rdi, rdi
91 mov eax, -1 106 mov eax, -1
92 xor dx, ax ;flip the bits for bsr 107 xor dx, ax ;flip the bits for bsr
93 bsr eax, edx 108 bsr eax, edx
94 109
95 movdqa [rsi], xmm2 ;store dqcoeff 110 movdqa [rcx], xmm2 ;store dqcoeff
96 movdqa [rsi + 16], xmm3 ;store dqcoeff 111 movdqa [rcx + 16], xmm3 ;store dqcoeff
97 112
98 sub edi, edx ;check for all zeros in bit mask 113 sub edi, edx ;check for all zeros in bit mask
99 sar edi, 31 ;0 or -1 114 sar edi, 31 ;0 or -1
100 add eax, 1 115 add eax, 1
101 and eax, edi ;if the bit mask was all zero, 116 and eax, edi ;if the bit mask was all zero,
102 ;then eob = 0 117 ;then eob = 0
118 mov [rsi + vp8_blockd_eob], eax
119
103 ; begin epilog 120 ; begin epilog
121 %if ABI_IS_32BIT
122 pop rsi
104 pop rdi 123 pop rdi
124 %else
125 %ifidn __OUTPUT_FORMAT__,x64
105 pop rsi 126 pop rsi
127 pop rdi
128 %endif
129 %endif
130
106 RESTORE_GOT 131 RESTORE_GOT
107 UNSHADOW_ARGS
108 pop rbp 132 pop rbp
109 ret 133 ret
110 134
111 SECTION_RODATA 135 SECTION_RODATA
112 align 16 136 align 16
113 zz_shuf: 137 zz_shuf:
114 db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 138 db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
OLDNEW
« no previous file with comments | « source/libvpx/vp8/encoder/x86/quantize_sse4.asm ('k') | source/libvpx/vp8/encoder/x86/quantize_x86.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698