Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(95)

Side by Side Diff: source/libvpx/vp8/encoder/x86/ssim_opt.asm

Issue 7671004: Update libvpx snapshot to v0.9.7-p1 (Cayuga). (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/
Patch Set: '' Created 9 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 ; 1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ; 3 ;
4 ; Use of this source code is governed by a BSD-style license 4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source 5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found 6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may 7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree. 8 ; be found in the AUTHORS file in the root of the source tree.
9 ; 9 ;
10 10
11 %include "vpx_ports/x86_abi_support.asm" 11 %include "vpx_ports/x86_abi_support.asm"
12 12
13 ; tabulate_ssim - sums sum_s,sum_r,sum_sq_s,sum_sq_r, sum_sxr 13 ; tabulate_ssim - sums sum_s,sum_r,sum_sq_s,sum_sq_r, sum_sxr
14 %macro TABULATE_SSIM 0 14 %macro TABULATE_SSIM 0
15 paddusw xmm15, xmm3 ; sum_s 15 paddusw xmm15, xmm3 ; sum_s
16 paddusw xmm14, xmm4 ; sum_r 16 paddusw xmm14, xmm4 ; sum_r
17 movdqa xmm1, xmm3 17 movdqa xmm1, xmm3
18 pmaddwd xmm1, xmm1 18 pmaddwd xmm1, xmm1
19 paddq xmm13, xmm1 ; sum_sq_s 19 paddd xmm13, xmm1 ; sum_sq_s
20 movdqa xmm2, xmm4 20 movdqa xmm2, xmm4
21 pmaddwd xmm2, xmm2 21 pmaddwd xmm2, xmm2
22 paddq xmm12, xmm2 ; sum_sq_r 22 paddd xmm12, xmm2 ; sum_sq_r
23 pmaddwd xmm3, xmm4 23 pmaddwd xmm3, xmm4
24 paddq xmm11, xmm3 ; sum_sxr 24 paddd xmm11, xmm3 ; sum_sxr
25 %endmacro 25 %endmacro
26 26
27 ; Sum across the register %1 starting with q words 27 ; Sum across the register %1 starting with q words
28 %macro SUM_ACROSS_Q 1 28 %macro SUM_ACROSS_Q 1
29 movdqa xmm2,%1 29 movdqa xmm2,%1
30 punpckldq %1,xmm0 30 punpckldq %1,xmm0
31 punpckhdq xmm2,xmm0 31 punpckhdq xmm2,xmm0
32 paddq %1,xmm2 32 paddq %1,xmm2
33 movdqa xmm2,%1 33 movdqa xmm2,%1
34 punpcklqdq %1,xmm0 34 punpcklqdq %1,xmm0
(...skipping 24 matching lines...) Expand all
59 ; ( calling app will initialize to 0 ) could easily fit everything in sse2 59 ; ( calling app will initialize to 0 ) could easily fit everything in sse2
60 ; without too much hastle, and can probably do better estimates with psadw 60 ; without too much hastle, and can probably do better estimates with psadw
61 ; or pavgb At this point this is just meant to be first pass for calculating 61 ; or pavgb At this point this is just meant to be first pass for calculating
62 ; all the parms needed for 16x16 ssim so we can play with dssim as distortion 62 ; all the parms needed for 16x16 ssim so we can play with dssim as distortion
63 ; in mode selection code. 63 ; in mode selection code.
64 global sym(vp8_ssim_parms_16x16_sse3) 64 global sym(vp8_ssim_parms_16x16_sse3)
65 sym(vp8_ssim_parms_16x16_sse3): 65 sym(vp8_ssim_parms_16x16_sse3):
66 push rbp 66 push rbp
67 mov rbp, rsp 67 mov rbp, rsp
68 SHADOW_ARGS_TO_STACK 9 68 SHADOW_ARGS_TO_STACK 9
69 SAVE_XMM 15
69 push rsi 70 push rsi
70 push rdi 71 push rdi
71 ; end prolog 72 ; end prolog
72 73
73 mov rsi, arg(0) ;s 74 mov rsi, arg(0) ;s
74 mov rcx, arg(1) ;sp 75 mov rcx, arg(1) ;sp
75 mov rdi, arg(2) ;r 76 mov rdi, arg(2) ;r
76 mov rax, arg(3) ;rp 77 mov rax, arg(3) ;rp
77 78
78 pxor xmm0, xmm0 79 pxor xmm0, xmm0
(...skipping 29 matching lines...) Expand all
108 dec rdx ; counter 109 dec rdx ; counter
109 jnz NextRow 110 jnz NextRow
110 111
111 SUM_ACROSS_W xmm15 112 SUM_ACROSS_W xmm15
112 SUM_ACROSS_W xmm14 113 SUM_ACROSS_W xmm14
113 SUM_ACROSS_Q xmm13 114 SUM_ACROSS_Q xmm13
114 SUM_ACROSS_Q xmm12 115 SUM_ACROSS_Q xmm12
115 SUM_ACROSS_Q xmm11 116 SUM_ACROSS_Q xmm11
116 117
117 mov rdi,arg(4) 118 mov rdi,arg(4)
118 movq [rdi], xmm15; 119 movd [rdi], xmm15;
119 mov rdi,arg(5) 120 mov rdi,arg(5)
120 movq [rdi], xmm14; 121 movd [rdi], xmm14;
121 mov rdi,arg(6) 122 mov rdi,arg(6)
122 movq [rdi], xmm13; 123 movd [rdi], xmm13;
123 mov rdi,arg(7) 124 mov rdi,arg(7)
124 movq [rdi], xmm12; 125 movd [rdi], xmm12;
125 mov rdi,arg(8) 126 mov rdi,arg(8)
126 movq [rdi], xmm11; 127 movd [rdi], xmm11;
127 128
128 ; begin epilog 129 ; begin epilog
129 pop rdi 130 pop rdi
130 pop rsi 131 pop rsi
132 RESTORE_XMM
131 UNSHADOW_ARGS 133 UNSHADOW_ARGS
132 pop rbp 134 pop rbp
133 ret 135 ret
134 136
135 ;void ssim_parms_sse3( 137 ;void ssim_parms_sse3(
136 ; unsigned char *s, 138 ; unsigned char *s,
137 ; int sp, 139 ; int sp,
138 ; unsigned char *r, 140 ; unsigned char *r,
139 ; int rp 141 ; int rp
140 ; unsigned long *sum_s, 142 ; unsigned long *sum_s,
141 ; unsigned long *sum_r, 143 ; unsigned long *sum_r,
142 ; unsigned long *sum_sq_s, 144 ; unsigned long *sum_sq_s,
143 ; unsigned long *sum_sq_r, 145 ; unsigned long *sum_sq_r,
144 ; unsigned long *sum_sxr); 146 ; unsigned long *sum_sxr);
145 ; 147 ;
146 ; TODO: Use parm passing through structure, probably don't need the pxors 148 ; TODO: Use parm passing through structure, probably don't need the pxors
147 ; ( calling app will initialize to 0 ) could easily fit everything in sse2 149 ; ( calling app will initialize to 0 ) could easily fit everything in sse2
148 ; without too much hastle, and can probably do better estimates with psadw 150 ; without too much hastle, and can probably do better estimates with psadw
149 ; or pavgb At this point this is just meant to be first pass for calculating 151 ; or pavgb At this point this is just meant to be first pass for calculating
150 ; all the parms needed for 16x16 ssim so we can play with dssim as distortion 152 ; all the parms needed for 16x16 ssim so we can play with dssim as distortion
151 ; in mode selection code. 153 ; in mode selection code.
152 global sym(vp8_ssim_parms_8x8_sse3) 154 global sym(vp8_ssim_parms_8x8_sse3)
153 sym(vp8_ssim_parms_8x8_sse3): 155 sym(vp8_ssim_parms_8x8_sse3):
154 push rbp 156 push rbp
155 mov rbp, rsp 157 mov rbp, rsp
156 SHADOW_ARGS_TO_STACK 9 158 SHADOW_ARGS_TO_STACK 9
159 SAVE_XMM 15
157 push rsi 160 push rsi
158 push rdi 161 push rdi
159 ; end prolog 162 ; end prolog
160 163
161 mov rsi, arg(0) ;s 164 mov rsi, arg(0) ;s
162 mov rcx, arg(1) ;sp 165 mov rcx, arg(1) ;sp
163 mov rdi, arg(2) ;r 166 mov rdi, arg(2) ;r
164 mov rax, arg(3) ;rp 167 mov rax, arg(3) ;rp
165 168
166 pxor xmm0, xmm0 169 pxor xmm0, xmm0
167 pxor xmm15,xmm15 ;sum_s 170 pxor xmm15,xmm15 ;sum_s
168 pxor xmm14,xmm14 ;sum_r 171 pxor xmm14,xmm14 ;sum_r
169 pxor xmm13,xmm13 ;sum_sq_s 172 pxor xmm13,xmm13 ;sum_sq_s
170 pxor xmm12,xmm12 ;sum_sq_r 173 pxor xmm12,xmm12 ;sum_sq_r
171 pxor xmm11,xmm11 ;sum_sxr 174 pxor xmm11,xmm11 ;sum_sxr
172 175
173 mov rdx, 8 ;row counter 176 mov rdx, 8 ;row counter
174 NextRow2: 177 NextRow2:
175 178
176 ;grab source and reference pixels 179 ;grab source and reference pixels
177 movq xmm5, [rsi] 180 movq xmm3, [rsi]
178 movq xmm6, [rdi] 181 movq xmm4, [rdi]
179
180 movdqa xmm3, xmm5
181 movdqa xmm4, xmm6
182 punpcklbw xmm3, xmm0 ; low_s 182 punpcklbw xmm3, xmm0 ; low_s
183 punpcklbw xmm4, xmm0 ; low_r 183 punpcklbw xmm4, xmm0 ; low_r
184 184
185 TABULATE_SSIM 185 TABULATE_SSIM
186 186
187 add rsi, rcx ; next s row 187 add rsi, rcx ; next s row
188 add rdi, rax ; next r row 188 add rdi, rax ; next r row
189 189
190 dec rdx ; counter 190 dec rdx ; counter
191 jnz NextRow2 191 jnz NextRow2
192 192
193 SUM_ACROSS_W xmm15 193 SUM_ACROSS_W xmm15
194 SUM_ACROSS_W xmm14 194 SUM_ACROSS_W xmm14
195 SUM_ACROSS_Q xmm13 195 SUM_ACROSS_Q xmm13
196 SUM_ACROSS_Q xmm12 196 SUM_ACROSS_Q xmm12
197 SUM_ACROSS_Q xmm11 197 SUM_ACROSS_Q xmm11
198 198
199 mov rdi,arg(4) 199 mov rdi,arg(4)
200 movq [rdi], xmm15; 200 movd [rdi], xmm15;
201 mov rdi,arg(5) 201 mov rdi,arg(5)
202 movq [rdi], xmm14; 202 movd [rdi], xmm14;
203 mov rdi,arg(6) 203 mov rdi,arg(6)
204 movq [rdi], xmm13; 204 movd [rdi], xmm13;
205 mov rdi,arg(7) 205 mov rdi,arg(7)
206 movq [rdi], xmm12; 206 movd [rdi], xmm12;
207 mov rdi,arg(8) 207 mov rdi,arg(8)
208 movq [rdi], xmm11; 208 movd [rdi], xmm11;
209 209
210 ; begin epilog 210 ; begin epilog
211 pop rdi 211 pop rdi
212 pop rsi 212 pop rsi
213 RESTORE_XMM
213 UNSHADOW_ARGS 214 UNSHADOW_ARGS
214 pop rbp 215 pop rbp
215 ret 216 ret
OLDNEW
« no previous file with comments | « source/libvpx/vp8/encoder/x86/sad_ssse3.asm ('k') | source/libvpx/vp8/encoder/x86/subtract_sse2.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698