Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(198)

Side by Side Diff: source/libvpx/vp8/encoder/x86/encodeopt.asm

Issue 7671004: Update libvpx snapshot to v0.9.7-p1 (Cayuga). (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/
Patch Set: '' Created 9 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « source/libvpx/vp8/encoder/x86/dct_x86.h ('k') | source/libvpx/vp8/encoder/x86/fwalsh_sse2.asm » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 ; 1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ; 3 ;
4 ; Use of this source code is governed by a BSD-style license 4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source 5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found 6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may 7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree. 8 ; be found in the AUTHORS file in the root of the source tree.
9 ; 9 ;
10 10
11 11
12 %include "vpx_ports/x86_abi_support.asm" 12 %include "vpx_ports/x86_abi_support.asm"
13 13
14 ;int vp8_block_error_xmm(short *coeff_ptr, short *dcoef_ptr) 14 ;int vp8_block_error_xmm(short *coeff_ptr, short *dcoef_ptr)
15 global sym(vp8_block_error_xmm) 15 global sym(vp8_block_error_xmm)
16 sym(vp8_block_error_xmm): 16 sym(vp8_block_error_xmm):
17 push rbp 17 push rbp
18 mov rbp, rsp 18 mov rbp, rsp
19 SHADOW_ARGS_TO_STACK 2 19 SHADOW_ARGS_TO_STACK 2
20 push rsi 20 push rsi
21 push rdi 21 push rdi
22 ; end prologue 22 ; end prologue
23 23
24 mov rsi, arg(0) ;coeff_ptr 24 mov rsi, arg(0) ;coeff_ptr
25 mov rdi, arg(1) ;dcoef_ptr
25 26
26 mov rdi, arg(1) ;dcoef_ptr 27 movdqa xmm0, [rsi]
27 movdqa xmm3, [rsi] 28 movdqa xmm1, [rdi]
28 29
29 movdqa xmm4, [rdi] 30 movdqa xmm2, [rsi+16]
30 movdqa xmm5, [rsi+16] 31 movdqa xmm3, [rdi+16]
31 32
32 movdqa xmm6, [rdi+16] 33 psubw xmm0, xmm1
33 psubw xmm3, xmm4 34 psubw xmm2, xmm3
34 35
35 psubw xmm5, xmm6 36 pmaddwd xmm0, xmm0
36 pmaddwd xmm3, xmm3 37 pmaddwd xmm2, xmm2
37 pmaddwd xmm5, xmm5
38 38
39 paddd xmm3, xmm5 39 paddd xmm0, xmm2
40 40
41 pxor xmm7, xmm7 41 pxor xmm5, xmm5
42 movdqa xmm0, xmm3 42 movdqa xmm1, xmm0
43 43
44 punpckldq xmm0, xmm7 44 punpckldq xmm0, xmm5
45 punpckhdq xmm3, xmm7 45 punpckhdq xmm1, xmm5
46 46
47 paddd xmm0, xmm3 47 paddd xmm0, xmm1
48 movdqa xmm3, xmm0 48 movdqa xmm1, xmm0
49 49
50 psrldq xmm0, 8 50 psrldq xmm0, 8
51 paddd xmm0, xmm3 51 paddd xmm0, xmm1
52 52
53 movq rax, xmm0 53 movq rax, xmm0
54 54
55 pop rdi 55 pop rdi
56 pop rsi 56 pop rsi
57 ; begin epilog 57 ; begin epilog
58 UNSHADOW_ARGS 58 UNSHADOW_ARGS
59 pop rbp 59 pop rbp
60 ret 60 ret
61 61
(...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after
201 pop rbp 201 pop rbp
202 ret 202 ret
203 203
204 204
205 ;int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc); 205 ;int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
206 global sym(vp8_mbblock_error_xmm_impl) 206 global sym(vp8_mbblock_error_xmm_impl)
207 sym(vp8_mbblock_error_xmm_impl): 207 sym(vp8_mbblock_error_xmm_impl):
208 push rbp 208 push rbp
209 mov rbp, rsp 209 mov rbp, rsp
210 SHADOW_ARGS_TO_STACK 3 210 SHADOW_ARGS_TO_STACK 3
211 SAVE_XMM 6
211 push rsi 212 push rsi
212 push rdi 213 push rdi
213 ; end prolog 214 ; end prolog
214 215
215 216
216 mov rsi, arg(0) ;coeff_ptr 217 mov rsi, arg(0) ;coeff_ptr
217 pxor xmm7, xmm7 218 pxor xmm6, xmm6
218 219
219 mov rdi, arg(1) ;dcoef_ptr 220 mov rdi, arg(1) ;dcoef_ptr
220 pxor xmm2, xmm2 221 pxor xmm4, xmm4
221 222
222 movd xmm1, dword ptr arg(2) ;dc 223 movd xmm5, dword ptr arg(2) ;dc
223 por xmm1, xmm2 224 por xmm5, xmm4
224 225
225 pcmpeqw xmm1, xmm7 226 pcmpeqw xmm5, xmm6
226 mov rcx, 16 227 mov rcx, 16
227 228
228 mberror_loop: 229 mberror_loop:
229 movdqa xmm3, [rsi] 230 movdqa xmm0, [rsi]
230 movdqa xmm4, [rdi] 231 movdqa xmm1, [rdi]
231 232
232 movdqa xmm5, [rsi+16] 233 movdqa xmm2, [rsi+16]
233 movdqa xmm6, [rdi+16] 234 movdqa xmm3, [rdi+16]
234 235
235 236
236 psubw xmm5, xmm6 237 psubw xmm2, xmm3
237 pmaddwd xmm5, xmm5 238 pmaddwd xmm2, xmm2
238 239
239 psubw xmm3, xmm4 240 psubw xmm0, xmm1
240 pand xmm3, xmm1 241 pand xmm0, xmm5
241 242
242 pmaddwd xmm3, xmm3 243 pmaddwd xmm0, xmm0
243 add rsi, 32 244 add rsi, 32
244 245
245 add rdi, 32 246 add rdi, 32
246 247
247 sub rcx, 1 248 sub rcx, 1
248 paddd xmm2, xmm5 249 paddd xmm4, xmm2
249 250
250 paddd xmm2, xmm3 251 paddd xmm4, xmm0
251 jnz mberror_loop 252 jnz mberror_loop
252 253
253 movdqa xmm0, xmm2 254 movdqa xmm0, xmm4
254 punpckldq xmm0, xmm7 255 punpckldq xmm0, xmm6
255 256
256 punpckhdq xmm2, xmm7 257 punpckhdq xmm4, xmm6
257 paddd xmm0, xmm2 258 paddd xmm0, xmm4
258 259
259 movdqa xmm1, xmm0 260 movdqa xmm1, xmm0
260 psrldq xmm0, 8 261 psrldq xmm0, 8
261 262
262 paddd xmm0, xmm1 263 paddd xmm0, xmm1
263 movq rax, xmm0 264 movq rax, xmm0
264 265
265 pop rdi 266 pop rdi
266 pop rsi 267 pop rsi
267 ; begin epilog 268 ; begin epilog
269 RESTORE_XMM
268 UNSHADOW_ARGS 270 UNSHADOW_ARGS
269 pop rbp 271 pop rbp
270 ret 272 ret
271 273
272 274
273 ;int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); 275 ;int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr);
274 global sym(vp8_mbuverror_mmx_impl) 276 global sym(vp8_mbuverror_mmx_impl)
275 sym(vp8_mbuverror_mmx_impl): 277 sym(vp8_mbuverror_mmx_impl):
276 push rbp 278 push rbp
277 mov rbp, rsp 279 mov rbp, rsp
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
335 SHADOW_ARGS_TO_STACK 2 337 SHADOW_ARGS_TO_STACK 2
336 push rsi 338 push rsi
337 push rdi 339 push rdi
338 ; end prolog 340 ; end prolog
339 341
340 342
341 mov rsi, arg(0) ;s_ptr 343 mov rsi, arg(0) ;s_ptr
342 mov rdi, arg(1) ;d_ptr 344 mov rdi, arg(1) ;d_ptr
343 345
344 mov rcx, 16 346 mov rcx, 16
345 pxor xmm7, xmm7 347 pxor xmm3, xmm3
346 348
347 mbuverror_loop: 349 mbuverror_loop:
348 350
349 movdqa xmm1, [rsi] 351 movdqa xmm1, [rsi]
350 movdqa xmm2, [rdi] 352 movdqa xmm2, [rdi]
351 353
352 psubw xmm1, xmm2 354 psubw xmm1, xmm2
353 pmaddwd xmm1, xmm1 355 pmaddwd xmm1, xmm1
354 356
355 paddd xmm7, xmm1 357 paddd xmm3, xmm1
356 358
357 add rsi, 16 359 add rsi, 16
358 add rdi, 16 360 add rdi, 16
359 361
360 dec rcx 362 dec rcx
361 jnz mbuverror_loop 363 jnz mbuverror_loop
362 364
363 pxor xmm0, xmm0 365 pxor xmm0, xmm0
364 movdqa xmm1, xmm7 366 movdqa xmm1, xmm3
365 367
366 movdqa xmm2, xmm1 368 movdqa xmm2, xmm1
367 punpckldq xmm1, xmm0 369 punpckldq xmm1, xmm0
368 370
369 punpckhdq xmm2, xmm0 371 punpckhdq xmm2, xmm0
370 paddd xmm1, xmm2 372 paddd xmm1, xmm2
371 373
372 movdqa xmm2, xmm1 374 movdqa xmm2, xmm1
373 375
374 psrldq xmm1, 8 376 psrldq xmm1, 8
375 paddd xmm1, xmm2 377 paddd xmm1, xmm2
376 378
377 movq rax, xmm1 379 movq rax, xmm1
378 380
379 pop rdi 381 pop rdi
380 pop rsi 382 pop rsi
381 ; begin epilog 383 ; begin epilog
382 UNSHADOW_ARGS 384 UNSHADOW_ARGS
383 pop rbp 385 pop rbp
384 ret 386 ret
OLDNEW
« no previous file with comments | « source/libvpx/vp8/encoder/x86/dct_x86.h ('k') | source/libvpx/vp8/encoder/x86/fwalsh_sse2.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698