| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 ; | 3 ; |
| 4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
| 5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
| 6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
| 7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
| 8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
| 9 ; | 9 ; |
| 10 | 10 |
| 11 | 11 |
| 12 %include "vpx_ports/x86_abi_support.asm" | 12 %include "vpx_ports/x86_abi_support.asm" |
| 13 | 13 |
| 14 ;int vp8_block_error_xmm(short *coeff_ptr, short *dcoef_ptr) | 14 ;int vp8_block_error_xmm(short *coeff_ptr, short *dcoef_ptr) |
| 15 global sym(vp8_block_error_xmm) | 15 global sym(vp8_block_error_xmm) |
| 16 sym(vp8_block_error_xmm): | 16 sym(vp8_block_error_xmm): |
| 17 push rbp | 17 push rbp |
| 18 mov rbp, rsp | 18 mov rbp, rsp |
| 19 SHADOW_ARGS_TO_STACK 2 | 19 SHADOW_ARGS_TO_STACK 2 |
| 20 push rsi | 20 push rsi |
| 21 push rdi | 21 push rdi |
| 22 ; end prologue | 22 ; end prologue |
| 23 | 23 |
| 24 mov rsi, arg(0) ;coeff_ptr | 24 mov rsi, arg(0) ;coeff_ptr |
| 25 mov rdi, arg(1) ;dcoef_ptr |
| 25 | 26 |
| 26 mov rdi, arg(1) ;dcoef_ptr | 27 movdqa xmm0, [rsi] |
| 27 movdqa xmm3, [rsi] | 28 movdqa xmm1, [rdi] |
| 28 | 29 |
| 29 movdqa xmm4, [rdi] | 30 movdqa xmm2, [rsi+16] |
| 30 movdqa xmm5, [rsi+16] | 31 movdqa xmm3, [rdi+16] |
| 31 | 32 |
| 32 movdqa xmm6, [rdi+16] | 33 psubw xmm0, xmm1 |
| 33 psubw xmm3, xmm4 | 34 psubw xmm2, xmm3 |
| 34 | 35 |
| 35 psubw xmm5, xmm6 | 36 pmaddwd xmm0, xmm0 |
| 36 pmaddwd xmm3, xmm3 | 37 pmaddwd xmm2, xmm2 |
| 37 pmaddwd xmm5, xmm5 | |
| 38 | 38 |
| 39 paddd xmm3, xmm5 | 39 paddd xmm0, xmm2 |
| 40 | 40 |
| 41 pxor xmm7, xmm7 | 41 pxor xmm5, xmm5 |
| 42 movdqa xmm0, xmm3 | 42 movdqa xmm1, xmm0 |
| 43 | 43 |
| 44 punpckldq xmm0, xmm7 | 44 punpckldq xmm0, xmm5 |
| 45 punpckhdq xmm3, xmm7 | 45 punpckhdq xmm1, xmm5 |
| 46 | 46 |
| 47 paddd xmm0, xmm3 | 47 paddd xmm0, xmm1 |
| 48 movdqa xmm3, xmm0 | 48 movdqa xmm1, xmm0 |
| 49 | 49 |
| 50 psrldq xmm0, 8 | 50 psrldq xmm0, 8 |
| 51 paddd xmm0, xmm3 | 51 paddd xmm0, xmm1 |
| 52 | 52 |
| 53 movq rax, xmm0 | 53 movq rax, xmm0 |
| 54 | 54 |
| 55 pop rdi | 55 pop rdi |
| 56 pop rsi | 56 pop rsi |
| 57 ; begin epilog | 57 ; begin epilog |
| 58 UNSHADOW_ARGS | 58 UNSHADOW_ARGS |
| 59 pop rbp | 59 pop rbp |
| 60 ret | 60 ret |
| 61 | 61 |
| (...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 201 pop rbp | 201 pop rbp |
| 202 ret | 202 ret |
| 203 | 203 |
| 204 | 204 |
| 205 ;int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc); | 205 ;int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc); |
| 206 global sym(vp8_mbblock_error_xmm_impl) | 206 global sym(vp8_mbblock_error_xmm_impl) |
| 207 sym(vp8_mbblock_error_xmm_impl): | 207 sym(vp8_mbblock_error_xmm_impl): |
| 208 push rbp | 208 push rbp |
| 209 mov rbp, rsp | 209 mov rbp, rsp |
| 210 SHADOW_ARGS_TO_STACK 3 | 210 SHADOW_ARGS_TO_STACK 3 |
| 211 SAVE_XMM 6 |
| 211 push rsi | 212 push rsi |
| 212 push rdi | 213 push rdi |
| 213 ; end prolog | 214 ; end prolog |
| 214 | 215 |
| 215 | 216 |
| 216 mov rsi, arg(0) ;coeff_ptr | 217 mov rsi, arg(0) ;coeff_ptr |
| 217 pxor xmm7, xmm7 | 218 pxor xmm6, xmm6 |
| 218 | 219 |
| 219 mov rdi, arg(1) ;dcoef_ptr | 220 mov rdi, arg(1) ;dcoef_ptr |
| 220 pxor xmm2, xmm2 | 221 pxor xmm4, xmm4 |
| 221 | 222 |
| 222 movd xmm1, dword ptr arg(2) ;dc | 223 movd xmm5, dword ptr arg(2) ;dc |
| 223 por xmm1, xmm2 | 224 por xmm5, xmm4 |
| 224 | 225 |
| 225 pcmpeqw xmm1, xmm7 | 226 pcmpeqw xmm5, xmm6 |
| 226 mov rcx, 16 | 227 mov rcx, 16 |
| 227 | 228 |
| 228 mberror_loop: | 229 mberror_loop: |
| 229 movdqa xmm3, [rsi] | 230 movdqa xmm0, [rsi] |
| 230 movdqa xmm4, [rdi] | 231 movdqa xmm1, [rdi] |
| 231 | 232 |
| 232 movdqa xmm5, [rsi+16] | 233 movdqa xmm2, [rsi+16] |
| 233 movdqa xmm6, [rdi+16] | 234 movdqa xmm3, [rdi+16] |
| 234 | 235 |
| 235 | 236 |
| 236 psubw xmm5, xmm6 | 237 psubw xmm2, xmm3 |
| 237 pmaddwd xmm5, xmm5 | 238 pmaddwd xmm2, xmm2 |
| 238 | 239 |
| 239 psubw xmm3, xmm4 | 240 psubw xmm0, xmm1 |
| 240 pand xmm3, xmm1 | 241 pand xmm0, xmm5 |
| 241 | 242 |
| 242 pmaddwd xmm3, xmm3 | 243 pmaddwd xmm0, xmm0 |
| 243 add rsi, 32 | 244 add rsi, 32 |
| 244 | 245 |
| 245 add rdi, 32 | 246 add rdi, 32 |
| 246 | 247 |
| 247 sub rcx, 1 | 248 sub rcx, 1 |
| 248 paddd xmm2, xmm5 | 249 paddd xmm4, xmm2 |
| 249 | 250 |
| 250 paddd xmm2, xmm3 | 251 paddd xmm4, xmm0 |
| 251 jnz mberror_loop | 252 jnz mberror_loop |
| 252 | 253 |
| 253 movdqa xmm0, xmm2 | 254 movdqa xmm0, xmm4 |
| 254 punpckldq xmm0, xmm7 | 255 punpckldq xmm0, xmm6 |
| 255 | 256 |
| 256 punpckhdq xmm2, xmm7 | 257 punpckhdq xmm4, xmm6 |
| 257 paddd xmm0, xmm2 | 258 paddd xmm0, xmm4 |
| 258 | 259 |
| 259 movdqa xmm1, xmm0 | 260 movdqa xmm1, xmm0 |
| 260 psrldq xmm0, 8 | 261 psrldq xmm0, 8 |
| 261 | 262 |
| 262 paddd xmm0, xmm1 | 263 paddd xmm0, xmm1 |
| 263 movq rax, xmm0 | 264 movq rax, xmm0 |
| 264 | 265 |
| 265 pop rdi | 266 pop rdi |
| 266 pop rsi | 267 pop rsi |
| 267 ; begin epilog | 268 ; begin epilog |
| 269 RESTORE_XMM |
| 268 UNSHADOW_ARGS | 270 UNSHADOW_ARGS |
| 269 pop rbp | 271 pop rbp |
| 270 ret | 272 ret |
| 271 | 273 |
| 272 | 274 |
| 273 ;int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); | 275 ;int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); |
| 274 global sym(vp8_mbuverror_mmx_impl) | 276 global sym(vp8_mbuverror_mmx_impl) |
| 275 sym(vp8_mbuverror_mmx_impl): | 277 sym(vp8_mbuverror_mmx_impl): |
| 276 push rbp | 278 push rbp |
| 277 mov rbp, rsp | 279 mov rbp, rsp |
| (...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 335 SHADOW_ARGS_TO_STACK 2 | 337 SHADOW_ARGS_TO_STACK 2 |
| 336 push rsi | 338 push rsi |
| 337 push rdi | 339 push rdi |
| 338 ; end prolog | 340 ; end prolog |
| 339 | 341 |
| 340 | 342 |
| 341 mov rsi, arg(0) ;s_ptr | 343 mov rsi, arg(0) ;s_ptr |
| 342 mov rdi, arg(1) ;d_ptr | 344 mov rdi, arg(1) ;d_ptr |
| 343 | 345 |
| 344 mov rcx, 16 | 346 mov rcx, 16 |
| 345 pxor xmm7, xmm7 | 347 pxor xmm3, xmm3 |
| 346 | 348 |
| 347 mbuverror_loop: | 349 mbuverror_loop: |
| 348 | 350 |
| 349 movdqa xmm1, [rsi] | 351 movdqa xmm1, [rsi] |
| 350 movdqa xmm2, [rdi] | 352 movdqa xmm2, [rdi] |
| 351 | 353 |
| 352 psubw xmm1, xmm2 | 354 psubw xmm1, xmm2 |
| 353 pmaddwd xmm1, xmm1 | 355 pmaddwd xmm1, xmm1 |
| 354 | 356 |
| 355 paddd xmm7, xmm1 | 357 paddd xmm3, xmm1 |
| 356 | 358 |
| 357 add rsi, 16 | 359 add rsi, 16 |
| 358 add rdi, 16 | 360 add rdi, 16 |
| 359 | 361 |
| 360 dec rcx | 362 dec rcx |
| 361 jnz mbuverror_loop | 363 jnz mbuverror_loop |
| 362 | 364 |
| 363 pxor xmm0, xmm0 | 365 pxor xmm0, xmm0 |
| 364 movdqa xmm1, xmm7 | 366 movdqa xmm1, xmm3 |
| 365 | 367 |
| 366 movdqa xmm2, xmm1 | 368 movdqa xmm2, xmm1 |
| 367 punpckldq xmm1, xmm0 | 369 punpckldq xmm1, xmm0 |
| 368 | 370 |
| 369 punpckhdq xmm2, xmm0 | 371 punpckhdq xmm2, xmm0 |
| 370 paddd xmm1, xmm2 | 372 paddd xmm1, xmm2 |
| 371 | 373 |
| 372 movdqa xmm2, xmm1 | 374 movdqa xmm2, xmm1 |
| 373 | 375 |
| 374 psrldq xmm1, 8 | 376 psrldq xmm1, 8 |
| 375 paddd xmm1, xmm2 | 377 paddd xmm1, xmm2 |
| 376 | 378 |
| 377 movq rax, xmm1 | 379 movq rax, xmm1 |
| 378 | 380 |
| 379 pop rdi | 381 pop rdi |
| 380 pop rsi | 382 pop rsi |
| 381 ; begin epilog | 383 ; begin epilog |
| 382 UNSHADOW_ARGS | 384 UNSHADOW_ARGS |
| 383 pop rbp | 385 pop rbp |
| 384 ret | 386 ret |
| OLD | NEW |