| OLD | NEW | 
|---|
| 1 ; | 1 ; | 
| 2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 
| 3 ; | 3 ; | 
| 4 ;  Use of this source code is governed by a BSD-style license | 4 ;  Use of this source code is governed by a BSD-style license | 
| 5 ;  that can be found in the LICENSE file in the root of the source | 5 ;  that can be found in the LICENSE file in the root of the source | 
| 6 ;  tree. An additional intellectual property rights grant can be found | 6 ;  tree. An additional intellectual property rights grant can be found | 
| 7 ;  in the file PATENTS.  All contributing project authors may | 7 ;  in the file PATENTS.  All contributing project authors may | 
| 8 ;  be found in the AUTHORS file in the root of the source tree. | 8 ;  be found in the AUTHORS file in the root of the source tree. | 
| 9 ; | 9 ; | 
| 10 | 10 | 
| 11 | 11 | 
| 12 %include "vpx_ports/x86_abi_support.asm" | 12 %include "vpx_ports/x86_abi_support.asm" | 
| 13 | 13 | 
| 14 ;int vp8_block_error_xmm(short *coeff_ptr,  short *dcoef_ptr) | 14 ;int vp8_block_error_xmm(short *coeff_ptr,  short *dcoef_ptr) | 
| 15 global sym(vp8_block_error_xmm) | 15 global sym(vp8_block_error_xmm) | 
| 16 sym(vp8_block_error_xmm): | 16 sym(vp8_block_error_xmm): | 
| 17     push        rbp | 17     push        rbp | 
| 18     mov         rbp, rsp | 18     mov         rbp, rsp | 
| 19     SHADOW_ARGS_TO_STACK 2 | 19     SHADOW_ARGS_TO_STACK 2 | 
| 20     push rsi | 20     push rsi | 
| 21     push rdi | 21     push rdi | 
| 22     ; end prologue | 22     ; end prologue | 
| 23 | 23 | 
| 24         mov         rsi,        arg(0) ;coeff_ptr | 24         mov         rsi,        arg(0) ;coeff_ptr | 
|  | 25         mov         rdi,        arg(1) ;dcoef_ptr | 
| 25 | 26 | 
| 26         mov         rdi,        arg(1) ;dcoef_ptr | 27         movdqa      xmm0,       [rsi] | 
| 27         movdqa      xmm3,       [rsi] | 28         movdqa      xmm1,       [rdi] | 
| 28 | 29 | 
| 29         movdqa      xmm4,       [rdi] | 30         movdqa      xmm2,       [rsi+16] | 
| 30         movdqa      xmm5,       [rsi+16] | 31         movdqa      xmm3,       [rdi+16] | 
| 31 | 32 | 
| 32         movdqa      xmm6,       [rdi+16] | 33         psubw       xmm0,       xmm1 | 
| 33         psubw       xmm3,       xmm4 | 34         psubw       xmm2,       xmm3 | 
| 34 | 35 | 
| 35         psubw       xmm5,       xmm6 | 36         pmaddwd     xmm0,       xmm0 | 
| 36         pmaddwd     xmm3,       xmm3 | 37         pmaddwd     xmm2,       xmm2 | 
| 37         pmaddwd     xmm5,       xmm5 |  | 
| 38 | 38 | 
| 39         paddd       xmm3,       xmm5 | 39         paddd       xmm0,       xmm2 | 
| 40 | 40 | 
| 41         pxor        xmm7,       xmm7 | 41         pxor        xmm5,       xmm5 | 
| 42         movdqa      xmm0,       xmm3 | 42         movdqa      xmm1,       xmm0 | 
| 43 | 43 | 
| 44         punpckldq   xmm0,       xmm7 | 44         punpckldq   xmm0,       xmm5 | 
| 45         punpckhdq   xmm3,       xmm7 | 45         punpckhdq   xmm1,       xmm5 | 
| 46 | 46 | 
| 47         paddd       xmm0,       xmm3 | 47         paddd       xmm0,       xmm1 | 
| 48         movdqa      xmm3,       xmm0 | 48         movdqa      xmm1,       xmm0 | 
| 49 | 49 | 
| 50         psrldq      xmm0,       8 | 50         psrldq      xmm0,       8 | 
| 51         paddd       xmm0,       xmm3 | 51         paddd       xmm0,       xmm1 | 
| 52 | 52 | 
| 53         movq        rax,        xmm0 | 53         movq        rax,        xmm0 | 
| 54 | 54 | 
| 55     pop rdi | 55     pop rdi | 
| 56     pop rsi | 56     pop rsi | 
| 57     ; begin epilog | 57     ; begin epilog | 
| 58     UNSHADOW_ARGS | 58     UNSHADOW_ARGS | 
| 59     pop         rbp | 59     pop         rbp | 
| 60     ret | 60     ret | 
| 61 | 61 | 
| (...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 201     pop         rbp | 201     pop         rbp | 
| 202     ret | 202     ret | 
| 203 | 203 | 
| 204 | 204 | 
| 205 ;int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc); | 205 ;int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc); | 
| 206 global sym(vp8_mbblock_error_xmm_impl) | 206 global sym(vp8_mbblock_error_xmm_impl) | 
| 207 sym(vp8_mbblock_error_xmm_impl): | 207 sym(vp8_mbblock_error_xmm_impl): | 
| 208     push        rbp | 208     push        rbp | 
| 209     mov         rbp, rsp | 209     mov         rbp, rsp | 
| 210     SHADOW_ARGS_TO_STACK 3 | 210     SHADOW_ARGS_TO_STACK 3 | 
|  | 211     SAVE_XMM 6 | 
| 211     push rsi | 212     push rsi | 
| 212     push rdi | 213     push rdi | 
| 213     ; end prolog | 214     ; end prolog | 
| 214 | 215 | 
| 215 | 216 | 
| 216         mov         rsi,        arg(0) ;coeff_ptr | 217         mov         rsi,        arg(0) ;coeff_ptr | 
| 217         pxor        xmm7,       xmm7 | 218         pxor        xmm6,       xmm6 | 
| 218 | 219 | 
| 219         mov         rdi,        arg(1) ;dcoef_ptr | 220         mov         rdi,        arg(1) ;dcoef_ptr | 
| 220         pxor        xmm2,       xmm2 | 221         pxor        xmm4,       xmm4 | 
| 221 | 222 | 
| 222         movd        xmm1,       dword ptr arg(2) ;dc | 223         movd        xmm5,       dword ptr arg(2) ;dc | 
| 223         por         xmm1,       xmm2 | 224         por         xmm5,       xmm4 | 
| 224 | 225 | 
| 225         pcmpeqw     xmm1,       xmm7 | 226         pcmpeqw     xmm5,       xmm6 | 
| 226         mov         rcx,        16 | 227         mov         rcx,        16 | 
| 227 | 228 | 
| 228 mberror_loop: | 229 mberror_loop: | 
| 229         movdqa      xmm3,       [rsi] | 230         movdqa      xmm0,       [rsi] | 
| 230         movdqa      xmm4,       [rdi] | 231         movdqa      xmm1,       [rdi] | 
| 231 | 232 | 
| 232         movdqa      xmm5,       [rsi+16] | 233         movdqa      xmm2,       [rsi+16] | 
| 233         movdqa      xmm6,       [rdi+16] | 234         movdqa      xmm3,       [rdi+16] | 
| 234 | 235 | 
| 235 | 236 | 
| 236         psubw       xmm5,       xmm6 | 237         psubw       xmm2,       xmm3 | 
| 237         pmaddwd     xmm5,       xmm5 | 238         pmaddwd     xmm2,       xmm2 | 
| 238 | 239 | 
| 239         psubw       xmm3,       xmm4 | 240         psubw       xmm0,       xmm1 | 
| 240         pand        xmm3,       xmm1 | 241         pand        xmm0,       xmm5 | 
| 241 | 242 | 
| 242         pmaddwd     xmm3,       xmm3 | 243         pmaddwd     xmm0,       xmm0 | 
| 243         add         rsi,        32 | 244         add         rsi,        32 | 
| 244 | 245 | 
| 245         add         rdi,        32 | 246         add         rdi,        32 | 
| 246 | 247 | 
| 247         sub         rcx,        1 | 248         sub         rcx,        1 | 
| 248         paddd       xmm2,       xmm5 | 249         paddd       xmm4,       xmm2 | 
| 249 | 250 | 
| 250         paddd       xmm2,       xmm3 | 251         paddd       xmm4,       xmm0 | 
| 251         jnz         mberror_loop | 252         jnz         mberror_loop | 
| 252 | 253 | 
| 253         movdqa      xmm0,       xmm2 | 254         movdqa      xmm0,       xmm4 | 
| 254         punpckldq   xmm0,       xmm7 | 255         punpckldq   xmm0,       xmm6 | 
| 255 | 256 | 
| 256         punpckhdq   xmm2,       xmm7 | 257         punpckhdq   xmm4,       xmm6 | 
| 257         paddd       xmm0,       xmm2 | 258         paddd       xmm0,       xmm4 | 
| 258 | 259 | 
| 259         movdqa      xmm1,       xmm0 | 260         movdqa      xmm1,       xmm0 | 
| 260         psrldq      xmm0,       8 | 261         psrldq      xmm0,       8 | 
| 261 | 262 | 
| 262         paddd       xmm0,       xmm1 | 263         paddd       xmm0,       xmm1 | 
| 263         movq        rax,        xmm0 | 264         movq        rax,        xmm0 | 
| 264 | 265 | 
| 265     pop rdi | 266     pop rdi | 
| 266     pop rsi | 267     pop rsi | 
| 267     ; begin epilog | 268     ; begin epilog | 
|  | 269     RESTORE_XMM | 
| 268     UNSHADOW_ARGS | 270     UNSHADOW_ARGS | 
| 269     pop         rbp | 271     pop         rbp | 
| 270     ret | 272     ret | 
| 271 | 273 | 
| 272 | 274 | 
| 273 ;int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); | 275 ;int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); | 
| 274 global sym(vp8_mbuverror_mmx_impl) | 276 global sym(vp8_mbuverror_mmx_impl) | 
| 275 sym(vp8_mbuverror_mmx_impl): | 277 sym(vp8_mbuverror_mmx_impl): | 
| 276     push        rbp | 278     push        rbp | 
| 277     mov         rbp, rsp | 279     mov         rbp, rsp | 
| (...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 335     SHADOW_ARGS_TO_STACK 2 | 337     SHADOW_ARGS_TO_STACK 2 | 
| 336     push rsi | 338     push rsi | 
| 337     push rdi | 339     push rdi | 
| 338     ; end prolog | 340     ; end prolog | 
| 339 | 341 | 
| 340 | 342 | 
| 341         mov             rsi,        arg(0) ;s_ptr | 343         mov             rsi,        arg(0) ;s_ptr | 
| 342         mov             rdi,        arg(1) ;d_ptr | 344         mov             rdi,        arg(1) ;d_ptr | 
| 343 | 345 | 
| 344         mov             rcx,        16 | 346         mov             rcx,        16 | 
| 345         pxor            xmm7,       xmm7 | 347         pxor            xmm3,       xmm3 | 
| 346 | 348 | 
| 347 mbuverror_loop: | 349 mbuverror_loop: | 
| 348 | 350 | 
| 349         movdqa          xmm1,       [rsi] | 351         movdqa          xmm1,       [rsi] | 
| 350         movdqa          xmm2,       [rdi] | 352         movdqa          xmm2,       [rdi] | 
| 351 | 353 | 
| 352         psubw           xmm1,       xmm2 | 354         psubw           xmm1,       xmm2 | 
| 353         pmaddwd         xmm1,       xmm1 | 355         pmaddwd         xmm1,       xmm1 | 
| 354 | 356 | 
| 355         paddd           xmm7,       xmm1 | 357         paddd           xmm3,       xmm1 | 
| 356 | 358 | 
| 357         add             rsi,        16 | 359         add             rsi,        16 | 
| 358         add             rdi,        16 | 360         add             rdi,        16 | 
| 359 | 361 | 
| 360         dec             rcx | 362         dec             rcx | 
| 361         jnz             mbuverror_loop | 363         jnz             mbuverror_loop | 
| 362 | 364 | 
| 363         pxor        xmm0,           xmm0 | 365         pxor        xmm0,           xmm0 | 
| 364         movdqa      xmm1,           xmm7 | 366         movdqa      xmm1,           xmm3 | 
| 365 | 367 | 
| 366         movdqa      xmm2,           xmm1 | 368         movdqa      xmm2,           xmm1 | 
| 367         punpckldq   xmm1,           xmm0 | 369         punpckldq   xmm1,           xmm0 | 
| 368 | 370 | 
| 369         punpckhdq   xmm2,           xmm0 | 371         punpckhdq   xmm2,           xmm0 | 
| 370         paddd       xmm1,           xmm2 | 372         paddd       xmm1,           xmm2 | 
| 371 | 373 | 
| 372         movdqa      xmm2,           xmm1 | 374         movdqa      xmm2,           xmm1 | 
| 373 | 375 | 
| 374         psrldq      xmm1,           8 | 376         psrldq      xmm1,           8 | 
| 375         paddd       xmm1,           xmm2 | 377         paddd       xmm1,           xmm2 | 
| 376 | 378 | 
| 377         movq            rax,            xmm1 | 379         movq            rax,            xmm1 | 
| 378 | 380 | 
| 379     pop rdi | 381     pop rdi | 
| 380     pop rsi | 382     pop rsi | 
| 381     ; begin epilog | 383     ; begin epilog | 
| 382     UNSHADOW_ARGS | 384     UNSHADOW_ARGS | 
| 383     pop         rbp | 385     pop         rbp | 
| 384     ret | 386     ret | 
| OLD | NEW | 
|---|