| OLD | NEW | 
|---|
| 1 ; | 1 ; | 
| 2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 
| 3 ; | 3 ; | 
| 4 ;  Use of this source code is governed by a BSD-style license | 4 ;  Use of this source code is governed by a BSD-style license | 
| 5 ;  that can be found in the LICENSE file in the root of the source | 5 ;  that can be found in the LICENSE file in the root of the source | 
| 6 ;  tree. An additional intellectual property rights grant can be found | 6 ;  tree. An additional intellectual property rights grant can be found | 
| 7 ;  in the file PATENTS.  All contributing project authors may | 7 ;  in the file PATENTS.  All contributing project authors may | 
| 8 ;  be found in the AUTHORS file in the root of the source tree. | 8 ;  be found in the AUTHORS file in the root of the source tree. | 
| 9 ; | 9 ; | 
| 10 | 10 | 
| (...skipping 21 matching lines...) Expand all  Loading... | 
| 32 ;    unsigned char *output_ptr, | 32 ;    unsigned char *output_ptr, | 
| 33 ;    unsigned int    output_pitch, | 33 ;    unsigned int    output_pitch, | 
| 34 ;    unsigned int    output_height, | 34 ;    unsigned int    output_height, | 
| 35 ;    unsigned int    vp8_filter_index | 35 ;    unsigned int    vp8_filter_index | 
| 36 ;) | 36 ;) | 
| 37 global sym(vp8_filter_block1d8_h6_ssse3) | 37 global sym(vp8_filter_block1d8_h6_ssse3) | 
| 38 sym(vp8_filter_block1d8_h6_ssse3): | 38 sym(vp8_filter_block1d8_h6_ssse3): | 
| 39     push        rbp | 39     push        rbp | 
| 40     mov         rbp, rsp | 40     mov         rbp, rsp | 
| 41     SHADOW_ARGS_TO_STACK 6 | 41     SHADOW_ARGS_TO_STACK 6 | 
|  | 42     SAVE_XMM 7 | 
| 42     GET_GOT     rbx | 43     GET_GOT     rbx | 
| 43     push        rsi | 44     push        rsi | 
| 44     push        rdi | 45     push        rdi | 
| 45     ; end prolog | 46     ; end prolog | 
| 46 | 47 | 
| 47     movsxd      rdx, DWORD PTR arg(5)   ;table index | 48     movsxd      rdx, DWORD PTR arg(5)   ;table index | 
| 48     xor         rsi, rsi | 49     xor         rsi, rsi | 
| 49     shl         rdx, 4 | 50     shl         rdx, 4 | 
| 50 | 51 | 
| 51     movdqa      xmm7, [GLOBAL(rd)] | 52     movdqa      xmm7, [GLOBAL(rd)] | 
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 100 | 101 | 
| 101     packuswb    xmm0,   xmm0 | 102     packuswb    xmm0,   xmm0 | 
| 102 | 103 | 
| 103     movq        MMWORD Ptr [rdi], xmm0 | 104     movq        MMWORD Ptr [rdi], xmm0 | 
| 104     jnz         filter_block1d8_h6_rowloop_ssse3 | 105     jnz         filter_block1d8_h6_rowloop_ssse3 | 
| 105 | 106 | 
| 106     ; begin epilog | 107     ; begin epilog | 
| 107     pop rdi | 108     pop rdi | 
| 108     pop rsi | 109     pop rsi | 
| 109     RESTORE_GOT | 110     RESTORE_GOT | 
|  | 111     RESTORE_XMM | 
| 110     UNSHADOW_ARGS | 112     UNSHADOW_ARGS | 
| 111     pop         rbp | 113     pop         rbp | 
| 112     ret | 114     ret | 
| 113 | 115 | 
| 114 vp8_filter_block1d8_h4_ssse3: | 116 vp8_filter_block1d8_h4_ssse3: | 
| 115     movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4 | 117     movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4 | 
| 116     movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3 | 118     movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3 | 
| 117 | 119 | 
| 118     movdqa      xmm3, XMMWORD PTR [GLOBAL(shuf2bfrom1)] | 120     movdqa      xmm3, XMMWORD PTR [GLOBAL(shuf2bfrom1)] | 
| 119     movdqa      xmm4, XMMWORD PTR [GLOBAL(shuf3bfrom1)] | 121     movdqa      xmm4, XMMWORD PTR [GLOBAL(shuf3bfrom1)] | 
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 155     packuswb    xmm0,   xmm0 | 157     packuswb    xmm0,   xmm0 | 
| 156 | 158 | 
| 157     movq        MMWORD Ptr [rdi], xmm0 | 159     movq        MMWORD Ptr [rdi], xmm0 | 
| 158 | 160 | 
| 159     jnz         filter_block1d8_h4_rowloop_ssse3 | 161     jnz         filter_block1d8_h4_rowloop_ssse3 | 
| 160 | 162 | 
| 161     ; begin epilog | 163     ; begin epilog | 
| 162     pop rdi | 164     pop rdi | 
| 163     pop rsi | 165     pop rsi | 
| 164     RESTORE_GOT | 166     RESTORE_GOT | 
|  | 167     RESTORE_XMM | 
| 165     UNSHADOW_ARGS | 168     UNSHADOW_ARGS | 
| 166     pop         rbp | 169     pop         rbp | 
| 167     ret | 170     ret | 
| 168 ;void vp8_filter_block1d16_h6_ssse3 | 171 ;void vp8_filter_block1d16_h6_ssse3 | 
| 169 ;( | 172 ;( | 
| 170 ;    unsigned char  *src_ptr, | 173 ;    unsigned char  *src_ptr, | 
| 171 ;    unsigned int    src_pixels_per_line, | 174 ;    unsigned int    src_pixels_per_line, | 
| 172 ;    unsigned char  *output_ptr, | 175 ;    unsigned char  *output_ptr, | 
| 173 ;    unsigned int    output_pitch, | 176 ;    unsigned int    output_pitch, | 
| 174 ;    unsigned int    output_height, | 177 ;    unsigned int    output_height, | 
| 175 ;    unsigned int    vp8_filter_index | 178 ;    unsigned int    vp8_filter_index | 
| 176 ;) | 179 ;) | 
| 177 global sym(vp8_filter_block1d16_h6_ssse3) | 180 global sym(vp8_filter_block1d16_h6_ssse3) | 
| 178 sym(vp8_filter_block1d16_h6_ssse3): | 181 sym(vp8_filter_block1d16_h6_ssse3): | 
| 179     push        rbp | 182     push        rbp | 
| 180     mov         rbp, rsp | 183     mov         rbp, rsp | 
| 181     SHADOW_ARGS_TO_STACK 6 | 184     SHADOW_ARGS_TO_STACK 6 | 
| 182     SAVE_XMM | 185     SAVE_XMM 7 | 
| 183     GET_GOT     rbx | 186     GET_GOT     rbx | 
| 184     push        rsi | 187     push        rsi | 
| 185     push        rdi | 188     push        rdi | 
| 186     ; end prolog | 189     ; end prolog | 
| 187 | 190 | 
| 188     movsxd      rdx, DWORD PTR arg(5)           ;table index | 191     movsxd      rdx, DWORD PTR arg(5)           ;table index | 
| 189     xor         rsi, rsi | 192     xor         rsi, rsi | 
| 190     shl         rdx, 4      ; | 193     shl         rdx, 4      ; | 
| 191 | 194 | 
| 192     lea         rax, [GLOBAL(k0_k5)] | 195     lea         rax, [GLOBAL(k0_k5)] | 
| 193     add         rax, rdx | 196     add         rax, rdx | 
| 194 | 197 | 
| 195     mov         rdi, arg(2)                     ;output_ptr | 198     mov         rdi, arg(2)                     ;output_ptr | 
| 196 | 199 | 
| 197 ;; |  | 
| 198 ;;    cmp         esi, DWORD PTR [rax] |  | 
| 199 ;;    je          vp8_filter_block1d16_h4_ssse3 |  | 
| 200 |  | 
| 201     mov         rsi, arg(0)                     ;src_ptr | 200     mov         rsi, arg(0)                     ;src_ptr | 
| 202 | 201 | 
| 203     movdqa      xmm4, XMMWORD PTR [rax]         ;k0_k5 | 202     movdqa      xmm4, XMMWORD PTR [rax]         ;k0_k5 | 
| 204     movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4 | 203     movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4 | 
| 205     movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3 | 204     movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3 | 
| 206 | 205 | 
| 207     movsxd      rax, dword ptr arg(1)           ;src_pixels_per_line | 206     movsxd      rax, dword ptr arg(1)           ;src_pixels_per_line | 
| 208     movsxd      rcx, dword ptr arg(4)           ;output_height | 207     movsxd      rcx, dword ptr arg(4)           ;output_height | 
| 209     movsxd      rdx, dword ptr arg(3)           ;output_pitch | 208     movsxd      rdx, dword ptr arg(3)           ;output_pitch | 
| 210 | 209 | 
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 264     movdqa      XMMWORD Ptr [rdi], xmm0 | 263     movdqa      XMMWORD Ptr [rdi], xmm0 | 
| 265 | 264 | 
| 266     lea         rdi,    [rdi + rdx] | 265     lea         rdi,    [rdi + rdx] | 
| 267     dec         rcx | 266     dec         rcx | 
| 268     jnz         filter_block1d16_h6_rowloop_ssse3 | 267     jnz         filter_block1d16_h6_rowloop_ssse3 | 
| 269 | 268 | 
| 270     ; begin epilog | 269     ; begin epilog | 
| 271     pop rdi | 270     pop rdi | 
| 272     pop rsi | 271     pop rsi | 
| 273     RESTORE_GOT | 272     RESTORE_GOT | 
| 274     UNSHADOW_ARGS | 273     RESTORE_XMM | 
| 275     pop         rbp |  | 
| 276     ret |  | 
| 277 |  | 
| 278 vp8_filter_block1d16_h4_ssse3: |  | 
| 279     movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4 |  | 
| 280     movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3 |  | 
| 281 |  | 
| 282     mov         rsi, arg(0)             ;src_ptr |  | 
| 283     movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line |  | 
| 284     movsxd      rcx, dword ptr arg(4)   ;output_height |  | 
| 285     movsxd      rdx, dword ptr arg(3)   ;output_pitch |  | 
| 286 |  | 
| 287 filter_block1d16_h4_rowloop_ssse3: |  | 
| 288     movdqu      xmm1,   XMMWORD PTR [rsi - 2] |  | 
| 289 |  | 
| 290     movdqa      xmm2, xmm1 |  | 
| 291     pshufb      xmm1, [GLOBAL(shuf2b)] |  | 
| 292     pshufb      xmm2, [GLOBAL(shuf3b)] |  | 
| 293     pmaddubsw   xmm1, xmm5 |  | 
| 294 |  | 
| 295     movdqu      xmm3,   XMMWORD PTR [rsi + 6] |  | 
| 296 |  | 
| 297     pmaddubsw   xmm2, xmm6 |  | 
| 298     movdqa      xmm0, xmm3 |  | 
| 299     pshufb      xmm3, [GLOBAL(shuf3b)] |  | 
| 300     pshufb      xmm0, [GLOBAL(shuf2b)] |  | 
| 301 |  | 
| 302     paddsw      xmm1, [GLOBAL(rd)] |  | 
| 303     paddsw      xmm1, xmm2 |  | 
| 304 |  | 
| 305     pmaddubsw   xmm0, xmm5 |  | 
| 306     pmaddubsw   xmm3, xmm6 |  | 
| 307 |  | 
| 308     psraw       xmm1, 7 |  | 
| 309     packuswb    xmm1, xmm1 |  | 
| 310     lea         rsi,    [rsi + rax] |  | 
| 311     paddsw      xmm3, xmm0 |  | 
| 312     paddsw      xmm3, [GLOBAL(rd)] |  | 
| 313     psraw       xmm3, 7 |  | 
| 314     packuswb    xmm3, xmm3 |  | 
| 315 |  | 
| 316     punpcklqdq  xmm1, xmm3 |  | 
| 317 |  | 
| 318     movdqa      XMMWORD Ptr [rdi], xmm1 |  | 
| 319 |  | 
| 320     add         rdi, rdx |  | 
| 321     dec         rcx |  | 
| 322     jnz         filter_block1d16_h4_rowloop_ssse3 |  | 
| 323 |  | 
| 324 |  | 
| 325     ; begin epilog |  | 
| 326     pop rdi |  | 
| 327     pop rsi |  | 
| 328     RESTORE_GOT |  | 
| 329     UNSHADOW_ARGS | 274     UNSHADOW_ARGS | 
| 330     pop         rbp | 275     pop         rbp | 
| 331     ret | 276     ret | 
| 332 | 277 | 
| 333 ;void vp8_filter_block1d4_h6_ssse3 | 278 ;void vp8_filter_block1d4_h6_ssse3 | 
| 334 ;( | 279 ;( | 
| 335 ;    unsigned char  *src_ptr, | 280 ;    unsigned char  *src_ptr, | 
| 336 ;    unsigned int    src_pixels_per_line, | 281 ;    unsigned int    src_pixels_per_line, | 
| 337 ;    unsigned char  *output_ptr, | 282 ;    unsigned char  *output_ptr, | 
| 338 ;    unsigned int    output_pitch, | 283 ;    unsigned int    output_pitch, | 
| 339 ;    unsigned int    output_height, | 284 ;    unsigned int    output_height, | 
| 340 ;    unsigned int    vp8_filter_index | 285 ;    unsigned int    vp8_filter_index | 
| 341 ;) | 286 ;) | 
| 342 global sym(vp8_filter_block1d4_h6_ssse3) | 287 global sym(vp8_filter_block1d4_h6_ssse3) | 
| 343 sym(vp8_filter_block1d4_h6_ssse3): | 288 sym(vp8_filter_block1d4_h6_ssse3): | 
| 344     push        rbp | 289     push        rbp | 
| 345     mov         rbp, rsp | 290     mov         rbp, rsp | 
| 346     SHADOW_ARGS_TO_STACK 6 | 291     SHADOW_ARGS_TO_STACK 6 | 
|  | 292     SAVE_XMM 7 | 
| 347     GET_GOT     rbx | 293     GET_GOT     rbx | 
| 348     push        rsi | 294     push        rsi | 
| 349     push        rdi | 295     push        rdi | 
| 350     ; end prolog | 296     ; end prolog | 
| 351 | 297 | 
| 352     movsxd      rdx, DWORD PTR arg(5)   ;table index | 298     movsxd      rdx, DWORD PTR arg(5)   ;table index | 
| 353     xor         rsi, rsi | 299     xor         rsi, rsi | 
| 354     shl         rdx, 4      ; | 300     shl         rdx, 4      ; | 
| 355 | 301 | 
| 356     lea         rax, [GLOBAL(k0_k5)] | 302     lea         rax, [GLOBAL(k0_k5)] | 
| (...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 444     movd        DWORD PTR [rdi], xmm1 | 390     movd        DWORD PTR [rdi], xmm1 | 
| 445 | 391 | 
| 446     add         rdi, rdx | 392     add         rdi, rdx | 
| 447     dec         rcx | 393     dec         rcx | 
| 448     jnz         filter_block1d4_h4_rowloop_ssse3 | 394     jnz         filter_block1d4_h4_rowloop_ssse3 | 
| 449 | 395 | 
| 450     ; begin epilog | 396     ; begin epilog | 
| 451     pop rdi | 397     pop rdi | 
| 452     pop rsi | 398     pop rsi | 
| 453     RESTORE_GOT | 399     RESTORE_GOT | 
|  | 400     RESTORE_XMM | 
| 454     UNSHADOW_ARGS | 401     UNSHADOW_ARGS | 
| 455     pop         rbp | 402     pop         rbp | 
| 456     ret | 403     ret | 
| 457 | 404 | 
| 458 | 405 | 
| 459 | 406 | 
| 460 ;void vp8_filter_block1d16_v6_ssse3 | 407 ;void vp8_filter_block1d16_v6_ssse3 | 
| 461 ;( | 408 ;( | 
| 462 ;    unsigned char *src_ptr, | 409 ;    unsigned char *src_ptr, | 
| 463 ;    unsigned int   src_pitch, | 410 ;    unsigned int   src_pitch, | 
| 464 ;    unsigned char *output_ptr, | 411 ;    unsigned char *output_ptr, | 
| 465 ;    unsigned int   out_pitch, | 412 ;    unsigned int   out_pitch, | 
| 466 ;    unsigned int   output_height, | 413 ;    unsigned int   output_height, | 
| 467 ;    unsigned int   vp8_filter_index | 414 ;    unsigned int   vp8_filter_index | 
| 468 ;) | 415 ;) | 
| 469 global sym(vp8_filter_block1d16_v6_ssse3) | 416 global sym(vp8_filter_block1d16_v6_ssse3) | 
| 470 sym(vp8_filter_block1d16_v6_ssse3): | 417 sym(vp8_filter_block1d16_v6_ssse3): | 
| 471     push        rbp | 418     push        rbp | 
| 472     mov         rbp, rsp | 419     mov         rbp, rsp | 
| 473     SHADOW_ARGS_TO_STACK 6 | 420     SHADOW_ARGS_TO_STACK 6 | 
|  | 421     SAVE_XMM 7 | 
| 474     GET_GOT     rbx | 422     GET_GOT     rbx | 
| 475     push        rsi | 423     push        rsi | 
| 476     push        rdi | 424     push        rdi | 
| 477     ; end prolog | 425     ; end prolog | 
| 478 | 426 | 
| 479     movsxd      rdx, DWORD PTR arg(5)   ;table index | 427     movsxd      rdx, DWORD PTR arg(5)   ;table index | 
| 480     xor         rsi, rsi | 428     xor         rsi, rsi | 
| 481     shl         rdx, 4      ; | 429     shl         rdx, 4      ; | 
| 482 | 430 | 
| 483     lea         rax, [GLOBAL(k0_k5)] | 431     lea         rax, [GLOBAL(k0_k5)] | 
| (...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 559 %else | 507 %else | 
| 560     add         rdi,        r8 | 508     add         rdi,        r8 | 
| 561 %endif | 509 %endif | 
| 562     dec         rcx | 510     dec         rcx | 
| 563     jnz         vp8_filter_block1d16_v6_ssse3_loop | 511     jnz         vp8_filter_block1d16_v6_ssse3_loop | 
| 564 | 512 | 
| 565     ; begin epilog | 513     ; begin epilog | 
| 566     pop rdi | 514     pop rdi | 
| 567     pop rsi | 515     pop rsi | 
| 568     RESTORE_GOT | 516     RESTORE_GOT | 
|  | 517     RESTORE_XMM | 
| 569     UNSHADOW_ARGS | 518     UNSHADOW_ARGS | 
| 570     pop         rbp | 519     pop         rbp | 
| 571     ret | 520     ret | 
| 572 | 521 | 
| 573 vp8_filter_block1d16_v4_ssse3: | 522 vp8_filter_block1d16_v4_ssse3: | 
| 574     movdqa      xmm6, XMMWORD PTR [rax+256]     ;k2_k4 | 523     movdqa      xmm6, XMMWORD PTR [rax+256]     ;k2_k4 | 
| 575     movdqa      xmm7, XMMWORD PTR [rax+128]     ;k1_k3 | 524     movdqa      xmm7, XMMWORD PTR [rax+128]     ;k1_k3 | 
| 576 | 525 | 
| 577     mov         rsi, arg(0)             ;src_ptr | 526     mov         rsi, arg(0)             ;src_ptr | 
| 578     movsxd      rdx, DWORD PTR arg(1)   ;pixels_per_line | 527     movsxd      rdx, DWORD PTR arg(1)   ;pixels_per_line | 
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 631 %else | 580 %else | 
| 632     add         rdi,        r8 | 581     add         rdi,        r8 | 
| 633 %endif | 582 %endif | 
| 634     dec         rcx | 583     dec         rcx | 
| 635     jnz         vp8_filter_block1d16_v4_ssse3_loop | 584     jnz         vp8_filter_block1d16_v4_ssse3_loop | 
| 636 | 585 | 
| 637     ; begin epilog | 586     ; begin epilog | 
| 638     pop rdi | 587     pop rdi | 
| 639     pop rsi | 588     pop rsi | 
| 640     RESTORE_GOT | 589     RESTORE_GOT | 
|  | 590     RESTORE_XMM | 
| 641     UNSHADOW_ARGS | 591     UNSHADOW_ARGS | 
| 642     pop         rbp | 592     pop         rbp | 
| 643     ret | 593     ret | 
| 644 | 594 | 
| 645 ;void vp8_filter_block1d8_v6_ssse3 | 595 ;void vp8_filter_block1d8_v6_ssse3 | 
| 646 ;( | 596 ;( | 
| 647 ;    unsigned char *src_ptr, | 597 ;    unsigned char *src_ptr, | 
| 648 ;    unsigned int   src_pitch, | 598 ;    unsigned int   src_pitch, | 
| 649 ;    unsigned char *output_ptr, | 599 ;    unsigned char *output_ptr, | 
| 650 ;    unsigned int   out_pitch, | 600 ;    unsigned int   out_pitch, | 
| 651 ;    unsigned int   output_height, | 601 ;    unsigned int   output_height, | 
| 652 ;    unsigned int   vp8_filter_index | 602 ;    unsigned int   vp8_filter_index | 
| 653 ;) | 603 ;) | 
| 654 global sym(vp8_filter_block1d8_v6_ssse3) | 604 global sym(vp8_filter_block1d8_v6_ssse3) | 
| 655 sym(vp8_filter_block1d8_v6_ssse3): | 605 sym(vp8_filter_block1d8_v6_ssse3): | 
| 656     push        rbp | 606     push        rbp | 
| 657     mov         rbp, rsp | 607     mov         rbp, rsp | 
| 658     SHADOW_ARGS_TO_STACK 6 | 608     SHADOW_ARGS_TO_STACK 6 | 
|  | 609     SAVE_XMM 7 | 
| 659     GET_GOT     rbx | 610     GET_GOT     rbx | 
| 660     push        rsi | 611     push        rsi | 
| 661     push        rdi | 612     push        rdi | 
| 662     ; end prolog | 613     ; end prolog | 
| 663 | 614 | 
| 664     movsxd      rdx, DWORD PTR arg(5)   ;table index | 615     movsxd      rdx, DWORD PTR arg(5)   ;table index | 
| 665     xor         rsi, rsi | 616     xor         rsi, rsi | 
| 666     shl         rdx, 4      ; | 617     shl         rdx, 4      ; | 
| 667 | 618 | 
| 668     lea         rax, [GLOBAL(k0_k5)] | 619     lea         rax, [GLOBAL(k0_k5)] | 
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 721 %else | 672 %else | 
| 722     add         rdi,        r8 | 673     add         rdi,        r8 | 
| 723 %endif | 674 %endif | 
| 724     dec         rcx | 675     dec         rcx | 
| 725     jnz         vp8_filter_block1d8_v6_ssse3_loop | 676     jnz         vp8_filter_block1d8_v6_ssse3_loop | 
| 726 | 677 | 
| 727     ; begin epilog | 678     ; begin epilog | 
| 728     pop rdi | 679     pop rdi | 
| 729     pop rsi | 680     pop rsi | 
| 730     RESTORE_GOT | 681     RESTORE_GOT | 
|  | 682     RESTORE_XMM | 
| 731     UNSHADOW_ARGS | 683     UNSHADOW_ARGS | 
| 732     pop         rbp | 684     pop         rbp | 
| 733     ret | 685     ret | 
| 734 | 686 | 
| 735 vp8_filter_block1d8_v4_ssse3: | 687 vp8_filter_block1d8_v4_ssse3: | 
| 736     movdqa      xmm6, XMMWORD PTR [rax+256]     ;k2_k4 | 688     movdqa      xmm6, XMMWORD PTR [rax+256]     ;k2_k4 | 
| 737     movdqa      xmm7, XMMWORD PTR [rax+128]     ;k1_k3 | 689     movdqa      xmm7, XMMWORD PTR [rax+128]     ;k1_k3 | 
| 738     movdqa      xmm5, [GLOBAL(rd)] | 690     movdqa      xmm5, [GLOBAL(rd)] | 
| 739 | 691 | 
| 740     mov         rsi, arg(0)             ;src_ptr | 692     mov         rsi, arg(0)             ;src_ptr | 
| (...skipping 28 matching lines...) Expand all  Loading... | 
| 769 %else | 721 %else | 
| 770     add         rdi,        r8 | 722     add         rdi,        r8 | 
| 771 %endif | 723 %endif | 
| 772     dec         rcx | 724     dec         rcx | 
| 773     jnz         vp8_filter_block1d8_v4_ssse3_loop | 725     jnz         vp8_filter_block1d8_v4_ssse3_loop | 
| 774 | 726 | 
| 775     ; begin epilog | 727     ; begin epilog | 
| 776     pop rdi | 728     pop rdi | 
| 777     pop rsi | 729     pop rsi | 
| 778     RESTORE_GOT | 730     RESTORE_GOT | 
|  | 731     RESTORE_XMM | 
| 779     UNSHADOW_ARGS | 732     UNSHADOW_ARGS | 
| 780     pop         rbp | 733     pop         rbp | 
| 781     ret | 734     ret | 
| 782 ;void vp8_filter_block1d4_v6_ssse3 | 735 ;void vp8_filter_block1d4_v6_ssse3 | 
| 783 ;( | 736 ;( | 
| 784 ;    unsigned char *src_ptr, | 737 ;    unsigned char *src_ptr, | 
| 785 ;    unsigned int   src_pitch, | 738 ;    unsigned int   src_pitch, | 
| 786 ;    unsigned char *output_ptr, | 739 ;    unsigned char *output_ptr, | 
| 787 ;    unsigned int   out_pitch, | 740 ;    unsigned int   out_pitch, | 
| 788 ;    unsigned int   output_height, | 741 ;    unsigned int   output_height, | 
| (...skipping 136 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 925 ;    int  xoffset, | 878 ;    int  xoffset, | 
| 926 ;    int  yoffset, | 879 ;    int  yoffset, | 
| 927 ;    unsigned char *dst_ptr, | 880 ;    unsigned char *dst_ptr, | 
| 928 ;    int dst_pitch | 881 ;    int dst_pitch | 
| 929 ;) | 882 ;) | 
| 930 global sym(vp8_bilinear_predict16x16_ssse3) | 883 global sym(vp8_bilinear_predict16x16_ssse3) | 
| 931 sym(vp8_bilinear_predict16x16_ssse3): | 884 sym(vp8_bilinear_predict16x16_ssse3): | 
| 932     push        rbp | 885     push        rbp | 
| 933     mov         rbp, rsp | 886     mov         rbp, rsp | 
| 934     SHADOW_ARGS_TO_STACK 6 | 887     SHADOW_ARGS_TO_STACK 6 | 
| 935     SAVE_XMM | 888     SAVE_XMM 7 | 
| 936     GET_GOT     rbx | 889     GET_GOT     rbx | 
| 937     push        rsi | 890     push        rsi | 
| 938     push        rdi | 891     push        rdi | 
| 939     ; end prolog | 892     ; end prolog | 
| 940 | 893 | 
| 941         lea         rcx,        [GLOBAL(vp8_bilinear_filters_ssse3)] | 894         lea         rcx,        [GLOBAL(vp8_bilinear_filters_ssse3)] | 
| 942         movsxd      rax,        dword ptr arg(2)    ; xoffset | 895         movsxd      rax,        dword ptr arg(2)    ; xoffset | 
| 943 | 896 | 
| 944         cmp         rax,        0                   ; skip first_pass filter if 
      xoffset=0 | 897         cmp         rax,        0                   ; skip first_pass filter if 
      xoffset=0 | 
| 945         je          b16x16_sp_only | 898         je          b16x16_sp_only | 
| (...skipping 242 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 1188 ;    int  xoffset, | 1141 ;    int  xoffset, | 
| 1189 ;    int  yoffset, | 1142 ;    int  yoffset, | 
| 1190 ;    unsigned char *dst_ptr, | 1143 ;    unsigned char *dst_ptr, | 
| 1191 ;    int dst_pitch | 1144 ;    int dst_pitch | 
| 1192 ;) | 1145 ;) | 
| 1193 global sym(vp8_bilinear_predict8x8_ssse3) | 1146 global sym(vp8_bilinear_predict8x8_ssse3) | 
| 1194 sym(vp8_bilinear_predict8x8_ssse3): | 1147 sym(vp8_bilinear_predict8x8_ssse3): | 
| 1195     push        rbp | 1148     push        rbp | 
| 1196     mov         rbp, rsp | 1149     mov         rbp, rsp | 
| 1197     SHADOW_ARGS_TO_STACK 6 | 1150     SHADOW_ARGS_TO_STACK 6 | 
| 1198     SAVE_XMM | 1151     SAVE_XMM 7 | 
| 1199     GET_GOT     rbx | 1152     GET_GOT     rbx | 
| 1200     push        rsi | 1153     push        rsi | 
| 1201     push        rdi | 1154     push        rdi | 
| 1202     ; end prolog | 1155     ; end prolog | 
| 1203 | 1156 | 
| 1204     ALIGN_STACK 16, rax | 1157     ALIGN_STACK 16, rax | 
| 1205     sub         rsp, 144                         ; reserve 144 bytes | 1158     sub         rsp, 144                         ; reserve 144 bytes | 
| 1206 | 1159 | 
| 1207         lea         rcx,        [GLOBAL(vp8_bilinear_filters_ssse3)] | 1160         lea         rcx,        [GLOBAL(vp8_bilinear_filters_ssse3)] | 
| 1208 | 1161 | 
| (...skipping 336 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 1545 vp8_bilinear_filters_ssse3: | 1498 vp8_bilinear_filters_ssse3: | 
| 1546     times 8 db 128, 0 | 1499     times 8 db 128, 0 | 
| 1547     times 8 db 112, 16 | 1500     times 8 db 112, 16 | 
| 1548     times 8 db 96,  32 | 1501     times 8 db 96,  32 | 
| 1549     times 8 db 80,  48 | 1502     times 8 db 80,  48 | 
| 1550     times 8 db 64,  64 | 1503     times 8 db 64,  64 | 
| 1551     times 8 db 48,  80 | 1504     times 8 db 48,  80 | 
| 1552     times 8 db 32,  96 | 1505     times 8 db 32,  96 | 
| 1553     times 8 db 16,  112 | 1506     times 8 db 16,  112 | 
| 1554 | 1507 | 
| OLD | NEW | 
|---|