| OLD | NEW | 
|---|
| 1 ; | 1 ; | 
| 2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 
| 3 ; | 3 ; | 
| 4 ;  Use of this source code is governed by a BSD-style license | 4 ;  Use of this source code is governed by a BSD-style license | 
| 5 ;  that can be found in the LICENSE file in the root of the source | 5 ;  that can be found in the LICENSE file in the root of the source | 
| 6 ;  tree. An additional intellectual property rights grant can be found | 6 ;  tree. An additional intellectual property rights grant can be found | 
| 7 ;  in the file PATENTS.  All contributing project authors may | 7 ;  in the file PATENTS.  All contributing project authors may | 
| 8 ;  be found in the AUTHORS file in the root of the source tree. | 8 ;  be found in the AUTHORS file in the root of the source tree. | 
| 9 ; | 9 ; | 
| 10 | 10 | 
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 78 ;    unsigned char   *  ref_ptr, | 78 ;    unsigned char   *  ref_ptr, | 
| 79 ;    int             recon_stride, | 79 ;    int             recon_stride, | 
| 80 ;    unsigned int    *  SSE, | 80 ;    unsigned int    *  SSE, | 
| 81 ;    int             *  Sum | 81 ;    int             *  Sum | 
| 82 ;) | 82 ;) | 
| 83 global sym(vp8_get16x16var_sse2) | 83 global sym(vp8_get16x16var_sse2) | 
| 84 sym(vp8_get16x16var_sse2): | 84 sym(vp8_get16x16var_sse2): | 
| 85     push        rbp | 85     push        rbp | 
| 86     mov         rbp, rsp | 86     mov         rbp, rsp | 
| 87     SHADOW_ARGS_TO_STACK 6 | 87     SHADOW_ARGS_TO_STACK 6 | 
|  | 88     SAVE_XMM 7 | 
| 88     push rbx | 89     push rbx | 
| 89     push rsi | 90     push rsi | 
| 90     push rdi | 91     push rdi | 
| 91     ; end prolog | 92     ; end prolog | 
| 92 | 93 | 
| 93         mov         rsi,            arg(0) ;[src_ptr] | 94         mov         rsi,            arg(0) ;[src_ptr] | 
| 94         mov         rdi,            arg(2) ;[ref_ptr] | 95         mov         rdi,            arg(2) ;[ref_ptr] | 
| 95 | 96 | 
| 96         movsxd      rax,            DWORD PTR arg(1) ;[source_stride] | 97         movsxd      rax,            DWORD PTR arg(1) ;[source_stride] | 
| 97         movsxd      rdx,            DWORD PTR arg(3) ;[recon_stride] | 98         movsxd      rdx,            DWORD PTR arg(3) ;[recon_stride] | 
| (...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 199         mov         rdi,            arg(4) ;[SSE] | 200         mov         rdi,            arg(4) ;[SSE] | 
| 200 | 201 | 
| 201         movd DWORD PTR [rax],       xmm7 | 202         movd DWORD PTR [rax],       xmm7 | 
| 202         movd DWORD PTR [rdi],       xmm1 | 203         movd DWORD PTR [rdi],       xmm1 | 
| 203 | 204 | 
| 204 | 205 | 
| 205     ; begin epilog | 206     ; begin epilog | 
| 206     pop rdi | 207     pop rdi | 
| 207     pop rsi | 208     pop rsi | 
| 208     pop rbx | 209     pop rbx | 
| 209     UNSHADOW_ARGS | 210     RESTORE_XMM | 
| 210     pop         rbp |  | 
| 211     ret |  | 
| 212 |  | 
| 213 |  | 
| 214 ;unsigned int vp8_get16x16pred_error_sse2 |  | 
| 215 ;( |  | 
| 216 ;   unsigned char *src_ptr, |  | 
| 217 ;    int src_stride, |  | 
| 218 ;    unsigned char *ref_ptr, |  | 
| 219 ;    int ref_stride |  | 
| 220 ;) |  | 
| 221 global sym(vp8_get16x16pred_error_sse2) |  | 
| 222 sym(vp8_get16x16pred_error_sse2): |  | 
| 223     push        rbp |  | 
| 224     mov         rbp, rsp |  | 
| 225     SHADOW_ARGS_TO_STACK 4 |  | 
| 226     GET_GOT     rbx |  | 
| 227     push rsi |  | 
| 228     push rdi |  | 
| 229     sub         rsp, 16 |  | 
| 230     ; end prolog |  | 
| 231 |  | 
| 232         mov         rsi,            arg(0) ;[src_ptr] |  | 
| 233         mov         rdi,            arg(2) ;[ref_ptr] |  | 
| 234 |  | 
| 235         movsxd      rax,            DWORD PTR arg(1) ;[src_stride] |  | 
| 236         movsxd      rdx,            DWORD PTR arg(3) ;[ref_stride] |  | 
| 237 |  | 
| 238         pxor        xmm0,           xmm0                        ; clear xmm0 for
       unpack |  | 
| 239         pxor        xmm7,           xmm7                        ; clear xmm7 for
       accumulating diffs |  | 
| 240 |  | 
| 241         pxor        xmm6,           xmm6                        ; clear xmm6 for
       accumulating sse |  | 
| 242         mov         rcx,            16 |  | 
| 243 |  | 
| 244 var16peloop: |  | 
| 245         movdqu      xmm1,           XMMWORD PTR [rsi] |  | 
| 246         movdqu      xmm2,           XMMWORD PTR [rdi] |  | 
| 247 |  | 
| 248         movdqa      xmm3,           xmm1 |  | 
| 249         movdqa      xmm4,           xmm2 |  | 
| 250 |  | 
| 251         punpcklbw   xmm1,           xmm0 |  | 
| 252         punpckhbw   xmm3,           xmm0 |  | 
| 253 |  | 
| 254         punpcklbw   xmm2,           xmm0 |  | 
| 255         punpckhbw   xmm4,           xmm0 |  | 
| 256 |  | 
| 257         psubw       xmm1,           xmm2 |  | 
| 258         psubw       xmm3,           xmm4 |  | 
| 259 |  | 
| 260         paddw       xmm7,           xmm1 |  | 
| 261         pmaddwd     xmm1,           xmm1 |  | 
| 262 |  | 
| 263         paddw       xmm7,           xmm3 |  | 
| 264         pmaddwd     xmm3,           xmm3 |  | 
| 265 |  | 
| 266         paddd       xmm6,           xmm1 |  | 
| 267         paddd       xmm6,           xmm3 |  | 
| 268 |  | 
| 269         add         rsi,            rax |  | 
| 270         add         rdi,            rdx |  | 
| 271 |  | 
| 272         sub         rcx,            1 |  | 
| 273         jnz         var16peloop |  | 
| 274 |  | 
| 275 |  | 
| 276         movdqa      xmm1,           xmm6 |  | 
| 277         pxor        xmm6,           xmm6 |  | 
| 278 |  | 
| 279         pxor        xmm5,           xmm5 |  | 
| 280         punpcklwd   xmm6,           xmm7 |  | 
| 281 |  | 
| 282         punpckhwd   xmm5,           xmm7 |  | 
| 283         psrad       xmm5,           16 |  | 
| 284 |  | 
| 285         psrad       xmm6,           16 |  | 
| 286         paddd       xmm6,           xmm5 |  | 
| 287 |  | 
| 288         movdqa      xmm2,           xmm1 |  | 
| 289         punpckldq   xmm1,           xmm0 |  | 
| 290 |  | 
| 291         punpckhdq   xmm2,           xmm0 |  | 
| 292         movdqa      xmm7,           xmm6 |  | 
| 293 |  | 
| 294         paddd       xmm1,           xmm2 |  | 
| 295         punpckldq   xmm6,           xmm0 |  | 
| 296 |  | 
| 297         punpckhdq   xmm7,           xmm0 |  | 
| 298         paddd       xmm6,           xmm7 |  | 
| 299 |  | 
| 300         movdqa      xmm2,           xmm1 |  | 
| 301         movdqa      xmm7,           xmm6 |  | 
| 302 |  | 
| 303         psrldq      xmm1,           8 |  | 
| 304         psrldq      xmm6,           8 |  | 
| 305 |  | 
| 306         paddd       xmm7,           xmm6 |  | 
| 307         paddd       xmm1,           xmm2 |  | 
| 308 |  | 
| 309         movd DWORD PTR [rsp],       xmm7  ;Sum |  | 
| 310         movd DWORD PTR [rsp+4],     xmm1  ;SSE |  | 
| 311 |  | 
| 312         ; return (SSE-((Sum*Sum)>>8)); |  | 
| 313         movsxd      rdx, dword ptr [rsp] |  | 
| 314         imul        rdx, rdx |  | 
| 315         sar         rdx, 8 |  | 
| 316         movsxd      rax, dword ptr [rsp + 4] |  | 
| 317         sub         rax, rdx |  | 
| 318 |  | 
| 319     ; begin epilog |  | 
| 320     add rsp, 16 |  | 
| 321     pop rdi |  | 
| 322     pop rsi |  | 
| 323     RESTORE_GOT |  | 
| 324     UNSHADOW_ARGS | 211     UNSHADOW_ARGS | 
| 325     pop         rbp | 212     pop         rbp | 
| 326     ret | 213     ret | 
| 327 | 214 | 
| 328 | 215 | 
| 329 | 216 | 
|  | 217 | 
| 330 ;unsigned int vp8_get8x8var_sse2 | 218 ;unsigned int vp8_get8x8var_sse2 | 
| 331 ;( | 219 ;( | 
| 332 ;    unsigned char   *  src_ptr, | 220 ;    unsigned char   *  src_ptr, | 
| 333 ;    int             source_stride, | 221 ;    int             source_stride, | 
| 334 ;    unsigned char   *  ref_ptr, | 222 ;    unsigned char   *  ref_ptr, | 
| 335 ;    int             recon_stride, | 223 ;    int             recon_stride, | 
| 336 ;    unsigned int    *  SSE, | 224 ;    unsigned int    *  SSE, | 
| 337 ;    int             *  Sum | 225 ;    int             *  Sum | 
| 338 ;) | 226 ;) | 
| 339 global sym(vp8_get8x8var_sse2) | 227 global sym(vp8_get8x8var_sse2) | 
| 340 sym(vp8_get8x8var_sse2): | 228 sym(vp8_get8x8var_sse2): | 
| 341     push        rbp | 229     push        rbp | 
| 342     mov         rbp, rsp | 230     mov         rbp, rsp | 
| 343     SHADOW_ARGS_TO_STACK 6 | 231     SHADOW_ARGS_TO_STACK 6 | 
|  | 232     SAVE_XMM 7 | 
| 344     GET_GOT     rbx | 233     GET_GOT     rbx | 
| 345     push rsi | 234     push rsi | 
| 346     push rdi | 235     push rdi | 
| 347     sub         rsp, 16 | 236     sub         rsp, 16 | 
| 348     ; end prolog | 237     ; end prolog | 
| 349 | 238 | 
| 350         mov         rsi,            arg(0) ;[src_ptr] | 239         mov         rsi,            arg(0) ;[src_ptr] | 
| 351         mov         rdi,            arg(2) ;[ref_ptr] | 240         mov         rdi,            arg(2) ;[ref_ptr] | 
| 352 | 241 | 
| 353         movsxd      rax,            DWORD PTR arg(1) ;[source_stride] | 242         movsxd      rax,            DWORD PTR arg(1) ;[source_stride] | 
| (...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 499         movsx       rcx,            dx | 388         movsx       rcx,            dx | 
| 500 | 389 | 
| 501         mov  dword ptr [rax],       ecx | 390         mov  dword ptr [rax],       ecx | 
| 502         movd DWORD PTR [rdi],       xmm1 | 391         movd DWORD PTR [rdi],       xmm1 | 
| 503 | 392 | 
| 504     ; begin epilog | 393     ; begin epilog | 
| 505     add rsp, 16 | 394     add rsp, 16 | 
| 506     pop rdi | 395     pop rdi | 
| 507     pop rsi | 396     pop rsi | 
| 508     RESTORE_GOT | 397     RESTORE_GOT | 
|  | 398     RESTORE_XMM | 
| 509     UNSHADOW_ARGS | 399     UNSHADOW_ARGS | 
| 510     pop         rbp | 400     pop         rbp | 
| 511     ret | 401     ret | 
| 512 | 402 | 
| 513 ;void vp8_filter_block2d_bil_var_sse2 | 403 ;void vp8_filter_block2d_bil_var_sse2 | 
| 514 ;( | 404 ;( | 
| 515 ;    unsigned char *ref_ptr, | 405 ;    unsigned char *ref_ptr, | 
| 516 ;    int ref_pixels_per_line, | 406 ;    int ref_pixels_per_line, | 
| 517 ;    unsigned char *src_ptr, | 407 ;    unsigned char *src_ptr, | 
| 518 ;    int src_pixels_per_line, | 408 ;    int src_pixels_per_line, | 
| 519 ;    unsigned int Height, | 409 ;    unsigned int Height, | 
| 520 ;    int  xoffset, | 410 ;    int  xoffset, | 
| 521 ;    int  yoffset, | 411 ;    int  yoffset, | 
| 522 ;    int *sum, | 412 ;    int *sum, | 
| 523 ;    unsigned int *sumsquared;; | 413 ;    unsigned int *sumsquared;; | 
| 524 ; | 414 ; | 
| 525 ;) | 415 ;) | 
| 526 global sym(vp8_filter_block2d_bil_var_sse2) | 416 global sym(vp8_filter_block2d_bil_var_sse2) | 
| 527 sym(vp8_filter_block2d_bil_var_sse2): | 417 sym(vp8_filter_block2d_bil_var_sse2): | 
| 528     push        rbp | 418     push        rbp | 
| 529     mov         rbp, rsp | 419     mov         rbp, rsp | 
| 530     SHADOW_ARGS_TO_STACK 9 | 420     SHADOW_ARGS_TO_STACK 9 | 
| 531     SAVE_XMM | 421     SAVE_XMM 7 | 
| 532     GET_GOT     rbx | 422     GET_GOT     rbx | 
| 533     push rsi | 423     push rsi | 
| 534     push rdi | 424     push rdi | 
| 535     push rbx | 425     push rbx | 
| 536     ; end prolog | 426     ; end prolog | 
| 537 | 427 | 
| 538         pxor            xmm6,           xmm6                 ; | 428         pxor            xmm6,           xmm6                 ; | 
| 539         pxor            xmm7,           xmm7                 ; | 429         pxor            xmm7,           xmm7                 ; | 
| 540 | 430 | 
| 541         lea             rsi,            [GLOBAL(xmm_bi_rd)]  ; rounding | 431         lea             rsi,            [GLOBAL(xmm_bi_rd)]  ; rounding | 
| (...skipping 256 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 798 ;    int src_pixels_per_line, | 688 ;    int src_pixels_per_line, | 
| 799 ;    unsigned int Height, | 689 ;    unsigned int Height, | 
| 800 ;    int *sum, | 690 ;    int *sum, | 
| 801 ;    unsigned int *sumsquared | 691 ;    unsigned int *sumsquared | 
| 802 ;) | 692 ;) | 
| 803 global sym(vp8_half_horiz_vert_variance8x_h_sse2) | 693 global sym(vp8_half_horiz_vert_variance8x_h_sse2) | 
| 804 sym(vp8_half_horiz_vert_variance8x_h_sse2): | 694 sym(vp8_half_horiz_vert_variance8x_h_sse2): | 
| 805     push        rbp | 695     push        rbp | 
| 806     mov         rbp, rsp | 696     mov         rbp, rsp | 
| 807     SHADOW_ARGS_TO_STACK 7 | 697     SHADOW_ARGS_TO_STACK 7 | 
|  | 698     SAVE_XMM 7 | 
| 808     GET_GOT     rbx | 699     GET_GOT     rbx | 
| 809     push rsi | 700     push rsi | 
| 810     push rdi | 701     push rdi | 
| 811     ; end prolog | 702     ; end prolog | 
| 812 | 703 | 
| 813 %if ABI_IS_32BIT=0 | 704 %if ABI_IS_32BIT=0 | 
| 814     movsxd          r8, dword ptr arg(1) ;ref_pixels_per_line | 705     movsxd          r8, dword ptr arg(1) ;ref_pixels_per_line | 
| 815     movsxd          r9, dword ptr arg(3) ;src_pixels_per_line | 706     movsxd          r9, dword ptr arg(3) ;src_pixels_per_line | 
| 816 %endif | 707 %endif | 
| 817 | 708 | 
| (...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 899         mov             rdi,            arg(6) ; sumsquared | 790         mov             rdi,            arg(6) ; sumsquared | 
| 900 | 791 | 
| 901         movd            [rsi],          mm2                 ; | 792         movd            [rsi],          mm2                 ; | 
| 902         movd            [rdi],          mm4                 ; | 793         movd            [rdi],          mm4                 ; | 
| 903 | 794 | 
| 904 | 795 | 
| 905     ; begin epilog | 796     ; begin epilog | 
| 906     pop rdi | 797     pop rdi | 
| 907     pop rsi | 798     pop rsi | 
| 908     RESTORE_GOT | 799     RESTORE_GOT | 
|  | 800     RESTORE_XMM | 
| 909     UNSHADOW_ARGS | 801     UNSHADOW_ARGS | 
| 910     pop         rbp | 802     pop         rbp | 
| 911     ret | 803     ret | 
| 912 | 804 | 
| 913 ;void vp8_half_horiz_vert_variance16x_h_sse2 | 805 ;void vp8_half_horiz_vert_variance16x_h_sse2 | 
| 914 ;( | 806 ;( | 
| 915 ;    unsigned char *ref_ptr, | 807 ;    unsigned char *ref_ptr, | 
| 916 ;    int ref_pixels_per_line, | 808 ;    int ref_pixels_per_line, | 
| 917 ;    unsigned char *src_ptr, | 809 ;    unsigned char *src_ptr, | 
| 918 ;    int src_pixels_per_line, | 810 ;    int src_pixels_per_line, | 
| 919 ;    unsigned int Height, | 811 ;    unsigned int Height, | 
| 920 ;    int *sum, | 812 ;    int *sum, | 
| 921 ;    unsigned int *sumsquared | 813 ;    unsigned int *sumsquared | 
| 922 ;) | 814 ;) | 
| 923 global sym(vp8_half_horiz_vert_variance16x_h_sse2) | 815 global sym(vp8_half_horiz_vert_variance16x_h_sse2) | 
| 924 sym(vp8_half_horiz_vert_variance16x_h_sse2): | 816 sym(vp8_half_horiz_vert_variance16x_h_sse2): | 
| 925     push        rbp | 817     push        rbp | 
| 926     mov         rbp, rsp | 818     mov         rbp, rsp | 
| 927     SHADOW_ARGS_TO_STACK 7 | 819     SHADOW_ARGS_TO_STACK 7 | 
| 928     SAVE_XMM | 820     SAVE_XMM 7 | 
| 929     GET_GOT     rbx | 821     GET_GOT     rbx | 
| 930     push rsi | 822     push rsi | 
| 931     push rdi | 823     push rdi | 
| 932     ; end prolog | 824     ; end prolog | 
| 933 | 825 | 
| 934         pxor            xmm6,           xmm6                ;  error accumulator | 826         pxor            xmm6,           xmm6                ;  error accumulator | 
| 935         pxor            xmm7,           xmm7                ;  sse eaccumulator | 827         pxor            xmm7,           xmm7                ;  sse eaccumulator | 
| 936         mov             rsi,            arg(0) ;ref_ptr              ; | 828         mov             rsi,            arg(0) ;ref_ptr              ; | 
| 937 | 829 | 
| 938         mov             rdi,            arg(2) ;src_ptr              ; | 830         mov             rdi,            arg(2) ;src_ptr              ; | 
| (...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 1034 ;    int src_pixels_per_line, | 926 ;    int src_pixels_per_line, | 
| 1035 ;    unsigned int Height, | 927 ;    unsigned int Height, | 
| 1036 ;    int *sum, | 928 ;    int *sum, | 
| 1037 ;    unsigned int *sumsquared | 929 ;    unsigned int *sumsquared | 
| 1038 ;) | 930 ;) | 
| 1039 global sym(vp8_half_vert_variance8x_h_sse2) | 931 global sym(vp8_half_vert_variance8x_h_sse2) | 
| 1040 sym(vp8_half_vert_variance8x_h_sse2): | 932 sym(vp8_half_vert_variance8x_h_sse2): | 
| 1041     push        rbp | 933     push        rbp | 
| 1042     mov         rbp, rsp | 934     mov         rbp, rsp | 
| 1043     SHADOW_ARGS_TO_STACK 7 | 935     SHADOW_ARGS_TO_STACK 7 | 
|  | 936     SAVE_XMM 7 | 
| 1044     GET_GOT     rbx | 937     GET_GOT     rbx | 
| 1045     push rsi | 938     push rsi | 
| 1046     push rdi | 939     push rdi | 
| 1047     ; end prolog | 940     ; end prolog | 
| 1048 | 941 | 
| 1049 %if ABI_IS_32BIT=0 | 942 %if ABI_IS_32BIT=0 | 
| 1050     movsxd          r8, dword ptr arg(1) ;ref_pixels_per_line | 943     movsxd          r8, dword ptr arg(1) ;ref_pixels_per_line | 
| 1051     movsxd          r9, dword ptr arg(3) ;src_pixels_per_line | 944     movsxd          r9, dword ptr arg(3) ;src_pixels_per_line | 
| 1052 %endif | 945 %endif | 
| 1053 | 946 | 
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 1120         mov             rdi,            arg(6) ; sumsquared | 1013         mov             rdi,            arg(6) ; sumsquared | 
| 1121 | 1014 | 
| 1122         movd            [rsi],          mm2                 ; | 1015         movd            [rsi],          mm2                 ; | 
| 1123         movd            [rdi],          mm4                 ; | 1016         movd            [rdi],          mm4                 ; | 
| 1124 | 1017 | 
| 1125 | 1018 | 
| 1126     ; begin epilog | 1019     ; begin epilog | 
| 1127     pop rdi | 1020     pop rdi | 
| 1128     pop rsi | 1021     pop rsi | 
| 1129     RESTORE_GOT | 1022     RESTORE_GOT | 
|  | 1023     RESTORE_XMM | 
| 1130     UNSHADOW_ARGS | 1024     UNSHADOW_ARGS | 
| 1131     pop         rbp | 1025     pop         rbp | 
| 1132     ret | 1026     ret | 
| 1133 | 1027 | 
| 1134 ;void vp8_half_vert_variance16x_h_sse2 | 1028 ;void vp8_half_vert_variance16x_h_sse2 | 
| 1135 ;( | 1029 ;( | 
| 1136 ;    unsigned char *ref_ptr, | 1030 ;    unsigned char *ref_ptr, | 
| 1137 ;    int ref_pixels_per_line, | 1031 ;    int ref_pixels_per_line, | 
| 1138 ;    unsigned char *src_ptr, | 1032 ;    unsigned char *src_ptr, | 
| 1139 ;    int src_pixels_per_line, | 1033 ;    int src_pixels_per_line, | 
| 1140 ;    unsigned int Height, | 1034 ;    unsigned int Height, | 
| 1141 ;    int *sum, | 1035 ;    int *sum, | 
| 1142 ;    unsigned int *sumsquared | 1036 ;    unsigned int *sumsquared | 
| 1143 ;) | 1037 ;) | 
| 1144 global sym(vp8_half_vert_variance16x_h_sse2) | 1038 global sym(vp8_half_vert_variance16x_h_sse2) | 
| 1145 sym(vp8_half_vert_variance16x_h_sse2): | 1039 sym(vp8_half_vert_variance16x_h_sse2): | 
| 1146     push        rbp | 1040     push        rbp | 
| 1147     mov         rbp, rsp | 1041     mov         rbp, rsp | 
| 1148     SHADOW_ARGS_TO_STACK 7 | 1042     SHADOW_ARGS_TO_STACK 7 | 
| 1149     SAVE_XMM | 1043     SAVE_XMM 7 | 
| 1150     GET_GOT     rbx | 1044     GET_GOT     rbx | 
| 1151     push rsi | 1045     push rsi | 
| 1152     push rdi | 1046     push rdi | 
| 1153     ; end prolog | 1047     ; end prolog | 
| 1154 | 1048 | 
| 1155         pxor            xmm6,           xmm6                ;  error accumulator | 1049         pxor            xmm6,           xmm6                ;  error accumulator | 
| 1156         pxor            xmm7,           xmm7                ;  sse eaccumulator | 1050         pxor            xmm7,           xmm7                ;  sse eaccumulator | 
| 1157         mov             rsi,            arg(0)              ;ref_ptr | 1051         mov             rsi,            arg(0)              ;ref_ptr | 
| 1158 | 1052 | 
| 1159         mov             rdi,            arg(2)              ;src_ptr | 1053         mov             rdi,            arg(2)              ;src_ptr | 
| (...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 1247 ;    int src_pixels_per_line, | 1141 ;    int src_pixels_per_line, | 
| 1248 ;    unsigned int Height, | 1142 ;    unsigned int Height, | 
| 1249 ;    int *sum, | 1143 ;    int *sum, | 
| 1250 ;    unsigned int *sumsquared | 1144 ;    unsigned int *sumsquared | 
| 1251 ;) | 1145 ;) | 
| 1252 global sym(vp8_half_horiz_variance8x_h_sse2) | 1146 global sym(vp8_half_horiz_variance8x_h_sse2) | 
| 1253 sym(vp8_half_horiz_variance8x_h_sse2): | 1147 sym(vp8_half_horiz_variance8x_h_sse2): | 
| 1254     push        rbp | 1148     push        rbp | 
| 1255     mov         rbp, rsp | 1149     mov         rbp, rsp | 
| 1256     SHADOW_ARGS_TO_STACK 7 | 1150     SHADOW_ARGS_TO_STACK 7 | 
|  | 1151     SAVE_XMM 7 | 
| 1257     GET_GOT     rbx | 1152     GET_GOT     rbx | 
| 1258     push rsi | 1153     push rsi | 
| 1259     push rdi | 1154     push rdi | 
| 1260     ; end prolog | 1155     ; end prolog | 
| 1261 | 1156 | 
| 1262 %if ABI_IS_32BIT=0 | 1157 %if ABI_IS_32BIT=0 | 
| 1263     movsxd          r8, dword ptr arg(1) ;ref_pixels_per_line | 1158     movsxd          r8, dword ptr arg(1) ;ref_pixels_per_line | 
| 1264     movsxd          r9, dword ptr arg(3) ;src_pixels_per_line | 1159     movsxd          r9, dword ptr arg(3) ;src_pixels_per_line | 
| 1265 %endif | 1160 %endif | 
| 1266 | 1161 | 
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 1331         mov             rdi,            arg(6) ; sumsquared | 1226         mov             rdi,            arg(6) ; sumsquared | 
| 1332 | 1227 | 
| 1333         movd            [rsi],          mm2                 ; | 1228         movd            [rsi],          mm2                 ; | 
| 1334         movd            [rdi],          mm4                 ; | 1229         movd            [rdi],          mm4                 ; | 
| 1335 | 1230 | 
| 1336 | 1231 | 
| 1337     ; begin epilog | 1232     ; begin epilog | 
| 1338     pop rdi | 1233     pop rdi | 
| 1339     pop rsi | 1234     pop rsi | 
| 1340     RESTORE_GOT | 1235     RESTORE_GOT | 
|  | 1236     RESTORE_XMM | 
| 1341     UNSHADOW_ARGS | 1237     UNSHADOW_ARGS | 
| 1342     pop         rbp | 1238     pop         rbp | 
| 1343     ret | 1239     ret | 
| 1344 | 1240 | 
| 1345 ;void vp8_half_horiz_variance16x_h_sse2 | 1241 ;void vp8_half_horiz_variance16x_h_sse2 | 
| 1346 ;( | 1242 ;( | 
| 1347 ;    unsigned char *ref_ptr, | 1243 ;    unsigned char *ref_ptr, | 
| 1348 ;    int ref_pixels_per_line, | 1244 ;    int ref_pixels_per_line, | 
| 1349 ;    unsigned char *src_ptr, | 1245 ;    unsigned char *src_ptr, | 
| 1350 ;    int src_pixels_per_line, | 1246 ;    int src_pixels_per_line, | 
| 1351 ;    unsigned int Height, | 1247 ;    unsigned int Height, | 
| 1352 ;    int *sum, | 1248 ;    int *sum, | 
| 1353 ;    unsigned int *sumsquared | 1249 ;    unsigned int *sumsquared | 
| 1354 ;) | 1250 ;) | 
| 1355 global sym(vp8_half_horiz_variance16x_h_sse2) | 1251 global sym(vp8_half_horiz_variance16x_h_sse2) | 
| 1356 sym(vp8_half_horiz_variance16x_h_sse2): | 1252 sym(vp8_half_horiz_variance16x_h_sse2): | 
| 1357     push        rbp | 1253     push        rbp | 
| 1358     mov         rbp, rsp | 1254     mov         rbp, rsp | 
| 1359     SHADOW_ARGS_TO_STACK 7 | 1255     SHADOW_ARGS_TO_STACK 7 | 
| 1360     SAVE_XMM | 1256     SAVE_XMM 7 | 
| 1361     GET_GOT     rbx | 1257     GET_GOT     rbx | 
| 1362     push rsi | 1258     push rsi | 
| 1363     push rdi | 1259     push rdi | 
| 1364     ; end prolog | 1260     ; end prolog | 
| 1365 | 1261 | 
| 1366         pxor            xmm6,           xmm6                ;  error accumulator | 1262         pxor            xmm6,           xmm6                ;  error accumulator | 
| 1367         pxor            xmm7,           xmm7                ;  sse eaccumulator | 1263         pxor            xmm7,           xmm7                ;  sse eaccumulator | 
| 1368         mov             rsi,            arg(0) ;ref_ptr              ; | 1264         mov             rsi,            arg(0) ;ref_ptr              ; | 
| 1369 | 1265 | 
| 1370         mov             rdi,            arg(2) ;src_ptr              ; | 1266         mov             rdi,            arg(2) ;src_ptr              ; | 
| (...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 1454 align 16 | 1350 align 16 | 
| 1455 vp8_bilinear_filters_sse2: | 1351 vp8_bilinear_filters_sse2: | 
| 1456     dw 128, 128, 128, 128, 128, 128, 128, 128,  0,  0,  0,  0,  0,  0,  0,  0 | 1352     dw 128, 128, 128, 128, 128, 128, 128, 128,  0,  0,  0,  0,  0,  0,  0,  0 | 
| 1457     dw 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 | 1353     dw 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 | 
| 1458     dw 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 | 1354     dw 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 | 
| 1459     dw 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 | 1355     dw 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 | 
| 1460     dw 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 | 1356     dw 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 | 
| 1461     dw 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 | 1357     dw 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 | 
| 1462     dw 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 | 1358     dw 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 | 
| 1463     dw 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 | 1359     dw 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 | 
| OLD | NEW | 
|---|