| OLD | NEW | 
|---|
| 1 ; | 1 ; | 
| 2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 
| 3 ; | 3 ; | 
| 4 ;  Use of this source code is governed by a BSD-style license | 4 ;  Use of this source code is governed by a BSD-style license | 
| 5 ;  that can be found in the LICENSE file in the root of the source | 5 ;  that can be found in the LICENSE file in the root of the source | 
| 6 ;  tree. An additional intellectual property rights grant can be found | 6 ;  tree. An additional intellectual property rights grant can be found | 
| 7 ;  in the file PATENTS.  All contributing project authors may | 7 ;  in the file PATENTS.  All contributing project authors may | 
| 8 ;  be found in the AUTHORS file in the root of the source tree. | 8 ;  be found in the AUTHORS file in the root of the source tree. | 
| 9 ; | 9 ; | 
| 10 | 10 | 
| 11 | 11 | 
| 12 %include "vpx_ports/x86_abi_support.asm" | 12 %include "vpx_ports/x86_abi_support.asm" | 
| 13 | 13 | 
| 14 ;unsigned int vp8_sad16x16_wmt( | 14 ;unsigned int vp8_sad16x16_wmt( | 
| 15 ;    unsigned char *src_ptr, | 15 ;    unsigned char *src_ptr, | 
| 16 ;    int  src_stride, | 16 ;    int  src_stride, | 
| 17 ;    unsigned char *ref_ptr, | 17 ;    unsigned char *ref_ptr, | 
| 18 ;    int  ref_stride) | 18 ;    int  ref_stride) | 
| 19 global sym(vp8_sad16x16_wmt) | 19 global sym(vp8_sad16x16_wmt) | 
| 20 sym(vp8_sad16x16_wmt): | 20 sym(vp8_sad16x16_wmt): | 
| 21     push        rbp | 21     push        rbp | 
| 22     mov         rbp, rsp | 22     mov         rbp, rsp | 
| 23     SHADOW_ARGS_TO_STACK 4 | 23     SHADOW_ARGS_TO_STACK 4 | 
|  | 24     SAVE_XMM 6 | 
| 24     push        rsi | 25     push        rsi | 
| 25     push        rdi | 26     push        rdi | 
| 26     ; end prolog | 27     ; end prolog | 
| 27 | 28 | 
| 28         mov             rsi,        arg(0) ;src_ptr | 29         mov             rsi,        arg(0) ;src_ptr | 
| 29         mov             rdi,        arg(2) ;ref_ptr | 30         mov             rdi,        arg(2) ;ref_ptr | 
| 30 | 31 | 
| 31         movsxd          rax,        dword ptr arg(1) ;src_stride | 32         movsxd          rax,        dword ptr arg(1) ;src_stride | 
| 32         movsxd          rdx,        dword ptr arg(3) ;ref_stride | 33         movsxd          rdx,        dword ptr arg(3) ;ref_stride | 
| 33 | 34 | 
| 34         lea             rcx,        [rsi+rax*8] | 35         lea             rcx,        [rsi+rax*8] | 
| 35 | 36 | 
| 36         lea             rcx,        [rcx+rax*8] | 37         lea             rcx,        [rcx+rax*8] | 
| 37         pxor            xmm7,       xmm7 | 38         pxor            xmm6,       xmm6 | 
| 38 | 39 | 
| 39 x16x16sad_wmt_loop: | 40 x16x16sad_wmt_loop: | 
| 40 | 41 | 
| 41         movq            xmm0,       QWORD PTR [rsi] | 42         movq            xmm0,       QWORD PTR [rsi] | 
| 42         movq            xmm2,       QWORD PTR [rsi+8] | 43         movq            xmm2,       QWORD PTR [rsi+8] | 
| 43 | 44 | 
| 44         movq            xmm1,       QWORD PTR [rdi] | 45         movq            xmm1,       QWORD PTR [rdi] | 
| 45         movq            xmm3,       QWORD PTR [rdi+8] | 46         movq            xmm3,       QWORD PTR [rdi+8] | 
| 46 | 47 | 
| 47         movq            xmm4,       QWORD PTR [rsi+rax] | 48         movq            xmm4,       QWORD PTR [rsi+rax] | 
| 48         movq            xmm5,       QWORD PTR [rdi+rdx] | 49         movq            xmm5,       QWORD PTR [rdi+rdx] | 
| 49 | 50 | 
| 50 | 51 | 
| 51         punpcklbw       xmm0,       xmm2 | 52         punpcklbw       xmm0,       xmm2 | 
| 52         punpcklbw       xmm1,       xmm3 | 53         punpcklbw       xmm1,       xmm3 | 
| 53 | 54 | 
| 54         psadbw          xmm0,       xmm1 | 55         psadbw          xmm0,       xmm1 | 
| 55         movq            xmm6,       QWORD PTR [rsi+rax+8] | 56         movq            xmm2,       QWORD PTR [rsi+rax+8] | 
| 56 | 57 | 
| 57         movq            xmm3,       QWORD PTR [rdi+rdx+8] | 58         movq            xmm3,       QWORD PTR [rdi+rdx+8] | 
| 58         lea             rsi,        [rsi+rax*2] | 59         lea             rsi,        [rsi+rax*2] | 
| 59 | 60 | 
| 60         lea             rdi,        [rdi+rdx*2] | 61         lea             rdi,        [rdi+rdx*2] | 
| 61         punpcklbw       xmm4,       xmm6 | 62         punpcklbw       xmm4,       xmm2 | 
| 62 | 63 | 
| 63         punpcklbw       xmm5,       xmm3 | 64         punpcklbw       xmm5,       xmm3 | 
| 64         psadbw          xmm4,       xmm5 | 65         psadbw          xmm4,       xmm5 | 
| 65 | 66 | 
| 66         paddw           xmm7,       xmm0 | 67         paddw           xmm6,       xmm0 | 
| 67         paddw           xmm7,       xmm4 | 68         paddw           xmm6,       xmm4 | 
| 68 | 69 | 
| 69         cmp             rsi,        rcx | 70         cmp             rsi,        rcx | 
| 70         jne             x16x16sad_wmt_loop | 71         jne             x16x16sad_wmt_loop | 
| 71 | 72 | 
| 72         movq            xmm0,       xmm7 | 73         movq            xmm0,       xmm6 | 
| 73         psrldq          xmm7,       8 | 74         psrldq          xmm6,       8 | 
| 74 | 75 | 
| 75         paddw           xmm0,       xmm7 | 76         paddw           xmm0,       xmm6 | 
| 76         movq            rax,        xmm0 | 77         movq            rax,        xmm0 | 
| 77 | 78 | 
| 78     ; begin epilog | 79     ; begin epilog | 
| 79     pop rdi | 80     pop rdi | 
| 80     pop rsi | 81     pop rsi | 
|  | 82     RESTORE_XMM | 
| 81     UNSHADOW_ARGS | 83     UNSHADOW_ARGS | 
| 82     pop         rbp | 84     pop         rbp | 
| 83     ret | 85     ret | 
| 84 | 86 | 
| 85 ;unsigned int vp8_sad8x16_wmt( | 87 ;unsigned int vp8_sad8x16_wmt( | 
| 86 ;    unsigned char *src_ptr, | 88 ;    unsigned char *src_ptr, | 
| 87 ;    int  src_stride, | 89 ;    int  src_stride, | 
| 88 ;    unsigned char *ref_ptr, | 90 ;    unsigned char *ref_ptr, | 
| 89 ;    int  ref_stride, | 91 ;    int  ref_stride, | 
| 90 ;    int  max_err) | 92 ;    int  max_err) | 
| (...skipping 14 matching lines...) Expand all  Loading... | 
| 105         movsxd          rdx,        dword ptr arg(3) ;ref_stride | 107         movsxd          rdx,        dword ptr arg(3) ;ref_stride | 
| 106 | 108 | 
| 107         lea             rcx,        [rsi+rbx*8] | 109         lea             rcx,        [rsi+rbx*8] | 
| 108 | 110 | 
| 109         lea             rcx,        [rcx+rbx*8] | 111         lea             rcx,        [rcx+rbx*8] | 
| 110         pxor            mm7,        mm7 | 112         pxor            mm7,        mm7 | 
| 111 | 113 | 
| 112 x8x16sad_wmt_loop: | 114 x8x16sad_wmt_loop: | 
| 113 | 115 | 
| 114         movq            rax,        mm7 | 116         movq            rax,        mm7 | 
| 115         cmp             rax,        arg(4) | 117         cmp             eax,        arg(4) | 
| 116         jg              x8x16sad_wmt_early_exit | 118         jg              x8x16sad_wmt_early_exit | 
| 117 | 119 | 
| 118         movq            mm0,        QWORD PTR [rsi] | 120         movq            mm0,        QWORD PTR [rsi] | 
| 119         movq            mm1,        QWORD PTR [rdi] | 121         movq            mm1,        QWORD PTR [rdi] | 
| 120 | 122 | 
| 121         movq            mm2,        QWORD PTR [rsi+rbx] | 123         movq            mm2,        QWORD PTR [rsi+rbx] | 
| 122         movq            mm3,        QWORD PTR [rdi+rdx] | 124         movq            mm3,        QWORD PTR [rdi+rdx] | 
| 123 | 125 | 
| 124         psadbw          mm0,        mm1 | 126         psadbw          mm0,        mm1 | 
| 125         psadbw          mm2,        mm3 | 127         psadbw          mm2,        mm3 | 
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 166 | 168 | 
| 167         movsxd          rbx,        dword ptr arg(1) ;src_stride | 169         movsxd          rbx,        dword ptr arg(1) ;src_stride | 
| 168         movsxd          rdx,        dword ptr arg(3) ;ref_stride | 170         movsxd          rdx,        dword ptr arg(3) ;ref_stride | 
| 169 | 171 | 
| 170         lea             rcx,        [rsi+rbx*8] | 172         lea             rcx,        [rsi+rbx*8] | 
| 171         pxor            mm7,        mm7 | 173         pxor            mm7,        mm7 | 
| 172 | 174 | 
| 173 x8x8sad_wmt_loop: | 175 x8x8sad_wmt_loop: | 
| 174 | 176 | 
| 175         movq            rax,        mm7 | 177         movq            rax,        mm7 | 
| 176         cmp             rax,        arg(4) | 178         cmp             eax,        arg(4) | 
| 177         jg              x8x8sad_wmt_early_exit | 179         jg              x8x8sad_wmt_early_exit | 
| 178 | 180 | 
| 179         movq            mm0,        QWORD PTR [rsi] | 181         movq            mm0,        QWORD PTR [rsi] | 
| 180         movq            mm1,        QWORD PTR [rdi] | 182         movq            mm1,        QWORD PTR [rdi] | 
| 181 | 183 | 
| 182         psadbw          mm0,        mm1 | 184         psadbw          mm0,        mm1 | 
| 183         lea             rsi,        [rsi+rbx] | 185         lea             rsi,        [rsi+rbx] | 
| 184 | 186 | 
| 185         add             rdi,        rdx | 187         add             rdi,        rdx | 
| 186         paddw           mm7,        mm0 | 188         paddw           mm7,        mm0 | 
| (...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 275 | 277 | 
| 276         movsxd          rbx,        dword ptr arg(1) ;src_stride | 278         movsxd          rbx,        dword ptr arg(1) ;src_stride | 
| 277         movsxd          rdx,        dword ptr arg(3) ;ref_stride | 279         movsxd          rdx,        dword ptr arg(3) ;ref_stride | 
| 278 | 280 | 
| 279         lea             rcx,        [rsi+rbx*8] | 281         lea             rcx,        [rsi+rbx*8] | 
| 280         pxor            mm7,        mm7 | 282         pxor            mm7,        mm7 | 
| 281 | 283 | 
| 282 x16x8sad_wmt_loop: | 284 x16x8sad_wmt_loop: | 
| 283 | 285 | 
| 284         movq            rax,        mm7 | 286         movq            rax,        mm7 | 
| 285         cmp             rax,        arg(4) | 287         cmp             eax,        arg(4) | 
| 286         jg              x16x8sad_wmt_early_exit | 288         jg              x16x8sad_wmt_early_exit | 
| 287 | 289 | 
| 288         movq            mm0,        QWORD PTR [rsi] | 290         movq            mm0,        QWORD PTR [rsi] | 
| 289         movq            mm2,        QWORD PTR [rsi+8] | 291         movq            mm2,        QWORD PTR [rsi+8] | 
| 290 | 292 | 
| 291         movq            mm1,        QWORD PTR [rdi] | 293         movq            mm1,        QWORD PTR [rdi] | 
| 292         movq            mm3,        QWORD PTR [rdi+8] | 294         movq            mm3,        QWORD PTR [rdi+8] | 
| 293 | 295 | 
| 294         movq            mm4,        QWORD PTR [rsi+rbx] | 296         movq            mm4,        QWORD PTR [rsi+rbx] | 
| 295         movq            mm5,        QWORD PTR [rdi+rdx] | 297         movq            mm5,        QWORD PTR [rdi+rdx] | 
| (...skipping 23 matching lines...) Expand all  Loading... | 
| 319 | 321 | 
| 320 x16x8sad_wmt_early_exit: | 322 x16x8sad_wmt_early_exit: | 
| 321 | 323 | 
| 322     ; begin epilog | 324     ; begin epilog | 
| 323     pop         rdi | 325     pop         rdi | 
| 324     pop         rsi | 326     pop         rsi | 
| 325     pop         rbx | 327     pop         rbx | 
| 326     UNSHADOW_ARGS | 328     UNSHADOW_ARGS | 
| 327     pop         rbp | 329     pop         rbp | 
| 328     ret | 330     ret | 
|  | 331 | 
|  | 332 ;void vp8_copy32xn_sse2( | 
|  | 333 ;    unsigned char *src_ptr, | 
|  | 334 ;    int  src_stride, | 
|  | 335 ;    unsigned char *dst_ptr, | 
|  | 336 ;    int  dst_stride, | 
|  | 337 ;    int height); | 
|  | 338 global sym(vp8_copy32xn_sse2) | 
|  | 339 sym(vp8_copy32xn_sse2): | 
|  | 340     push        rbp | 
|  | 341     mov         rbp, rsp | 
|  | 342     SHADOW_ARGS_TO_STACK 5 | 
|  | 343     SAVE_XMM 7 | 
|  | 344     push        rsi | 
|  | 345     push        rdi | 
|  | 346     ; end prolog | 
|  | 347 | 
|  | 348         mov             rsi,        arg(0) ;src_ptr | 
|  | 349         mov             rdi,        arg(2) ;dst_ptr | 
|  | 350 | 
|  | 351         movsxd          rax,        dword ptr arg(1) ;src_stride | 
|  | 352         movsxd          rdx,        dword ptr arg(3) ;dst_stride | 
|  | 353         movsxd          rcx,        dword ptr arg(4) ;height | 
|  | 354 | 
|  | 355 block_copy_sse2_loopx4: | 
|  | 356         movdqu          xmm0,       XMMWORD PTR [rsi] | 
|  | 357         movdqu          xmm1,       XMMWORD PTR [rsi + 16] | 
|  | 358         movdqu          xmm2,       XMMWORD PTR [rsi + rax] | 
|  | 359         movdqu          xmm3,       XMMWORD PTR [rsi + rax + 16] | 
|  | 360 | 
|  | 361         lea             rsi,        [rsi+rax*2] | 
|  | 362 | 
|  | 363         movdqu          xmm4,       XMMWORD PTR [rsi] | 
|  | 364         movdqu          xmm5,       XMMWORD PTR [rsi + 16] | 
|  | 365         movdqu          xmm6,       XMMWORD PTR [rsi + rax] | 
|  | 366         movdqu          xmm7,       XMMWORD PTR [rsi + rax + 16] | 
|  | 367 | 
|  | 368         lea             rsi,    [rsi+rax*2] | 
|  | 369 | 
|  | 370         movdqa          XMMWORD PTR [rdi], xmm0 | 
|  | 371         movdqa          XMMWORD PTR [rdi + 16], xmm1 | 
|  | 372         movdqa          XMMWORD PTR [rdi + rdx], xmm2 | 
|  | 373         movdqa          XMMWORD PTR [rdi + rdx + 16], xmm3 | 
|  | 374 | 
|  | 375         lea             rdi,    [rdi+rdx*2] | 
|  | 376 | 
|  | 377         movdqa          XMMWORD PTR [rdi], xmm4 | 
|  | 378         movdqa          XMMWORD PTR [rdi + 16], xmm5 | 
|  | 379         movdqa          XMMWORD PTR [rdi + rdx], xmm6 | 
|  | 380         movdqa          XMMWORD PTR [rdi + rdx + 16], xmm7 | 
|  | 381 | 
|  | 382         lea             rdi,    [rdi+rdx*2] | 
|  | 383 | 
|  | 384         sub             rcx,     4 | 
|  | 385         cmp             rcx,     4 | 
|  | 386         jge             block_copy_sse2_loopx4 | 
|  | 387 | 
|  | 388         cmp             rcx, 0 | 
|  | 389         je              copy_is_done | 
|  | 390 | 
|  | 391 block_copy_sse2_loop: | 
|  | 392         movdqu          xmm0,       XMMWORD PTR [rsi] | 
|  | 393         movdqu          xmm1,       XMMWORD PTR [rsi + 16] | 
|  | 394         lea             rsi,    [rsi+rax] | 
|  | 395 | 
|  | 396         movdqa          XMMWORD PTR [rdi], xmm0 | 
|  | 397         movdqa          XMMWORD PTR [rdi + 16], xmm1 | 
|  | 398         lea             rdi,    [rdi+rdx] | 
|  | 399 | 
|  | 400         sub             rcx,     1 | 
|  | 401         jne             block_copy_sse2_loop | 
|  | 402 | 
|  | 403 copy_is_done: | 
|  | 404     ; begin epilog | 
|  | 405     pop rdi | 
|  | 406     pop rsi | 
|  | 407     RESTORE_XMM | 
|  | 408     UNSHADOW_ARGS | 
|  | 409     pop         rbp | 
|  | 410     ret | 
| OLD | NEW | 
|---|