| OLD | NEW | 
|---|
| 1 ; | 1 ; | 
| 2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 
| 3 ; | 3 ; | 
| 4 ;  Use of this source code is governed by a BSD-style license | 4 ;  Use of this source code is governed by a BSD-style license | 
| 5 ;  that can be found in the LICENSE file in the root of the source | 5 ;  that can be found in the LICENSE file in the root of the source | 
| 6 ;  tree. An additional intellectual property rights grant can be found | 6 ;  tree. An additional intellectual property rights grant can be found | 
| 7 ;  in the file PATENTS.  All contributing project authors may | 7 ;  in the file PATENTS.  All contributing project authors may | 
| 8 ;  be found in the AUTHORS file in the root of the source tree. | 8 ;  be found in the AUTHORS file in the root of the source tree. | 
| 9 ; | 9 ; | 
| 10 | 10 | 
| (...skipping 19 matching lines...) Expand all  Loading... | 
| 30 ;    unsigned int    pixel_step, | 30 ;    unsigned int    pixel_step, | 
| 31 ;    unsigned int    output_height, | 31 ;    unsigned int    output_height, | 
| 32 ;    unsigned int    output_width, | 32 ;    unsigned int    output_width, | 
| 33 ;    short           *vp8_filter | 33 ;    short           *vp8_filter | 
| 34 ;) | 34 ;) | 
| 35 global sym(vp8_filter_block1d8_h6_sse2) | 35 global sym(vp8_filter_block1d8_h6_sse2) | 
| 36 sym(vp8_filter_block1d8_h6_sse2): | 36 sym(vp8_filter_block1d8_h6_sse2): | 
| 37     push        rbp | 37     push        rbp | 
| 38     mov         rbp, rsp | 38     mov         rbp, rsp | 
| 39     SHADOW_ARGS_TO_STACK 7 | 39     SHADOW_ARGS_TO_STACK 7 | 
| 40     SAVE_XMM | 40     SAVE_XMM 7 | 
| 41     GET_GOT     rbx | 41     GET_GOT     rbx | 
| 42     push        rsi | 42     push        rsi | 
| 43     push        rdi | 43     push        rdi | 
| 44     ; end prolog | 44     ; end prolog | 
| 45 | 45 | 
| 46         mov         rdx,        arg(6) ;vp8_filter | 46         mov         rdx,        arg(6) ;vp8_filter | 
| 47         mov         rsi,        arg(0) ;src_ptr | 47         mov         rsi,        arg(0) ;src_ptr | 
| 48 | 48 | 
| 49         mov         rdi,        arg(1) ;output_ptr | 49         mov         rdi,        arg(1) ;output_ptr | 
| 50 | 50 | 
| (...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 150 ; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixe
      ls. The | 150 ; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixe
      ls. The | 
| 151 ; input pixel array has output_height rows. This routine assumes that output_hei
      ght is an | 151 ; input pixel array has output_height rows. This routine assumes that output_hei
      ght is an | 
| 152 ; even number. This function handles 8 pixels in horizontal direction, calculati
      ng ONE | 152 ; even number. This function handles 8 pixels in horizontal direction, calculati
      ng ONE | 
| 153 ; rows each iteration to take advantage of the 128 bits operations. | 153 ; rows each iteration to take advantage of the 128 bits operations. | 
| 154 ;*******************************************************************************
      ******/ | 154 ;*******************************************************************************
      ******/ | 
| 155 global sym(vp8_filter_block1d16_h6_sse2) | 155 global sym(vp8_filter_block1d16_h6_sse2) | 
| 156 sym(vp8_filter_block1d16_h6_sse2): | 156 sym(vp8_filter_block1d16_h6_sse2): | 
| 157     push        rbp | 157     push        rbp | 
| 158     mov         rbp, rsp | 158     mov         rbp, rsp | 
| 159     SHADOW_ARGS_TO_STACK 7 | 159     SHADOW_ARGS_TO_STACK 7 | 
| 160     SAVE_XMM | 160     SAVE_XMM 7 | 
| 161     GET_GOT     rbx | 161     GET_GOT     rbx | 
| 162     push        rsi | 162     push        rsi | 
| 163     push        rdi | 163     push        rdi | 
| 164     ; end prolog | 164     ; end prolog | 
| 165 | 165 | 
| 166         mov         rdx,        arg(6) ;vp8_filter | 166         mov         rdx,        arg(6) ;vp8_filter | 
| 167         mov         rsi,        arg(0) ;src_ptr | 167         mov         rsi,        arg(0) ;src_ptr | 
| 168 | 168 | 
| 169         mov         rdi,        arg(1) ;output_ptr | 169         mov         rdi,        arg(1) ;output_ptr | 
| 170 | 170 | 
| (...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 326 ;) | 326 ;) | 
| 327 ;/******************************************************************************
      ****** | 327 ;/******************************************************************************
      ****** | 
| 328 ; Notes: filter_block1d8_v6 applies a 6 tap filter vertically to the input pixel
      s. The | 328 ; Notes: filter_block1d8_v6 applies a 6 tap filter vertically to the input pixel
      s. The | 
| 329 ; input pixel array has output_height rows. | 329 ; input pixel array has output_height rows. | 
| 330 ;*******************************************************************************
      ******/ | 330 ;*******************************************************************************
      ******/ | 
| 331 global sym(vp8_filter_block1d8_v6_sse2) | 331 global sym(vp8_filter_block1d8_v6_sse2) | 
| 332 sym(vp8_filter_block1d8_v6_sse2): | 332 sym(vp8_filter_block1d8_v6_sse2): | 
| 333     push        rbp | 333     push        rbp | 
| 334     mov         rbp, rsp | 334     mov         rbp, rsp | 
| 335     SHADOW_ARGS_TO_STACK 8 | 335     SHADOW_ARGS_TO_STACK 8 | 
| 336     SAVE_XMM | 336     SAVE_XMM 7 | 
| 337     GET_GOT     rbx | 337     GET_GOT     rbx | 
| 338     push        rsi | 338     push        rsi | 
| 339     push        rdi | 339     push        rdi | 
| 340     ; end prolog | 340     ; end prolog | 
| 341 | 341 | 
| 342         mov         rax,        arg(7) ;vp8_filter | 342         mov         rax,        arg(7) ;vp8_filter | 
| 343         movsxd      rdx,        dword ptr arg(3) ;pixels_per_line | 343         movsxd      rdx,        dword ptr arg(3) ;pixels_per_line | 
| 344 | 344 | 
| 345         mov         rdi,        arg(1) ;output_ptr | 345         mov         rdi,        arg(1) ;output_ptr | 
| 346         mov         rsi,        arg(0) ;src_ptr | 346         mov         rsi,        arg(0) ;src_ptr | 
| (...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 421 ;) | 421 ;) | 
| 422 ;/******************************************************************************
      ****** | 422 ;/******************************************************************************
      ****** | 
| 423 ; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixe
      ls. The | 423 ; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixe
      ls. The | 
| 424 ; input pixel array has output_height rows. | 424 ; input pixel array has output_height rows. | 
| 425 ;*******************************************************************************
      ******/ | 425 ;*******************************************************************************
      ******/ | 
| 426 global sym(vp8_filter_block1d16_v6_sse2) | 426 global sym(vp8_filter_block1d16_v6_sse2) | 
| 427 sym(vp8_filter_block1d16_v6_sse2): | 427 sym(vp8_filter_block1d16_v6_sse2): | 
| 428     push        rbp | 428     push        rbp | 
| 429     mov         rbp, rsp | 429     mov         rbp, rsp | 
| 430     SHADOW_ARGS_TO_STACK 8 | 430     SHADOW_ARGS_TO_STACK 8 | 
| 431     SAVE_XMM | 431     SAVE_XMM 7 | 
| 432     GET_GOT     rbx | 432     GET_GOT     rbx | 
| 433     push        rsi | 433     push        rsi | 
| 434     push        rdi | 434     push        rdi | 
| 435     ; end prolog | 435     ; end prolog | 
| 436 | 436 | 
| 437         mov         rax,        arg(7) ;vp8_filter | 437         mov         rax,        arg(7) ;vp8_filter | 
| 438         movsxd      rdx,        dword ptr arg(3) ;pixels_per_line | 438         movsxd      rdx,        dword ptr arg(3) ;pixels_per_line | 
| 439 | 439 | 
| 440         mov         rdi,        arg(1) ;output_ptr | 440         mov         rdi,        arg(1) ;output_ptr | 
| 441         mov         rsi,        arg(0) ;src_ptr | 441         mov         rsi,        arg(0) ;src_ptr | 
| (...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 531 ;    int dst_ptich, | 531 ;    int dst_ptich, | 
| 532 ;    unsigned int    output_height, | 532 ;    unsigned int    output_height, | 
| 533 ;    const short    *vp8_filter | 533 ;    const short    *vp8_filter | 
| 534 ;) | 534 ;) | 
| 535 ; First-pass filter only when yoffset==0 | 535 ; First-pass filter only when yoffset==0 | 
| 536 global sym(vp8_filter_block1d8_h6_only_sse2) | 536 global sym(vp8_filter_block1d8_h6_only_sse2) | 
| 537 sym(vp8_filter_block1d8_h6_only_sse2): | 537 sym(vp8_filter_block1d8_h6_only_sse2): | 
| 538     push        rbp | 538     push        rbp | 
| 539     mov         rbp, rsp | 539     mov         rbp, rsp | 
| 540     SHADOW_ARGS_TO_STACK 6 | 540     SHADOW_ARGS_TO_STACK 6 | 
| 541     SAVE_XMM | 541     SAVE_XMM 7 | 
| 542     GET_GOT     rbx | 542     GET_GOT     rbx | 
| 543     push        rsi | 543     push        rsi | 
| 544     push        rdi | 544     push        rdi | 
| 545     ; end prolog | 545     ; end prolog | 
| 546 | 546 | 
| 547         mov         rdx,        arg(5) ;vp8_filter | 547         mov         rdx,        arg(5) ;vp8_filter | 
| 548         mov         rsi,        arg(0) ;src_ptr | 548         mov         rsi,        arg(0) ;src_ptr | 
| 549 | 549 | 
| 550         mov         rdi,        arg(2) ;output_ptr | 550         mov         rdi,        arg(2) ;output_ptr | 
| 551 | 551 | 
| (...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 644 ;    int dst_ptich, | 644 ;    int dst_ptich, | 
| 645 ;    unsigned int    output_height, | 645 ;    unsigned int    output_height, | 
| 646 ;    const short    *vp8_filter | 646 ;    const short    *vp8_filter | 
| 647 ;) | 647 ;) | 
| 648 ; First-pass filter only when yoffset==0 | 648 ; First-pass filter only when yoffset==0 | 
| 649 global sym(vp8_filter_block1d16_h6_only_sse2) | 649 global sym(vp8_filter_block1d16_h6_only_sse2) | 
| 650 sym(vp8_filter_block1d16_h6_only_sse2): | 650 sym(vp8_filter_block1d16_h6_only_sse2): | 
| 651     push        rbp | 651     push        rbp | 
| 652     mov         rbp, rsp | 652     mov         rbp, rsp | 
| 653     SHADOW_ARGS_TO_STACK 6 | 653     SHADOW_ARGS_TO_STACK 6 | 
| 654     SAVE_XMM | 654     SAVE_XMM 7 | 
| 655     GET_GOT     rbx | 655     GET_GOT     rbx | 
| 656     push        rsi | 656     push        rsi | 
| 657     push        rdi | 657     push        rdi | 
| 658     ; end prolog | 658     ; end prolog | 
| 659 | 659 | 
| 660         mov         rdx,        arg(5) ;vp8_filter | 660         mov         rdx,        arg(5) ;vp8_filter | 
| 661         mov         rsi,        arg(0) ;src_ptr | 661         mov         rsi,        arg(0) ;src_ptr | 
| 662 | 662 | 
| 663         mov         rdi,        arg(2) ;output_ptr | 663         mov         rdi,        arg(2) ;output_ptr | 
| 664 | 664 | 
| (...skipping 144 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 809 ;    int dst_ptich, | 809 ;    int dst_ptich, | 
| 810 ;    unsigned int output_height, | 810 ;    unsigned int output_height, | 
| 811 ;    const short    *vp8_filter | 811 ;    const short    *vp8_filter | 
| 812 ;) | 812 ;) | 
| 813 ; Second-pass filter only when xoffset==0 | 813 ; Second-pass filter only when xoffset==0 | 
| 814 global sym(vp8_filter_block1d8_v6_only_sse2) | 814 global sym(vp8_filter_block1d8_v6_only_sse2) | 
| 815 sym(vp8_filter_block1d8_v6_only_sse2): | 815 sym(vp8_filter_block1d8_v6_only_sse2): | 
| 816     push        rbp | 816     push        rbp | 
| 817     mov         rbp, rsp | 817     mov         rbp, rsp | 
| 818     SHADOW_ARGS_TO_STACK 6 | 818     SHADOW_ARGS_TO_STACK 6 | 
| 819     SAVE_XMM | 819     SAVE_XMM 7 | 
| 820     GET_GOT     rbx | 820     GET_GOT     rbx | 
| 821     push        rsi | 821     push        rsi | 
| 822     push        rdi | 822     push        rdi | 
| 823     ; end prolog | 823     ; end prolog | 
| 824 | 824 | 
| 825         mov         rsi,        arg(0) ;src_ptr | 825         mov         rsi,        arg(0) ;src_ptr | 
| 826         mov         rdi,        arg(2) ;output_ptr | 826         mov         rdi,        arg(2) ;output_ptr | 
| 827 | 827 | 
| 828         movsxd      rcx,        dword ptr arg(4) ;output_height | 828         movsxd      rcx,        dword ptr arg(4) ;output_height | 
| 829         movsxd      rdx,        dword ptr arg(1) ;src_pixels_per_line | 829         movsxd      rdx,        dword ptr arg(1) ;src_pixels_per_line | 
| (...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 901 ;    unsigned short *output_ptr, | 901 ;    unsigned short *output_ptr, | 
| 902 ;    unsigned int    src_pixels_per_line, | 902 ;    unsigned int    src_pixels_per_line, | 
| 903 ;    unsigned int    output_height, | 903 ;    unsigned int    output_height, | 
| 904 ;    unsigned int    output_width | 904 ;    unsigned int    output_width | 
| 905 ;) | 905 ;) | 
| 906 global sym(vp8_unpack_block1d16_h6_sse2) | 906 global sym(vp8_unpack_block1d16_h6_sse2) | 
| 907 sym(vp8_unpack_block1d16_h6_sse2): | 907 sym(vp8_unpack_block1d16_h6_sse2): | 
| 908     push        rbp | 908     push        rbp | 
| 909     mov         rbp, rsp | 909     mov         rbp, rsp | 
| 910     SHADOW_ARGS_TO_STACK 5 | 910     SHADOW_ARGS_TO_STACK 5 | 
| 911     ;SAVE_XMM                          ;xmm6, xmm7 are not used here. |  | 
| 912     GET_GOT     rbx | 911     GET_GOT     rbx | 
| 913     push        rsi | 912     push        rsi | 
| 914     push        rdi | 913     push        rdi | 
| 915     ; end prolog | 914     ; end prolog | 
| 916 | 915 | 
| 917         mov         rsi,        arg(0) ;src_ptr | 916         mov         rsi,        arg(0) ;src_ptr | 
| 918         mov         rdi,        arg(1) ;output_ptr | 917         mov         rdi,        arg(1) ;output_ptr | 
| 919 | 918 | 
| 920         movsxd      rcx,        dword ptr arg(3) ;output_height | 919         movsxd      rcx,        dword ptr arg(3) ;output_height | 
| 921         movsxd      rax,        dword ptr arg(2) ;src_pixels_per_line           
       ; Pitch for Source | 920         movsxd      rax,        dword ptr arg(2) ;src_pixels_per_line           
       ; Pitch for Source | 
| (...skipping 19 matching lines...) Expand all  Loading... | 
| 941 %else | 940 %else | 
| 942         add         rdi,        r8 | 941         add         rdi,        r8 | 
| 943 %endif | 942 %endif | 
| 944         dec         rcx | 943         dec         rcx | 
| 945         jnz         unpack_block1d16_h6_sse2_rowloop                ; next row | 944         jnz         unpack_block1d16_h6_sse2_rowloop                ; next row | 
| 946 | 945 | 
| 947     ; begin epilog | 946     ; begin epilog | 
| 948     pop rdi | 947     pop rdi | 
| 949     pop rsi | 948     pop rsi | 
| 950     RESTORE_GOT | 949     RESTORE_GOT | 
| 951     ;RESTORE_XMM |  | 
| 952     UNSHADOW_ARGS | 950     UNSHADOW_ARGS | 
| 953     pop         rbp | 951     pop         rbp | 
| 954     ret | 952     ret | 
| 955 | 953 | 
| 956 | 954 | 
| 957 ;void vp8_bilinear_predict16x16_sse2 | 955 ;void vp8_bilinear_predict16x16_sse2 | 
| 958 ;( | 956 ;( | 
| 959 ;    unsigned char  *src_ptr, | 957 ;    unsigned char  *src_ptr, | 
| 960 ;    int   src_pixels_per_line, | 958 ;    int   src_pixels_per_line, | 
| 961 ;    int  xoffset, | 959 ;    int  xoffset, | 
| 962 ;    int  yoffset, | 960 ;    int  yoffset, | 
| 963 ;    unsigned char *dst_ptr, | 961 ;    unsigned char *dst_ptr, | 
| 964 ;    int dst_pitch | 962 ;    int dst_pitch | 
| 965 ;) | 963 ;) | 
| 966 extern sym(vp8_bilinear_filters_mmx) | 964 extern sym(vp8_bilinear_filters_mmx) | 
| 967 global sym(vp8_bilinear_predict16x16_sse2) | 965 global sym(vp8_bilinear_predict16x16_sse2) | 
| 968 sym(vp8_bilinear_predict16x16_sse2): | 966 sym(vp8_bilinear_predict16x16_sse2): | 
| 969     push        rbp | 967     push        rbp | 
| 970     mov         rbp, rsp | 968     mov         rbp, rsp | 
| 971     SHADOW_ARGS_TO_STACK 6 | 969     SHADOW_ARGS_TO_STACK 6 | 
| 972     SAVE_XMM | 970     SAVE_XMM 7 | 
| 973     GET_GOT     rbx | 971     GET_GOT     rbx | 
| 974     push        rsi | 972     push        rsi | 
| 975     push        rdi | 973     push        rdi | 
| 976     ; end prolog | 974     ; end prolog | 
| 977 | 975 | 
| 978     ;const short *HFilter = bilinear_filters_mmx[xoffset] | 976     ;const short *HFilter = bilinear_filters_mmx[xoffset] | 
| 979     ;const short *VFilter = bilinear_filters_mmx[yoffset] | 977     ;const short *VFilter = bilinear_filters_mmx[yoffset] | 
| 980 | 978 | 
| 981         lea         rcx,        [GLOBAL(sym(vp8_bilinear_filters_mmx))] | 979         lea         rcx,        [GLOBAL(sym(vp8_bilinear_filters_mmx))] | 
| 982         movsxd      rax,        dword ptr arg(2) ;xoffset | 980         movsxd      rax,        dword ptr arg(2) ;xoffset | 
| (...skipping 248 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 1231 ;    int  yoffset, | 1229 ;    int  yoffset, | 
| 1232 ;    unsigned char *dst_ptr, | 1230 ;    unsigned char *dst_ptr, | 
| 1233 ;    int dst_pitch | 1231 ;    int dst_pitch | 
| 1234 ;) | 1232 ;) | 
| 1235 extern sym(vp8_bilinear_filters_mmx) | 1233 extern sym(vp8_bilinear_filters_mmx) | 
| 1236 global sym(vp8_bilinear_predict8x8_sse2) | 1234 global sym(vp8_bilinear_predict8x8_sse2) | 
| 1237 sym(vp8_bilinear_predict8x8_sse2): | 1235 sym(vp8_bilinear_predict8x8_sse2): | 
| 1238     push        rbp | 1236     push        rbp | 
| 1239     mov         rbp, rsp | 1237     mov         rbp, rsp | 
| 1240     SHADOW_ARGS_TO_STACK 6 | 1238     SHADOW_ARGS_TO_STACK 6 | 
| 1241     SAVE_XMM | 1239     SAVE_XMM 7 | 
| 1242     GET_GOT     rbx | 1240     GET_GOT     rbx | 
| 1243     push        rsi | 1241     push        rsi | 
| 1244     push        rdi | 1242     push        rdi | 
| 1245     ; end prolog | 1243     ; end prolog | 
| 1246 | 1244 | 
| 1247     ALIGN_STACK 16, rax | 1245     ALIGN_STACK 16, rax | 
| 1248     sub         rsp, 144                         ; reserve 144 bytes | 1246     sub         rsp, 144                         ; reserve 144 bytes | 
| 1249 | 1247 | 
| 1250     ;const short *HFilter = bilinear_filters_mmx[xoffset] | 1248     ;const short *HFilter = bilinear_filters_mmx[xoffset] | 
| 1251     ;const short *VFilter = bilinear_filters_mmx[yoffset] | 1249     ;const short *VFilter = bilinear_filters_mmx[yoffset] | 
| (...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 1365     RESTORE_XMM | 1363     RESTORE_XMM | 
| 1366     UNSHADOW_ARGS | 1364     UNSHADOW_ARGS | 
| 1367     pop         rbp | 1365     pop         rbp | 
| 1368     ret | 1366     ret | 
| 1369 | 1367 | 
| 1370 | 1368 | 
| 1371 SECTION_RODATA | 1369 SECTION_RODATA | 
| 1372 align 16 | 1370 align 16 | 
| 1373 rd: | 1371 rd: | 
| 1374     times 8 dw 0x40 | 1372     times 8 dw 0x40 | 
| OLD | NEW | 
|---|