| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 ; | 3 ; |
| 4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
| 5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
| 6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
| 7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
| 8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
| 9 ; | 9 ; |
| 10 | 10 |
| (...skipping 19 matching lines...) Expand all Loading... |
| 30 ; unsigned int pixel_step, | 30 ; unsigned int pixel_step, |
| 31 ; unsigned int output_height, | 31 ; unsigned int output_height, |
| 32 ; unsigned int output_width, | 32 ; unsigned int output_width, |
| 33 ; short *vp8_filter | 33 ; short *vp8_filter |
| 34 ;) | 34 ;) |
| 35 global sym(vp8_filter_block1d8_h6_sse2) | 35 global sym(vp8_filter_block1d8_h6_sse2) |
| 36 sym(vp8_filter_block1d8_h6_sse2): | 36 sym(vp8_filter_block1d8_h6_sse2): |
| 37 push rbp | 37 push rbp |
| 38 mov rbp, rsp | 38 mov rbp, rsp |
| 39 SHADOW_ARGS_TO_STACK 7 | 39 SHADOW_ARGS_TO_STACK 7 |
| 40 SAVE_XMM | 40 SAVE_XMM 7 |
| 41 GET_GOT rbx | 41 GET_GOT rbx |
| 42 push rsi | 42 push rsi |
| 43 push rdi | 43 push rdi |
| 44 ; end prolog | 44 ; end prolog |
| 45 | 45 |
| 46 mov rdx, arg(6) ;vp8_filter | 46 mov rdx, arg(6) ;vp8_filter |
| 47 mov rsi, arg(0) ;src_ptr | 47 mov rsi, arg(0) ;src_ptr |
| 48 | 48 |
| 49 mov rdi, arg(1) ;output_ptr | 49 mov rdi, arg(1) ;output_ptr |
| 50 | 50 |
| (...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 150 ; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixe
ls. The | 150 ; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixe
ls. The |
| 151 ; input pixel array has output_height rows. This routine assumes that output_hei
ght is an | 151 ; input pixel array has output_height rows. This routine assumes that output_hei
ght is an |
| 152 ; even number. This function handles 8 pixels in horizontal direction, calculati
ng ONE | 152 ; even number. This function handles 8 pixels in horizontal direction, calculati
ng ONE |
| 153 ; rows each iteration to take advantage of the 128 bits operations. | 153 ; rows each iteration to take advantage of the 128 bits operations. |
| 154 ;*******************************************************************************
******/ | 154 ;*******************************************************************************
******/ |
| 155 global sym(vp8_filter_block1d16_h6_sse2) | 155 global sym(vp8_filter_block1d16_h6_sse2) |
| 156 sym(vp8_filter_block1d16_h6_sse2): | 156 sym(vp8_filter_block1d16_h6_sse2): |
| 157 push rbp | 157 push rbp |
| 158 mov rbp, rsp | 158 mov rbp, rsp |
| 159 SHADOW_ARGS_TO_STACK 7 | 159 SHADOW_ARGS_TO_STACK 7 |
| 160 SAVE_XMM | 160 SAVE_XMM 7 |
| 161 GET_GOT rbx | 161 GET_GOT rbx |
| 162 push rsi | 162 push rsi |
| 163 push rdi | 163 push rdi |
| 164 ; end prolog | 164 ; end prolog |
| 165 | 165 |
| 166 mov rdx, arg(6) ;vp8_filter | 166 mov rdx, arg(6) ;vp8_filter |
| 167 mov rsi, arg(0) ;src_ptr | 167 mov rsi, arg(0) ;src_ptr |
| 168 | 168 |
| 169 mov rdi, arg(1) ;output_ptr | 169 mov rdi, arg(1) ;output_ptr |
| 170 | 170 |
| (...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 326 ;) | 326 ;) |
| 327 ;/******************************************************************************
****** | 327 ;/******************************************************************************
****** |
| 328 ; Notes: filter_block1d8_v6 applies a 6 tap filter vertically to the input pixel
s. The | 328 ; Notes: filter_block1d8_v6 applies a 6 tap filter vertically to the input pixel
s. The |
| 329 ; input pixel array has output_height rows. | 329 ; input pixel array has output_height rows. |
| 330 ;*******************************************************************************
******/ | 330 ;*******************************************************************************
******/ |
| 331 global sym(vp8_filter_block1d8_v6_sse2) | 331 global sym(vp8_filter_block1d8_v6_sse2) |
| 332 sym(vp8_filter_block1d8_v6_sse2): | 332 sym(vp8_filter_block1d8_v6_sse2): |
| 333 push rbp | 333 push rbp |
| 334 mov rbp, rsp | 334 mov rbp, rsp |
| 335 SHADOW_ARGS_TO_STACK 8 | 335 SHADOW_ARGS_TO_STACK 8 |
| 336 SAVE_XMM | 336 SAVE_XMM 7 |
| 337 GET_GOT rbx | 337 GET_GOT rbx |
| 338 push rsi | 338 push rsi |
| 339 push rdi | 339 push rdi |
| 340 ; end prolog | 340 ; end prolog |
| 341 | 341 |
| 342 mov rax, arg(7) ;vp8_filter | 342 mov rax, arg(7) ;vp8_filter |
| 343 movsxd rdx, dword ptr arg(3) ;pixels_per_line | 343 movsxd rdx, dword ptr arg(3) ;pixels_per_line |
| 344 | 344 |
| 345 mov rdi, arg(1) ;output_ptr | 345 mov rdi, arg(1) ;output_ptr |
| 346 mov rsi, arg(0) ;src_ptr | 346 mov rsi, arg(0) ;src_ptr |
| (...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 421 ;) | 421 ;) |
| 422 ;/******************************************************************************
****** | 422 ;/******************************************************************************
****** |
| 423 ; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixe
ls. The | 423 ; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixe
ls. The |
| 424 ; input pixel array has output_height rows. | 424 ; input pixel array has output_height rows. |
| 425 ;*******************************************************************************
******/ | 425 ;*******************************************************************************
******/ |
| 426 global sym(vp8_filter_block1d16_v6_sse2) | 426 global sym(vp8_filter_block1d16_v6_sse2) |
| 427 sym(vp8_filter_block1d16_v6_sse2): | 427 sym(vp8_filter_block1d16_v6_sse2): |
| 428 push rbp | 428 push rbp |
| 429 mov rbp, rsp | 429 mov rbp, rsp |
| 430 SHADOW_ARGS_TO_STACK 8 | 430 SHADOW_ARGS_TO_STACK 8 |
| 431 SAVE_XMM | 431 SAVE_XMM 7 |
| 432 GET_GOT rbx | 432 GET_GOT rbx |
| 433 push rsi | 433 push rsi |
| 434 push rdi | 434 push rdi |
| 435 ; end prolog | 435 ; end prolog |
| 436 | 436 |
| 437 mov rax, arg(7) ;vp8_filter | 437 mov rax, arg(7) ;vp8_filter |
| 438 movsxd rdx, dword ptr arg(3) ;pixels_per_line | 438 movsxd rdx, dword ptr arg(3) ;pixels_per_line |
| 439 | 439 |
| 440 mov rdi, arg(1) ;output_ptr | 440 mov rdi, arg(1) ;output_ptr |
| 441 mov rsi, arg(0) ;src_ptr | 441 mov rsi, arg(0) ;src_ptr |
| (...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 531 ; int dst_ptich, | 531 ; int dst_ptich, |
| 532 ; unsigned int output_height, | 532 ; unsigned int output_height, |
| 533 ; const short *vp8_filter | 533 ; const short *vp8_filter |
| 534 ;) | 534 ;) |
| 535 ; First-pass filter only when yoffset==0 | 535 ; First-pass filter only when yoffset==0 |
| 536 global sym(vp8_filter_block1d8_h6_only_sse2) | 536 global sym(vp8_filter_block1d8_h6_only_sse2) |
| 537 sym(vp8_filter_block1d8_h6_only_sse2): | 537 sym(vp8_filter_block1d8_h6_only_sse2): |
| 538 push rbp | 538 push rbp |
| 539 mov rbp, rsp | 539 mov rbp, rsp |
| 540 SHADOW_ARGS_TO_STACK 6 | 540 SHADOW_ARGS_TO_STACK 6 |
| 541 SAVE_XMM | 541 SAVE_XMM 7 |
| 542 GET_GOT rbx | 542 GET_GOT rbx |
| 543 push rsi | 543 push rsi |
| 544 push rdi | 544 push rdi |
| 545 ; end prolog | 545 ; end prolog |
| 546 | 546 |
| 547 mov rdx, arg(5) ;vp8_filter | 547 mov rdx, arg(5) ;vp8_filter |
| 548 mov rsi, arg(0) ;src_ptr | 548 mov rsi, arg(0) ;src_ptr |
| 549 | 549 |
| 550 mov rdi, arg(2) ;output_ptr | 550 mov rdi, arg(2) ;output_ptr |
| 551 | 551 |
| (...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 644 ; int dst_ptich, | 644 ; int dst_ptich, |
| 645 ; unsigned int output_height, | 645 ; unsigned int output_height, |
| 646 ; const short *vp8_filter | 646 ; const short *vp8_filter |
| 647 ;) | 647 ;) |
| 648 ; First-pass filter only when yoffset==0 | 648 ; First-pass filter only when yoffset==0 |
| 649 global sym(vp8_filter_block1d16_h6_only_sse2) | 649 global sym(vp8_filter_block1d16_h6_only_sse2) |
| 650 sym(vp8_filter_block1d16_h6_only_sse2): | 650 sym(vp8_filter_block1d16_h6_only_sse2): |
| 651 push rbp | 651 push rbp |
| 652 mov rbp, rsp | 652 mov rbp, rsp |
| 653 SHADOW_ARGS_TO_STACK 6 | 653 SHADOW_ARGS_TO_STACK 6 |
| 654 SAVE_XMM | 654 SAVE_XMM 7 |
| 655 GET_GOT rbx | 655 GET_GOT rbx |
| 656 push rsi | 656 push rsi |
| 657 push rdi | 657 push rdi |
| 658 ; end prolog | 658 ; end prolog |
| 659 | 659 |
| 660 mov rdx, arg(5) ;vp8_filter | 660 mov rdx, arg(5) ;vp8_filter |
| 661 mov rsi, arg(0) ;src_ptr | 661 mov rsi, arg(0) ;src_ptr |
| 662 | 662 |
| 663 mov rdi, arg(2) ;output_ptr | 663 mov rdi, arg(2) ;output_ptr |
| 664 | 664 |
| (...skipping 144 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 809 ; int dst_ptich, | 809 ; int dst_ptich, |
| 810 ; unsigned int output_height, | 810 ; unsigned int output_height, |
| 811 ; const short *vp8_filter | 811 ; const short *vp8_filter |
| 812 ;) | 812 ;) |
| 813 ; Second-pass filter only when xoffset==0 | 813 ; Second-pass filter only when xoffset==0 |
| 814 global sym(vp8_filter_block1d8_v6_only_sse2) | 814 global sym(vp8_filter_block1d8_v6_only_sse2) |
| 815 sym(vp8_filter_block1d8_v6_only_sse2): | 815 sym(vp8_filter_block1d8_v6_only_sse2): |
| 816 push rbp | 816 push rbp |
| 817 mov rbp, rsp | 817 mov rbp, rsp |
| 818 SHADOW_ARGS_TO_STACK 6 | 818 SHADOW_ARGS_TO_STACK 6 |
| 819 SAVE_XMM | 819 SAVE_XMM 7 |
| 820 GET_GOT rbx | 820 GET_GOT rbx |
| 821 push rsi | 821 push rsi |
| 822 push rdi | 822 push rdi |
| 823 ; end prolog | 823 ; end prolog |
| 824 | 824 |
| 825 mov rsi, arg(0) ;src_ptr | 825 mov rsi, arg(0) ;src_ptr |
| 826 mov rdi, arg(2) ;output_ptr | 826 mov rdi, arg(2) ;output_ptr |
| 827 | 827 |
| 828 movsxd rcx, dword ptr arg(4) ;output_height | 828 movsxd rcx, dword ptr arg(4) ;output_height |
| 829 movsxd rdx, dword ptr arg(1) ;src_pixels_per_line | 829 movsxd rdx, dword ptr arg(1) ;src_pixels_per_line |
| (...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 901 ; unsigned short *output_ptr, | 901 ; unsigned short *output_ptr, |
| 902 ; unsigned int src_pixels_per_line, | 902 ; unsigned int src_pixels_per_line, |
| 903 ; unsigned int output_height, | 903 ; unsigned int output_height, |
| 904 ; unsigned int output_width | 904 ; unsigned int output_width |
| 905 ;) | 905 ;) |
| 906 global sym(vp8_unpack_block1d16_h6_sse2) | 906 global sym(vp8_unpack_block1d16_h6_sse2) |
| 907 sym(vp8_unpack_block1d16_h6_sse2): | 907 sym(vp8_unpack_block1d16_h6_sse2): |
| 908 push rbp | 908 push rbp |
| 909 mov rbp, rsp | 909 mov rbp, rsp |
| 910 SHADOW_ARGS_TO_STACK 5 | 910 SHADOW_ARGS_TO_STACK 5 |
| 911 ;SAVE_XMM ;xmm6, xmm7 are not used here. | |
| 912 GET_GOT rbx | 911 GET_GOT rbx |
| 913 push rsi | 912 push rsi |
| 914 push rdi | 913 push rdi |
| 915 ; end prolog | 914 ; end prolog |
| 916 | 915 |
| 917 mov rsi, arg(0) ;src_ptr | 916 mov rsi, arg(0) ;src_ptr |
| 918 mov rdi, arg(1) ;output_ptr | 917 mov rdi, arg(1) ;output_ptr |
| 919 | 918 |
| 920 movsxd rcx, dword ptr arg(3) ;output_height | 919 movsxd rcx, dword ptr arg(3) ;output_height |
| 921 movsxd rax, dword ptr arg(2) ;src_pixels_per_line
; Pitch for Source | 920 movsxd rax, dword ptr arg(2) ;src_pixels_per_line
; Pitch for Source |
| (...skipping 19 matching lines...) Expand all Loading... |
| 941 %else | 940 %else |
| 942 add rdi, r8 | 941 add rdi, r8 |
| 943 %endif | 942 %endif |
| 944 dec rcx | 943 dec rcx |
| 945 jnz unpack_block1d16_h6_sse2_rowloop ; next row | 944 jnz unpack_block1d16_h6_sse2_rowloop ; next row |
| 946 | 945 |
| 947 ; begin epilog | 946 ; begin epilog |
| 948 pop rdi | 947 pop rdi |
| 949 pop rsi | 948 pop rsi |
| 950 RESTORE_GOT | 949 RESTORE_GOT |
| 951 ;RESTORE_XMM | |
| 952 UNSHADOW_ARGS | 950 UNSHADOW_ARGS |
| 953 pop rbp | 951 pop rbp |
| 954 ret | 952 ret |
| 955 | 953 |
| 956 | 954 |
| 957 ;void vp8_bilinear_predict16x16_sse2 | 955 ;void vp8_bilinear_predict16x16_sse2 |
| 958 ;( | 956 ;( |
| 959 ; unsigned char *src_ptr, | 957 ; unsigned char *src_ptr, |
| 960 ; int src_pixels_per_line, | 958 ; int src_pixels_per_line, |
| 961 ; int xoffset, | 959 ; int xoffset, |
| 962 ; int yoffset, | 960 ; int yoffset, |
| 963 ; unsigned char *dst_ptr, | 961 ; unsigned char *dst_ptr, |
| 964 ; int dst_pitch | 962 ; int dst_pitch |
| 965 ;) | 963 ;) |
| 966 extern sym(vp8_bilinear_filters_mmx) | 964 extern sym(vp8_bilinear_filters_mmx) |
| 967 global sym(vp8_bilinear_predict16x16_sse2) | 965 global sym(vp8_bilinear_predict16x16_sse2) |
| 968 sym(vp8_bilinear_predict16x16_sse2): | 966 sym(vp8_bilinear_predict16x16_sse2): |
| 969 push rbp | 967 push rbp |
| 970 mov rbp, rsp | 968 mov rbp, rsp |
| 971 SHADOW_ARGS_TO_STACK 6 | 969 SHADOW_ARGS_TO_STACK 6 |
| 972 SAVE_XMM | 970 SAVE_XMM 7 |
| 973 GET_GOT rbx | 971 GET_GOT rbx |
| 974 push rsi | 972 push rsi |
| 975 push rdi | 973 push rdi |
| 976 ; end prolog | 974 ; end prolog |
| 977 | 975 |
| 978 ;const short *HFilter = bilinear_filters_mmx[xoffset] | 976 ;const short *HFilter = bilinear_filters_mmx[xoffset] |
| 979 ;const short *VFilter = bilinear_filters_mmx[yoffset] | 977 ;const short *VFilter = bilinear_filters_mmx[yoffset] |
| 980 | 978 |
| 981 lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))] | 979 lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))] |
| 982 movsxd rax, dword ptr arg(2) ;xoffset | 980 movsxd rax, dword ptr arg(2) ;xoffset |
| (...skipping 248 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1231 ; int yoffset, | 1229 ; int yoffset, |
| 1232 ; unsigned char *dst_ptr, | 1230 ; unsigned char *dst_ptr, |
| 1233 ; int dst_pitch | 1231 ; int dst_pitch |
| 1234 ;) | 1232 ;) |
| 1235 extern sym(vp8_bilinear_filters_mmx) | 1233 extern sym(vp8_bilinear_filters_mmx) |
| 1236 global sym(vp8_bilinear_predict8x8_sse2) | 1234 global sym(vp8_bilinear_predict8x8_sse2) |
| 1237 sym(vp8_bilinear_predict8x8_sse2): | 1235 sym(vp8_bilinear_predict8x8_sse2): |
| 1238 push rbp | 1236 push rbp |
| 1239 mov rbp, rsp | 1237 mov rbp, rsp |
| 1240 SHADOW_ARGS_TO_STACK 6 | 1238 SHADOW_ARGS_TO_STACK 6 |
| 1241 SAVE_XMM | 1239 SAVE_XMM 7 |
| 1242 GET_GOT rbx | 1240 GET_GOT rbx |
| 1243 push rsi | 1241 push rsi |
| 1244 push rdi | 1242 push rdi |
| 1245 ; end prolog | 1243 ; end prolog |
| 1246 | 1244 |
| 1247 ALIGN_STACK 16, rax | 1245 ALIGN_STACK 16, rax |
| 1248 sub rsp, 144 ; reserve 144 bytes | 1246 sub rsp, 144 ; reserve 144 bytes |
| 1249 | 1247 |
| 1250 ;const short *HFilter = bilinear_filters_mmx[xoffset] | 1248 ;const short *HFilter = bilinear_filters_mmx[xoffset] |
| 1251 ;const short *VFilter = bilinear_filters_mmx[yoffset] | 1249 ;const short *VFilter = bilinear_filters_mmx[yoffset] |
| (...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1365 RESTORE_XMM | 1363 RESTORE_XMM |
| 1366 UNSHADOW_ARGS | 1364 UNSHADOW_ARGS |
| 1367 pop rbp | 1365 pop rbp |
| 1368 ret | 1366 ret |
| 1369 | 1367 |
| 1370 | 1368 |
| 1371 SECTION_RODATA | 1369 SECTION_RODATA |
| 1372 align 16 | 1370 align 16 |
| 1373 rd: | 1371 rd: |
| 1374 times 8 dw 0x40 | 1372 times 8 dw 0x40 |
| OLD | NEW |