OLD | NEW |
1 ; | 1 ; |
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 ; | 3 ; |
4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
9 ; | 9 ; |
10 | 10 |
(...skipping 14 matching lines...) Expand all Loading... |
25 ;void vp8_filter_block1d8_h6_sse2 | 25 ;void vp8_filter_block1d8_h6_sse2 |
26 ;( | 26 ;( |
27 ; unsigned char *src_ptr, | 27 ; unsigned char *src_ptr, |
28 ; unsigned short *output_ptr, | 28 ; unsigned short *output_ptr, |
29 ; unsigned int src_pixels_per_line, | 29 ; unsigned int src_pixels_per_line, |
30 ; unsigned int pixel_step, | 30 ; unsigned int pixel_step, |
31 ; unsigned int output_height, | 31 ; unsigned int output_height, |
32 ; unsigned int output_width, | 32 ; unsigned int output_width, |
33 ; short *vp8_filter | 33 ; short *vp8_filter |
34 ;) | 34 ;) |
35 global sym(vp8_filter_block1d8_h6_sse2) | 35 global sym(vp8_filter_block1d8_h6_sse2) PRIVATE |
36 sym(vp8_filter_block1d8_h6_sse2): | 36 sym(vp8_filter_block1d8_h6_sse2): |
37 push rbp | 37 push rbp |
38 mov rbp, rsp | 38 mov rbp, rsp |
39 SHADOW_ARGS_TO_STACK 7 | 39 SHADOW_ARGS_TO_STACK 7 |
40 SAVE_XMM | 40 SAVE_XMM |
41 GET_GOT rbx | 41 GET_GOT rbx |
42 push rsi | 42 push rsi |
43 push rdi | 43 push rdi |
44 ; end prolog | 44 ; end prolog |
45 | 45 |
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
145 ; unsigned int output_height, | 145 ; unsigned int output_height, |
146 ; unsigned int output_width, | 146 ; unsigned int output_width, |
147 ; short *vp8_filter | 147 ; short *vp8_filter |
148 ;) | 148 ;) |
149 ;/******************************************************************************
****** | 149 ;/******************************************************************************
****** |
150 ; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixe
ls. The | 150 ; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixe
ls. The |
151 ; input pixel array has output_height rows. This routine assumes that output_hei
ght is an | 151 ; input pixel array has output_height rows. This routine assumes that output_hei
ght is an |
152 ; even number. This function handles 8 pixels in horizontal direction, calculati
ng ONE | 152 ; even number. This function handles 8 pixels in horizontal direction, calculati
ng ONE |
153 ; rows each iteration to take advantage of the 128 bits operations. | 153 ; rows each iteration to take advantage of the 128 bits operations. |
154 ;*******************************************************************************
******/ | 154 ;*******************************************************************************
******/ |
155 global sym(vp8_filter_block1d16_h6_sse2) | 155 global sym(vp8_filter_block1d16_h6_sse2) PRIVATE |
156 sym(vp8_filter_block1d16_h6_sse2): | 156 sym(vp8_filter_block1d16_h6_sse2): |
157 push rbp | 157 push rbp |
158 mov rbp, rsp | 158 mov rbp, rsp |
159 SHADOW_ARGS_TO_STACK 7 | 159 SHADOW_ARGS_TO_STACK 7 |
160 SAVE_XMM | 160 SAVE_XMM |
161 GET_GOT rbx | 161 GET_GOT rbx |
162 push rsi | 162 push rsi |
163 push rdi | 163 push rdi |
164 ; end prolog | 164 ; end prolog |
165 | 165 |
(...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
321 ; unsigned int pixels_per_line, | 321 ; unsigned int pixels_per_line, |
322 ; unsigned int pixel_step, | 322 ; unsigned int pixel_step, |
323 ; unsigned int output_height, | 323 ; unsigned int output_height, |
324 ; unsigned int output_width, | 324 ; unsigned int output_width, |
325 ; short * vp8_filter | 325 ; short * vp8_filter |
326 ;) | 326 ;) |
327 ;/******************************************************************************
****** | 327 ;/******************************************************************************
****** |
328 ; Notes: filter_block1d8_v6 applies a 6 tap filter vertically to the input pixel
s. The | 328 ; Notes: filter_block1d8_v6 applies a 6 tap filter vertically to the input pixel
s. The |
329 ; input pixel array has output_height rows. | 329 ; input pixel array has output_height rows. |
330 ;*******************************************************************************
******/ | 330 ;*******************************************************************************
******/ |
331 global sym(vp8_filter_block1d8_v6_sse2) | 331 global sym(vp8_filter_block1d8_v6_sse2) PRIVATE |
332 sym(vp8_filter_block1d8_v6_sse2): | 332 sym(vp8_filter_block1d8_v6_sse2): |
333 push rbp | 333 push rbp |
334 mov rbp, rsp | 334 mov rbp, rsp |
335 SHADOW_ARGS_TO_STACK 8 | 335 SHADOW_ARGS_TO_STACK 8 |
336 SAVE_XMM | 336 SAVE_XMM |
337 GET_GOT rbx | 337 GET_GOT rbx |
338 push rsi | 338 push rsi |
339 push rdi | 339 push rdi |
340 ; end prolog | 340 ; end prolog |
341 | 341 |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
416 ; unsigned int pixels_per_line, | 416 ; unsigned int pixels_per_line, |
417 ; unsigned int pixel_step, | 417 ; unsigned int pixel_step, |
418 ; unsigned int output_height, | 418 ; unsigned int output_height, |
419 ; unsigned int output_width, | 419 ; unsigned int output_width, |
420 ; const short *vp8_filter | 420 ; const short *vp8_filter |
421 ;) | 421 ;) |
422 ;/******************************************************************************
****** | 422 ;/******************************************************************************
****** |
423 ; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixe
ls. The | 423 ; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixe
ls. The |
424 ; input pixel array has output_height rows. | 424 ; input pixel array has output_height rows. |
425 ;*******************************************************************************
******/ | 425 ;*******************************************************************************
******/ |
426 global sym(vp8_filter_block1d16_v6_sse2) | 426 global sym(vp8_filter_block1d16_v6_sse2) PRIVATE |
427 sym(vp8_filter_block1d16_v6_sse2): | 427 sym(vp8_filter_block1d16_v6_sse2): |
428 push rbp | 428 push rbp |
429 mov rbp, rsp | 429 mov rbp, rsp |
430 SHADOW_ARGS_TO_STACK 8 | 430 SHADOW_ARGS_TO_STACK 8 |
431 SAVE_XMM | 431 SAVE_XMM |
432 GET_GOT rbx | 432 GET_GOT rbx |
433 push rsi | 433 push rsi |
434 push rdi | 434 push rdi |
435 ; end prolog | 435 ; end prolog |
436 | 436 |
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
526 ;void vp8_filter_block1d8_h6_only_sse2 | 526 ;void vp8_filter_block1d8_h6_only_sse2 |
527 ;( | 527 ;( |
528 ; unsigned char *src_ptr, | 528 ; unsigned char *src_ptr, |
529 ; unsigned int src_pixels_per_line, | 529 ; unsigned int src_pixels_per_line, |
530 ; unsigned char *output_ptr, | 530 ; unsigned char *output_ptr, |
531 ; int dst_ptich, | 531 ; int dst_ptich, |
532 ; unsigned int output_height, | 532 ; unsigned int output_height, |
533 ; const short *vp8_filter | 533 ; const short *vp8_filter |
534 ;) | 534 ;) |
535 ; First-pass filter only when yoffset==0 | 535 ; First-pass filter only when yoffset==0 |
536 global sym(vp8_filter_block1d8_h6_only_sse2) | 536 global sym(vp8_filter_block1d8_h6_only_sse2) PRIVATE |
537 sym(vp8_filter_block1d8_h6_only_sse2): | 537 sym(vp8_filter_block1d8_h6_only_sse2): |
538 push rbp | 538 push rbp |
539 mov rbp, rsp | 539 mov rbp, rsp |
540 SHADOW_ARGS_TO_STACK 6 | 540 SHADOW_ARGS_TO_STACK 6 |
541 SAVE_XMM | 541 SAVE_XMM |
542 GET_GOT rbx | 542 GET_GOT rbx |
543 push rsi | 543 push rsi |
544 push rdi | 544 push rdi |
545 ; end prolog | 545 ; end prolog |
546 | 546 |
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
639 ;void vp8_filter_block1d16_h6_only_sse2 | 639 ;void vp8_filter_block1d16_h6_only_sse2 |
640 ;( | 640 ;( |
641 ; unsigned char *src_ptr, | 641 ; unsigned char *src_ptr, |
642 ; unsigned int src_pixels_per_line, | 642 ; unsigned int src_pixels_per_line, |
643 ; unsigned char *output_ptr, | 643 ; unsigned char *output_ptr, |
644 ; int dst_ptich, | 644 ; int dst_ptich, |
645 ; unsigned int output_height, | 645 ; unsigned int output_height, |
646 ; const short *vp8_filter | 646 ; const short *vp8_filter |
647 ;) | 647 ;) |
648 ; First-pass filter only when yoffset==0 | 648 ; First-pass filter only when yoffset==0 |
649 global sym(vp8_filter_block1d16_h6_only_sse2) | 649 global sym(vp8_filter_block1d16_h6_only_sse2) PRIVATE |
650 sym(vp8_filter_block1d16_h6_only_sse2): | 650 sym(vp8_filter_block1d16_h6_only_sse2): |
651 push rbp | 651 push rbp |
652 mov rbp, rsp | 652 mov rbp, rsp |
653 SHADOW_ARGS_TO_STACK 6 | 653 SHADOW_ARGS_TO_STACK 6 |
654 SAVE_XMM | 654 SAVE_XMM |
655 GET_GOT rbx | 655 GET_GOT rbx |
656 push rsi | 656 push rsi |
657 push rdi | 657 push rdi |
658 ; end prolog | 658 ; end prolog |
659 | 659 |
(...skipping 144 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
804 ;void vp8_filter_block1d8_v6_only_sse2 | 804 ;void vp8_filter_block1d8_v6_only_sse2 |
805 ;( | 805 ;( |
806 ; unsigned char *src_ptr, | 806 ; unsigned char *src_ptr, |
807 ; unsigned int src_pixels_per_line, | 807 ; unsigned int src_pixels_per_line, |
808 ; unsigned char *output_ptr, | 808 ; unsigned char *output_ptr, |
809 ; int dst_ptich, | 809 ; int dst_ptich, |
810 ; unsigned int output_height, | 810 ; unsigned int output_height, |
811 ; const short *vp8_filter | 811 ; const short *vp8_filter |
812 ;) | 812 ;) |
813 ; Second-pass filter only when xoffset==0 | 813 ; Second-pass filter only when xoffset==0 |
814 global sym(vp8_filter_block1d8_v6_only_sse2) | 814 global sym(vp8_filter_block1d8_v6_only_sse2) PRIVATE |
815 sym(vp8_filter_block1d8_v6_only_sse2): | 815 sym(vp8_filter_block1d8_v6_only_sse2): |
816 push rbp | 816 push rbp |
817 mov rbp, rsp | 817 mov rbp, rsp |
818 SHADOW_ARGS_TO_STACK 6 | 818 SHADOW_ARGS_TO_STACK 6 |
819 SAVE_XMM | 819 SAVE_XMM |
820 GET_GOT rbx | 820 GET_GOT rbx |
821 push rsi | 821 push rsi |
822 push rdi | 822 push rdi |
823 ; end prolog | 823 ; end prolog |
824 | 824 |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
896 | 896 |
897 | 897 |
898 ;void vp8_unpack_block1d16_h6_sse2 | 898 ;void vp8_unpack_block1d16_h6_sse2 |
899 ;( | 899 ;( |
900 ; unsigned char *src_ptr, | 900 ; unsigned char *src_ptr, |
901 ; unsigned short *output_ptr, | 901 ; unsigned short *output_ptr, |
902 ; unsigned int src_pixels_per_line, | 902 ; unsigned int src_pixels_per_line, |
903 ; unsigned int output_height, | 903 ; unsigned int output_height, |
904 ; unsigned int output_width | 904 ; unsigned int output_width |
905 ;) | 905 ;) |
906 global sym(vp8_unpack_block1d16_h6_sse2) | 906 global sym(vp8_unpack_block1d16_h6_sse2) PRIVATE |
907 sym(vp8_unpack_block1d16_h6_sse2): | 907 sym(vp8_unpack_block1d16_h6_sse2): |
908 push rbp | 908 push rbp |
909 mov rbp, rsp | 909 mov rbp, rsp |
910 SHADOW_ARGS_TO_STACK 5 | 910 SHADOW_ARGS_TO_STACK 5 |
911 ;SAVE_XMM ;xmm6, xmm7 are not used here. | 911 ;SAVE_XMM ;xmm6, xmm7 are not used here. |
912 GET_GOT rbx | 912 GET_GOT rbx |
913 push rsi | 913 push rsi |
914 push rdi | 914 push rdi |
915 ; end prolog | 915 ; end prolog |
916 | 916 |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
957 ;void vp8_bilinear_predict16x16_sse2 | 957 ;void vp8_bilinear_predict16x16_sse2 |
958 ;( | 958 ;( |
959 ; unsigned char *src_ptr, | 959 ; unsigned char *src_ptr, |
960 ; int src_pixels_per_line, | 960 ; int src_pixels_per_line, |
961 ; int xoffset, | 961 ; int xoffset, |
962 ; int yoffset, | 962 ; int yoffset, |
963 ; unsigned char *dst_ptr, | 963 ; unsigned char *dst_ptr, |
964 ; int dst_pitch | 964 ; int dst_pitch |
965 ;) | 965 ;) |
966 extern sym(vp8_bilinear_filters_mmx) | 966 extern sym(vp8_bilinear_filters_mmx) |
967 global sym(vp8_bilinear_predict16x16_sse2) | 967 global sym(vp8_bilinear_predict16x16_sse2) PRIVATE |
968 sym(vp8_bilinear_predict16x16_sse2): | 968 sym(vp8_bilinear_predict16x16_sse2): |
969 push rbp | 969 push rbp |
970 mov rbp, rsp | 970 mov rbp, rsp |
971 SHADOW_ARGS_TO_STACK 6 | 971 SHADOW_ARGS_TO_STACK 6 |
972 SAVE_XMM | 972 SAVE_XMM |
973 GET_GOT rbx | 973 GET_GOT rbx |
974 push rsi | 974 push rsi |
975 push rdi | 975 push rdi |
976 ; end prolog | 976 ; end prolog |
977 | 977 |
(...skipping 248 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1226 ;void vp8_bilinear_predict8x8_sse2 | 1226 ;void vp8_bilinear_predict8x8_sse2 |
1227 ;( | 1227 ;( |
1228 ; unsigned char *src_ptr, | 1228 ; unsigned char *src_ptr, |
1229 ; int src_pixels_per_line, | 1229 ; int src_pixels_per_line, |
1230 ; int xoffset, | 1230 ; int xoffset, |
1231 ; int yoffset, | 1231 ; int yoffset, |
1232 ; unsigned char *dst_ptr, | 1232 ; unsigned char *dst_ptr, |
1233 ; int dst_pitch | 1233 ; int dst_pitch |
1234 ;) | 1234 ;) |
1235 extern sym(vp8_bilinear_filters_mmx) | 1235 extern sym(vp8_bilinear_filters_mmx) |
1236 global sym(vp8_bilinear_predict8x8_sse2) | 1236 global sym(vp8_bilinear_predict8x8_sse2) PRIVATE |
1237 sym(vp8_bilinear_predict8x8_sse2): | 1237 sym(vp8_bilinear_predict8x8_sse2): |
1238 push rbp | 1238 push rbp |
1239 mov rbp, rsp | 1239 mov rbp, rsp |
1240 SHADOW_ARGS_TO_STACK 6 | 1240 SHADOW_ARGS_TO_STACK 6 |
1241 SAVE_XMM | 1241 SAVE_XMM |
1242 GET_GOT rbx | 1242 GET_GOT rbx |
1243 push rsi | 1243 push rsi |
1244 push rdi | 1244 push rdi |
1245 ; end prolog | 1245 ; end prolog |
1246 | 1246 |
(...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1365 RESTORE_XMM | 1365 RESTORE_XMM |
1366 UNSHADOW_ARGS | 1366 UNSHADOW_ARGS |
1367 pop rbp | 1367 pop rbp |
1368 ret | 1368 ret |
1369 | 1369 |
1370 | 1370 |
1371 SECTION_RODATA | 1371 SECTION_RODATA |
1372 align 16 | 1372 align 16 |
1373 rd: | 1373 rd: |
1374 times 8 dw 0x40 | 1374 times 8 dw 0x40 |
OLD | NEW |