OLD | NEW |
1 ; | 1 ; |
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 ; | 3 ; |
4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
9 ; | 9 ; |
10 | 10 |
11 | 11 |
12 %include "vpx_ports/x86_abi_support.asm" | 12 %include "vpx_ports/x86_abi_support.asm" |
13 | 13 |
14 %define xmm_filter_shift 7 | 14 %define xmm_filter_shift 7 |
15 | 15 |
16 ;unsigned int vp8_get_mb_ss_sse2 | 16 ;unsigned int vp8_get_mb_ss_sse2 |
17 ;( | 17 ;( |
18 ; short *src_ptr | 18 ; short *src_ptr |
19 ;) | 19 ;) |
20 global sym(vp8_get_mb_ss_sse2) | 20 global sym(vp8_get_mb_ss_sse2) PRIVATE |
21 sym(vp8_get_mb_ss_sse2): | 21 sym(vp8_get_mb_ss_sse2): |
22 push rbp | 22 push rbp |
23 mov rbp, rsp | 23 mov rbp, rsp |
24 SHADOW_ARGS_TO_STACK 1 | 24 SHADOW_ARGS_TO_STACK 1 |
25 GET_GOT rbx | 25 GET_GOT rbx |
26 push rsi | 26 push rsi |
27 push rdi | 27 push rdi |
28 sub rsp, 16 | 28 sub rsp, 16 |
29 ; end prolog | 29 ; end prolog |
30 | 30 |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
73 | 73 |
74 ;unsigned int vp8_get16x16var_sse2 | 74 ;unsigned int vp8_get16x16var_sse2 |
75 ;( | 75 ;( |
76 ; unsigned char * src_ptr, | 76 ; unsigned char * src_ptr, |
77 ; int source_stride, | 77 ; int source_stride, |
78 ; unsigned char * ref_ptr, | 78 ; unsigned char * ref_ptr, |
79 ; int recon_stride, | 79 ; int recon_stride, |
80 ; unsigned int * SSE, | 80 ; unsigned int * SSE, |
81 ; int * Sum | 81 ; int * Sum |
82 ;) | 82 ;) |
83 global sym(vp8_get16x16var_sse2) | 83 global sym(vp8_get16x16var_sse2) PRIVATE |
84 sym(vp8_get16x16var_sse2): | 84 sym(vp8_get16x16var_sse2): |
85 push rbp | 85 push rbp |
86 mov rbp, rsp | 86 mov rbp, rsp |
87 SHADOW_ARGS_TO_STACK 6 | 87 SHADOW_ARGS_TO_STACK 6 |
88 push rbx | 88 push rbx |
89 push rsi | 89 push rsi |
90 push rdi | 90 push rdi |
91 ; end prolog | 91 ; end prolog |
92 | 92 |
93 mov rsi, arg(0) ;[src_ptr] | 93 mov rsi, arg(0) ;[src_ptr] |
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
211 ret | 211 ret |
212 | 212 |
213 | 213 |
214 ;unsigned int vp8_get16x16pred_error_sse2 | 214 ;unsigned int vp8_get16x16pred_error_sse2 |
215 ;( | 215 ;( |
216 ; unsigned char *src_ptr, | 216 ; unsigned char *src_ptr, |
217 ; int src_stride, | 217 ; int src_stride, |
218 ; unsigned char *ref_ptr, | 218 ; unsigned char *ref_ptr, |
219 ; int ref_stride | 219 ; int ref_stride |
220 ;) | 220 ;) |
221 global sym(vp8_get16x16pred_error_sse2) | 221 global sym(vp8_get16x16pred_error_sse2) PRIVATE |
222 sym(vp8_get16x16pred_error_sse2): | 222 sym(vp8_get16x16pred_error_sse2): |
223 push rbp | 223 push rbp |
224 mov rbp, rsp | 224 mov rbp, rsp |
225 SHADOW_ARGS_TO_STACK 4 | 225 SHADOW_ARGS_TO_STACK 4 |
226 GET_GOT rbx | 226 GET_GOT rbx |
227 push rsi | 227 push rsi |
228 push rdi | 228 push rdi |
229 sub rsp, 16 | 229 sub rsp, 16 |
230 ; end prolog | 230 ; end prolog |
231 | 231 |
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
329 | 329 |
330 ;unsigned int vp8_get8x8var_sse2 | 330 ;unsigned int vp8_get8x8var_sse2 |
331 ;( | 331 ;( |
332 ; unsigned char * src_ptr, | 332 ; unsigned char * src_ptr, |
333 ; int source_stride, | 333 ; int source_stride, |
334 ; unsigned char * ref_ptr, | 334 ; unsigned char * ref_ptr, |
335 ; int recon_stride, | 335 ; int recon_stride, |
336 ; unsigned int * SSE, | 336 ; unsigned int * SSE, |
337 ; int * Sum | 337 ; int * Sum |
338 ;) | 338 ;) |
339 global sym(vp8_get8x8var_sse2) | 339 global sym(vp8_get8x8var_sse2) PRIVATE |
340 sym(vp8_get8x8var_sse2): | 340 sym(vp8_get8x8var_sse2): |
341 push rbp | 341 push rbp |
342 mov rbp, rsp | 342 mov rbp, rsp |
343 SHADOW_ARGS_TO_STACK 6 | 343 SHADOW_ARGS_TO_STACK 6 |
344 GET_GOT rbx | 344 GET_GOT rbx |
345 push rsi | 345 push rsi |
346 push rdi | 346 push rdi |
347 sub rsp, 16 | 347 sub rsp, 16 |
348 ; end prolog | 348 ; end prolog |
349 | 349 |
(...skipping 166 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
516 ; int ref_pixels_per_line, | 516 ; int ref_pixels_per_line, |
517 ; unsigned char *src_ptr, | 517 ; unsigned char *src_ptr, |
518 ; int src_pixels_per_line, | 518 ; int src_pixels_per_line, |
519 ; unsigned int Height, | 519 ; unsigned int Height, |
520 ; int xoffset, | 520 ; int xoffset, |
521 ; int yoffset, | 521 ; int yoffset, |
522 ; int *sum, | 522 ; int *sum, |
523 ; unsigned int *sumsquared;; | 523 ; unsigned int *sumsquared;; |
524 ; | 524 ; |
525 ;) | 525 ;) |
526 global sym(vp8_filter_block2d_bil_var_sse2) | 526 global sym(vp8_filter_block2d_bil_var_sse2) PRIVATE |
527 sym(vp8_filter_block2d_bil_var_sse2): | 527 sym(vp8_filter_block2d_bil_var_sse2): |
528 push rbp | 528 push rbp |
529 mov rbp, rsp | 529 mov rbp, rsp |
530 SHADOW_ARGS_TO_STACK 9 | 530 SHADOW_ARGS_TO_STACK 9 |
531 SAVE_XMM | 531 SAVE_XMM |
532 GET_GOT rbx | 532 GET_GOT rbx |
533 push rsi | 533 push rsi |
534 push rdi | 534 push rdi |
535 push rbx | 535 push rbx |
536 ; end prolog | 536 ; end prolog |
(...skipping 256 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
793 ;void vp8_half_horiz_vert_variance8x_h_sse2 | 793 ;void vp8_half_horiz_vert_variance8x_h_sse2 |
794 ;( | 794 ;( |
795 ; unsigned char *ref_ptr, | 795 ; unsigned char *ref_ptr, |
796 ; int ref_pixels_per_line, | 796 ; int ref_pixels_per_line, |
797 ; unsigned char *src_ptr, | 797 ; unsigned char *src_ptr, |
798 ; int src_pixels_per_line, | 798 ; int src_pixels_per_line, |
799 ; unsigned int Height, | 799 ; unsigned int Height, |
800 ; int *sum, | 800 ; int *sum, |
801 ; unsigned int *sumsquared | 801 ; unsigned int *sumsquared |
802 ;) | 802 ;) |
803 global sym(vp8_half_horiz_vert_variance8x_h_sse2) | 803 global sym(vp8_half_horiz_vert_variance8x_h_sse2) PRIVATE |
804 sym(vp8_half_horiz_vert_variance8x_h_sse2): | 804 sym(vp8_half_horiz_vert_variance8x_h_sse2): |
805 push rbp | 805 push rbp |
806 mov rbp, rsp | 806 mov rbp, rsp |
807 SHADOW_ARGS_TO_STACK 7 | 807 SHADOW_ARGS_TO_STACK 7 |
808 GET_GOT rbx | 808 GET_GOT rbx |
809 push rsi | 809 push rsi |
810 push rdi | 810 push rdi |
811 ; end prolog | 811 ; end prolog |
812 | 812 |
813 %if ABI_IS_32BIT=0 | 813 %if ABI_IS_32BIT=0 |
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
913 ;void vp8_half_horiz_vert_variance16x_h_sse2 | 913 ;void vp8_half_horiz_vert_variance16x_h_sse2 |
914 ;( | 914 ;( |
915 ; unsigned char *ref_ptr, | 915 ; unsigned char *ref_ptr, |
916 ; int ref_pixels_per_line, | 916 ; int ref_pixels_per_line, |
917 ; unsigned char *src_ptr, | 917 ; unsigned char *src_ptr, |
918 ; int src_pixels_per_line, | 918 ; int src_pixels_per_line, |
919 ; unsigned int Height, | 919 ; unsigned int Height, |
920 ; int *sum, | 920 ; int *sum, |
921 ; unsigned int *sumsquared | 921 ; unsigned int *sumsquared |
922 ;) | 922 ;) |
923 global sym(vp8_half_horiz_vert_variance16x_h_sse2) | 923 global sym(vp8_half_horiz_vert_variance16x_h_sse2) PRIVATE |
924 sym(vp8_half_horiz_vert_variance16x_h_sse2): | 924 sym(vp8_half_horiz_vert_variance16x_h_sse2): |
925 push rbp | 925 push rbp |
926 mov rbp, rsp | 926 mov rbp, rsp |
927 SHADOW_ARGS_TO_STACK 7 | 927 SHADOW_ARGS_TO_STACK 7 |
928 SAVE_XMM | 928 SAVE_XMM |
929 GET_GOT rbx | 929 GET_GOT rbx |
930 push rsi | 930 push rsi |
931 push rdi | 931 push rdi |
932 ; end prolog | 932 ; end prolog |
933 | 933 |
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1029 ;void vp8_half_vert_variance8x_h_sse2 | 1029 ;void vp8_half_vert_variance8x_h_sse2 |
1030 ;( | 1030 ;( |
1031 ; unsigned char *ref_ptr, | 1031 ; unsigned char *ref_ptr, |
1032 ; int ref_pixels_per_line, | 1032 ; int ref_pixels_per_line, |
1033 ; unsigned char *src_ptr, | 1033 ; unsigned char *src_ptr, |
1034 ; int src_pixels_per_line, | 1034 ; int src_pixels_per_line, |
1035 ; unsigned int Height, | 1035 ; unsigned int Height, |
1036 ; int *sum, | 1036 ; int *sum, |
1037 ; unsigned int *sumsquared | 1037 ; unsigned int *sumsquared |
1038 ;) | 1038 ;) |
1039 global sym(vp8_half_vert_variance8x_h_sse2) | 1039 global sym(vp8_half_vert_variance8x_h_sse2) PRIVATE |
1040 sym(vp8_half_vert_variance8x_h_sse2): | 1040 sym(vp8_half_vert_variance8x_h_sse2): |
1041 push rbp | 1041 push rbp |
1042 mov rbp, rsp | 1042 mov rbp, rsp |
1043 SHADOW_ARGS_TO_STACK 7 | 1043 SHADOW_ARGS_TO_STACK 7 |
1044 GET_GOT rbx | 1044 GET_GOT rbx |
1045 push rsi | 1045 push rsi |
1046 push rdi | 1046 push rdi |
1047 ; end prolog | 1047 ; end prolog |
1048 | 1048 |
1049 %if ABI_IS_32BIT=0 | 1049 %if ABI_IS_32BIT=0 |
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1134 ;void vp8_half_vert_variance16x_h_sse2 | 1134 ;void vp8_half_vert_variance16x_h_sse2 |
1135 ;( | 1135 ;( |
1136 ; unsigned char *ref_ptr, | 1136 ; unsigned char *ref_ptr, |
1137 ; int ref_pixels_per_line, | 1137 ; int ref_pixels_per_line, |
1138 ; unsigned char *src_ptr, | 1138 ; unsigned char *src_ptr, |
1139 ; int src_pixels_per_line, | 1139 ; int src_pixels_per_line, |
1140 ; unsigned int Height, | 1140 ; unsigned int Height, |
1141 ; int *sum, | 1141 ; int *sum, |
1142 ; unsigned int *sumsquared | 1142 ; unsigned int *sumsquared |
1143 ;) | 1143 ;) |
1144 global sym(vp8_half_vert_variance16x_h_sse2) | 1144 global sym(vp8_half_vert_variance16x_h_sse2) PRIVATE |
1145 sym(vp8_half_vert_variance16x_h_sse2): | 1145 sym(vp8_half_vert_variance16x_h_sse2): |
1146 push rbp | 1146 push rbp |
1147 mov rbp, rsp | 1147 mov rbp, rsp |
1148 SHADOW_ARGS_TO_STACK 7 | 1148 SHADOW_ARGS_TO_STACK 7 |
1149 SAVE_XMM | 1149 SAVE_XMM |
1150 GET_GOT rbx | 1150 GET_GOT rbx |
1151 push rsi | 1151 push rsi |
1152 push rdi | 1152 push rdi |
1153 ; end prolog | 1153 ; end prolog |
1154 | 1154 |
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1242 ;void vp8_half_horiz_variance8x_h_sse2 | 1242 ;void vp8_half_horiz_variance8x_h_sse2 |
1243 ;( | 1243 ;( |
1244 ; unsigned char *ref_ptr, | 1244 ; unsigned char *ref_ptr, |
1245 ; int ref_pixels_per_line, | 1245 ; int ref_pixels_per_line, |
1246 ; unsigned char *src_ptr, | 1246 ; unsigned char *src_ptr, |
1247 ; int src_pixels_per_line, | 1247 ; int src_pixels_per_line, |
1248 ; unsigned int Height, | 1248 ; unsigned int Height, |
1249 ; int *sum, | 1249 ; int *sum, |
1250 ; unsigned int *sumsquared | 1250 ; unsigned int *sumsquared |
1251 ;) | 1251 ;) |
1252 global sym(vp8_half_horiz_variance8x_h_sse2) | 1252 global sym(vp8_half_horiz_variance8x_h_sse2) PRIVATE |
1253 sym(vp8_half_horiz_variance8x_h_sse2): | 1253 sym(vp8_half_horiz_variance8x_h_sse2): |
1254 push rbp | 1254 push rbp |
1255 mov rbp, rsp | 1255 mov rbp, rsp |
1256 SHADOW_ARGS_TO_STACK 7 | 1256 SHADOW_ARGS_TO_STACK 7 |
1257 GET_GOT rbx | 1257 GET_GOT rbx |
1258 push rsi | 1258 push rsi |
1259 push rdi | 1259 push rdi |
1260 ; end prolog | 1260 ; end prolog |
1261 | 1261 |
1262 %if ABI_IS_32BIT=0 | 1262 %if ABI_IS_32BIT=0 |
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1345 ;void vp8_half_horiz_variance16x_h_sse2 | 1345 ;void vp8_half_horiz_variance16x_h_sse2 |
1346 ;( | 1346 ;( |
1347 ; unsigned char *ref_ptr, | 1347 ; unsigned char *ref_ptr, |
1348 ; int ref_pixels_per_line, | 1348 ; int ref_pixels_per_line, |
1349 ; unsigned char *src_ptr, | 1349 ; unsigned char *src_ptr, |
1350 ; int src_pixels_per_line, | 1350 ; int src_pixels_per_line, |
1351 ; unsigned int Height, | 1351 ; unsigned int Height, |
1352 ; int *sum, | 1352 ; int *sum, |
1353 ; unsigned int *sumsquared | 1353 ; unsigned int *sumsquared |
1354 ;) | 1354 ;) |
1355 global sym(vp8_half_horiz_variance16x_h_sse2) | 1355 global sym(vp8_half_horiz_variance16x_h_sse2) PRIVATE |
1356 sym(vp8_half_horiz_variance16x_h_sse2): | 1356 sym(vp8_half_horiz_variance16x_h_sse2): |
1357 push rbp | 1357 push rbp |
1358 mov rbp, rsp | 1358 mov rbp, rsp |
1359 SHADOW_ARGS_TO_STACK 7 | 1359 SHADOW_ARGS_TO_STACK 7 |
1360 SAVE_XMM | 1360 SAVE_XMM |
1361 GET_GOT rbx | 1361 GET_GOT rbx |
1362 push rsi | 1362 push rsi |
1363 push rdi | 1363 push rdi |
1364 ; end prolog | 1364 ; end prolog |
1365 | 1365 |
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1454 align 16 | 1454 align 16 |
1455 vp8_bilinear_filters_sse2: | 1455 vp8_bilinear_filters_sse2: |
1456 dw 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 | 1456 dw 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 |
1457 dw 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 | 1457 dw 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 |
1458 dw 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 | 1458 dw 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 |
1459 dw 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 | 1459 dw 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 |
1460 dw 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 | 1460 dw 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 |
1461 dw 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 | 1461 dw 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 |
1462 dw 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 | 1462 dw 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 |
1463 dw 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 | 1463 dw 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 |
OLD | NEW |