| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 ; | 3 ; |
| 4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
| 5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
| 6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
| 7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
| 8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
| 9 ; | 9 ; |
| 10 | 10 |
| 11 | 11 |
| 12 %include "vpx_ports/x86_abi_support.asm" | 12 %include "vpx_ports/x86_abi_support.asm" |
| 13 | 13 |
| 14 %define xmm_filter_shift 7 | 14 %define xmm_filter_shift 7 |
| 15 | 15 |
| 16 ;unsigned int vp8_get_mb_ss_sse2 | 16 ;unsigned int vp8_get_mb_ss_sse2 |
| 17 ;( | 17 ;( |
| 18 ; short *src_ptr | 18 ; short *src_ptr |
| 19 ;) | 19 ;) |
| 20 global sym(vp8_get_mb_ss_sse2) | 20 global sym(vp8_get_mb_ss_sse2) PRIVATE |
| 21 sym(vp8_get_mb_ss_sse2): | 21 sym(vp8_get_mb_ss_sse2): |
| 22 push rbp | 22 push rbp |
| 23 mov rbp, rsp | 23 mov rbp, rsp |
| 24 SHADOW_ARGS_TO_STACK 1 | 24 SHADOW_ARGS_TO_STACK 1 |
| 25 GET_GOT rbx | 25 GET_GOT rbx |
| 26 push rsi | 26 push rsi |
| 27 push rdi | 27 push rdi |
| 28 sub rsp, 16 | 28 sub rsp, 16 |
| 29 ; end prolog | 29 ; end prolog |
| 30 | 30 |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 73 | 73 |
| 74 ;unsigned int vp8_get16x16var_sse2 | 74 ;unsigned int vp8_get16x16var_sse2 |
| 75 ;( | 75 ;( |
| 76 ; unsigned char * src_ptr, | 76 ; unsigned char * src_ptr, |
| 77 ; int source_stride, | 77 ; int source_stride, |
| 78 ; unsigned char * ref_ptr, | 78 ; unsigned char * ref_ptr, |
| 79 ; int recon_stride, | 79 ; int recon_stride, |
| 80 ; unsigned int * SSE, | 80 ; unsigned int * SSE, |
| 81 ; int * Sum | 81 ; int * Sum |
| 82 ;) | 82 ;) |
| 83 global sym(vp8_get16x16var_sse2) | 83 global sym(vp8_get16x16var_sse2) PRIVATE |
| 84 sym(vp8_get16x16var_sse2): | 84 sym(vp8_get16x16var_sse2): |
| 85 push rbp | 85 push rbp |
| 86 mov rbp, rsp | 86 mov rbp, rsp |
| 87 SHADOW_ARGS_TO_STACK 6 | 87 SHADOW_ARGS_TO_STACK 6 |
| 88 push rbx | 88 push rbx |
| 89 push rsi | 89 push rsi |
| 90 push rdi | 90 push rdi |
| 91 ; end prolog | 91 ; end prolog |
| 92 | 92 |
| 93 mov rsi, arg(0) ;[src_ptr] | 93 mov rsi, arg(0) ;[src_ptr] |
| (...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 211 ret | 211 ret |
| 212 | 212 |
| 213 | 213 |
| 214 ;unsigned int vp8_get16x16pred_error_sse2 | 214 ;unsigned int vp8_get16x16pred_error_sse2 |
| 215 ;( | 215 ;( |
| 216 ; unsigned char *src_ptr, | 216 ; unsigned char *src_ptr, |
| 217 ; int src_stride, | 217 ; int src_stride, |
| 218 ; unsigned char *ref_ptr, | 218 ; unsigned char *ref_ptr, |
| 219 ; int ref_stride | 219 ; int ref_stride |
| 220 ;) | 220 ;) |
| 221 global sym(vp8_get16x16pred_error_sse2) | 221 global sym(vp8_get16x16pred_error_sse2) PRIVATE |
| 222 sym(vp8_get16x16pred_error_sse2): | 222 sym(vp8_get16x16pred_error_sse2): |
| 223 push rbp | 223 push rbp |
| 224 mov rbp, rsp | 224 mov rbp, rsp |
| 225 SHADOW_ARGS_TO_STACK 4 | 225 SHADOW_ARGS_TO_STACK 4 |
| 226 GET_GOT rbx | 226 GET_GOT rbx |
| 227 push rsi | 227 push rsi |
| 228 push rdi | 228 push rdi |
| 229 sub rsp, 16 | 229 sub rsp, 16 |
| 230 ; end prolog | 230 ; end prolog |
| 231 | 231 |
| (...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 329 | 329 |
| 330 ;unsigned int vp8_get8x8var_sse2 | 330 ;unsigned int vp8_get8x8var_sse2 |
| 331 ;( | 331 ;( |
| 332 ; unsigned char * src_ptr, | 332 ; unsigned char * src_ptr, |
| 333 ; int source_stride, | 333 ; int source_stride, |
| 334 ; unsigned char * ref_ptr, | 334 ; unsigned char * ref_ptr, |
| 335 ; int recon_stride, | 335 ; int recon_stride, |
| 336 ; unsigned int * SSE, | 336 ; unsigned int * SSE, |
| 337 ; int * Sum | 337 ; int * Sum |
| 338 ;) | 338 ;) |
| 339 global sym(vp8_get8x8var_sse2) | 339 global sym(vp8_get8x8var_sse2) PRIVATE |
| 340 sym(vp8_get8x8var_sse2): | 340 sym(vp8_get8x8var_sse2): |
| 341 push rbp | 341 push rbp |
| 342 mov rbp, rsp | 342 mov rbp, rsp |
| 343 SHADOW_ARGS_TO_STACK 6 | 343 SHADOW_ARGS_TO_STACK 6 |
| 344 GET_GOT rbx | 344 GET_GOT rbx |
| 345 push rsi | 345 push rsi |
| 346 push rdi | 346 push rdi |
| 347 sub rsp, 16 | 347 sub rsp, 16 |
| 348 ; end prolog | 348 ; end prolog |
| 349 | 349 |
| (...skipping 166 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 516 ; int ref_pixels_per_line, | 516 ; int ref_pixels_per_line, |
| 517 ; unsigned char *src_ptr, | 517 ; unsigned char *src_ptr, |
| 518 ; int src_pixels_per_line, | 518 ; int src_pixels_per_line, |
| 519 ; unsigned int Height, | 519 ; unsigned int Height, |
| 520 ; int xoffset, | 520 ; int xoffset, |
| 521 ; int yoffset, | 521 ; int yoffset, |
| 522 ; int *sum, | 522 ; int *sum, |
| 523 ; unsigned int *sumsquared;; | 523 ; unsigned int *sumsquared;; |
| 524 ; | 524 ; |
| 525 ;) | 525 ;) |
| 526 global sym(vp8_filter_block2d_bil_var_sse2) | 526 global sym(vp8_filter_block2d_bil_var_sse2) PRIVATE |
| 527 sym(vp8_filter_block2d_bil_var_sse2): | 527 sym(vp8_filter_block2d_bil_var_sse2): |
| 528 push rbp | 528 push rbp |
| 529 mov rbp, rsp | 529 mov rbp, rsp |
| 530 SHADOW_ARGS_TO_STACK 9 | 530 SHADOW_ARGS_TO_STACK 9 |
| 531 SAVE_XMM | 531 SAVE_XMM |
| 532 GET_GOT rbx | 532 GET_GOT rbx |
| 533 push rsi | 533 push rsi |
| 534 push rdi | 534 push rdi |
| 535 push rbx | 535 push rbx |
| 536 ; end prolog | 536 ; end prolog |
| (...skipping 256 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 793 ;void vp8_half_horiz_vert_variance8x_h_sse2 | 793 ;void vp8_half_horiz_vert_variance8x_h_sse2 |
| 794 ;( | 794 ;( |
| 795 ; unsigned char *ref_ptr, | 795 ; unsigned char *ref_ptr, |
| 796 ; int ref_pixels_per_line, | 796 ; int ref_pixels_per_line, |
| 797 ; unsigned char *src_ptr, | 797 ; unsigned char *src_ptr, |
| 798 ; int src_pixels_per_line, | 798 ; int src_pixels_per_line, |
| 799 ; unsigned int Height, | 799 ; unsigned int Height, |
| 800 ; int *sum, | 800 ; int *sum, |
| 801 ; unsigned int *sumsquared | 801 ; unsigned int *sumsquared |
| 802 ;) | 802 ;) |
| 803 global sym(vp8_half_horiz_vert_variance8x_h_sse2) | 803 global sym(vp8_half_horiz_vert_variance8x_h_sse2) PRIVATE |
| 804 sym(vp8_half_horiz_vert_variance8x_h_sse2): | 804 sym(vp8_half_horiz_vert_variance8x_h_sse2): |
| 805 push rbp | 805 push rbp |
| 806 mov rbp, rsp | 806 mov rbp, rsp |
| 807 SHADOW_ARGS_TO_STACK 7 | 807 SHADOW_ARGS_TO_STACK 7 |
| 808 GET_GOT rbx | 808 GET_GOT rbx |
| 809 push rsi | 809 push rsi |
| 810 push rdi | 810 push rdi |
| 811 ; end prolog | 811 ; end prolog |
| 812 | 812 |
| 813 %if ABI_IS_32BIT=0 | 813 %if ABI_IS_32BIT=0 |
| (...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 913 ;void vp8_half_horiz_vert_variance16x_h_sse2 | 913 ;void vp8_half_horiz_vert_variance16x_h_sse2 |
| 914 ;( | 914 ;( |
| 915 ; unsigned char *ref_ptr, | 915 ; unsigned char *ref_ptr, |
| 916 ; int ref_pixels_per_line, | 916 ; int ref_pixels_per_line, |
| 917 ; unsigned char *src_ptr, | 917 ; unsigned char *src_ptr, |
| 918 ; int src_pixels_per_line, | 918 ; int src_pixels_per_line, |
| 919 ; unsigned int Height, | 919 ; unsigned int Height, |
| 920 ; int *sum, | 920 ; int *sum, |
| 921 ; unsigned int *sumsquared | 921 ; unsigned int *sumsquared |
| 922 ;) | 922 ;) |
| 923 global sym(vp8_half_horiz_vert_variance16x_h_sse2) | 923 global sym(vp8_half_horiz_vert_variance16x_h_sse2) PRIVATE |
| 924 sym(vp8_half_horiz_vert_variance16x_h_sse2): | 924 sym(vp8_half_horiz_vert_variance16x_h_sse2): |
| 925 push rbp | 925 push rbp |
| 926 mov rbp, rsp | 926 mov rbp, rsp |
| 927 SHADOW_ARGS_TO_STACK 7 | 927 SHADOW_ARGS_TO_STACK 7 |
| 928 SAVE_XMM | 928 SAVE_XMM |
| 929 GET_GOT rbx | 929 GET_GOT rbx |
| 930 push rsi | 930 push rsi |
| 931 push rdi | 931 push rdi |
| 932 ; end prolog | 932 ; end prolog |
| 933 | 933 |
| (...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1029 ;void vp8_half_vert_variance8x_h_sse2 | 1029 ;void vp8_half_vert_variance8x_h_sse2 |
| 1030 ;( | 1030 ;( |
| 1031 ; unsigned char *ref_ptr, | 1031 ; unsigned char *ref_ptr, |
| 1032 ; int ref_pixels_per_line, | 1032 ; int ref_pixels_per_line, |
| 1033 ; unsigned char *src_ptr, | 1033 ; unsigned char *src_ptr, |
| 1034 ; int src_pixels_per_line, | 1034 ; int src_pixels_per_line, |
| 1035 ; unsigned int Height, | 1035 ; unsigned int Height, |
| 1036 ; int *sum, | 1036 ; int *sum, |
| 1037 ; unsigned int *sumsquared | 1037 ; unsigned int *sumsquared |
| 1038 ;) | 1038 ;) |
| 1039 global sym(vp8_half_vert_variance8x_h_sse2) | 1039 global sym(vp8_half_vert_variance8x_h_sse2) PRIVATE |
| 1040 sym(vp8_half_vert_variance8x_h_sse2): | 1040 sym(vp8_half_vert_variance8x_h_sse2): |
| 1041 push rbp | 1041 push rbp |
| 1042 mov rbp, rsp | 1042 mov rbp, rsp |
| 1043 SHADOW_ARGS_TO_STACK 7 | 1043 SHADOW_ARGS_TO_STACK 7 |
| 1044 GET_GOT rbx | 1044 GET_GOT rbx |
| 1045 push rsi | 1045 push rsi |
| 1046 push rdi | 1046 push rdi |
| 1047 ; end prolog | 1047 ; end prolog |
| 1048 | 1048 |
| 1049 %if ABI_IS_32BIT=0 | 1049 %if ABI_IS_32BIT=0 |
| (...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1134 ;void vp8_half_vert_variance16x_h_sse2 | 1134 ;void vp8_half_vert_variance16x_h_sse2 |
| 1135 ;( | 1135 ;( |
| 1136 ; unsigned char *ref_ptr, | 1136 ; unsigned char *ref_ptr, |
| 1137 ; int ref_pixels_per_line, | 1137 ; int ref_pixels_per_line, |
| 1138 ; unsigned char *src_ptr, | 1138 ; unsigned char *src_ptr, |
| 1139 ; int src_pixels_per_line, | 1139 ; int src_pixels_per_line, |
| 1140 ; unsigned int Height, | 1140 ; unsigned int Height, |
| 1141 ; int *sum, | 1141 ; int *sum, |
| 1142 ; unsigned int *sumsquared | 1142 ; unsigned int *sumsquared |
| 1143 ;) | 1143 ;) |
| 1144 global sym(vp8_half_vert_variance16x_h_sse2) | 1144 global sym(vp8_half_vert_variance16x_h_sse2) PRIVATE |
| 1145 sym(vp8_half_vert_variance16x_h_sse2): | 1145 sym(vp8_half_vert_variance16x_h_sse2): |
| 1146 push rbp | 1146 push rbp |
| 1147 mov rbp, rsp | 1147 mov rbp, rsp |
| 1148 SHADOW_ARGS_TO_STACK 7 | 1148 SHADOW_ARGS_TO_STACK 7 |
| 1149 SAVE_XMM | 1149 SAVE_XMM |
| 1150 GET_GOT rbx | 1150 GET_GOT rbx |
| 1151 push rsi | 1151 push rsi |
| 1152 push rdi | 1152 push rdi |
| 1153 ; end prolog | 1153 ; end prolog |
| 1154 | 1154 |
| (...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1242 ;void vp8_half_horiz_variance8x_h_sse2 | 1242 ;void vp8_half_horiz_variance8x_h_sse2 |
| 1243 ;( | 1243 ;( |
| 1244 ; unsigned char *ref_ptr, | 1244 ; unsigned char *ref_ptr, |
| 1245 ; int ref_pixels_per_line, | 1245 ; int ref_pixels_per_line, |
| 1246 ; unsigned char *src_ptr, | 1246 ; unsigned char *src_ptr, |
| 1247 ; int src_pixels_per_line, | 1247 ; int src_pixels_per_line, |
| 1248 ; unsigned int Height, | 1248 ; unsigned int Height, |
| 1249 ; int *sum, | 1249 ; int *sum, |
| 1250 ; unsigned int *sumsquared | 1250 ; unsigned int *sumsquared |
| 1251 ;) | 1251 ;) |
| 1252 global sym(vp8_half_horiz_variance8x_h_sse2) | 1252 global sym(vp8_half_horiz_variance8x_h_sse2) PRIVATE |
| 1253 sym(vp8_half_horiz_variance8x_h_sse2): | 1253 sym(vp8_half_horiz_variance8x_h_sse2): |
| 1254 push rbp | 1254 push rbp |
| 1255 mov rbp, rsp | 1255 mov rbp, rsp |
| 1256 SHADOW_ARGS_TO_STACK 7 | 1256 SHADOW_ARGS_TO_STACK 7 |
| 1257 GET_GOT rbx | 1257 GET_GOT rbx |
| 1258 push rsi | 1258 push rsi |
| 1259 push rdi | 1259 push rdi |
| 1260 ; end prolog | 1260 ; end prolog |
| 1261 | 1261 |
| 1262 %if ABI_IS_32BIT=0 | 1262 %if ABI_IS_32BIT=0 |
| (...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1345 ;void vp8_half_horiz_variance16x_h_sse2 | 1345 ;void vp8_half_horiz_variance16x_h_sse2 |
| 1346 ;( | 1346 ;( |
| 1347 ; unsigned char *ref_ptr, | 1347 ; unsigned char *ref_ptr, |
| 1348 ; int ref_pixels_per_line, | 1348 ; int ref_pixels_per_line, |
| 1349 ; unsigned char *src_ptr, | 1349 ; unsigned char *src_ptr, |
| 1350 ; int src_pixels_per_line, | 1350 ; int src_pixels_per_line, |
| 1351 ; unsigned int Height, | 1351 ; unsigned int Height, |
| 1352 ; int *sum, | 1352 ; int *sum, |
| 1353 ; unsigned int *sumsquared | 1353 ; unsigned int *sumsquared |
| 1354 ;) | 1354 ;) |
| 1355 global sym(vp8_half_horiz_variance16x_h_sse2) | 1355 global sym(vp8_half_horiz_variance16x_h_sse2) PRIVATE |
| 1356 sym(vp8_half_horiz_variance16x_h_sse2): | 1356 sym(vp8_half_horiz_variance16x_h_sse2): |
| 1357 push rbp | 1357 push rbp |
| 1358 mov rbp, rsp | 1358 mov rbp, rsp |
| 1359 SHADOW_ARGS_TO_STACK 7 | 1359 SHADOW_ARGS_TO_STACK 7 |
| 1360 SAVE_XMM | 1360 SAVE_XMM |
| 1361 GET_GOT rbx | 1361 GET_GOT rbx |
| 1362 push rsi | 1362 push rsi |
| 1363 push rdi | 1363 push rdi |
| 1364 ; end prolog | 1364 ; end prolog |
| 1365 | 1365 |
| (...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1454 align 16 | 1454 align 16 |
| 1455 vp8_bilinear_filters_sse2: | 1455 vp8_bilinear_filters_sse2: |
| 1456 dw 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 | 1456 dw 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 |
| 1457 dw 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 | 1457 dw 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 |
| 1458 dw 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 | 1458 dw 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 |
| 1459 dw 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 | 1459 dw 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 |
| 1460 dw 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 | 1460 dw 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 |
| 1461 dw 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 | 1461 dw 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 |
| 1462 dw 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 | 1462 dw 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 |
| 1463 dw 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 | 1463 dw 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 |
| OLD | NEW |