| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 ; | 3 ; |
| 4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
| 5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
| 6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
| 7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
| 8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
| 9 ; | 9 ; |
| 10 | 10 |
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 78 ; unsigned char * ref_ptr, | 78 ; unsigned char * ref_ptr, |
| 79 ; int recon_stride, | 79 ; int recon_stride, |
| 80 ; unsigned int * SSE, | 80 ; unsigned int * SSE, |
| 81 ; int * Sum | 81 ; int * Sum |
| 82 ;) | 82 ;) |
| 83 global sym(vp8_get16x16var_sse2) | 83 global sym(vp8_get16x16var_sse2) |
| 84 sym(vp8_get16x16var_sse2): | 84 sym(vp8_get16x16var_sse2): |
| 85 push rbp | 85 push rbp |
| 86 mov rbp, rsp | 86 mov rbp, rsp |
| 87 SHADOW_ARGS_TO_STACK 6 | 87 SHADOW_ARGS_TO_STACK 6 |
| 88 SAVE_XMM 7 |
| 88 push rbx | 89 push rbx |
| 89 push rsi | 90 push rsi |
| 90 push rdi | 91 push rdi |
| 91 ; end prolog | 92 ; end prolog |
| 92 | 93 |
| 93 mov rsi, arg(0) ;[src_ptr] | 94 mov rsi, arg(0) ;[src_ptr] |
| 94 mov rdi, arg(2) ;[ref_ptr] | 95 mov rdi, arg(2) ;[ref_ptr] |
| 95 | 96 |
| 96 movsxd rax, DWORD PTR arg(1) ;[source_stride] | 97 movsxd rax, DWORD PTR arg(1) ;[source_stride] |
| 97 movsxd rdx, DWORD PTR arg(3) ;[recon_stride] | 98 movsxd rdx, DWORD PTR arg(3) ;[recon_stride] |
| (...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 199 mov rdi, arg(4) ;[SSE] | 200 mov rdi, arg(4) ;[SSE] |
| 200 | 201 |
| 201 movd DWORD PTR [rax], xmm7 | 202 movd DWORD PTR [rax], xmm7 |
| 202 movd DWORD PTR [rdi], xmm1 | 203 movd DWORD PTR [rdi], xmm1 |
| 203 | 204 |
| 204 | 205 |
| 205 ; begin epilog | 206 ; begin epilog |
| 206 pop rdi | 207 pop rdi |
| 207 pop rsi | 208 pop rsi |
| 208 pop rbx | 209 pop rbx |
| 209 UNSHADOW_ARGS | 210 RESTORE_XMM |
| 210 pop rbp | |
| 211 ret | |
| 212 | |
| 213 | |
| 214 ;unsigned int vp8_get16x16pred_error_sse2 | |
| 215 ;( | |
| 216 ; unsigned char *src_ptr, | |
| 217 ; int src_stride, | |
| 218 ; unsigned char *ref_ptr, | |
| 219 ; int ref_stride | |
| 220 ;) | |
| 221 global sym(vp8_get16x16pred_error_sse2) | |
| 222 sym(vp8_get16x16pred_error_sse2): | |
| 223 push rbp | |
| 224 mov rbp, rsp | |
| 225 SHADOW_ARGS_TO_STACK 4 | |
| 226 GET_GOT rbx | |
| 227 push rsi | |
| 228 push rdi | |
| 229 sub rsp, 16 | |
| 230 ; end prolog | |
| 231 | |
| 232 mov rsi, arg(0) ;[src_ptr] | |
| 233 mov rdi, arg(2) ;[ref_ptr] | |
| 234 | |
| 235 movsxd rax, DWORD PTR arg(1) ;[src_stride] | |
| 236 movsxd rdx, DWORD PTR arg(3) ;[ref_stride] | |
| 237 | |
| 238 pxor xmm0, xmm0 ; clear xmm0 for
unpack | |
| 239 pxor xmm7, xmm7 ; clear xmm7 for
accumulating diffs | |
| 240 | |
| 241 pxor xmm6, xmm6 ; clear xmm6 for
accumulating sse | |
| 242 mov rcx, 16 | |
| 243 | |
| 244 var16peloop: | |
| 245 movdqu xmm1, XMMWORD PTR [rsi] | |
| 246 movdqu xmm2, XMMWORD PTR [rdi] | |
| 247 | |
| 248 movdqa xmm3, xmm1 | |
| 249 movdqa xmm4, xmm2 | |
| 250 | |
| 251 punpcklbw xmm1, xmm0 | |
| 252 punpckhbw xmm3, xmm0 | |
| 253 | |
| 254 punpcklbw xmm2, xmm0 | |
| 255 punpckhbw xmm4, xmm0 | |
| 256 | |
| 257 psubw xmm1, xmm2 | |
| 258 psubw xmm3, xmm4 | |
| 259 | |
| 260 paddw xmm7, xmm1 | |
| 261 pmaddwd xmm1, xmm1 | |
| 262 | |
| 263 paddw xmm7, xmm3 | |
| 264 pmaddwd xmm3, xmm3 | |
| 265 | |
| 266 paddd xmm6, xmm1 | |
| 267 paddd xmm6, xmm3 | |
| 268 | |
| 269 add rsi, rax | |
| 270 add rdi, rdx | |
| 271 | |
| 272 sub rcx, 1 | |
| 273 jnz var16peloop | |
| 274 | |
| 275 | |
| 276 movdqa xmm1, xmm6 | |
| 277 pxor xmm6, xmm6 | |
| 278 | |
| 279 pxor xmm5, xmm5 | |
| 280 punpcklwd xmm6, xmm7 | |
| 281 | |
| 282 punpckhwd xmm5, xmm7 | |
| 283 psrad xmm5, 16 | |
| 284 | |
| 285 psrad xmm6, 16 | |
| 286 paddd xmm6, xmm5 | |
| 287 | |
| 288 movdqa xmm2, xmm1 | |
| 289 punpckldq xmm1, xmm0 | |
| 290 | |
| 291 punpckhdq xmm2, xmm0 | |
| 292 movdqa xmm7, xmm6 | |
| 293 | |
| 294 paddd xmm1, xmm2 | |
| 295 punpckldq xmm6, xmm0 | |
| 296 | |
| 297 punpckhdq xmm7, xmm0 | |
| 298 paddd xmm6, xmm7 | |
| 299 | |
| 300 movdqa xmm2, xmm1 | |
| 301 movdqa xmm7, xmm6 | |
| 302 | |
| 303 psrldq xmm1, 8 | |
| 304 psrldq xmm6, 8 | |
| 305 | |
| 306 paddd xmm7, xmm6 | |
| 307 paddd xmm1, xmm2 | |
| 308 | |
| 309 movd DWORD PTR [rsp], xmm7 ;Sum | |
| 310 movd DWORD PTR [rsp+4], xmm1 ;SSE | |
| 311 | |
| 312 ; return (SSE-((Sum*Sum)>>8)); | |
| 313 movsxd rdx, dword ptr [rsp] | |
| 314 imul rdx, rdx | |
| 315 sar rdx, 8 | |
| 316 movsxd rax, dword ptr [rsp + 4] | |
| 317 sub rax, rdx | |
| 318 | |
| 319 ; begin epilog | |
| 320 add rsp, 16 | |
| 321 pop rdi | |
| 322 pop rsi | |
| 323 RESTORE_GOT | |
| 324 UNSHADOW_ARGS | 211 UNSHADOW_ARGS |
| 325 pop rbp | 212 pop rbp |
| 326 ret | 213 ret |
| 327 | 214 |
| 328 | 215 |
| 329 | 216 |
| 217 |
| 330 ;unsigned int vp8_get8x8var_sse2 | 218 ;unsigned int vp8_get8x8var_sse2 |
| 331 ;( | 219 ;( |
| 332 ; unsigned char * src_ptr, | 220 ; unsigned char * src_ptr, |
| 333 ; int source_stride, | 221 ; int source_stride, |
| 334 ; unsigned char * ref_ptr, | 222 ; unsigned char * ref_ptr, |
| 335 ; int recon_stride, | 223 ; int recon_stride, |
| 336 ; unsigned int * SSE, | 224 ; unsigned int * SSE, |
| 337 ; int * Sum | 225 ; int * Sum |
| 338 ;) | 226 ;) |
| 339 global sym(vp8_get8x8var_sse2) | 227 global sym(vp8_get8x8var_sse2) |
| 340 sym(vp8_get8x8var_sse2): | 228 sym(vp8_get8x8var_sse2): |
| 341 push rbp | 229 push rbp |
| 342 mov rbp, rsp | 230 mov rbp, rsp |
| 343 SHADOW_ARGS_TO_STACK 6 | 231 SHADOW_ARGS_TO_STACK 6 |
| 232 SAVE_XMM 7 |
| 344 GET_GOT rbx | 233 GET_GOT rbx |
| 345 push rsi | 234 push rsi |
| 346 push rdi | 235 push rdi |
| 347 sub rsp, 16 | 236 sub rsp, 16 |
| 348 ; end prolog | 237 ; end prolog |
| 349 | 238 |
| 350 mov rsi, arg(0) ;[src_ptr] | 239 mov rsi, arg(0) ;[src_ptr] |
| 351 mov rdi, arg(2) ;[ref_ptr] | 240 mov rdi, arg(2) ;[ref_ptr] |
| 352 | 241 |
| 353 movsxd rax, DWORD PTR arg(1) ;[source_stride] | 242 movsxd rax, DWORD PTR arg(1) ;[source_stride] |
| (...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 499 movsx rcx, dx | 388 movsx rcx, dx |
| 500 | 389 |
| 501 mov dword ptr [rax], ecx | 390 mov dword ptr [rax], ecx |
| 502 movd DWORD PTR [rdi], xmm1 | 391 movd DWORD PTR [rdi], xmm1 |
| 503 | 392 |
| 504 ; begin epilog | 393 ; begin epilog |
| 505 add rsp, 16 | 394 add rsp, 16 |
| 506 pop rdi | 395 pop rdi |
| 507 pop rsi | 396 pop rsi |
| 508 RESTORE_GOT | 397 RESTORE_GOT |
| 398 RESTORE_XMM |
| 509 UNSHADOW_ARGS | 399 UNSHADOW_ARGS |
| 510 pop rbp | 400 pop rbp |
| 511 ret | 401 ret |
| 512 | 402 |
| 513 ;void vp8_filter_block2d_bil_var_sse2 | 403 ;void vp8_filter_block2d_bil_var_sse2 |
| 514 ;( | 404 ;( |
| 515 ; unsigned char *ref_ptr, | 405 ; unsigned char *ref_ptr, |
| 516 ; int ref_pixels_per_line, | 406 ; int ref_pixels_per_line, |
| 517 ; unsigned char *src_ptr, | 407 ; unsigned char *src_ptr, |
| 518 ; int src_pixels_per_line, | 408 ; int src_pixels_per_line, |
| 519 ; unsigned int Height, | 409 ; unsigned int Height, |
| 520 ; int xoffset, | 410 ; int xoffset, |
| 521 ; int yoffset, | 411 ; int yoffset, |
| 522 ; int *sum, | 412 ; int *sum, |
| 523 ; unsigned int *sumsquared;; | 413 ; unsigned int *sumsquared;; |
| 524 ; | 414 ; |
| 525 ;) | 415 ;) |
| 526 global sym(vp8_filter_block2d_bil_var_sse2) | 416 global sym(vp8_filter_block2d_bil_var_sse2) |
| 527 sym(vp8_filter_block2d_bil_var_sse2): | 417 sym(vp8_filter_block2d_bil_var_sse2): |
| 528 push rbp | 418 push rbp |
| 529 mov rbp, rsp | 419 mov rbp, rsp |
| 530 SHADOW_ARGS_TO_STACK 9 | 420 SHADOW_ARGS_TO_STACK 9 |
| 531 SAVE_XMM | 421 SAVE_XMM 7 |
| 532 GET_GOT rbx | 422 GET_GOT rbx |
| 533 push rsi | 423 push rsi |
| 534 push rdi | 424 push rdi |
| 535 push rbx | 425 push rbx |
| 536 ; end prolog | 426 ; end prolog |
| 537 | 427 |
| 538 pxor xmm6, xmm6 ; | 428 pxor xmm6, xmm6 ; |
| 539 pxor xmm7, xmm7 ; | 429 pxor xmm7, xmm7 ; |
| 540 | 430 |
| 541 lea rsi, [GLOBAL(xmm_bi_rd)] ; rounding | 431 lea rsi, [GLOBAL(xmm_bi_rd)] ; rounding |
| (...skipping 256 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 798 ; int src_pixels_per_line, | 688 ; int src_pixels_per_line, |
| 799 ; unsigned int Height, | 689 ; unsigned int Height, |
| 800 ; int *sum, | 690 ; int *sum, |
| 801 ; unsigned int *sumsquared | 691 ; unsigned int *sumsquared |
| 802 ;) | 692 ;) |
| 803 global sym(vp8_half_horiz_vert_variance8x_h_sse2) | 693 global sym(vp8_half_horiz_vert_variance8x_h_sse2) |
| 804 sym(vp8_half_horiz_vert_variance8x_h_sse2): | 694 sym(vp8_half_horiz_vert_variance8x_h_sse2): |
| 805 push rbp | 695 push rbp |
| 806 mov rbp, rsp | 696 mov rbp, rsp |
| 807 SHADOW_ARGS_TO_STACK 7 | 697 SHADOW_ARGS_TO_STACK 7 |
| 698 SAVE_XMM 7 |
| 808 GET_GOT rbx | 699 GET_GOT rbx |
| 809 push rsi | 700 push rsi |
| 810 push rdi | 701 push rdi |
| 811 ; end prolog | 702 ; end prolog |
| 812 | 703 |
| 813 %if ABI_IS_32BIT=0 | 704 %if ABI_IS_32BIT=0 |
| 814 movsxd r8, dword ptr arg(1) ;ref_pixels_per_line | 705 movsxd r8, dword ptr arg(1) ;ref_pixels_per_line |
| 815 movsxd r9, dword ptr arg(3) ;src_pixels_per_line | 706 movsxd r9, dword ptr arg(3) ;src_pixels_per_line |
| 816 %endif | 707 %endif |
| 817 | 708 |
| (...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 899 mov rdi, arg(6) ; sumsquared | 790 mov rdi, arg(6) ; sumsquared |
| 900 | 791 |
| 901 movd [rsi], mm2 ; | 792 movd [rsi], mm2 ; |
| 902 movd [rdi], mm4 ; | 793 movd [rdi], mm4 ; |
| 903 | 794 |
| 904 | 795 |
| 905 ; begin epilog | 796 ; begin epilog |
| 906 pop rdi | 797 pop rdi |
| 907 pop rsi | 798 pop rsi |
| 908 RESTORE_GOT | 799 RESTORE_GOT |
| 800 RESTORE_XMM |
| 909 UNSHADOW_ARGS | 801 UNSHADOW_ARGS |
| 910 pop rbp | 802 pop rbp |
| 911 ret | 803 ret |
| 912 | 804 |
| 913 ;void vp8_half_horiz_vert_variance16x_h_sse2 | 805 ;void vp8_half_horiz_vert_variance16x_h_sse2 |
| 914 ;( | 806 ;( |
| 915 ; unsigned char *ref_ptr, | 807 ; unsigned char *ref_ptr, |
| 916 ; int ref_pixels_per_line, | 808 ; int ref_pixels_per_line, |
| 917 ; unsigned char *src_ptr, | 809 ; unsigned char *src_ptr, |
| 918 ; int src_pixels_per_line, | 810 ; int src_pixels_per_line, |
| 919 ; unsigned int Height, | 811 ; unsigned int Height, |
| 920 ; int *sum, | 812 ; int *sum, |
| 921 ; unsigned int *sumsquared | 813 ; unsigned int *sumsquared |
| 922 ;) | 814 ;) |
| 923 global sym(vp8_half_horiz_vert_variance16x_h_sse2) | 815 global sym(vp8_half_horiz_vert_variance16x_h_sse2) |
| 924 sym(vp8_half_horiz_vert_variance16x_h_sse2): | 816 sym(vp8_half_horiz_vert_variance16x_h_sse2): |
| 925 push rbp | 817 push rbp |
| 926 mov rbp, rsp | 818 mov rbp, rsp |
| 927 SHADOW_ARGS_TO_STACK 7 | 819 SHADOW_ARGS_TO_STACK 7 |
| 928 SAVE_XMM | 820 SAVE_XMM 7 |
| 929 GET_GOT rbx | 821 GET_GOT rbx |
| 930 push rsi | 822 push rsi |
| 931 push rdi | 823 push rdi |
| 932 ; end prolog | 824 ; end prolog |
| 933 | 825 |
| 934 pxor xmm6, xmm6 ; error accumulator | 826 pxor xmm6, xmm6 ; error accumulator |
| 935 pxor xmm7, xmm7 ; sse eaccumulator | 827 pxor xmm7, xmm7 ; sse eaccumulator |
| 936 mov rsi, arg(0) ;ref_ptr ; | 828 mov rsi, arg(0) ;ref_ptr ; |
| 937 | 829 |
| 938 mov rdi, arg(2) ;src_ptr ; | 830 mov rdi, arg(2) ;src_ptr ; |
| (...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1034 ; int src_pixels_per_line, | 926 ; int src_pixels_per_line, |
| 1035 ; unsigned int Height, | 927 ; unsigned int Height, |
| 1036 ; int *sum, | 928 ; int *sum, |
| 1037 ; unsigned int *sumsquared | 929 ; unsigned int *sumsquared |
| 1038 ;) | 930 ;) |
| 1039 global sym(vp8_half_vert_variance8x_h_sse2) | 931 global sym(vp8_half_vert_variance8x_h_sse2) |
| 1040 sym(vp8_half_vert_variance8x_h_sse2): | 932 sym(vp8_half_vert_variance8x_h_sse2): |
| 1041 push rbp | 933 push rbp |
| 1042 mov rbp, rsp | 934 mov rbp, rsp |
| 1043 SHADOW_ARGS_TO_STACK 7 | 935 SHADOW_ARGS_TO_STACK 7 |
| 936 SAVE_XMM 7 |
| 1044 GET_GOT rbx | 937 GET_GOT rbx |
| 1045 push rsi | 938 push rsi |
| 1046 push rdi | 939 push rdi |
| 1047 ; end prolog | 940 ; end prolog |
| 1048 | 941 |
| 1049 %if ABI_IS_32BIT=0 | 942 %if ABI_IS_32BIT=0 |
| 1050 movsxd r8, dword ptr arg(1) ;ref_pixels_per_line | 943 movsxd r8, dword ptr arg(1) ;ref_pixels_per_line |
| 1051 movsxd r9, dword ptr arg(3) ;src_pixels_per_line | 944 movsxd r9, dword ptr arg(3) ;src_pixels_per_line |
| 1052 %endif | 945 %endif |
| 1053 | 946 |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1120 mov rdi, arg(6) ; sumsquared | 1013 mov rdi, arg(6) ; sumsquared |
| 1121 | 1014 |
| 1122 movd [rsi], mm2 ; | 1015 movd [rsi], mm2 ; |
| 1123 movd [rdi], mm4 ; | 1016 movd [rdi], mm4 ; |
| 1124 | 1017 |
| 1125 | 1018 |
| 1126 ; begin epilog | 1019 ; begin epilog |
| 1127 pop rdi | 1020 pop rdi |
| 1128 pop rsi | 1021 pop rsi |
| 1129 RESTORE_GOT | 1022 RESTORE_GOT |
| 1023 RESTORE_XMM |
| 1130 UNSHADOW_ARGS | 1024 UNSHADOW_ARGS |
| 1131 pop rbp | 1025 pop rbp |
| 1132 ret | 1026 ret |
| 1133 | 1027 |
| 1134 ;void vp8_half_vert_variance16x_h_sse2 | 1028 ;void vp8_half_vert_variance16x_h_sse2 |
| 1135 ;( | 1029 ;( |
| 1136 ; unsigned char *ref_ptr, | 1030 ; unsigned char *ref_ptr, |
| 1137 ; int ref_pixels_per_line, | 1031 ; int ref_pixels_per_line, |
| 1138 ; unsigned char *src_ptr, | 1032 ; unsigned char *src_ptr, |
| 1139 ; int src_pixels_per_line, | 1033 ; int src_pixels_per_line, |
| 1140 ; unsigned int Height, | 1034 ; unsigned int Height, |
| 1141 ; int *sum, | 1035 ; int *sum, |
| 1142 ; unsigned int *sumsquared | 1036 ; unsigned int *sumsquared |
| 1143 ;) | 1037 ;) |
| 1144 global sym(vp8_half_vert_variance16x_h_sse2) | 1038 global sym(vp8_half_vert_variance16x_h_sse2) |
| 1145 sym(vp8_half_vert_variance16x_h_sse2): | 1039 sym(vp8_half_vert_variance16x_h_sse2): |
| 1146 push rbp | 1040 push rbp |
| 1147 mov rbp, rsp | 1041 mov rbp, rsp |
| 1148 SHADOW_ARGS_TO_STACK 7 | 1042 SHADOW_ARGS_TO_STACK 7 |
| 1149 SAVE_XMM | 1043 SAVE_XMM 7 |
| 1150 GET_GOT rbx | 1044 GET_GOT rbx |
| 1151 push rsi | 1045 push rsi |
| 1152 push rdi | 1046 push rdi |
| 1153 ; end prolog | 1047 ; end prolog |
| 1154 | 1048 |
| 1155 pxor xmm6, xmm6 ; error accumulator | 1049 pxor xmm6, xmm6 ; error accumulator |
| 1156 pxor xmm7, xmm7 ; sse eaccumulator | 1050 pxor xmm7, xmm7 ; sse eaccumulator |
| 1157 mov rsi, arg(0) ;ref_ptr | 1051 mov rsi, arg(0) ;ref_ptr |
| 1158 | 1052 |
| 1159 mov rdi, arg(2) ;src_ptr | 1053 mov rdi, arg(2) ;src_ptr |
| (...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1247 ; int src_pixels_per_line, | 1141 ; int src_pixels_per_line, |
| 1248 ; unsigned int Height, | 1142 ; unsigned int Height, |
| 1249 ; int *sum, | 1143 ; int *sum, |
| 1250 ; unsigned int *sumsquared | 1144 ; unsigned int *sumsquared |
| 1251 ;) | 1145 ;) |
| 1252 global sym(vp8_half_horiz_variance8x_h_sse2) | 1146 global sym(vp8_half_horiz_variance8x_h_sse2) |
| 1253 sym(vp8_half_horiz_variance8x_h_sse2): | 1147 sym(vp8_half_horiz_variance8x_h_sse2): |
| 1254 push rbp | 1148 push rbp |
| 1255 mov rbp, rsp | 1149 mov rbp, rsp |
| 1256 SHADOW_ARGS_TO_STACK 7 | 1150 SHADOW_ARGS_TO_STACK 7 |
| 1151 SAVE_XMM 7 |
| 1257 GET_GOT rbx | 1152 GET_GOT rbx |
| 1258 push rsi | 1153 push rsi |
| 1259 push rdi | 1154 push rdi |
| 1260 ; end prolog | 1155 ; end prolog |
| 1261 | 1156 |
| 1262 %if ABI_IS_32BIT=0 | 1157 %if ABI_IS_32BIT=0 |
| 1263 movsxd r8, dword ptr arg(1) ;ref_pixels_per_line | 1158 movsxd r8, dword ptr arg(1) ;ref_pixels_per_line |
| 1264 movsxd r9, dword ptr arg(3) ;src_pixels_per_line | 1159 movsxd r9, dword ptr arg(3) ;src_pixels_per_line |
| 1265 %endif | 1160 %endif |
| 1266 | 1161 |
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1331 mov rdi, arg(6) ; sumsquared | 1226 mov rdi, arg(6) ; sumsquared |
| 1332 | 1227 |
| 1333 movd [rsi], mm2 ; | 1228 movd [rsi], mm2 ; |
| 1334 movd [rdi], mm4 ; | 1229 movd [rdi], mm4 ; |
| 1335 | 1230 |
| 1336 | 1231 |
| 1337 ; begin epilog | 1232 ; begin epilog |
| 1338 pop rdi | 1233 pop rdi |
| 1339 pop rsi | 1234 pop rsi |
| 1340 RESTORE_GOT | 1235 RESTORE_GOT |
| 1236 RESTORE_XMM |
| 1341 UNSHADOW_ARGS | 1237 UNSHADOW_ARGS |
| 1342 pop rbp | 1238 pop rbp |
| 1343 ret | 1239 ret |
| 1344 | 1240 |
| 1345 ;void vp8_half_horiz_variance16x_h_sse2 | 1241 ;void vp8_half_horiz_variance16x_h_sse2 |
| 1346 ;( | 1242 ;( |
| 1347 ; unsigned char *ref_ptr, | 1243 ; unsigned char *ref_ptr, |
| 1348 ; int ref_pixels_per_line, | 1244 ; int ref_pixels_per_line, |
| 1349 ; unsigned char *src_ptr, | 1245 ; unsigned char *src_ptr, |
| 1350 ; int src_pixels_per_line, | 1246 ; int src_pixels_per_line, |
| 1351 ; unsigned int Height, | 1247 ; unsigned int Height, |
| 1352 ; int *sum, | 1248 ; int *sum, |
| 1353 ; unsigned int *sumsquared | 1249 ; unsigned int *sumsquared |
| 1354 ;) | 1250 ;) |
| 1355 global sym(vp8_half_horiz_variance16x_h_sse2) | 1251 global sym(vp8_half_horiz_variance16x_h_sse2) |
| 1356 sym(vp8_half_horiz_variance16x_h_sse2): | 1252 sym(vp8_half_horiz_variance16x_h_sse2): |
| 1357 push rbp | 1253 push rbp |
| 1358 mov rbp, rsp | 1254 mov rbp, rsp |
| 1359 SHADOW_ARGS_TO_STACK 7 | 1255 SHADOW_ARGS_TO_STACK 7 |
| 1360 SAVE_XMM | 1256 SAVE_XMM 7 |
| 1361 GET_GOT rbx | 1257 GET_GOT rbx |
| 1362 push rsi | 1258 push rsi |
| 1363 push rdi | 1259 push rdi |
| 1364 ; end prolog | 1260 ; end prolog |
| 1365 | 1261 |
| 1366 pxor xmm6, xmm6 ; error accumulator | 1262 pxor xmm6, xmm6 ; error accumulator |
| 1367 pxor xmm7, xmm7 ; sse eaccumulator | 1263 pxor xmm7, xmm7 ; sse eaccumulator |
| 1368 mov rsi, arg(0) ;ref_ptr ; | 1264 mov rsi, arg(0) ;ref_ptr ; |
| 1369 | 1265 |
| 1370 mov rdi, arg(2) ;src_ptr ; | 1266 mov rdi, arg(2) ;src_ptr ; |
| (...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1454 align 16 | 1350 align 16 |
| 1455 vp8_bilinear_filters_sse2: | 1351 vp8_bilinear_filters_sse2: |
| 1456 dw 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 | 1352 dw 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 |
| 1457 dw 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 | 1353 dw 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 |
| 1458 dw 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 | 1354 dw 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 |
| 1459 dw 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 | 1355 dw 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 |
| 1460 dw 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 | 1356 dw 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 |
| 1461 dw 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 | 1357 dw 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 |
| 1462 dw 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 | 1358 dw 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 |
| 1463 dw 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 | 1359 dw 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 |
| OLD | NEW |