| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 ; | 3 ; |
| 4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
| 5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
| 6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
| 7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
| 8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
| 9 ; | 9 ; |
| 10 | 10 |
| (...skipping 21 matching lines...) Expand all Loading... |
| 32 ; unsigned char *output_ptr, | 32 ; unsigned char *output_ptr, |
| 33 ; unsigned int output_pitch, | 33 ; unsigned int output_pitch, |
| 34 ; unsigned int output_height, | 34 ; unsigned int output_height, |
| 35 ; unsigned int vp8_filter_index | 35 ; unsigned int vp8_filter_index |
| 36 ;) | 36 ;) |
| 37 global sym(vp8_filter_block1d8_h6_ssse3) | 37 global sym(vp8_filter_block1d8_h6_ssse3) |
| 38 sym(vp8_filter_block1d8_h6_ssse3): | 38 sym(vp8_filter_block1d8_h6_ssse3): |
| 39 push rbp | 39 push rbp |
| 40 mov rbp, rsp | 40 mov rbp, rsp |
| 41 SHADOW_ARGS_TO_STACK 6 | 41 SHADOW_ARGS_TO_STACK 6 |
| 42 SAVE_XMM 7 |
| 42 GET_GOT rbx | 43 GET_GOT rbx |
| 43 push rsi | 44 push rsi |
| 44 push rdi | 45 push rdi |
| 45 ; end prolog | 46 ; end prolog |
| 46 | 47 |
| 47 movsxd rdx, DWORD PTR arg(5) ;table index | 48 movsxd rdx, DWORD PTR arg(5) ;table index |
| 48 xor rsi, rsi | 49 xor rsi, rsi |
| 49 shl rdx, 4 | 50 shl rdx, 4 |
| 50 | 51 |
| 51 movdqa xmm7, [GLOBAL(rd)] | 52 movdqa xmm7, [GLOBAL(rd)] |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 100 | 101 |
| 101 packuswb xmm0, xmm0 | 102 packuswb xmm0, xmm0 |
| 102 | 103 |
| 103 movq MMWORD Ptr [rdi], xmm0 | 104 movq MMWORD Ptr [rdi], xmm0 |
| 104 jnz filter_block1d8_h6_rowloop_ssse3 | 105 jnz filter_block1d8_h6_rowloop_ssse3 |
| 105 | 106 |
| 106 ; begin epilog | 107 ; begin epilog |
| 107 pop rdi | 108 pop rdi |
| 108 pop rsi | 109 pop rsi |
| 109 RESTORE_GOT | 110 RESTORE_GOT |
| 111 RESTORE_XMM |
| 110 UNSHADOW_ARGS | 112 UNSHADOW_ARGS |
| 111 pop rbp | 113 pop rbp |
| 112 ret | 114 ret |
| 113 | 115 |
| 114 vp8_filter_block1d8_h4_ssse3: | 116 vp8_filter_block1d8_h4_ssse3: |
| 115 movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 | 117 movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 |
| 116 movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 | 118 movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 |
| 117 | 119 |
| 118 movdqa xmm3, XMMWORD PTR [GLOBAL(shuf2bfrom1)] | 120 movdqa xmm3, XMMWORD PTR [GLOBAL(shuf2bfrom1)] |
| 119 movdqa xmm4, XMMWORD PTR [GLOBAL(shuf3bfrom1)] | 121 movdqa xmm4, XMMWORD PTR [GLOBAL(shuf3bfrom1)] |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 155 packuswb xmm0, xmm0 | 157 packuswb xmm0, xmm0 |
| 156 | 158 |
| 157 movq MMWORD Ptr [rdi], xmm0 | 159 movq MMWORD Ptr [rdi], xmm0 |
| 158 | 160 |
| 159 jnz filter_block1d8_h4_rowloop_ssse3 | 161 jnz filter_block1d8_h4_rowloop_ssse3 |
| 160 | 162 |
| 161 ; begin epilog | 163 ; begin epilog |
| 162 pop rdi | 164 pop rdi |
| 163 pop rsi | 165 pop rsi |
| 164 RESTORE_GOT | 166 RESTORE_GOT |
| 167 RESTORE_XMM |
| 165 UNSHADOW_ARGS | 168 UNSHADOW_ARGS |
| 166 pop rbp | 169 pop rbp |
| 167 ret | 170 ret |
| 168 ;void vp8_filter_block1d16_h6_ssse3 | 171 ;void vp8_filter_block1d16_h6_ssse3 |
| 169 ;( | 172 ;( |
| 170 ; unsigned char *src_ptr, | 173 ; unsigned char *src_ptr, |
| 171 ; unsigned int src_pixels_per_line, | 174 ; unsigned int src_pixels_per_line, |
| 172 ; unsigned char *output_ptr, | 175 ; unsigned char *output_ptr, |
| 173 ; unsigned int output_pitch, | 176 ; unsigned int output_pitch, |
| 174 ; unsigned int output_height, | 177 ; unsigned int output_height, |
| 175 ; unsigned int vp8_filter_index | 178 ; unsigned int vp8_filter_index |
| 176 ;) | 179 ;) |
| 177 global sym(vp8_filter_block1d16_h6_ssse3) | 180 global sym(vp8_filter_block1d16_h6_ssse3) |
| 178 sym(vp8_filter_block1d16_h6_ssse3): | 181 sym(vp8_filter_block1d16_h6_ssse3): |
| 179 push rbp | 182 push rbp |
| 180 mov rbp, rsp | 183 mov rbp, rsp |
| 181 SHADOW_ARGS_TO_STACK 6 | 184 SHADOW_ARGS_TO_STACK 6 |
| 182 SAVE_XMM | 185 SAVE_XMM 7 |
| 183 GET_GOT rbx | 186 GET_GOT rbx |
| 184 push rsi | 187 push rsi |
| 185 push rdi | 188 push rdi |
| 186 ; end prolog | 189 ; end prolog |
| 187 | 190 |
| 188 movsxd rdx, DWORD PTR arg(5) ;table index | 191 movsxd rdx, DWORD PTR arg(5) ;table index |
| 189 xor rsi, rsi | 192 xor rsi, rsi |
| 190 shl rdx, 4 ; | 193 shl rdx, 4 ; |
| 191 | 194 |
| 192 lea rax, [GLOBAL(k0_k5)] | 195 lea rax, [GLOBAL(k0_k5)] |
| 193 add rax, rdx | 196 add rax, rdx |
| 194 | 197 |
| 195 mov rdi, arg(2) ;output_ptr | 198 mov rdi, arg(2) ;output_ptr |
| 196 | 199 |
| 197 ;; | |
| 198 ;; cmp esi, DWORD PTR [rax] | |
| 199 ;; je vp8_filter_block1d16_h4_ssse3 | |
| 200 | |
| 201 mov rsi, arg(0) ;src_ptr | 200 mov rsi, arg(0) ;src_ptr |
| 202 | 201 |
| 203 movdqa xmm4, XMMWORD PTR [rax] ;k0_k5 | 202 movdqa xmm4, XMMWORD PTR [rax] ;k0_k5 |
| 204 movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 | 203 movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 |
| 205 movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 | 204 movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 |
| 206 | 205 |
| 207 movsxd rax, dword ptr arg(1) ;src_pixels_per_line | 206 movsxd rax, dword ptr arg(1) ;src_pixels_per_line |
| 208 movsxd rcx, dword ptr arg(4) ;output_height | 207 movsxd rcx, dword ptr arg(4) ;output_height |
| 209 movsxd rdx, dword ptr arg(3) ;output_pitch | 208 movsxd rdx, dword ptr arg(3) ;output_pitch |
| 210 | 209 |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 264 movdqa XMMWORD Ptr [rdi], xmm0 | 263 movdqa XMMWORD Ptr [rdi], xmm0 |
| 265 | 264 |
| 266 lea rdi, [rdi + rdx] | 265 lea rdi, [rdi + rdx] |
| 267 dec rcx | 266 dec rcx |
| 268 jnz filter_block1d16_h6_rowloop_ssse3 | 267 jnz filter_block1d16_h6_rowloop_ssse3 |
| 269 | 268 |
| 270 ; begin epilog | 269 ; begin epilog |
| 271 pop rdi | 270 pop rdi |
| 272 pop rsi | 271 pop rsi |
| 273 RESTORE_GOT | 272 RESTORE_GOT |
| 274 UNSHADOW_ARGS | 273 RESTORE_XMM |
| 275 pop rbp | |
| 276 ret | |
| 277 | |
| 278 vp8_filter_block1d16_h4_ssse3: | |
| 279 movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 | |
| 280 movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 | |
| 281 | |
| 282 mov rsi, arg(0) ;src_ptr | |
| 283 movsxd rax, dword ptr arg(1) ;src_pixels_per_line | |
| 284 movsxd rcx, dword ptr arg(4) ;output_height | |
| 285 movsxd rdx, dword ptr arg(3) ;output_pitch | |
| 286 | |
| 287 filter_block1d16_h4_rowloop_ssse3: | |
| 288 movdqu xmm1, XMMWORD PTR [rsi - 2] | |
| 289 | |
| 290 movdqa xmm2, xmm1 | |
| 291 pshufb xmm1, [GLOBAL(shuf2b)] | |
| 292 pshufb xmm2, [GLOBAL(shuf3b)] | |
| 293 pmaddubsw xmm1, xmm5 | |
| 294 | |
| 295 movdqu xmm3, XMMWORD PTR [rsi + 6] | |
| 296 | |
| 297 pmaddubsw xmm2, xmm6 | |
| 298 movdqa xmm0, xmm3 | |
| 299 pshufb xmm3, [GLOBAL(shuf3b)] | |
| 300 pshufb xmm0, [GLOBAL(shuf2b)] | |
| 301 | |
| 302 paddsw xmm1, [GLOBAL(rd)] | |
| 303 paddsw xmm1, xmm2 | |
| 304 | |
| 305 pmaddubsw xmm0, xmm5 | |
| 306 pmaddubsw xmm3, xmm6 | |
| 307 | |
| 308 psraw xmm1, 7 | |
| 309 packuswb xmm1, xmm1 | |
| 310 lea rsi, [rsi + rax] | |
| 311 paddsw xmm3, xmm0 | |
| 312 paddsw xmm3, [GLOBAL(rd)] | |
| 313 psraw xmm3, 7 | |
| 314 packuswb xmm3, xmm3 | |
| 315 | |
| 316 punpcklqdq xmm1, xmm3 | |
| 317 | |
| 318 movdqa XMMWORD Ptr [rdi], xmm1 | |
| 319 | |
| 320 add rdi, rdx | |
| 321 dec rcx | |
| 322 jnz filter_block1d16_h4_rowloop_ssse3 | |
| 323 | |
| 324 | |
| 325 ; begin epilog | |
| 326 pop rdi | |
| 327 pop rsi | |
| 328 RESTORE_GOT | |
| 329 UNSHADOW_ARGS | 274 UNSHADOW_ARGS |
| 330 pop rbp | 275 pop rbp |
| 331 ret | 276 ret |
| 332 | 277 |
| 333 ;void vp8_filter_block1d4_h6_ssse3 | 278 ;void vp8_filter_block1d4_h6_ssse3 |
| 334 ;( | 279 ;( |
| 335 ; unsigned char *src_ptr, | 280 ; unsigned char *src_ptr, |
| 336 ; unsigned int src_pixels_per_line, | 281 ; unsigned int src_pixels_per_line, |
| 337 ; unsigned char *output_ptr, | 282 ; unsigned char *output_ptr, |
| 338 ; unsigned int output_pitch, | 283 ; unsigned int output_pitch, |
| 339 ; unsigned int output_height, | 284 ; unsigned int output_height, |
| 340 ; unsigned int vp8_filter_index | 285 ; unsigned int vp8_filter_index |
| 341 ;) | 286 ;) |
| 342 global sym(vp8_filter_block1d4_h6_ssse3) | 287 global sym(vp8_filter_block1d4_h6_ssse3) |
| 343 sym(vp8_filter_block1d4_h6_ssse3): | 288 sym(vp8_filter_block1d4_h6_ssse3): |
| 344 push rbp | 289 push rbp |
| 345 mov rbp, rsp | 290 mov rbp, rsp |
| 346 SHADOW_ARGS_TO_STACK 6 | 291 SHADOW_ARGS_TO_STACK 6 |
| 292 SAVE_XMM 7 |
| 347 GET_GOT rbx | 293 GET_GOT rbx |
| 348 push rsi | 294 push rsi |
| 349 push rdi | 295 push rdi |
| 350 ; end prolog | 296 ; end prolog |
| 351 | 297 |
| 352 movsxd rdx, DWORD PTR arg(5) ;table index | 298 movsxd rdx, DWORD PTR arg(5) ;table index |
| 353 xor rsi, rsi | 299 xor rsi, rsi |
| 354 shl rdx, 4 ; | 300 shl rdx, 4 ; |
| 355 | 301 |
| 356 lea rax, [GLOBAL(k0_k5)] | 302 lea rax, [GLOBAL(k0_k5)] |
| (...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 444 movd DWORD PTR [rdi], xmm1 | 390 movd DWORD PTR [rdi], xmm1 |
| 445 | 391 |
| 446 add rdi, rdx | 392 add rdi, rdx |
| 447 dec rcx | 393 dec rcx |
| 448 jnz filter_block1d4_h4_rowloop_ssse3 | 394 jnz filter_block1d4_h4_rowloop_ssse3 |
| 449 | 395 |
| 450 ; begin epilog | 396 ; begin epilog |
| 451 pop rdi | 397 pop rdi |
| 452 pop rsi | 398 pop rsi |
| 453 RESTORE_GOT | 399 RESTORE_GOT |
| 400 RESTORE_XMM |
| 454 UNSHADOW_ARGS | 401 UNSHADOW_ARGS |
| 455 pop rbp | 402 pop rbp |
| 456 ret | 403 ret |
| 457 | 404 |
| 458 | 405 |
| 459 | 406 |
| 460 ;void vp8_filter_block1d16_v6_ssse3 | 407 ;void vp8_filter_block1d16_v6_ssse3 |
| 461 ;( | 408 ;( |
| 462 ; unsigned char *src_ptr, | 409 ; unsigned char *src_ptr, |
| 463 ; unsigned int src_pitch, | 410 ; unsigned int src_pitch, |
| 464 ; unsigned char *output_ptr, | 411 ; unsigned char *output_ptr, |
| 465 ; unsigned int out_pitch, | 412 ; unsigned int out_pitch, |
| 466 ; unsigned int output_height, | 413 ; unsigned int output_height, |
| 467 ; unsigned int vp8_filter_index | 414 ; unsigned int vp8_filter_index |
| 468 ;) | 415 ;) |
| 469 global sym(vp8_filter_block1d16_v6_ssse3) | 416 global sym(vp8_filter_block1d16_v6_ssse3) |
| 470 sym(vp8_filter_block1d16_v6_ssse3): | 417 sym(vp8_filter_block1d16_v6_ssse3): |
| 471 push rbp | 418 push rbp |
| 472 mov rbp, rsp | 419 mov rbp, rsp |
| 473 SHADOW_ARGS_TO_STACK 6 | 420 SHADOW_ARGS_TO_STACK 6 |
| 421 SAVE_XMM 7 |
| 474 GET_GOT rbx | 422 GET_GOT rbx |
| 475 push rsi | 423 push rsi |
| 476 push rdi | 424 push rdi |
| 477 ; end prolog | 425 ; end prolog |
| 478 | 426 |
| 479 movsxd rdx, DWORD PTR arg(5) ;table index | 427 movsxd rdx, DWORD PTR arg(5) ;table index |
| 480 xor rsi, rsi | 428 xor rsi, rsi |
| 481 shl rdx, 4 ; | 429 shl rdx, 4 ; |
| 482 | 430 |
| 483 lea rax, [GLOBAL(k0_k5)] | 431 lea rax, [GLOBAL(k0_k5)] |
| (...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 559 %else | 507 %else |
| 560 add rdi, r8 | 508 add rdi, r8 |
| 561 %endif | 509 %endif |
| 562 dec rcx | 510 dec rcx |
| 563 jnz vp8_filter_block1d16_v6_ssse3_loop | 511 jnz vp8_filter_block1d16_v6_ssse3_loop |
| 564 | 512 |
| 565 ; begin epilog | 513 ; begin epilog |
| 566 pop rdi | 514 pop rdi |
| 567 pop rsi | 515 pop rsi |
| 568 RESTORE_GOT | 516 RESTORE_GOT |
| 517 RESTORE_XMM |
| 569 UNSHADOW_ARGS | 518 UNSHADOW_ARGS |
| 570 pop rbp | 519 pop rbp |
| 571 ret | 520 ret |
| 572 | 521 |
| 573 vp8_filter_block1d16_v4_ssse3: | 522 vp8_filter_block1d16_v4_ssse3: |
| 574 movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 | 523 movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 |
| 575 movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 | 524 movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 |
| 576 | 525 |
| 577 mov rsi, arg(0) ;src_ptr | 526 mov rsi, arg(0) ;src_ptr |
| 578 movsxd rdx, DWORD PTR arg(1) ;pixels_per_line | 527 movsxd rdx, DWORD PTR arg(1) ;pixels_per_line |
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 631 %else | 580 %else |
| 632 add rdi, r8 | 581 add rdi, r8 |
| 633 %endif | 582 %endif |
| 634 dec rcx | 583 dec rcx |
| 635 jnz vp8_filter_block1d16_v4_ssse3_loop | 584 jnz vp8_filter_block1d16_v4_ssse3_loop |
| 636 | 585 |
| 637 ; begin epilog | 586 ; begin epilog |
| 638 pop rdi | 587 pop rdi |
| 639 pop rsi | 588 pop rsi |
| 640 RESTORE_GOT | 589 RESTORE_GOT |
| 590 RESTORE_XMM |
| 641 UNSHADOW_ARGS | 591 UNSHADOW_ARGS |
| 642 pop rbp | 592 pop rbp |
| 643 ret | 593 ret |
| 644 | 594 |
| 645 ;void vp8_filter_block1d8_v6_ssse3 | 595 ;void vp8_filter_block1d8_v6_ssse3 |
| 646 ;( | 596 ;( |
| 647 ; unsigned char *src_ptr, | 597 ; unsigned char *src_ptr, |
| 648 ; unsigned int src_pitch, | 598 ; unsigned int src_pitch, |
| 649 ; unsigned char *output_ptr, | 599 ; unsigned char *output_ptr, |
| 650 ; unsigned int out_pitch, | 600 ; unsigned int out_pitch, |
| 651 ; unsigned int output_height, | 601 ; unsigned int output_height, |
| 652 ; unsigned int vp8_filter_index | 602 ; unsigned int vp8_filter_index |
| 653 ;) | 603 ;) |
| 654 global sym(vp8_filter_block1d8_v6_ssse3) | 604 global sym(vp8_filter_block1d8_v6_ssse3) |
| 655 sym(vp8_filter_block1d8_v6_ssse3): | 605 sym(vp8_filter_block1d8_v6_ssse3): |
| 656 push rbp | 606 push rbp |
| 657 mov rbp, rsp | 607 mov rbp, rsp |
| 658 SHADOW_ARGS_TO_STACK 6 | 608 SHADOW_ARGS_TO_STACK 6 |
| 609 SAVE_XMM 7 |
| 659 GET_GOT rbx | 610 GET_GOT rbx |
| 660 push rsi | 611 push rsi |
| 661 push rdi | 612 push rdi |
| 662 ; end prolog | 613 ; end prolog |
| 663 | 614 |
| 664 movsxd rdx, DWORD PTR arg(5) ;table index | 615 movsxd rdx, DWORD PTR arg(5) ;table index |
| 665 xor rsi, rsi | 616 xor rsi, rsi |
| 666 shl rdx, 4 ; | 617 shl rdx, 4 ; |
| 667 | 618 |
| 668 lea rax, [GLOBAL(k0_k5)] | 619 lea rax, [GLOBAL(k0_k5)] |
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 721 %else | 672 %else |
| 722 add rdi, r8 | 673 add rdi, r8 |
| 723 %endif | 674 %endif |
| 724 dec rcx | 675 dec rcx |
| 725 jnz vp8_filter_block1d8_v6_ssse3_loop | 676 jnz vp8_filter_block1d8_v6_ssse3_loop |
| 726 | 677 |
| 727 ; begin epilog | 678 ; begin epilog |
| 728 pop rdi | 679 pop rdi |
| 729 pop rsi | 680 pop rsi |
| 730 RESTORE_GOT | 681 RESTORE_GOT |
| 682 RESTORE_XMM |
| 731 UNSHADOW_ARGS | 683 UNSHADOW_ARGS |
| 732 pop rbp | 684 pop rbp |
| 733 ret | 685 ret |
| 734 | 686 |
| 735 vp8_filter_block1d8_v4_ssse3: | 687 vp8_filter_block1d8_v4_ssse3: |
| 736 movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 | 688 movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 |
| 737 movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 | 689 movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 |
| 738 movdqa xmm5, [GLOBAL(rd)] | 690 movdqa xmm5, [GLOBAL(rd)] |
| 739 | 691 |
| 740 mov rsi, arg(0) ;src_ptr | 692 mov rsi, arg(0) ;src_ptr |
| (...skipping 28 matching lines...) Expand all Loading... |
| 769 %else | 721 %else |
| 770 add rdi, r8 | 722 add rdi, r8 |
| 771 %endif | 723 %endif |
| 772 dec rcx | 724 dec rcx |
| 773 jnz vp8_filter_block1d8_v4_ssse3_loop | 725 jnz vp8_filter_block1d8_v4_ssse3_loop |
| 774 | 726 |
| 775 ; begin epilog | 727 ; begin epilog |
| 776 pop rdi | 728 pop rdi |
| 777 pop rsi | 729 pop rsi |
| 778 RESTORE_GOT | 730 RESTORE_GOT |
| 731 RESTORE_XMM |
| 779 UNSHADOW_ARGS | 732 UNSHADOW_ARGS |
| 780 pop rbp | 733 pop rbp |
| 781 ret | 734 ret |
| 782 ;void vp8_filter_block1d4_v6_ssse3 | 735 ;void vp8_filter_block1d4_v6_ssse3 |
| 783 ;( | 736 ;( |
| 784 ; unsigned char *src_ptr, | 737 ; unsigned char *src_ptr, |
| 785 ; unsigned int src_pitch, | 738 ; unsigned int src_pitch, |
| 786 ; unsigned char *output_ptr, | 739 ; unsigned char *output_ptr, |
| 787 ; unsigned int out_pitch, | 740 ; unsigned int out_pitch, |
| 788 ; unsigned int output_height, | 741 ; unsigned int output_height, |
| (...skipping 136 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 925 ; int xoffset, | 878 ; int xoffset, |
| 926 ; int yoffset, | 879 ; int yoffset, |
| 927 ; unsigned char *dst_ptr, | 880 ; unsigned char *dst_ptr, |
| 928 ; int dst_pitch | 881 ; int dst_pitch |
| 929 ;) | 882 ;) |
| 930 global sym(vp8_bilinear_predict16x16_ssse3) | 883 global sym(vp8_bilinear_predict16x16_ssse3) |
| 931 sym(vp8_bilinear_predict16x16_ssse3): | 884 sym(vp8_bilinear_predict16x16_ssse3): |
| 932 push rbp | 885 push rbp |
| 933 mov rbp, rsp | 886 mov rbp, rsp |
| 934 SHADOW_ARGS_TO_STACK 6 | 887 SHADOW_ARGS_TO_STACK 6 |
| 935 SAVE_XMM | 888 SAVE_XMM 7 |
| 936 GET_GOT rbx | 889 GET_GOT rbx |
| 937 push rsi | 890 push rsi |
| 938 push rdi | 891 push rdi |
| 939 ; end prolog | 892 ; end prolog |
| 940 | 893 |
| 941 lea rcx, [GLOBAL(vp8_bilinear_filters_ssse3)] | 894 lea rcx, [GLOBAL(vp8_bilinear_filters_ssse3)] |
| 942 movsxd rax, dword ptr arg(2) ; xoffset | 895 movsxd rax, dword ptr arg(2) ; xoffset |
| 943 | 896 |
| 944 cmp rax, 0 ; skip first_pass filter if
xoffset=0 | 897 cmp rax, 0 ; skip first_pass filter if
xoffset=0 |
| 945 je b16x16_sp_only | 898 je b16x16_sp_only |
| (...skipping 242 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1188 ; int xoffset, | 1141 ; int xoffset, |
| 1189 ; int yoffset, | 1142 ; int yoffset, |
| 1190 ; unsigned char *dst_ptr, | 1143 ; unsigned char *dst_ptr, |
| 1191 ; int dst_pitch | 1144 ; int dst_pitch |
| 1192 ;) | 1145 ;) |
| 1193 global sym(vp8_bilinear_predict8x8_ssse3) | 1146 global sym(vp8_bilinear_predict8x8_ssse3) |
| 1194 sym(vp8_bilinear_predict8x8_ssse3): | 1147 sym(vp8_bilinear_predict8x8_ssse3): |
| 1195 push rbp | 1148 push rbp |
| 1196 mov rbp, rsp | 1149 mov rbp, rsp |
| 1197 SHADOW_ARGS_TO_STACK 6 | 1150 SHADOW_ARGS_TO_STACK 6 |
| 1198 SAVE_XMM | 1151 SAVE_XMM 7 |
| 1199 GET_GOT rbx | 1152 GET_GOT rbx |
| 1200 push rsi | 1153 push rsi |
| 1201 push rdi | 1154 push rdi |
| 1202 ; end prolog | 1155 ; end prolog |
| 1203 | 1156 |
| 1204 ALIGN_STACK 16, rax | 1157 ALIGN_STACK 16, rax |
| 1205 sub rsp, 144 ; reserve 144 bytes | 1158 sub rsp, 144 ; reserve 144 bytes |
| 1206 | 1159 |
| 1207 lea rcx, [GLOBAL(vp8_bilinear_filters_ssse3)] | 1160 lea rcx, [GLOBAL(vp8_bilinear_filters_ssse3)] |
| 1208 | 1161 |
| (...skipping 336 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1545 vp8_bilinear_filters_ssse3: | 1498 vp8_bilinear_filters_ssse3: |
| 1546 times 8 db 128, 0 | 1499 times 8 db 128, 0 |
| 1547 times 8 db 112, 16 | 1500 times 8 db 112, 16 |
| 1548 times 8 db 96, 32 | 1501 times 8 db 96, 32 |
| 1549 times 8 db 80, 48 | 1502 times 8 db 80, 48 |
| 1550 times 8 db 64, 64 | 1503 times 8 db 64, 64 |
| 1551 times 8 db 48, 80 | 1504 times 8 db 48, 80 |
| 1552 times 8 db 32, 96 | 1505 times 8 db 32, 96 |
| 1553 times 8 db 16, 112 | 1506 times 8 db 16, 112 |
| 1554 | 1507 |
| OLD | NEW |