Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(14)

Side by Side Diff: source/patched-ffmpeg-mt/libavcodec/x86/vp8dsp.asm

Issue 3384002: ffmpeg source update for sep 09 (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/ffmpeg/
Patch Set: Created 10 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 ;****************************************************************************** 1 ;******************************************************************************
2 ;* VP8 MMXEXT optimizations 2 ;* VP8 MMXEXT optimizations
3 ;* Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com> 3 ;* Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com>
4 ;* Copyright (c) 2010 Jason Garrett-Glaser <darkshikari@gmail.com> 4 ;* Copyright (c) 2010 Jason Garrett-Glaser <darkshikari@gmail.com>
5 ;* 5 ;*
6 ;* This file is part of FFmpeg. 6 ;* This file is part of FFmpeg.
7 ;* 7 ;*
8 ;* FFmpeg is free software; you can redistribute it and/or 8 ;* FFmpeg is free software; you can redistribute it and/or
9 ;* modify it under the terms of the GNU Lesser General Public 9 ;* modify it under the terms of the GNU Lesser General Public
10 ;* License as published by the Free Software Foundation; either 10 ;* License as published by the Free Software Foundation; either
(...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after
204 paddsw m0, m1 204 paddsw m0, m1
205 paddsw m0, m2 205 paddsw m0, m2
206 paddsw m0, [pw_64] 206 paddsw m0, [pw_64]
207 psraw m0, 7 207 psraw m0, 7
208 packuswb m0, m0 208 packuswb m0, m0
209 movh [r0], m0 ; store 209 movh [r0], m0 ; store
210 210
211 ; go to next line 211 ; go to next line
212 add r0, r1 212 add r0, r1
213 add r2, r3 213 add r2, r3
214 dec r4 ; next row 214 dec r4d ; next row
215 jg .nextrow 215 jg .nextrow
216 REP_RET 216 REP_RET
217 217
218 cglobal put_vp8_epel%1_h4_ssse3, 6, 6, %3 218 cglobal put_vp8_epel%1_h4_ssse3, 6, 6, %3
219 shl r5d, 4 219 shl r5d, 4
220 mova m2, [pw_64] 220 mova m2, [pw_64]
221 mova m3, [filter_h2_shuf] 221 mova m3, [filter_h2_shuf]
222 mova m4, [filter_h4_shuf] 222 mova m4, [filter_h4_shuf]
223 %ifdef PIC 223 %ifdef PIC
224 lea r11, [fourtap_filter_hb_m] 224 lea r11, [fourtap_filter_hb_m]
(...skipping 10 matching lines...) Expand all
235 pmaddubsw m1, m6 235 pmaddubsw m1, m6
236 paddsw m0, m2 236 paddsw m0, m2
237 paddsw m0, m1 237 paddsw m0, m1
238 psraw m0, 7 238 psraw m0, 7
239 packuswb m0, m0 239 packuswb m0, m0
240 movh [r0], m0 ; store 240 movh [r0], m0 ; store
241 241
242 ; go to next line 242 ; go to next line
243 add r0, r1 243 add r0, r1
244 add r2, r3 244 add r2, r3
245 dec r4 ; next row 245 dec r4d ; next row
246 jg .nextrow 246 jg .nextrow
247 REP_RET 247 REP_RET
248 248
249 cglobal put_vp8_epel%1_v4_ssse3, 7, 7, %2 249 cglobal put_vp8_epel%1_v4_ssse3, 7, 7, %2
250 shl r6d, 4 250 shl r6d, 4
251 %ifdef PIC 251 %ifdef PIC
252 lea r11, [fourtap_filter_hb_m] 252 lea r11, [fourtap_filter_hb_m]
253 %endif 253 %endif
254 mova m5, [fourtap_filter_hb+r6-16] 254 mova m5, [fourtap_filter_hb+r6-16]
255 mova m6, [fourtap_filter_hb+r6] 255 mova m6, [fourtap_filter_hb+r6]
(...skipping 18 matching lines...) Expand all
274 paddsw m4, m2 274 paddsw m4, m2
275 mova m2, m3 275 mova m2, m3
276 paddsw m4, m7 276 paddsw m4, m7
277 psraw m4, 7 277 psraw m4, 7
278 packuswb m4, m4 278 packuswb m4, m4
279 movh [r0], m4 279 movh [r0], m4
280 280
281 ; go to next line 281 ; go to next line
282 add r0, r1 282 add r0, r1
283 add r2, r3 283 add r2, r3
284 dec r4 ; next row 284 dec r4d ; next row
285 jg .nextrow 285 jg .nextrow
286 REP_RET 286 REP_RET
287 287
288 cglobal put_vp8_epel%1_v6_ssse3, 7, 7, %2 288 cglobal put_vp8_epel%1_v6_ssse3, 7, 7, %2
289 lea r6d, [r6*3] 289 lea r6d, [r6*3]
290 %ifdef PIC 290 %ifdef PIC
291 lea r11, [sixtap_filter_hb_m] 291 lea r11, [sixtap_filter_hb_m]
292 %endif 292 %endif
293 lea r6, [sixtap_filter_hb+r6*8] 293 lea r6, [sixtap_filter_hb+r6*8]
294 294
(...skipping 26 matching lines...) Expand all
321 mova m2, m3 321 mova m2, m3
322 psraw m6, 7 322 psraw m6, 7
323 mova m3, m4 323 mova m3, m4
324 packuswb m6, m6 324 packuswb m6, m6
325 mova m4, m5 325 mova m4, m5
326 movh [r0], m6 326 movh [r0], m6
327 327
328 ; go to next line 328 ; go to next line
329 add r0, r1 329 add r0, r1
330 add r2, r3 330 add r2, r3
331 dec r4 ; next row 331 dec r4d ; next row
332 jg .nextrow 332 jg .nextrow
333 REP_RET 333 REP_RET
334 %endmacro 334 %endmacro
335 335
336 INIT_MMX 336 INIT_MMX
337 FILTER_SSSE3 4, 0, 0 337 FILTER_SSSE3 4, 0, 0
338 INIT_XMM 338 INIT_XMM
339 FILTER_SSSE3 8, 8, 7 339 FILTER_SSSE3 8, 8, 7
340 340
341 ; 4x4 block, H-only 4-tap filter 341 ; 4x4 block, H-only 4-tap filter
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
374 ; merge two sets of 2 pixels into one set of 4, round/clip/store 374 ; merge two sets of 2 pixels into one set of 4, round/clip/store
375 packssdw mm3, mm0 ; merge dword->word (4px) 375 packssdw mm3, mm0 ; merge dword->word (4px)
376 paddsw mm3, mm7 ; rounding 376 paddsw mm3, mm7 ; rounding
377 psraw mm3, 7 377 psraw mm3, 7
378 packuswb mm3, mm6 ; clip and word->bytes 378 packuswb mm3, mm6 ; clip and word->bytes
379 movd [r0], mm3 ; store 379 movd [r0], mm3 ; store
380 380
381 ; go to next line 381 ; go to next line
382 add r0, r1 382 add r0, r1
383 add r2, r3 383 add r2, r3
384 dec r4 ; next row 384 dec r4d ; next row
385 jg .nextrow 385 jg .nextrow
386 REP_RET 386 REP_RET
387 387
388 ; 4x4 block, H-only 6-tap filter 388 ; 4x4 block, H-only 6-tap filter
389 cglobal put_vp8_epel4_h6_mmxext, 6, 6 389 cglobal put_vp8_epel4_h6_mmxext, 6, 6
390 lea r5d, [r5*3] 390 lea r5d, [r5*3]
391 %ifdef PIC 391 %ifdef PIC
392 lea r11, [sixtap_filter_hw_m] 392 lea r11, [sixtap_filter_hw_m]
393 %endif 393 %endif
394 movq mm4, [sixtap_filter_hw+r5*8-48] ; set up 4tap filter in words 394 movq mm4, [sixtap_filter_hw+r5*8-48] ; set up 4tap filter in words
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
431 ; merge two sets of 2 pixels into one set of 4, round/clip/store 431 ; merge two sets of 2 pixels into one set of 4, round/clip/store
432 packssdw mm1, mm0 ; merge dword->word (4px) 432 packssdw mm1, mm0 ; merge dword->word (4px)
433 paddsw mm1, mm7 ; rounding 433 paddsw mm1, mm7 ; rounding
434 psraw mm1, 7 434 psraw mm1, 7
435 packuswb mm1, mm3 ; clip and word->bytes 435 packuswb mm1, mm3 ; clip and word->bytes
436 movd [r0], mm1 ; store 436 movd [r0], mm1 ; store
437 437
438 ; go to next line 438 ; go to next line
439 add r0, r1 439 add r0, r1
440 add r2, r3 440 add r2, r3
441 dec r4 ; next row 441 dec r4d ; next row
442 jg .nextrow 442 jg .nextrow
443 REP_RET 443 REP_RET
444 444
445 INIT_XMM 445 INIT_XMM
446 cglobal put_vp8_epel8_h4_sse2, 6, 6, 10 446 cglobal put_vp8_epel8_h4_sse2, 6, 6, 10
447 shl r5d, 5 447 shl r5d, 5
448 %ifdef PIC 448 %ifdef PIC
449 lea r11, [fourtap_filter_v_m] 449 lea r11, [fourtap_filter_v_m]
450 %endif 450 %endif
451 lea r5, [fourtap_filter_v+r5-32] 451 lea r5, [fourtap_filter_v+r5-32]
(...skipping 27 matching lines...) Expand all
479 paddsw m2, m3 479 paddsw m2, m3
480 paddsw m0, m2 480 paddsw m0, m2
481 paddsw m0, m4 481 paddsw m0, m4
482 psraw m0, 7 482 psraw m0, 7
483 packuswb m0, m7 483 packuswb m0, m7
484 movh [r0], m0 ; store 484 movh [r0], m0 ; store
485 485
486 ; go to next line 486 ; go to next line
487 add r0, r1 487 add r0, r1
488 add r2, r3 488 add r2, r3
489 dec r4 ; next row 489 dec r4d ; next row
490 jg .nextrow 490 jg .nextrow
491 REP_RET 491 REP_RET
492 492
493 cglobal put_vp8_epel8_h6_sse2, 6, 6, 14 493 cglobal put_vp8_epel8_h6_sse2, 6, 6, 14
494 lea r5d, [r5*3] 494 lea r5d, [r5*3]
495 shl r5d, 4 495 shl r5d, 4
496 %ifdef PIC 496 %ifdef PIC
497 lea r11, [sixtap_filter_v_m] 497 lea r11, [sixtap_filter_v_m]
498 %endif 498 %endif
499 lea r5, [sixtap_filter_v+r5-96] 499 lea r5, [sixtap_filter_v+r5-96]
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
541 paddsw m0, m3 541 paddsw m0, m3
542 paddsw m0, m1 542 paddsw m0, m1
543 paddsw m0, m6 543 paddsw m0, m6
544 psraw m0, 7 544 psraw m0, 7
545 packuswb m0, m7 545 packuswb m0, m7
546 movh [r0], m0 ; store 546 movh [r0], m0 ; store
547 547
548 ; go to next line 548 ; go to next line
549 add r0, r1 549 add r0, r1
550 add r2, r3 550 add r2, r3
551 dec r4 ; next row 551 dec r4d ; next row
552 jg .nextrow 552 jg .nextrow
553 REP_RET 553 REP_RET
554 554
555 %macro FILTER_V 3 555 %macro FILTER_V 3
556 ; 4x4 block, V-only 4-tap filter 556 ; 4x4 block, V-only 4-tap filter
557 cglobal put_vp8_epel%2_v4_%1, 7, 7, %3 557 cglobal put_vp8_epel%2_v4_%1, 7, 7, %3
558 shl r6d, 5 558 shl r6d, 5
559 %ifdef PIC 559 %ifdef PIC
560 lea r11, [fourtap_filter_v_m] 560 lea r11, [fourtap_filter_v_m]
561 %endif 561 %endif
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
594 594
595 ; round/clip/store 595 ; round/clip/store
596 paddsw m4, m6 596 paddsw m4, m6
597 psraw m4, 7 597 psraw m4, 7
598 packuswb m4, m7 598 packuswb m4, m7
599 movh [r0], m4 599 movh [r0], m4
600 600
601 ; go to next line 601 ; go to next line
602 add r0, r1 602 add r0, r1
603 add r2, r3 603 add r2, r3
604 dec r4 ; next row 604 dec r4d ; next row
605 jg .nextrow 605 jg .nextrow
606 REP_RET 606 REP_RET
607 607
608 608
609 ; 4x4 block, V-only 6-tap filter 609 ; 4x4 block, V-only 6-tap filter
610 cglobal put_vp8_epel%2_v6_%1, 7, 7, %3 610 cglobal put_vp8_epel%2_v6_%1, 7, 7, %3
611 shl r6d, 4 611 shl r6d, 4
612 lea r6, [r6*3] 612 lea r6, [r6*3]
613 %ifdef PIC 613 %ifdef PIC
614 lea r11, [sixtap_filter_v_m] 614 lea r11, [sixtap_filter_v_m]
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
659 659
660 ; round/clip/store 660 ; round/clip/store
661 paddsw m6, [pw_64] 661 paddsw m6, [pw_64]
662 psraw m6, 7 662 psraw m6, 7
663 packuswb m6, m7 663 packuswb m6, m7
664 movh [r0], m6 664 movh [r0], m6
665 665
666 ; go to next line 666 ; go to next line
667 add r0, r1 667 add r0, r1
668 add r2, r3 668 add r2, r3
669 dec r4 ; next row 669 dec r4d ; next row
670 jg .nextrow 670 jg .nextrow
671 REP_RET 671 REP_RET
672 %endmacro 672 %endmacro
673 673
674 INIT_MMX 674 INIT_MMX
675 FILTER_V mmxext, 4, 0 675 FILTER_V mmxext, 4, 0
676 INIT_XMM 676 INIT_XMM
677 FILTER_V sse2, 8, 8 677 FILTER_V sse2, 8, 8
678 678
679 %macro FILTER_BILINEAR 3 679 %macro FILTER_BILINEAR 3
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
711 movh [r0+r1*0], m0 711 movh [r0+r1*0], m0
712 movh [r0+r1*1], m2 712 movh [r0+r1*1], m2
713 %else 713 %else
714 packuswb m0, m2 714 packuswb m0, m2
715 movh [r0+r1*0], m0 715 movh [r0+r1*0], m0
716 movhps [r0+r1*1], m0 716 movhps [r0+r1*1], m0
717 %endif 717 %endif
718 718
719 lea r0, [r0+r1*2] 719 lea r0, [r0+r1*2]
720 lea r2, [r2+r3*2] 720 lea r2, [r2+r3*2]
721 sub r4, 2 721 sub r4d, 2
722 jg .nextrow 722 jg .nextrow
723 REP_RET 723 REP_RET
724 724
725 cglobal put_vp8_bilinear%2_h_%1, 7,7,%3 725 cglobal put_vp8_bilinear%2_h_%1, 7,7,%3
726 mov r6d, 8*16 726 mov r6d, 8*16
727 shl r5d, 4 727 shl r5d, 4
728 sub r6d, r5d 728 sub r6d, r5d
729 %ifdef PIC 729 %ifdef PIC
730 lea r11, [bilinear_filter_vw_m] 730 lea r11, [bilinear_filter_vw_m]
731 %endif 731 %endif
(...skipping 25 matching lines...) Expand all
757 movh [r0+r1*0], m0 757 movh [r0+r1*0], m0
758 movh [r0+r1*1], m2 758 movh [r0+r1*1], m2
759 %else 759 %else
760 packuswb m0, m2 760 packuswb m0, m2
761 movh [r0+r1*0], m0 761 movh [r0+r1*0], m0
762 movhps [r0+r1*1], m0 762 movhps [r0+r1*1], m0
763 %endif 763 %endif
764 764
765 lea r0, [r0+r1*2] 765 lea r0, [r0+r1*2]
766 lea r2, [r2+r3*2] 766 lea r2, [r2+r3*2]
767 sub r4, 2 767 sub r4d, 2
768 jg .nextrow 768 jg .nextrow
769 REP_RET 769 REP_RET
770 %endmacro 770 %endmacro
771 771
772 INIT_MMX 772 INIT_MMX
773 FILTER_BILINEAR mmxext, 4, 0 773 FILTER_BILINEAR mmxext, 4, 0
774 INIT_XMM 774 INIT_XMM
775 FILTER_BILINEAR sse2, 8, 7 775 FILTER_BILINEAR sse2, 8, 7
776 776
777 %macro FILTER_BILINEAR_SSSE3 1 777 %macro FILTER_BILINEAR_SSSE3 1
(...skipping 22 matching lines...) Expand all
800 movh [r0+r1*0], m0 800 movh [r0+r1*0], m0
801 movh [r0+r1*1], m1 801 movh [r0+r1*1], m1
802 %else 802 %else
803 packuswb m0, m1 803 packuswb m0, m1
804 movh [r0+r1*0], m0 804 movh [r0+r1*0], m0
805 movhps [r0+r1*1], m0 805 movhps [r0+r1*1], m0
806 %endif 806 %endif
807 807
808 lea r0, [r0+r1*2] 808 lea r0, [r0+r1*2]
809 lea r2, [r2+r3*2] 809 lea r2, [r2+r3*2]
810 sub r4, 2 810 sub r4d, 2
811 jg .nextrow 811 jg .nextrow
812 REP_RET 812 REP_RET
813 813
814 cglobal put_vp8_bilinear%1_h_ssse3, 7,7 814 cglobal put_vp8_bilinear%1_h_ssse3, 7,7
815 shl r5d, 4 815 shl r5d, 4
816 %ifdef PIC 816 %ifdef PIC
817 lea r11, [bilinear_filter_vb_m] 817 lea r11, [bilinear_filter_vb_m]
818 %endif 818 %endif
819 pxor m4, m4 819 pxor m4, m4
820 mova m2, [filter_h2_shuf] 820 mova m2, [filter_h2_shuf]
(...skipping 15 matching lines...) Expand all
836 movh [r0+r1*0], m0 836 movh [r0+r1*0], m0
837 movh [r0+r1*1], m1 837 movh [r0+r1*1], m1
838 %else 838 %else
839 packuswb m0, m1 839 packuswb m0, m1
840 movh [r0+r1*0], m0 840 movh [r0+r1*0], m0
841 movhps [r0+r1*1], m0 841 movhps [r0+r1*1], m0
842 %endif 842 %endif
843 843
844 lea r0, [r0+r1*2] 844 lea r0, [r0+r1*2]
845 lea r2, [r2+r3*2] 845 lea r2, [r2+r3*2]
846 sub r4, 2 846 sub r4d, 2
847 jg .nextrow 847 jg .nextrow
848 REP_RET 848 REP_RET
849 %endmacro 849 %endmacro
850 850
851 INIT_MMX 851 INIT_MMX
852 FILTER_BILINEAR_SSSE3 4 852 FILTER_BILINEAR_SSSE3 4
853 INIT_XMM 853 INIT_XMM
854 FILTER_BILINEAR_SSSE3 8 854 FILTER_BILINEAR_SSSE3 8
855 855
856 cglobal put_vp8_pixels8_mmx, 5,5 856 cglobal put_vp8_pixels8_mmx, 5,5
(...skipping 478 matching lines...) Expand 10 before | Expand all | Expand 10 after
1335 %if %10 == 16 1335 %if %10 == 16
1336 movd [%6+%9*4], m%3 1336 movd [%6+%9*4], m%3
1337 %endif 1337 %endif
1338 movd [%7+%9], m%4 1338 movd [%7+%9], m%4
1339 1339
1340 ; write dwords 2 1340 ; write dwords 2
1341 psrldq m%1, 4 1341 psrldq m%1, 4
1342 psrldq m%2, 4 1342 psrldq m%2, 4
1343 %if %10 == 8 1343 %if %10 == 8
1344 movd [%5+%8*2], m%1 1344 movd [%5+%8*2], m%1
1345 movd %5, m%3 1345 movd %5d, m%3
1346 %endif 1346 %endif
1347 psrldq m%3, 4 1347 psrldq m%3, 4
1348 psrldq m%4, 4 1348 psrldq m%4, 4
1349 %if %10 == 16 1349 %if %10 == 16
1350 movd [%5+%8*2], m%1 1350 movd [%5+%8*2], m%1
1351 %endif 1351 %endif
1352 movd [%6+%9], m%2 1352 movd [%6+%9], m%2
1353 movd [%7+%8*2], m%3 1353 movd [%7+%8*2], m%3
1354 movd [%7+%9*2], m%4 1354 movd [%7+%9*2], m%4
1355 add %7, %9 1355 add %7, %9
(...skipping 16 matching lines...) Expand all
1372 1372
1373 ; write 4 or 8 words in the mmx/xmm registers as 8 lines 1373 ; write 4 or 8 words in the mmx/xmm registers as 8 lines
1374 ; 1 and 2 are the registers to write, this can be the same (for SSE2) 1374 ; 1 and 2 are the registers to write, this can be the same (for SSE2)
1375 ; for pre-SSE4: 1375 ; for pre-SSE4:
1376 ; 3 is a general-purpose register that we will clobber 1376 ; 3 is a general-purpose register that we will clobber
1377 ; for SSE4: 1377 ; for SSE4:
1378 ; 3 is a pointer to the destination's 5th line 1378 ; 3 is a pointer to the destination's 5th line
1379 ; 4 is a pointer to the destination's 4th line 1379 ; 4 is a pointer to the destination's 4th line
1380 ; 5/6 is -stride and +stride 1380 ; 5/6 is -stride and +stride
1381 %macro WRITE_2x4W 6 1381 %macro WRITE_2x4W 6
1382 movd %3, %1 1382 movd %3d, %1
1383 punpckhdq %1, %1 1383 punpckhdq %1, %1
1384 mov [%4+%5*4], %3w 1384 mov [%4+%5*4], %3w
1385 shr %3, 16 1385 shr %3, 16
1386 add %4, %6 1386 add %4, %6
1387 mov [%4+%5*4], %3w 1387 mov [%4+%5*4], %3w
1388 1388
1389 movd %3, %1 1389 movd %3d, %1
1390 add %4, %5 1390 add %4, %5
1391 mov [%4+%5*2], %3w 1391 mov [%4+%5*2], %3w
1392 shr %3, 16 1392 shr %3, 16
1393 mov [%4+%5 ], %3w 1393 mov [%4+%5 ], %3w
1394 1394
1395 movd %3, %2 1395 movd %3d, %2
1396 punpckhdq %2, %2 1396 punpckhdq %2, %2
1397 mov [%4 ], %3w 1397 mov [%4 ], %3w
1398 shr %3, 16 1398 shr %3, 16
1399 mov [%4+%6 ], %3w 1399 mov [%4+%6 ], %3w
1400 1400
1401 movd %3, %2 1401 movd %3d, %2
1402 add %4, %6 1402 add %4, %6
1403 mov [%4+%6 ], %3w 1403 mov [%4+%6 ], %3w
1404 shr %3, 16 1404 shr %3, 16
1405 mov [%4+%6*2], %3w 1405 mov [%4+%6*2], %3w
1406 add %4, %5 1406 add %4, %5
1407 %endmacro 1407 %endmacro
1408 1408
1409 %macro WRITE_8W_SSE2 5 1409 %macro WRITE_8W_SSE2 5
1410 movd %2, %1 1410 movd %2d, %1
1411 psrldq %1, 4 1411 psrldq %1, 4
1412 mov [%3+%4*4], %2w 1412 mov [%3+%4*4], %2w
1413 shr %2, 16 1413 shr %2, 16
1414 add %3, %5 1414 add %3, %5
1415 mov [%3+%4*4], %2w 1415 mov [%3+%4*4], %2w
1416 1416
1417 movd %2, %1 1417 movd %2d, %1
1418 psrldq %1, 4 1418 psrldq %1, 4
1419 add %3, %4 1419 add %3, %4
1420 mov [%3+%4*2], %2w 1420 mov [%3+%4*2], %2w
1421 shr %2, 16 1421 shr %2, 16
1422 mov [%3+%4 ], %2w 1422 mov [%3+%4 ], %2w
1423 1423
1424 movd %2, %1 1424 movd %2d, %1
1425 psrldq %1, 4 1425 psrldq %1, 4
1426 mov [%3 ], %2w 1426 mov [%3 ], %2w
1427 shr %2, 16 1427 shr %2, 16
1428 mov [%3+%5 ], %2w 1428 mov [%3+%5 ], %2w
1429 1429
1430 movd %2, %1 1430 movd %2d, %1
1431 add %3, %5 1431 add %3, %5
1432 mov [%3+%5 ], %2w 1432 mov [%3+%5 ], %2w
1433 shr %2, 16 1433 shr %2, 16
1434 mov [%3+%5*2], %2w 1434 mov [%3+%5*2], %2w
1435 %endmacro 1435 %endmacro
1436 1436
1437 %macro WRITE_8W_SSE4 5 1437 %macro WRITE_8W_SSE4 5
1438 pextrw [%3+%4*4], %1, 0 1438 pextrw [%3+%4*4], %1, 0
1439 pextrw [%2+%4*4], %1, 1 1439 pextrw [%2+%4*4], %1, 1
1440 pextrw [%3+%4*2], %1, 2 1440 pextrw [%3+%4*2], %1, 2
1441 pextrw [%3+%4 ], %1, 3 1441 pextrw [%3+%4 ], %1, 3
1442 pextrw [%3 ], %1, 4 1442 pextrw [%3 ], %1, 4
1443 pextrw [%2 ], %1, 5 1443 pextrw [%2 ], %1, 5
1444 pextrw [%2+%5 ], %1, 6 1444 pextrw [%2+%5 ], %1, 6
1445 pextrw [%2+%5*2], %1, 7 1445 pextrw [%2+%5*2], %1, 7
1446 %endmacro 1446 %endmacro
1447 1447
1448 %macro SPLATB_REG_MMX 2-3 1448 %macro SPLATB_REG_MMX 2-3
1449 movd %1, %2 1449 movd %1, %2d
1450 punpcklbw %1, %1 1450 punpcklbw %1, %1
1451 punpcklwd %1, %1 1451 punpcklwd %1, %1
1452 punpckldq %1, %1 1452 punpckldq %1, %1
1453 %endmacro 1453 %endmacro
1454 1454
1455 %macro SPLATB_REG_MMXEXT 2-3 1455 %macro SPLATB_REG_MMXEXT 2-3
1456 movd %1, %2 1456 movd %1, %2d
1457 punpcklbw %1, %1 1457 punpcklbw %1, %1
1458 pshufw %1, %1, 0x0 1458 pshufw %1, %1, 0x0
1459 %endmacro 1459 %endmacro
1460 1460
1461 %macro SPLATB_REG_SSE2 2-3 1461 %macro SPLATB_REG_SSE2 2-3
1462 movd %1, %2 1462 movd %1, %2d
1463 punpcklbw %1, %1 1463 punpcklbw %1, %1
1464 pshuflw %1, %1, 0x0 1464 pshuflw %1, %1, 0x0
1465 punpcklqdq %1, %1 1465 punpcklqdq %1, %1
1466 %endmacro 1466 %endmacro
1467 1467
1468 %macro SPLATB_REG_SSSE3 3 1468 %macro SPLATB_REG_SSSE3 3
1469 movd %1, %2 1469 movd %1, %2d
1470 pshufb %1, %3 1470 pshufb %1, %3
1471 %endmacro 1471 %endmacro
1472 1472
1473 %macro SIMPLE_LOOPFILTER 3 1473 %macro SIMPLE_LOOPFILTER 4
1474 cglobal vp8_%2_loop_filter_simple_%1, 3, %3 1474 cglobal vp8_%2_loop_filter_simple_%1, 3, %3, %4
1475 %if mmsize == 8 ; mmx/mmxext 1475 %if mmsize == 8 ; mmx/mmxext
1476 mov r3, 2 1476 mov r3, 2
1477 %endif 1477 %endif
1478 %ifnidn %1, sse2 1478 %ifnidn %1, sse2
1479 %if mmsize == 16 1479 %if mmsize == 16
1480 pxor m0, m0 1480 pxor m0, m0
1481 %endif 1481 %endif
1482 %endif 1482 %endif
1483 SPLATB_REG m7, r2, m0 ; splat "flim" into register 1483 SPLATB_REG m7, r2, m0 ; splat "flim" into register
1484 1484
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after
1605 dec r3 1605 dec r3
1606 jg .next8px 1606 jg .next8px
1607 REP_RET 1607 REP_RET
1608 %else ; sse2 1608 %else ; sse2
1609 RET 1609 RET
1610 %endif 1610 %endif
1611 %endmacro 1611 %endmacro
1612 1612
1613 INIT_MMX 1613 INIT_MMX
1614 %define SPLATB_REG SPLATB_REG_MMX 1614 %define SPLATB_REG SPLATB_REG_MMX
1615 SIMPLE_LOOPFILTER mmx, v, 4 1615 SIMPLE_LOOPFILTER mmx, v, 4, 0
1616 SIMPLE_LOOPFILTER mmx, h, 5 1616 SIMPLE_LOOPFILTER mmx, h, 5, 0
1617 %define SPLATB_REG SPLATB_REG_MMXEXT 1617 %define SPLATB_REG SPLATB_REG_MMXEXT
1618 SIMPLE_LOOPFILTER mmxext, v, 4 1618 SIMPLE_LOOPFILTER mmxext, v, 4, 0
1619 SIMPLE_LOOPFILTER mmxext, h, 5 1619 SIMPLE_LOOPFILTER mmxext, h, 5, 0
1620 INIT_XMM 1620 INIT_XMM
1621 %define SPLATB_REG SPLATB_REG_SSE2 1621 %define SPLATB_REG SPLATB_REG_SSE2
1622 %define WRITE_8W WRITE_8W_SSE2 1622 %define WRITE_8W WRITE_8W_SSE2
1623 SIMPLE_LOOPFILTER sse2, v, 3 1623 SIMPLE_LOOPFILTER sse2, v, 3, 8
1624 SIMPLE_LOOPFILTER sse2, h, 5 1624 SIMPLE_LOOPFILTER sse2, h, 5, 8
1625 %define SPLATB_REG SPLATB_REG_SSSE3 1625 %define SPLATB_REG SPLATB_REG_SSSE3
1626 SIMPLE_LOOPFILTER ssse3, v, 3 1626 SIMPLE_LOOPFILTER ssse3, v, 3, 8
1627 SIMPLE_LOOPFILTER ssse3, h, 5 1627 SIMPLE_LOOPFILTER ssse3, h, 5, 8
1628 %define WRITE_8W WRITE_8W_SSE4 1628 %define WRITE_8W WRITE_8W_SSE4
1629 SIMPLE_LOOPFILTER sse4, h, 5 1629 SIMPLE_LOOPFILTER sse4, h, 5, 8
1630 1630
1631 ;----------------------------------------------------------------------------- 1631 ;-----------------------------------------------------------------------------
1632 ; void vp8_h/v_loop_filter<size>_inner_<opt>(uint8_t *dst, [uint8_t *v,] int str ide, 1632 ; void vp8_h/v_loop_filter<size>_inner_<opt>(uint8_t *dst, [uint8_t *v,] int str ide,
1633 ; int flimE, int flimI, int hev_thr); 1633 ; int flimE, int flimI, int hev_thr);
1634 ;----------------------------------------------------------------------------- 1634 ;-----------------------------------------------------------------------------
1635 1635
1636 %macro INNER_LOOPFILTER 5 1636 %macro INNER_LOOPFILTER 5
1637 %if %4 == 8 ; chroma 1637 %if %4 == 8 ; chroma
1638 cglobal vp8_%2_loop_filter8uv_inner_%1, 6, %3, %5 1638 cglobal vp8_%2_loop_filter8uv_inner_%1, 6, %3, %5
1639 %define dst8_reg r1 1639 %define dst8_reg r1
(...skipping 1209 matching lines...) Expand 10 before | Expand all | Expand 10 after
2849 MBEDGE_LOOPFILTER ssse3, v, 6, 8, 15 2849 MBEDGE_LOOPFILTER ssse3, v, 6, 8, 15
2850 MBEDGE_LOOPFILTER ssse3, h, 6, 8, 15 2850 MBEDGE_LOOPFILTER ssse3, h, 6, 8, 15
2851 2851
2852 %define WRITE_8W WRITE_8W_SSE4 2852 %define WRITE_8W WRITE_8W_SSE4
2853 %ifdef m8 2853 %ifdef m8
2854 MBEDGE_LOOPFILTER sse4, h, 5, 16, 15 2854 MBEDGE_LOOPFILTER sse4, h, 5, 16, 15
2855 %else 2855 %else
2856 MBEDGE_LOOPFILTER sse4, h, 6, 16, 15 2856 MBEDGE_LOOPFILTER sse4, h, 6, 16, 15
2857 %endif 2857 %endif
2858 MBEDGE_LOOPFILTER sse4, h, 6, 8, 15 2858 MBEDGE_LOOPFILTER sse4, h, 6, 8, 15
OLDNEW
« no previous file with comments | « source/patched-ffmpeg-mt/libavcodec/x86/vp56dsp_init.c ('k') | source/patched-ffmpeg-mt/libavcodec/x86/vp8dsp-init.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698