source/patched-ffmpeg-mt/libavcodec/x86/vp8dsp.asm - Issue 3384002: ffmpeg source update for sep 09

Side by Side Diff: source/patched-ffmpeg-mt/libavcodec/x86/vp8dsp.asm

Issue 3384002: ffmpeg source update for sep 09 (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/ffmpeg/

Patch Set: Created 10 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 ;******************************************************************************	1 ;******************************************************************************

2 ;* VP8 MMXEXT optimizations	2 ;* VP8 MMXEXT optimizations

3 ;* Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com>	3 ;* Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com>

4 ;* Copyright (c) 2010 Jason Garrett-Glaser <darkshikari@gmail.com>	4 ;* Copyright (c) 2010 Jason Garrett-Glaser <darkshikari@gmail.com>

5 ;*	5 ;*

6 ;* This file is part of FFmpeg.	6 ;* This file is part of FFmpeg.

7 ;*	7 ;*

8 ;* FFmpeg is free software; you can redistribute it and/or	8 ;* FFmpeg is free software; you can redistribute it and/or

9 ;* modify it under the terms of the GNU Lesser General Public	9 ;* modify it under the terms of the GNU Lesser General Public

10 ;* License as published by the Free Software Foundation; either	10 ;* License as published by the Free Software Foundation; either

(...skipping 193 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
204 paddsw m0, m1	204 paddsw m0, m1

205 paddsw m0, m2	205 paddsw m0, m2

206 paddsw m0, [pw_64]	206 paddsw m0, [pw_64]

207 psraw m0, 7	207 psraw m0, 7

208 packuswb m0, m0	208 packuswb m0, m0

209 movh [r0], m0 ; store	209 movh [r0], m0 ; store

210	210

211 ; go to next line	211 ; go to next line

212 add r0, r1	212 add r0, r1

213 add r2, r3	213 add r2, r3

214 dec r4 ; next row	214 dec r4d ; next row

215 jg .nextrow	215 jg .nextrow

216 REP_RET	216 REP_RET

217	217

218 cglobal put_vp8_epel%1_h4_ssse3, 6, 6, %3	218 cglobal put_vp8_epel%1_h4_ssse3, 6, 6, %3

219 shl r5d, 4	219 shl r5d, 4

220 mova m2, [pw_64]	220 mova m2, [pw_64]

221 mova m3, [filter_h2_shuf]	221 mova m3, [filter_h2_shuf]

222 mova m4, [filter_h4_shuf]	222 mova m4, [filter_h4_shuf]

223 %ifdef PIC	223 %ifdef PIC

224 lea r11, [fourtap_filter_hb_m]	224 lea r11, [fourtap_filter_hb_m]

(...skipping 10 matching lines...) Expand all Loading...
235 pmaddubsw m1, m6	235 pmaddubsw m1, m6

236 paddsw m0, m2	236 paddsw m0, m2

237 paddsw m0, m1	237 paddsw m0, m1

238 psraw m0, 7	238 psraw m0, 7

239 packuswb m0, m0	239 packuswb m0, m0

240 movh [r0], m0 ; store	240 movh [r0], m0 ; store

241	241

242 ; go to next line	242 ; go to next line

243 add r0, r1	243 add r0, r1

244 add r2, r3	244 add r2, r3

245 dec r4 ; next row	245 dec r4d ; next row

246 jg .nextrow	246 jg .nextrow

247 REP_RET	247 REP_RET

248	248

249 cglobal put_vp8_epel%1_v4_ssse3, 7, 7, %2	249 cglobal put_vp8_epel%1_v4_ssse3, 7, 7, %2

250 shl r6d, 4	250 shl r6d, 4

251 %ifdef PIC	251 %ifdef PIC

252 lea r11, [fourtap_filter_hb_m]	252 lea r11, [fourtap_filter_hb_m]

253 %endif	253 %endif

254 mova m5, [fourtap_filter_hb+r6-16]	254 mova m5, [fourtap_filter_hb+r6-16]

255 mova m6, [fourtap_filter_hb+r6]	255 mova m6, [fourtap_filter_hb+r6]

(...skipping 18 matching lines...) Expand all Loading...
274 paddsw m4, m2	274 paddsw m4, m2

275 mova m2, m3	275 mova m2, m3

276 paddsw m4, m7	276 paddsw m4, m7

277 psraw m4, 7	277 psraw m4, 7

278 packuswb m4, m4	278 packuswb m4, m4

279 movh [r0], m4	279 movh [r0], m4

280	280

281 ; go to next line	281 ; go to next line

282 add r0, r1	282 add r0, r1

283 add r2, r3	283 add r2, r3

284 dec r4 ; next row	284 dec r4d ; next row

285 jg .nextrow	285 jg .nextrow

286 REP_RET	286 REP_RET

287	287

288 cglobal put_vp8_epel%1_v6_ssse3, 7, 7, %2	288 cglobal put_vp8_epel%1_v6_ssse3, 7, 7, %2

289 lea r6d, [r6*3]	289 lea r6d, [r6*3]

290 %ifdef PIC	290 %ifdef PIC

291 lea r11, [sixtap_filter_hb_m]	291 lea r11, [sixtap_filter_hb_m]

292 %endif	292 %endif

293 lea r6, [sixtap_filter_hb+r6*8]	293 lea r6, [sixtap_filter_hb+r6*8]

294	294

(...skipping 26 matching lines...) Expand all Loading...
321 mova m2, m3	321 mova m2, m3

322 psraw m6, 7	322 psraw m6, 7

323 mova m3, m4	323 mova m3, m4

324 packuswb m6, m6	324 packuswb m6, m6

325 mova m4, m5	325 mova m4, m5

326 movh [r0], m6	326 movh [r0], m6

327	327

328 ; go to next line	328 ; go to next line

329 add r0, r1	329 add r0, r1

330 add r2, r3	330 add r2, r3

331 dec r4 ; next row	331 dec r4d ; next row

332 jg .nextrow	332 jg .nextrow

333 REP_RET	333 REP_RET

334 %endmacro	334 %endmacro

335	335

336 INIT_MMX	336 INIT_MMX

337 FILTER_SSSE3 4, 0, 0	337 FILTER_SSSE3 4, 0, 0

338 INIT_XMM	338 INIT_XMM

339 FILTER_SSSE3 8, 8, 7	339 FILTER_SSSE3 8, 8, 7

340	340

341 ; 4x4 block, H-only 4-tap filter	341 ; 4x4 block, H-only 4-tap filter

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
374 ; merge two sets of 2 pixels into one set of 4, round/clip/store	374 ; merge two sets of 2 pixels into one set of 4, round/clip/store

375 packssdw mm3, mm0 ; merge dword->word (4px)	375 packssdw mm3, mm0 ; merge dword->word (4px)

376 paddsw mm3, mm7 ; rounding	376 paddsw mm3, mm7 ; rounding

377 psraw mm3, 7	377 psraw mm3, 7

378 packuswb mm3, mm6 ; clip and word->bytes	378 packuswb mm3, mm6 ; clip and word->bytes

379 movd [r0], mm3 ; store	379 movd [r0], mm3 ; store

380	380

381 ; go to next line	381 ; go to next line

382 add r0, r1	382 add r0, r1

383 add r2, r3	383 add r2, r3

384 dec r4 ; next row	384 dec r4d ; next row

385 jg .nextrow	385 jg .nextrow

386 REP_RET	386 REP_RET

387	387

388 ; 4x4 block, H-only 6-tap filter	388 ; 4x4 block, H-only 6-tap filter

389 cglobal put_vp8_epel4_h6_mmxext, 6, 6	389 cglobal put_vp8_epel4_h6_mmxext, 6, 6

390 lea r5d, [r5*3]	390 lea r5d, [r5*3]

391 %ifdef PIC	391 %ifdef PIC

392 lea r11, [sixtap_filter_hw_m]	392 lea r11, [sixtap_filter_hw_m]

393 %endif	393 %endif

394 movq mm4, [sixtap_filter_hw+r5*8-48] ; set up 4tap filter in words	394 movq mm4, [sixtap_filter_hw+r5*8-48] ; set up 4tap filter in words

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
431 ; merge two sets of 2 pixels into one set of 4, round/clip/store	431 ; merge two sets of 2 pixels into one set of 4, round/clip/store

432 packssdw mm1, mm0 ; merge dword->word (4px)	432 packssdw mm1, mm0 ; merge dword->word (4px)

433 paddsw mm1, mm7 ; rounding	433 paddsw mm1, mm7 ; rounding

434 psraw mm1, 7	434 psraw mm1, 7

435 packuswb mm1, mm3 ; clip and word->bytes	435 packuswb mm1, mm3 ; clip and word->bytes

436 movd [r0], mm1 ; store	436 movd [r0], mm1 ; store

437	437

438 ; go to next line	438 ; go to next line

439 add r0, r1	439 add r0, r1

440 add r2, r3	440 add r2, r3

441 dec r4 ; next row	441 dec r4d ; next row

442 jg .nextrow	442 jg .nextrow

443 REP_RET	443 REP_RET

444	444

445 INIT_XMM	445 INIT_XMM

446 cglobal put_vp8_epel8_h4_sse2, 6, 6, 10	446 cglobal put_vp8_epel8_h4_sse2, 6, 6, 10

447 shl r5d, 5	447 shl r5d, 5

448 %ifdef PIC	448 %ifdef PIC

449 lea r11, [fourtap_filter_v_m]	449 lea r11, [fourtap_filter_v_m]

450 %endif	450 %endif

451 lea r5, [fourtap_filter_v+r5-32]	451 lea r5, [fourtap_filter_v+r5-32]

(...skipping 27 matching lines...) Expand all Loading...
479 paddsw m2, m3	479 paddsw m2, m3

480 paddsw m0, m2	480 paddsw m0, m2

481 paddsw m0, m4	481 paddsw m0, m4

482 psraw m0, 7	482 psraw m0, 7

483 packuswb m0, m7	483 packuswb m0, m7

484 movh [r0], m0 ; store	484 movh [r0], m0 ; store

485	485

486 ; go to next line	486 ; go to next line

487 add r0, r1	487 add r0, r1

488 add r2, r3	488 add r2, r3

489 dec r4 ; next row	489 dec r4d ; next row

490 jg .nextrow	490 jg .nextrow

491 REP_RET	491 REP_RET

492	492

493 cglobal put_vp8_epel8_h6_sse2, 6, 6, 14	493 cglobal put_vp8_epel8_h6_sse2, 6, 6, 14

494 lea r5d, [r5*3]	494 lea r5d, [r5*3]

495 shl r5d, 4	495 shl r5d, 4

496 %ifdef PIC	496 %ifdef PIC

497 lea r11, [sixtap_filter_v_m]	497 lea r11, [sixtap_filter_v_m]

498 %endif	498 %endif

499 lea r5, [sixtap_filter_v+r5-96]	499 lea r5, [sixtap_filter_v+r5-96]

(...skipping 41 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
541 paddsw m0, m3	541 paddsw m0, m3

542 paddsw m0, m1	542 paddsw m0, m1

543 paddsw m0, m6	543 paddsw m0, m6

544 psraw m0, 7	544 psraw m0, 7

545 packuswb m0, m7	545 packuswb m0, m7

546 movh [r0], m0 ; store	546 movh [r0], m0 ; store

547	547

548 ; go to next line	548 ; go to next line

549 add r0, r1	549 add r0, r1

550 add r2, r3	550 add r2, r3

551 dec r4 ; next row	551 dec r4d ; next row

552 jg .nextrow	552 jg .nextrow

553 REP_RET	553 REP_RET

554	554

555 %macro FILTER_V 3	555 %macro FILTER_V 3

556 ; 4x4 block, V-only 4-tap filter	556 ; 4x4 block, V-only 4-tap filter

557 cglobal put_vp8_epel%2_v4_%1, 7, 7, %3	557 cglobal put_vp8_epel%2_v4_%1, 7, 7, %3

558 shl r6d, 5	558 shl r6d, 5

559 %ifdef PIC	559 %ifdef PIC

560 lea r11, [fourtap_filter_v_m]	560 lea r11, [fourtap_filter_v_m]

561 %endif	561 %endif

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
594	594

595 ; round/clip/store	595 ; round/clip/store

596 paddsw m4, m6	596 paddsw m4, m6

597 psraw m4, 7	597 psraw m4, 7

598 packuswb m4, m7	598 packuswb m4, m7

599 movh [r0], m4	599 movh [r0], m4

600	600

601 ; go to next line	601 ; go to next line

602 add r0, r1	602 add r0, r1

603 add r2, r3	603 add r2, r3

604 dec r4 ; next row	604 dec r4d ; next row

605 jg .nextrow	605 jg .nextrow

606 REP_RET	606 REP_RET

607	607

608	608

609 ; 4x4 block, V-only 6-tap filter	609 ; 4x4 block, V-only 6-tap filter

610 cglobal put_vp8_epel%2_v6_%1, 7, 7, %3	610 cglobal put_vp8_epel%2_v6_%1, 7, 7, %3

611 shl r6d, 4	611 shl r6d, 4

612 lea r6, [r6*3]	612 lea r6, [r6*3]

613 %ifdef PIC	613 %ifdef PIC

614 lea r11, [sixtap_filter_v_m]	614 lea r11, [sixtap_filter_v_m]

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
659	659

660 ; round/clip/store	660 ; round/clip/store

661 paddsw m6, [pw_64]	661 paddsw m6, [pw_64]

662 psraw m6, 7	662 psraw m6, 7

663 packuswb m6, m7	663 packuswb m6, m7

664 movh [r0], m6	664 movh [r0], m6

665	665

666 ; go to next line	666 ; go to next line

667 add r0, r1	667 add r0, r1

668 add r2, r3	668 add r2, r3

669 dec r4 ; next row	669 dec r4d ; next row

670 jg .nextrow	670 jg .nextrow

671 REP_RET	671 REP_RET

672 %endmacro	672 %endmacro

673	673

674 INIT_MMX	674 INIT_MMX

675 FILTER_V mmxext, 4, 0	675 FILTER_V mmxext, 4, 0

676 INIT_XMM	676 INIT_XMM

677 FILTER_V sse2, 8, 8	677 FILTER_V sse2, 8, 8

678	678

679 %macro FILTER_BILINEAR 3	679 %macro FILTER_BILINEAR 3

(...skipping 31 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
711 movh [r0+r1*0], m0	711 movh [r0+r1*0], m0

712 movh [r0+r1*1], m2	712 movh [r0+r1*1], m2

713 %else	713 %else

714 packuswb m0, m2	714 packuswb m0, m2

715 movh [r0+r1*0], m0	715 movh [r0+r1*0], m0

716 movhps [r0+r1*1], m0	716 movhps [r0+r1*1], m0

717 %endif	717 %endif

718	718

719 lea r0, [r0+r1*2]	719 lea r0, [r0+r1*2]

720 lea r2, [r2+r3*2]	720 lea r2, [r2+r3*2]

721 sub r4, 2	721 sub r4d, 2

722 jg .nextrow	722 jg .nextrow

723 REP_RET	723 REP_RET

724	724

725 cglobal put_vp8_bilinear%2_h_%1, 7,7,%3	725 cglobal put_vp8_bilinear%2_h_%1, 7,7,%3

726 mov r6d, 8*16	726 mov r6d, 8*16

727 shl r5d, 4	727 shl r5d, 4

728 sub r6d, r5d	728 sub r6d, r5d

729 %ifdef PIC	729 %ifdef PIC

730 lea r11, [bilinear_filter_vw_m]	730 lea r11, [bilinear_filter_vw_m]

731 %endif	731 %endif

(...skipping 25 matching lines...) Expand all Loading...
757 movh [r0+r1*0], m0	757 movh [r0+r1*0], m0

758 movh [r0+r1*1], m2	758 movh [r0+r1*1], m2

759 %else	759 %else

760 packuswb m0, m2	760 packuswb m0, m2

761 movh [r0+r1*0], m0	761 movh [r0+r1*0], m0

762 movhps [r0+r1*1], m0	762 movhps [r0+r1*1], m0

763 %endif	763 %endif

764	764

765 lea r0, [r0+r1*2]	765 lea r0, [r0+r1*2]

766 lea r2, [r2+r3*2]	766 lea r2, [r2+r3*2]

767 sub r4, 2	767 sub r4d, 2

768 jg .nextrow	768 jg .nextrow

769 REP_RET	769 REP_RET

770 %endmacro	770 %endmacro

771	771

772 INIT_MMX	772 INIT_MMX

773 FILTER_BILINEAR mmxext, 4, 0	773 FILTER_BILINEAR mmxext, 4, 0

774 INIT_XMM	774 INIT_XMM

775 FILTER_BILINEAR sse2, 8, 7	775 FILTER_BILINEAR sse2, 8, 7

776	776

777 %macro FILTER_BILINEAR_SSSE3 1	777 %macro FILTER_BILINEAR_SSSE3 1

(...skipping 22 matching lines...) Expand all Loading...
800 movh [r0+r1*0], m0	800 movh [r0+r1*0], m0

801 movh [r0+r1*1], m1	801 movh [r0+r1*1], m1

802 %else	802 %else

803 packuswb m0, m1	803 packuswb m0, m1

804 movh [r0+r1*0], m0	804 movh [r0+r1*0], m0

805 movhps [r0+r1*1], m0	805 movhps [r0+r1*1], m0

806 %endif	806 %endif

807	807

808 lea r0, [r0+r1*2]	808 lea r0, [r0+r1*2]

809 lea r2, [r2+r3*2]	809 lea r2, [r2+r3*2]

810 sub r4, 2	810 sub r4d, 2

811 jg .nextrow	811 jg .nextrow

812 REP_RET	812 REP_RET

813	813

814 cglobal put_vp8_bilinear%1_h_ssse3, 7,7	814 cglobal put_vp8_bilinear%1_h_ssse3, 7,7

815 shl r5d, 4	815 shl r5d, 4

816 %ifdef PIC	816 %ifdef PIC

817 lea r11, [bilinear_filter_vb_m]	817 lea r11, [bilinear_filter_vb_m]

818 %endif	818 %endif

819 pxor m4, m4	819 pxor m4, m4

820 mova m2, [filter_h2_shuf]	820 mova m2, [filter_h2_shuf]

(...skipping 15 matching lines...) Expand all Loading...
836 movh [r0+r1*0], m0	836 movh [r0+r1*0], m0

837 movh [r0+r1*1], m1	837 movh [r0+r1*1], m1

838 %else	838 %else

839 packuswb m0, m1	839 packuswb m0, m1

840 movh [r0+r1*0], m0	840 movh [r0+r1*0], m0

841 movhps [r0+r1*1], m0	841 movhps [r0+r1*1], m0

842 %endif	842 %endif

843	843

844 lea r0, [r0+r1*2]	844 lea r0, [r0+r1*2]

845 lea r2, [r2+r3*2]	845 lea r2, [r2+r3*2]

846 sub r4, 2	846 sub r4d, 2

847 jg .nextrow	847 jg .nextrow

848 REP_RET	848 REP_RET

849 %endmacro	849 %endmacro

850	850

851 INIT_MMX	851 INIT_MMX

852 FILTER_BILINEAR_SSSE3 4	852 FILTER_BILINEAR_SSSE3 4

853 INIT_XMM	853 INIT_XMM

854 FILTER_BILINEAR_SSSE3 8	854 FILTER_BILINEAR_SSSE3 8

855	855

856 cglobal put_vp8_pixels8_mmx, 5,5	856 cglobal put_vp8_pixels8_mmx, 5,5

(...skipping 478 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1335 %if %10 == 16	1335 %if %10 == 16

1336 movd [%6+%9*4], m%3	1336 movd [%6+%9*4], m%3

1337 %endif	1337 %endif

1338 movd [%7+%9], m%4	1338 movd [%7+%9], m%4

1339	1339

1340 ; write dwords 2	1340 ; write dwords 2

1341 psrldq m%1, 4	1341 psrldq m%1, 4

1342 psrldq m%2, 4	1342 psrldq m%2, 4

1343 %if %10 == 8	1343 %if %10 == 8

1344 movd [%5+%8*2], m%1	1344 movd [%5+%8*2], m%1

1345 movd %5, m%3	1345 movd %5d, m%3

1346 %endif	1346 %endif

1347 psrldq m%3, 4	1347 psrldq m%3, 4

1348 psrldq m%4, 4	1348 psrldq m%4, 4

1349 %if %10 == 16	1349 %if %10 == 16

1350 movd [%5+%8*2], m%1	1350 movd [%5+%8*2], m%1

1351 %endif	1351 %endif

1352 movd [%6+%9], m%2	1352 movd [%6+%9], m%2

1353 movd [%7+%8*2], m%3	1353 movd [%7+%8*2], m%3

1354 movd [%7+%9*2], m%4	1354 movd [%7+%9*2], m%4

1355 add %7, %9	1355 add %7, %9

(...skipping 16 matching lines...) Expand all Loading...
1372	1372

1373 ; write 4 or 8 words in the mmx/xmm registers as 8 lines	1373 ; write 4 or 8 words in the mmx/xmm registers as 8 lines

1374 ; 1 and 2 are the registers to write, this can be the same (for SSE2)	1374 ; 1 and 2 are the registers to write, this can be the same (for SSE2)

1375 ; for pre-SSE4:	1375 ; for pre-SSE4:

1376 ; 3 is a general-purpose register that we will clobber	1376 ; 3 is a general-purpose register that we will clobber

1377 ; for SSE4:	1377 ; for SSE4:

1378 ; 3 is a pointer to the destination's 5th line	1378 ; 3 is a pointer to the destination's 5th line

1379 ; 4 is a pointer to the destination's 4th line	1379 ; 4 is a pointer to the destination's 4th line

1380 ; 5/6 is -stride and +stride	1380 ; 5/6 is -stride and +stride

1381 %macro WRITE_2x4W 6	1381 %macro WRITE_2x4W 6

1382 movd %3, %1	1382 movd %3d, %1

1383 punpckhdq %1, %1	1383 punpckhdq %1, %1

1384 mov [%4+%5*4], %3w	1384 mov [%4+%5*4], %3w

1385 shr %3, 16	1385 shr %3, 16

1386 add %4, %6	1386 add %4, %6

1387 mov [%4+%5*4], %3w	1387 mov [%4+%5*4], %3w

1388	1388

1389 movd %3, %1	1389 movd %3d, %1

1390 add %4, %5	1390 add %4, %5

1391 mov [%4+%5*2], %3w	1391 mov [%4+%5*2], %3w

1392 shr %3, 16	1392 shr %3, 16

1393 mov [%4+%5 ], %3w	1393 mov [%4+%5 ], %3w

1394	1394

1395 movd %3, %2	1395 movd %3d, %2

1396 punpckhdq %2, %2	1396 punpckhdq %2, %2

1397 mov [%4 ], %3w	1397 mov [%4 ], %3w

1398 shr %3, 16	1398 shr %3, 16

1399 mov [%4+%6 ], %3w	1399 mov [%4+%6 ], %3w

1400	1400

1401 movd %3, %2	1401 movd %3d, %2

1402 add %4, %6	1402 add %4, %6

1403 mov [%4+%6 ], %3w	1403 mov [%4+%6 ], %3w

1404 shr %3, 16	1404 shr %3, 16

1405 mov [%4+%6*2], %3w	1405 mov [%4+%6*2], %3w

1406 add %4, %5	1406 add %4, %5

1407 %endmacro	1407 %endmacro

1408	1408

1409 %macro WRITE_8W_SSE2 5	1409 %macro WRITE_8W_SSE2 5

1410 movd %2, %1	1410 movd %2d, %1

1411 psrldq %1, 4	1411 psrldq %1, 4

1412 mov [%3+%4*4], %2w	1412 mov [%3+%4*4], %2w

1413 shr %2, 16	1413 shr %2, 16

1414 add %3, %5	1414 add %3, %5

1415 mov [%3+%4*4], %2w	1415 mov [%3+%4*4], %2w

1416	1416

1417 movd %2, %1	1417 movd %2d, %1

1418 psrldq %1, 4	1418 psrldq %1, 4

1419 add %3, %4	1419 add %3, %4

1420 mov [%3+%4*2], %2w	1420 mov [%3+%4*2], %2w

1421 shr %2, 16	1421 shr %2, 16

1422 mov [%3+%4 ], %2w	1422 mov [%3+%4 ], %2w

1423	1423

1424 movd %2, %1	1424 movd %2d, %1

1425 psrldq %1, 4	1425 psrldq %1, 4

1426 mov [%3 ], %2w	1426 mov [%3 ], %2w

1427 shr %2, 16	1427 shr %2, 16

1428 mov [%3+%5 ], %2w	1428 mov [%3+%5 ], %2w

1429	1429

1430 movd %2, %1	1430 movd %2d, %1

1431 add %3, %5	1431 add %3, %5

1432 mov [%3+%5 ], %2w	1432 mov [%3+%5 ], %2w

1433 shr %2, 16	1433 shr %2, 16

1434 mov [%3+%5*2], %2w	1434 mov [%3+%5*2], %2w

1435 %endmacro	1435 %endmacro

1436	1436

1437 %macro WRITE_8W_SSE4 5	1437 %macro WRITE_8W_SSE4 5

1438 pextrw [%3+%4*4], %1, 0	1438 pextrw [%3+%4*4], %1, 0

1439 pextrw [%2+%4*4], %1, 1	1439 pextrw [%2+%4*4], %1, 1

1440 pextrw [%3+%4*2], %1, 2	1440 pextrw [%3+%4*2], %1, 2

1441 pextrw [%3+%4 ], %1, 3	1441 pextrw [%3+%4 ], %1, 3

1442 pextrw [%3 ], %1, 4	1442 pextrw [%3 ], %1, 4

1443 pextrw [%2 ], %1, 5	1443 pextrw [%2 ], %1, 5

1444 pextrw [%2+%5 ], %1, 6	1444 pextrw [%2+%5 ], %1, 6

1445 pextrw [%2+%5*2], %1, 7	1445 pextrw [%2+%5*2], %1, 7

1446 %endmacro	1446 %endmacro

1447	1447

1448 %macro SPLATB_REG_MMX 2-3	1448 %macro SPLATB_REG_MMX 2-3

1449 movd %1, %2	1449 movd %1, %2d

1450 punpcklbw %1, %1	1450 punpcklbw %1, %1

1451 punpcklwd %1, %1	1451 punpcklwd %1, %1

1452 punpckldq %1, %1	1452 punpckldq %1, %1

1453 %endmacro	1453 %endmacro

1454	1454

1455 %macro SPLATB_REG_MMXEXT 2-3	1455 %macro SPLATB_REG_MMXEXT 2-3

1456 movd %1, %2	1456 movd %1, %2d

1457 punpcklbw %1, %1	1457 punpcklbw %1, %1

1458 pshufw %1, %1, 0x0	1458 pshufw %1, %1, 0x0

1459 %endmacro	1459 %endmacro

1460	1460

1461 %macro SPLATB_REG_SSE2 2-3	1461 %macro SPLATB_REG_SSE2 2-3

1462 movd %1, %2	1462 movd %1, %2d

1463 punpcklbw %1, %1	1463 punpcklbw %1, %1

1464 pshuflw %1, %1, 0x0	1464 pshuflw %1, %1, 0x0

1465 punpcklqdq %1, %1	1465 punpcklqdq %1, %1

1466 %endmacro	1466 %endmacro

1467	1467

1468 %macro SPLATB_REG_SSSE3 3	1468 %macro SPLATB_REG_SSSE3 3

1469 movd %1, %2	1469 movd %1, %2d

1470 pshufb %1, %3	1470 pshufb %1, %3

1471 %endmacro	1471 %endmacro

1472	1472

1473 %macro SIMPLE_LOOPFILTER 3	1473 %macro SIMPLE_LOOPFILTER 4

1474 cglobal vp8_%2_loop_filter_simple_%1, 3, %3	1474 cglobal vp8_%2_loop_filter_simple_%1, 3, %3, %4

1475 %if mmsize == 8 ; mmx/mmxext	1475 %if mmsize == 8 ; mmx/mmxext

1476 mov r3, 2	1476 mov r3, 2

1477 %endif	1477 %endif

1478 %ifnidn %1, sse2	1478 %ifnidn %1, sse2

1479 %if mmsize == 16	1479 %if mmsize == 16

1480 pxor m0, m0	1480 pxor m0, m0

1481 %endif	1481 %endif

1482 %endif	1482 %endif

1483 SPLATB_REG m7, r2, m0 ; splat "flim" into register	1483 SPLATB_REG m7, r2, m0 ; splat "flim" into register

1484	1484

(...skipping 120 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1605 dec r3	1605 dec r3

1606 jg .next8px	1606 jg .next8px

1607 REP_RET	1607 REP_RET

1608 %else ; sse2	1608 %else ; sse2

1609 RET	1609 RET

1610 %endif	1610 %endif

1611 %endmacro	1611 %endmacro

1612	1612

1613 INIT_MMX	1613 INIT_MMX

1614 %define SPLATB_REG SPLATB_REG_MMX	1614 %define SPLATB_REG SPLATB_REG_MMX

1615 SIMPLE_LOOPFILTER mmx, v, 4	1615 SIMPLE_LOOPFILTER mmx, v, 4, 0

1616 SIMPLE_LOOPFILTER mmx, h, 5	1616 SIMPLE_LOOPFILTER mmx, h, 5, 0

1617 %define SPLATB_REG SPLATB_REG_MMXEXT	1617 %define SPLATB_REG SPLATB_REG_MMXEXT

1618 SIMPLE_LOOPFILTER mmxext, v, 4	1618 SIMPLE_LOOPFILTER mmxext, v, 4, 0

1619 SIMPLE_LOOPFILTER mmxext, h, 5	1619 SIMPLE_LOOPFILTER mmxext, h, 5, 0

1620 INIT_XMM	1620 INIT_XMM

1621 %define SPLATB_REG SPLATB_REG_SSE2	1621 %define SPLATB_REG SPLATB_REG_SSE2

1622 %define WRITE_8W WRITE_8W_SSE2	1622 %define WRITE_8W WRITE_8W_SSE2

1623 SIMPLE_LOOPFILTER sse2, v, 3	1623 SIMPLE_LOOPFILTER sse2, v, 3, 8

1624 SIMPLE_LOOPFILTER sse2, h, 5	1624 SIMPLE_LOOPFILTER sse2, h, 5, 8

1625 %define SPLATB_REG SPLATB_REG_SSSE3	1625 %define SPLATB_REG SPLATB_REG_SSSE3

1626 SIMPLE_LOOPFILTER ssse3, v, 3	1626 SIMPLE_LOOPFILTER ssse3, v, 3, 8

1627 SIMPLE_LOOPFILTER ssse3, h, 5	1627 SIMPLE_LOOPFILTER ssse3, h, 5, 8

1628 %define WRITE_8W WRITE_8W_SSE4	1628 %define WRITE_8W WRITE_8W_SSE4

1629 SIMPLE_LOOPFILTER sse4, h, 5	1629 SIMPLE_LOOPFILTER sse4, h, 5, 8

1630	1630

1631 ;-----------------------------------------------------------------------------	1631 ;-----------------------------------------------------------------------------

1632 ; void vp8_h/v_loop_filter<size>_inner_<opt>(uint8_t dst, [uint8_t v,] int str ide,	1632 ; void vp8_h/v_loop_filter<size>_inner_<opt>(uint8_t dst, [uint8_t v,] int str ide,

1633 ; int flimE, int flimI, int hev_thr);	1633 ; int flimE, int flimI, int hev_thr);

1634 ;-----------------------------------------------------------------------------	1634 ;-----------------------------------------------------------------------------

1635	1635

1636 %macro INNER_LOOPFILTER 5	1636 %macro INNER_LOOPFILTER 5

1637 %if %4 == 8 ; chroma	1637 %if %4 == 8 ; chroma

1638 cglobal vp8_%2_loop_filter8uv_inner_%1, 6, %3, %5	1638 cglobal vp8_%2_loop_filter8uv_inner_%1, 6, %3, %5

1639 %define dst8_reg r1	1639 %define dst8_reg r1

(...skipping 1209 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2849 MBEDGE_LOOPFILTER ssse3, v, 6, 8, 15	2849 MBEDGE_LOOPFILTER ssse3, v, 6, 8, 15

2850 MBEDGE_LOOPFILTER ssse3, h, 6, 8, 15	2850 MBEDGE_LOOPFILTER ssse3, h, 6, 8, 15

2851	2851

2852 %define WRITE_8W WRITE_8W_SSE4	2852 %define WRITE_8W WRITE_8W_SSE4

2853 %ifdef m8	2853 %ifdef m8

2854 MBEDGE_LOOPFILTER sse4, h, 5, 16, 15	2854 MBEDGE_LOOPFILTER sse4, h, 5, 16, 15

2855 %else	2855 %else

2856 MBEDGE_LOOPFILTER sse4, h, 6, 16, 15	2856 MBEDGE_LOOPFILTER sse4, h, 6, 16, 15

2857 %endif	2857 %endif

2858 MBEDGE_LOOPFILTER sse4, h, 6, 8, 15	2858 MBEDGE_LOOPFILTER sse4, h, 6, 8, 15

OLD	NEW

« no previous file with comments | « source/patched-ffmpeg-mt/libavcodec/x86/vp56dsp_init.c ('k') | source/patched-ffmpeg-mt/libavcodec/x86/vp8dsp-init.c » ('j') | no next file with comments »