OLD | NEW |
1 /* | 1 /* |
2 * Chinese AVS video (AVS1-P2, JiZhun profile) decoder. | 2 * Chinese AVS video (AVS1-P2, JiZhun profile) decoder. |
3 * Copyright (c) 2006 Stefan Gehrer <stefan.gehrer@gmx.de> | 3 * Copyright (c) 2006 Stefan Gehrer <stefan.gehrer@gmx.de> |
4 * | 4 * |
5 * MMX-optimized DSP functions, based on H.264 optimizations by | 5 * MMX-optimized DSP functions, based on H.264 optimizations by |
6 * Michael Niedermayer and Loren Merritt | 6 * Michael Niedermayer and Loren Merritt |
7 * | 7 * |
8 * This file is part of FFmpeg. | 8 * This file is part of FFmpeg. |
9 * | 9 * |
10 * FFmpeg is free software; you can redistribute it and/or | 10 * FFmpeg is free software; you can redistribute it and/or |
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
106 SUMSUB_BA( %%mm5, %%mm4 ) /* mm5 = dst1 mm4 = dst6 */ | 106 SUMSUB_BA( %%mm5, %%mm4 ) /* mm5 = dst1 mm4 = dst6 */ |
107 SUMSUB_BA( %%mm3, %%mm2 ) /* mm3 = dst2 mm2 = dst5 */ | 107 SUMSUB_BA( %%mm3, %%mm2 ) /* mm3 = dst2 mm2 = dst5 */ |
108 SUMSUB_BA( %%mm1, %%mm0 ) /* mm1 = dst3 mm0 = dst4 */ | 108 SUMSUB_BA( %%mm1, %%mm0 ) /* mm1 = dst3 mm0 = dst4 */ |
109 :: "r"(block), "m"(bias) | 109 :: "r"(block), "m"(bias) |
110 ); | 110 ); |
111 } | 111 } |
112 | 112 |
113 static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) | 113 static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) |
114 { | 114 { |
115 int i; | 115 int i; |
116 DECLARE_ALIGNED_8(int16_t, b2)[64]; | 116 DECLARE_ALIGNED(8, int16_t, b2)[64]; |
117 | 117 |
118 for(i=0; i<2; i++){ | 118 for(i=0; i<2; i++){ |
119 DECLARE_ALIGNED_8(uint64_t, tmp); | 119 DECLARE_ALIGNED(8, uint64_t, tmp); |
120 | 120 |
121 cavs_idct8_1d(block+4*i, ff_pw_4); | 121 cavs_idct8_1d(block+4*i, ff_pw_4); |
122 | 122 |
123 __asm__ volatile( | 123 __asm__ volatile( |
124 "psraw $3, %%mm7 \n\t" | 124 "psraw $3, %%mm7 \n\t" |
125 "psraw $3, %%mm6 \n\t" | 125 "psraw $3, %%mm6 \n\t" |
126 "psraw $3, %%mm5 \n\t" | 126 "psraw $3, %%mm5 \n\t" |
127 "psraw $3, %%mm4 \n\t" | 127 "psraw $3, %%mm4 \n\t" |
128 "psraw $3, %%mm3 \n\t" | 128 "psraw $3, %%mm3 \n\t" |
129 "psraw $3, %%mm2 \n\t" | 129 "psraw $3, %%mm2 \n\t" |
(...skipping 300 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
430 | 430 |
431 CAVS_MC(put_, 8, 3dnow) | 431 CAVS_MC(put_, 8, 3dnow) |
432 CAVS_MC(put_, 16,3dnow) | 432 CAVS_MC(put_, 16,3dnow) |
433 CAVS_MC(avg_, 8, 3dnow) | 433 CAVS_MC(avg_, 8, 3dnow) |
434 CAVS_MC(avg_, 16,3dnow) | 434 CAVS_MC(avg_, 16,3dnow) |
435 CAVS_MC(put_, 8, mmx2) | 435 CAVS_MC(put_, 8, mmx2) |
436 CAVS_MC(put_, 16,mmx2) | 436 CAVS_MC(put_, 16,mmx2) |
437 CAVS_MC(avg_, 8, mmx2) | 437 CAVS_MC(avg_, 8, mmx2) |
438 CAVS_MC(avg_, 16,mmx2) | 438 CAVS_MC(avg_, 16,mmx2) |
439 | 439 |
440 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); | |
441 void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); | |
442 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); | |
443 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); | |
444 | |
445 void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx) { | 440 void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx) { |
446 #define dspfunc(PFX, IDX, NUM) \ | 441 #define dspfunc(PFX, IDX, NUM) \ |
447 c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \ | 442 c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \ |
448 c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_mmx2; \ | 443 c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_mmx2; \ |
449 c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_mmx2; \ | 444 c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_mmx2; \ |
450 c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_mmx2; \ | 445 c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_mmx2; \ |
451 c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_mmx2; \ | 446 c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_mmx2; \ |
452 | 447 |
453 dspfunc(put_cavs_qpel, 0, 16); | 448 dspfunc(put_cavs_qpel, 0, 16); |
454 dspfunc(put_cavs_qpel, 1, 8); | 449 dspfunc(put_cavs_qpel, 1, 8); |
(...skipping 11 matching lines...) Expand all Loading... |
466 c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_3dnow; \ | 461 c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_3dnow; \ |
467 c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_3dnow; \ | 462 c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_3dnow; \ |
468 | 463 |
469 dspfunc(put_cavs_qpel, 0, 16); | 464 dspfunc(put_cavs_qpel, 0, 16); |
470 dspfunc(put_cavs_qpel, 1, 8); | 465 dspfunc(put_cavs_qpel, 1, 8); |
471 dspfunc(avg_cavs_qpel, 0, 16); | 466 dspfunc(avg_cavs_qpel, 0, 16); |
472 dspfunc(avg_cavs_qpel, 1, 8); | 467 dspfunc(avg_cavs_qpel, 1, 8); |
473 #undef dspfunc | 468 #undef dspfunc |
474 c->cavs_idct8_add = cavs_idct8_add_mmx; | 469 c->cavs_idct8_add = cavs_idct8_add_mmx; |
475 } | 470 } |
OLD | NEW |