OLD | NEW |
1 /* | 1 /* |
2 * DSP utils | 2 * DSP utils |
3 * Copyright (c) 2000, 2001 Fabrice Bellard | 3 * Copyright (c) 2000, 2001 Fabrice Bellard |
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
5 * | 5 * |
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> | 6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> |
7 * | 7 * |
8 * This file is part of FFmpeg. | 8 * This file is part of FFmpeg. |
9 * | 9 * |
10 * FFmpeg is free software; you can redistribute it and/or | 10 * FFmpeg is free software; you can redistribute it and/or |
(...skipping 18 matching lines...) Expand all Loading... |
29 | 29 |
30 #include "avcodec.h" | 30 #include "avcodec.h" |
31 #include "dsputil.h" | 31 #include "dsputil.h" |
32 #include "simple_idct.h" | 32 #include "simple_idct.h" |
33 #include "faandct.h" | 33 #include "faandct.h" |
34 #include "faanidct.h" | 34 #include "faanidct.h" |
35 #include "mathops.h" | 35 #include "mathops.h" |
36 #include "snow.h" | 36 #include "snow.h" |
37 #include "mpegvideo.h" | 37 #include "mpegvideo.h" |
38 #include "config.h" | 38 #include "config.h" |
39 | 39 #include "lpc.h" |
40 /* snow.c */ | 40 #include "ac3dec.h" |
41 void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, in
t decomposition_count); | 41 #include "vorbis.h" |
42 | 42 #include "png.h" |
43 /* vorbis.c */ | |
44 void vorbis_inverse_coupling(float *mag, float *ang, int blocksize); | |
45 | |
46 /* ac3dec.c */ | |
47 void ff_ac3_downmix_c(float (*samples)[256], float (*matrix)[2], int out_ch, int
in_ch, int len); | |
48 | |
49 /* lpc.c */ | |
50 void ff_lpc_compute_autocorr(const int32_t *data, int len, int lag, double *auto
c); | |
51 | |
52 /* pngdec.c */ | |
53 void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w
, int bpp); | |
54 | |
55 /* eaidct.c */ | |
56 void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block); | |
57 | 43 |
58 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; | 44 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; |
59 uint32_t ff_squareTbl[512] = {0, }; | 45 uint32_t ff_squareTbl[512] = {0, }; |
60 | 46 |
61 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native a
rithmetic size | 47 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native a
rithmetic size |
62 #define pb_7f (~0UL/255 * 0x7f) | 48 #define pb_7f (~0UL/255 * 0x7f) |
63 #define pb_80 (~0UL/255 * 0x80) | 49 #define pb_80 (~0UL/255 * 0x80) |
64 | 50 |
65 const uint8_t ff_zigzag_direct[64] = { | 51 const uint8_t ff_zigzag_direct[64] = { |
66 0, 1, 8, 16, 9, 2, 3, 10, | 52 0, 1, 8, 16, 9, 2, 3, 10, |
(...skipping 13 matching lines...) Expand all Loading... |
80 17, 25, 32, 40, 48, 56, 33, 41, | 66 17, 25, 32, 40, 48, 56, 33, 41, |
81 18, 26, 3, 11, 4, 12, 19, 27, | 67 18, 26, 3, 11, 4, 12, 19, 27, |
82 34, 42, 49, 57, 50, 58, 35, 43, | 68 34, 42, 49, 57, 50, 58, 35, 43, |
83 20, 28, 5, 13, 6, 14, 21, 29, | 69 20, 28, 5, 13, 6, 14, 21, 29, |
84 36, 44, 51, 59, 52, 60, 37, 45, | 70 36, 44, 51, 59, 52, 60, 37, 45, |
85 22, 30, 7, 15, 23, 31, 38, 46, | 71 22, 30, 7, 15, 23, 31, 38, 46, |
86 53, 61, 54, 62, 39, 47, 55, 63, | 72 53, 61, 54, 62, 39, 47, 55, 63, |
87 }; | 73 }; |
88 | 74 |
89 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ | 75 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ |
90 DECLARE_ALIGNED_16(uint16_t, inv_zigzag_direct16)[64]; | 76 DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64]; |
91 | 77 |
92 const uint8_t ff_alternate_horizontal_scan[64] = { | 78 const uint8_t ff_alternate_horizontal_scan[64] = { |
93 0, 1, 2, 3, 8, 9, 16, 17, | 79 0, 1, 2, 3, 8, 9, 16, 17, |
94 10, 11, 4, 5, 6, 7, 15, 14, | 80 10, 11, 4, 5, 6, 7, 15, 14, |
95 13, 12, 19, 18, 24, 25, 32, 33, | 81 13, 12, 19, 18, 24, 25, 32, 33, |
96 26, 27, 20, 21, 22, 23, 28, 29, | 82 26, 27, 20, 21, 22, 23, 28, 29, |
97 30, 31, 34, 35, 40, 41, 48, 49, | 83 30, 31, 34, 35, 40, 41, 48, 49, |
98 42, 43, 36, 37, 38, 39, 44, 45, | 84 42, 43, 36, 37, 38, 39, 44, 45, |
99 46, 47, 50, 51, 56, 57, 58, 59, | 85 46, 47, 50, 51, 56, 57, 58, 59, |
100 52, 53, 54, 55, 60, 61, 62, 63, | 86 52, 53, 54, 55, 60, 61, 62, 63, |
(...skipping 553 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
654 *pixels = 255; | 640 *pixels = 255; |
655 else | 641 else |
656 *pixels = (uint8_t)(*block + 128); | 642 *pixels = (uint8_t)(*block + 128); |
657 block++; | 643 block++; |
658 pixels++; | 644 pixels++; |
659 } | 645 } |
660 pixels += (line_size - 8); | 646 pixels += (line_size - 8); |
661 } | 647 } |
662 } | 648 } |
663 | 649 |
| 650 static void put_pixels_nonclamped_c(const DCTELEM *block, uint8_t *restrict pixe
ls, |
| 651 int line_size) |
| 652 { |
| 653 int i; |
| 654 |
| 655 /* read the pixels */ |
| 656 for(i=0;i<8;i++) { |
| 657 pixels[0] = block[0]; |
| 658 pixels[1] = block[1]; |
| 659 pixels[2] = block[2]; |
| 660 pixels[3] = block[3]; |
| 661 pixels[4] = block[4]; |
| 662 pixels[5] = block[5]; |
| 663 pixels[6] = block[6]; |
| 664 pixels[7] = block[7]; |
| 665 |
| 666 pixels += line_size; |
| 667 block += 8; |
| 668 } |
| 669 } |
| 670 |
664 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, | 671 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, |
665 int line_size) | 672 int line_size) |
666 { | 673 { |
667 int i; | 674 int i; |
668 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | 675 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
669 | 676 |
670 /* read the pixels */ | 677 /* read the pixels */ |
671 for(i=0;i<8;i++) { | 678 for(i=0;i<8;i++) { |
672 pixels[0] = cm[pixels[0] + block[0]]; | 679 pixels[0] = cm[pixels[0] + block[0]]; |
673 pixels[1] = cm[pixels[1] + block[1]]; | 680 pixels[1] = cm[pixels[1] + block[1]]; |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
745 } | 752 } |
746 | 753 |
747 static int sum_abs_dctelem_c(DCTELEM *block) | 754 static int sum_abs_dctelem_c(DCTELEM *block) |
748 { | 755 { |
749 int sum=0, i; | 756 int sum=0, i; |
750 for(i=0; i<64; i++) | 757 for(i=0; i<64; i++) |
751 sum+= FFABS(block[i]); | 758 sum+= FFABS(block[i]); |
752 return sum; | 759 return sum; |
753 } | 760 } |
754 | 761 |
| 762 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h) |
| 763 { |
| 764 int i; |
| 765 |
| 766 for (i = 0; i < h; i++) { |
| 767 memset(block, value, 16); |
| 768 block += line_size; |
| 769 } |
| 770 } |
| 771 |
| 772 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h) |
| 773 { |
| 774 int i; |
| 775 |
| 776 for (i = 0; i < h; i++) { |
| 777 memset(block, value, 8); |
| 778 block += line_size; |
| 779 } |
| 780 } |
| 781 |
| 782 static void scale_block_c(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align
8*/, int linesize) |
| 783 { |
| 784 int i, j; |
| 785 uint16_t *dst1 = dst; |
| 786 uint16_t *dst2 = dst + linesize; |
| 787 |
| 788 for (j = 0; j < 8; j++) { |
| 789 for (i = 0; i < 8; i++) { |
| 790 dst1[i] = dst2[i] = src[i] * 0x0101; |
| 791 } |
| 792 src += 8; |
| 793 dst1 += linesize; |
| 794 dst2 += linesize; |
| 795 } |
| 796 } |
| 797 |
755 #if 0 | 798 #if 0 |
756 | 799 |
757 #define PIXOP2(OPNAME, OP) \ | 800 #define PIXOP2(OPNAME, OP) \ |
758 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_si
ze, int h)\ | 801 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_si
ze, int h)\ |
759 {\ | 802 {\ |
760 int i;\ | 803 int i;\ |
761 for(i=0; i<h; i++){\ | 804 for(i=0; i<h; i++){\ |
762 OP(*((uint64_t*)block), AV_RN64(pixels));\ | 805 OP(*((uint64_t*)block), AV_RN64(pixels));\ |
763 pixels+=line_size;\ | 806 pixels+=line_size;\ |
764 block +=line_size;\ | 807 block +=line_size;\ |
(...skipping 1974 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2739 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4]; | 2782 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4]; |
2740 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4]; | 2783 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4]; |
2741 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4]; | 2784 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4]; |
2742 dst+=dstStride; | 2785 dst+=dstStride; |
2743 src+=srcStride; | 2786 src+=srcStride; |
2744 } | 2787 } |
2745 } | 2788 } |
2746 | 2789 |
2747 #if CONFIG_CAVS_DECODER | 2790 #if CONFIG_CAVS_DECODER |
2748 /* AVS specific */ | 2791 /* AVS specific */ |
2749 void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx); | |
2750 | |
2751 void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { | 2792 void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { |
2752 put_pixels8_c(dst, src, stride, 8); | 2793 put_pixels8_c(dst, src, stride, 8); |
2753 } | 2794 } |
2754 void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { | 2795 void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { |
2755 avg_pixels8_c(dst, src, stride, 8); | 2796 avg_pixels8_c(dst, src, stride, 8); |
2756 } | 2797 } |
2757 void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { | 2798 void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { |
2758 put_pixels16_c(dst, src, stride, 16); | 2799 put_pixels16_c(dst, src, stride, 16); |
2759 } | 2800 } |
2760 void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { | 2801 void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { |
2761 avg_pixels16_c(dst, src, stride, 16); | 2802 avg_pixels16_c(dst, src, stride, 16); |
2762 } | 2803 } |
2763 #endif /* CONFIG_CAVS_DECODER */ | 2804 #endif /* CONFIG_CAVS_DECODER */ |
2764 | 2805 |
2765 void ff_mlp_init(DSPContext* c, AVCodecContext *avctx); | |
2766 | |
2767 #if CONFIG_VC1_DECODER | 2806 #if CONFIG_VC1_DECODER |
2768 /* VC-1 specific */ | 2807 /* VC-1 specific */ |
2769 void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx); | 2808 void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int r
nd) { |
2770 | |
2771 void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) { | |
2772 put_pixels8_c(dst, src, stride, 8); | 2809 put_pixels8_c(dst, src, stride, 8); |
2773 } | 2810 } |
2774 void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) { | 2811 void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int r
nd) { |
2775 avg_pixels8_c(dst, src, stride, 8); | 2812 avg_pixels8_c(dst, src, stride, 8); |
2776 } | 2813 } |
2777 #endif /* CONFIG_VC1_DECODER */ | 2814 #endif /* CONFIG_VC1_DECODER */ |
2778 | 2815 |
2779 void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx); | |
2780 | |
2781 /* H264 specific */ | 2816 /* H264 specific */ |
2782 void ff_h264dspenc_init(DSPContext* c, AVCodecContext *avctx); | 2817 void ff_h264dspenc_init(DSPContext* c, AVCodecContext *avctx); |
2783 | 2818 |
2784 #if CONFIG_RV30_DECODER | |
2785 void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx); | |
2786 #endif /* CONFIG_RV30_DECODER */ | |
2787 | |
2788 #if CONFIG_RV40_DECODER | 2819 #if CONFIG_RV40_DECODER |
2789 static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | 2820 static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ |
2790 put_pixels16_xy2_c(dst, src, stride, 16); | 2821 put_pixels16_xy2_c(dst, src, stride, 16); |
2791 } | 2822 } |
2792 static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | 2823 static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ |
2793 avg_pixels16_xy2_c(dst, src, stride, 16); | 2824 avg_pixels16_xy2_c(dst, src, stride, 16); |
2794 } | 2825 } |
2795 static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | 2826 static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ |
2796 put_pixels8_xy2_c(dst, src, stride, 8); | 2827 put_pixels8_xy2_c(dst, src, stride, 8); |
2797 } | 2828 } |
2798 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | 2829 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ |
2799 avg_pixels8_xy2_c(dst, src, stride, 8); | 2830 avg_pixels8_xy2_c(dst, src, stride, 8); |
2800 } | 2831 } |
2801 | |
2802 void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx); | |
2803 #endif /* CONFIG_RV40_DECODER */ | 2832 #endif /* CONFIG_RV40_DECODER */ |
2804 | 2833 |
2805 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int
srcStride, int w){ | 2834 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int
srcStride, int w){ |
2806 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | 2835 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2807 int i; | 2836 int i; |
2808 | 2837 |
2809 for(i=0; i<w; i++){ | 2838 for(i=0; i<w; i++){ |
2810 const int src_1= src[ -srcStride]; | 2839 const int src_1= src[ -srcStride]; |
2811 const int src0 = src[0 ]; | 2840 const int src0 = src[0 ]; |
2812 const int src1 = src[ srcStride]; | 2841 const int src1 = src[ srcStride]; |
(...skipping 973 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3786 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); | 3815 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); |
3787 } | 3816 } |
3788 | 3817 |
3789 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean | 3818 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean |
3790 | 3819 |
3791 return sum; | 3820 return sum; |
3792 } | 3821 } |
3793 | 3822 |
3794 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2
, int stride, int h){ | 3823 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2
, int stride, int h){ |
3795 MpegEncContext * const s= (MpegEncContext *)c; | 3824 MpegEncContext * const s= (MpegEncContext *)c; |
3796 DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64/8]; | 3825 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); |
3797 DCTELEM * const temp= (DCTELEM*)aligned_temp; | |
3798 | 3826 |
3799 assert(h==8); | 3827 assert(h==8); |
3800 | 3828 |
3801 s->dsp.diff_pixels(temp, src1, src2, stride); | 3829 s->dsp.diff_pixels(temp, src1, src2, stride); |
3802 s->dsp.fdct(temp); | 3830 s->dsp.fdct(temp); |
3803 return s->dsp.sum_abs_dctelem(temp); | 3831 return s->dsp.sum_abs_dctelem(temp); |
3804 } | 3832 } |
3805 | 3833 |
3806 #if CONFIG_GPL | 3834 #if CONFIG_GPL |
3807 #define DCT8_1D {\ | 3835 #define DCT8_1D {\ |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3851 for( i = 0; i < 8; i++ ) | 3879 for( i = 0; i < 8; i++ ) |
3852 DCT8_1D | 3880 DCT8_1D |
3853 #undef SRC | 3881 #undef SRC |
3854 #undef DST | 3882 #undef DST |
3855 return sum; | 3883 return sum; |
3856 } | 3884 } |
3857 #endif | 3885 #endif |
3858 | 3886 |
3859 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2
, int stride, int h){ | 3887 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2
, int stride, int h){ |
3860 MpegEncContext * const s= (MpegEncContext *)c; | 3888 MpegEncContext * const s= (MpegEncContext *)c; |
3861 DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64/8]; | 3889 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); |
3862 DCTELEM * const temp= (DCTELEM*)aligned_temp; | |
3863 int sum=0, i; | 3890 int sum=0, i; |
3864 | 3891 |
3865 assert(h==8); | 3892 assert(h==8); |
3866 | 3893 |
3867 s->dsp.diff_pixels(temp, src1, src2, stride); | 3894 s->dsp.diff_pixels(temp, src1, src2, stride); |
3868 s->dsp.fdct(temp); | 3895 s->dsp.fdct(temp); |
3869 | 3896 |
3870 for(i=0; i<64; i++) | 3897 for(i=0; i<64; i++) |
3871 sum= FFMAX(sum, FFABS(temp[i])); | 3898 sum= FFMAX(sum, FFABS(temp[i])); |
3872 | 3899 |
3873 return sum; | 3900 return sum; |
3874 } | 3901 } |
3875 | 3902 |
3876 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s
rc2, int stride, int h){ | 3903 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s
rc2, int stride, int h){ |
3877 MpegEncContext * const s= (MpegEncContext *)c; | 3904 MpegEncContext * const s= (MpegEncContext *)c; |
3878 DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64*2/8]; | 3905 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]); |
3879 DCTELEM * const temp= (DCTELEM*)aligned_temp; | 3906 DCTELEM * const bak = temp+64; |
3880 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64; | |
3881 int sum=0, i; | 3907 int sum=0, i; |
3882 | 3908 |
3883 assert(h==8); | 3909 assert(h==8); |
3884 s->mb_intra=0; | 3910 s->mb_intra=0; |
3885 | 3911 |
3886 s->dsp.diff_pixels(temp, src1, src2, stride); | 3912 s->dsp.diff_pixels(temp, src1, src2, stride); |
3887 | 3913 |
3888 memcpy(bak, temp, 64*sizeof(DCTELEM)); | 3914 memcpy(bak, temp, 64*sizeof(DCTELEM)); |
3889 | 3915 |
3890 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s
->qscale, &i); | 3916 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s
->qscale, &i); |
3891 s->dct_unquantize_inter(s, temp, 0, s->qscale); | 3917 s->dct_unquantize_inter(s, temp, 0, s->qscale); |
3892 ff_simple_idct(temp); //FIXME | 3918 ff_simple_idct(temp); //FIXME |
3893 | 3919 |
3894 for(i=0; i<64; i++) | 3920 for(i=0; i<64; i++) |
3895 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); | 3921 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); |
3896 | 3922 |
3897 return sum; | 3923 return sum; |
3898 } | 3924 } |
3899 | 3925 |
3900 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int
stride, int h){ | 3926 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int
stride, int h){ |
3901 MpegEncContext * const s= (MpegEncContext *)c; | 3927 MpegEncContext * const s= (MpegEncContext *)c; |
3902 const uint8_t *scantable= s->intra_scantable.permutated; | 3928 const uint8_t *scantable= s->intra_scantable.permutated; |
3903 DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64/8]; | 3929 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); |
3904 DECLARE_ALIGNED_16(uint64_t, aligned_src1)[8]; | 3930 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]); |
3905 DECLARE_ALIGNED_16(uint64_t, aligned_src2)[8]; | 3931 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]); |
3906 DCTELEM * const temp= (DCTELEM*)aligned_temp; | |
3907 uint8_t * const lsrc1 = (uint8_t*)aligned_src1; | |
3908 uint8_t * const lsrc2 = (uint8_t*)aligned_src2; | |
3909 int i, last, run, bits, level, distortion, start_i; | 3932 int i, last, run, bits, level, distortion, start_i; |
3910 const int esc_length= s->ac_esc_length; | 3933 const int esc_length= s->ac_esc_length; |
3911 uint8_t * length; | 3934 uint8_t * length; |
3912 uint8_t * last_length; | 3935 uint8_t * last_length; |
3913 | 3936 |
3914 assert(h==8); | 3937 assert(h==8); |
3915 | 3938 |
3916 copy_block8(lsrc1, src1, 8, stride, 8); | 3939 copy_block8(lsrc1, src1, 8, stride, 8); |
3917 copy_block8(lsrc2, src2, 8, stride, 8); | 3940 copy_block8(lsrc2, src2, 8, stride, 8); |
3918 | 3941 |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3972 s->dsp.idct_add(lsrc2, 8, temp); | 3995 s->dsp.idct_add(lsrc2, 8, temp); |
3973 | 3996 |
3974 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8); | 3997 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8); |
3975 | 3998 |
3976 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7); | 3999 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7); |
3977 } | 4000 } |
3978 | 4001 |
3979 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, in
t stride, int h){ | 4002 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, in
t stride, int h){ |
3980 MpegEncContext * const s= (MpegEncContext *)c; | 4003 MpegEncContext * const s= (MpegEncContext *)c; |
3981 const uint8_t *scantable= s->intra_scantable.permutated; | 4004 const uint8_t *scantable= s->intra_scantable.permutated; |
3982 DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64/8]; | 4005 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); |
3983 DCTELEM * const temp= (DCTELEM*)aligned_temp; | |
3984 int i, last, run, bits, level, start_i; | 4006 int i, last, run, bits, level, start_i; |
3985 const int esc_length= s->ac_esc_length; | 4007 const int esc_length= s->ac_esc_length; |
3986 uint8_t * length; | 4008 uint8_t * length; |
3987 uint8_t * last_length; | 4009 uint8_t * last_length; |
3988 | 4010 |
3989 assert(h==8); | 4011 assert(h==8); |
3990 | 4012 |
3991 s->dsp.diff_pixels(temp, src1, src2, stride); | 4013 s->dsp.diff_pixels(temp, src1, src2, stride); |
3992 | 4014 |
3993 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXM
E*/, s->qscale, &i); | 4015 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXM
E*/, s->qscale, &i); |
(...skipping 484 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4478 | 4500 |
4479 for(i=0;i<512;i++) { | 4501 for(i=0;i<512;i++) { |
4480 ff_squareTbl[i] = (i - 256) * (i - 256); | 4502 ff_squareTbl[i] = (i - 256) * (i - 256); |
4481 } | 4503 } |
4482 | 4504 |
4483 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; | 4505 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; |
4484 } | 4506 } |
4485 | 4507 |
4486 int ff_check_alignment(void){ | 4508 int ff_check_alignment(void){ |
4487 static int did_fail=0; | 4509 static int did_fail=0; |
4488 DECLARE_ALIGNED_16(int, aligned); | 4510 DECLARE_ALIGNED(16, int, aligned); |
4489 | 4511 |
4490 if((intptr_t)&aligned & 15){ | 4512 if((intptr_t)&aligned & 15){ |
4491 if(!did_fail){ | 4513 if(!did_fail){ |
4492 #if HAVE_MMX || HAVE_ALTIVEC | 4514 #if HAVE_MMX || HAVE_ALTIVEC |
4493 av_log(NULL, AV_LOG_ERROR, | 4515 av_log(NULL, AV_LOG_ERROR, |
4494 "Compiler did not align stack variables. Libavcodec has been mis
compiled\n" | 4516 "Compiler did not align stack variables. Libavcodec has been mis
compiled\n" |
4495 "and may be very slow or crash. This is not a bug in libavcodec,
\n" | 4517 "and may be very slow or crash. This is not a bug in libavcodec,
\n" |
4496 "but in the compiler. You may try recompiling using gcc >= 4.2.\
n" | 4518 "but in the compiler. You may try recompiling using gcc >= 4.2.\
n" |
4497 "Do not report crashes to FFmpeg developers.\n"); | 4519 "Do not report crashes to FFmpeg developers.\n"); |
4498 #endif | 4520 #endif |
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4562 c->idct = ff_wmv2_idct_c; | 4584 c->idct = ff_wmv2_idct_c; |
4563 c->idct_permutation_type= FF_NO_IDCT_PERM; | 4585 c->idct_permutation_type= FF_NO_IDCT_PERM; |
4564 }else if(avctx->idct_algo==FF_IDCT_FAAN){ | 4586 }else if(avctx->idct_algo==FF_IDCT_FAAN){ |
4565 c->idct_put= ff_faanidct_put; | 4587 c->idct_put= ff_faanidct_put; |
4566 c->idct_add= ff_faanidct_add; | 4588 c->idct_add= ff_faanidct_add; |
4567 c->idct = ff_faanidct; | 4589 c->idct = ff_faanidct; |
4568 c->idct_permutation_type= FF_NO_IDCT_PERM; | 4590 c->idct_permutation_type= FF_NO_IDCT_PERM; |
4569 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) { | 4591 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) { |
4570 c->idct_put= ff_ea_idct_put_c; | 4592 c->idct_put= ff_ea_idct_put_c; |
4571 c->idct_permutation_type= FF_NO_IDCT_PERM; | 4593 c->idct_permutation_type= FF_NO_IDCT_PERM; |
| 4594 }else if(CONFIG_BINK_DECODER && avctx->idct_algo==FF_IDCT_BINK) { |
| 4595 c->idct = ff_bink_idct_c; |
| 4596 c->idct_add = ff_bink_idct_add_c; |
| 4597 c->idct_put = ff_bink_idct_put_c; |
| 4598 c->idct_permutation_type = FF_NO_IDCT_PERM; |
4572 }else{ //accurate/default | 4599 }else{ //accurate/default |
4573 c->idct_put= ff_simple_idct_put; | 4600 c->idct_put= ff_simple_idct_put; |
4574 c->idct_add= ff_simple_idct_add; | 4601 c->idct_add= ff_simple_idct_add; |
4575 c->idct = ff_simple_idct; | 4602 c->idct = ff_simple_idct; |
4576 c->idct_permutation_type= FF_NO_IDCT_PERM; | 4603 c->idct_permutation_type= FF_NO_IDCT_PERM; |
4577 } | 4604 } |
4578 } | 4605 } |
4579 | 4606 |
4580 if (CONFIG_H264_DECODER) { | 4607 if (CONFIG_H264_DECODER) { |
4581 c->h264_idct_add= ff_h264_idct_add_c; | 4608 c->h264_idct_add= ff_h264_idct_add_c; |
4582 c->h264_idct8_add= ff_h264_idct8_add_c; | 4609 c->h264_idct8_add= ff_h264_idct8_add_c; |
4583 c->h264_idct_dc_add= ff_h264_idct_dc_add_c; | 4610 c->h264_idct_dc_add= ff_h264_idct_dc_add_c; |
4584 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c; | 4611 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c; |
4585 c->h264_idct_add16 = ff_h264_idct_add16_c; | 4612 c->h264_idct_add16 = ff_h264_idct_add16_c; |
4586 c->h264_idct8_add4 = ff_h264_idct8_add4_c; | 4613 c->h264_idct8_add4 = ff_h264_idct8_add4_c; |
4587 c->h264_idct_add8 = ff_h264_idct_add8_c; | 4614 c->h264_idct_add8 = ff_h264_idct_add8_c; |
4588 c->h264_idct_add16intra= ff_h264_idct_add16intra_c; | 4615 c->h264_idct_add16intra= ff_h264_idct_add16intra_c; |
4589 } | 4616 } |
4590 | 4617 |
4591 c->get_pixels = get_pixels_c; | 4618 c->get_pixels = get_pixels_c; |
4592 c->diff_pixels = diff_pixels_c; | 4619 c->diff_pixels = diff_pixels_c; |
4593 c->put_pixels_clamped = put_pixels_clamped_c; | 4620 c->put_pixels_clamped = put_pixels_clamped_c; |
4594 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c; | 4621 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c; |
| 4622 c->put_pixels_nonclamped = put_pixels_nonclamped_c; |
4595 c->add_pixels_clamped = add_pixels_clamped_c; | 4623 c->add_pixels_clamped = add_pixels_clamped_c; |
4596 c->add_pixels8 = add_pixels8_c; | 4624 c->add_pixels8 = add_pixels8_c; |
4597 c->add_pixels4 = add_pixels4_c; | 4625 c->add_pixels4 = add_pixels4_c; |
4598 c->sum_abs_dctelem = sum_abs_dctelem_c; | 4626 c->sum_abs_dctelem = sum_abs_dctelem_c; |
4599 c->gmc1 = gmc1_c; | 4627 c->gmc1 = gmc1_c; |
4600 c->gmc = ff_gmc_c; | 4628 c->gmc = ff_gmc_c; |
4601 c->clear_block = clear_block_c; | 4629 c->clear_block = clear_block_c; |
4602 c->clear_blocks = clear_blocks_c; | 4630 c->clear_blocks = clear_blocks_c; |
4603 c->pix_sum = pix_sum_c; | 4631 c->pix_sum = pix_sum_c; |
4604 c->pix_norm1 = pix_norm1_c; | 4632 c->pix_norm1 = pix_norm1_c; |
4605 | 4633 |
| 4634 c->fill_block_tab[0] = fill_block16_c; |
| 4635 c->fill_block_tab[1] = fill_block8_c; |
| 4636 c->scale_block = scale_block_c; |
| 4637 |
4606 /* TODO [0] 16 [1] 8 */ | 4638 /* TODO [0] 16 [1] 8 */ |
4607 c->pix_abs[0][0] = pix_abs16_c; | 4639 c->pix_abs[0][0] = pix_abs16_c; |
4608 c->pix_abs[0][1] = pix_abs16_x2_c; | 4640 c->pix_abs[0][1] = pix_abs16_x2_c; |
4609 c->pix_abs[0][2] = pix_abs16_y2_c; | 4641 c->pix_abs[0][2] = pix_abs16_y2_c; |
4610 c->pix_abs[0][3] = pix_abs16_xy2_c; | 4642 c->pix_abs[0][3] = pix_abs16_xy2_c; |
4611 c->pix_abs[1][0] = pix_abs8_c; | 4643 c->pix_abs[1][0] = pix_abs8_c; |
4612 c->pix_abs[1][1] = pix_abs8_x2_c; | 4644 c->pix_abs[1][1] = pix_abs8_x2_c; |
4613 c->pix_abs[1][2] = pix_abs8_y2_c; | 4645 c->pix_abs[1][2] = pix_abs8_y2_c; |
4614 c->pix_abs[1][3] = pix_abs8_xy2_c; | 4646 c->pix_abs[1][3] = pix_abs8_xy2_c; |
4615 | 4647 |
(...skipping 307 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4923 break; | 4955 break; |
4924 case FF_SSE2_IDCT_PERM: | 4956 case FF_SSE2_IDCT_PERM: |
4925 for(i=0; i<64; i++) | 4957 for(i=0; i<64; i++) |
4926 c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7]; | 4958 c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7]; |
4927 break; | 4959 break; |
4928 default: | 4960 default: |
4929 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n"
); | 4961 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n"
); |
4930 } | 4962 } |
4931 } | 4963 } |
4932 | 4964 |
OLD | NEW |