Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(216)

Side by Side Diff: patched-ffmpeg-mt/libavcodec/dsputil.c

Issue 789004: ffmpeg roll of source to mar 9 version... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/ffmpeg/
Patch Set: '' Created 10 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * DSP utils 2 * DSP utils
3 * Copyright (c) 2000, 2001 Fabrice Bellard 3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5 * 5 *
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> 6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7 * 7 *
8 * This file is part of FFmpeg. 8 * This file is part of FFmpeg.
9 * 9 *
10 * FFmpeg is free software; you can redistribute it and/or 10 * FFmpeg is free software; you can redistribute it and/or
(...skipping 18 matching lines...) Expand all
29 29
30 #include "avcodec.h" 30 #include "avcodec.h"
31 #include "dsputil.h" 31 #include "dsputil.h"
32 #include "simple_idct.h" 32 #include "simple_idct.h"
33 #include "faandct.h" 33 #include "faandct.h"
34 #include "faanidct.h" 34 #include "faanidct.h"
35 #include "mathops.h" 35 #include "mathops.h"
36 #include "snow.h" 36 #include "snow.h"
37 #include "mpegvideo.h" 37 #include "mpegvideo.h"
38 #include "config.h" 38 #include "config.h"
39 39 #include "lpc.h"
40 /* snow.c */ 40 #include "ac3dec.h"
41 void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, in t decomposition_count); 41 #include "vorbis.h"
42 42 #include "png.h"
43 /* vorbis.c */
44 void vorbis_inverse_coupling(float *mag, float *ang, int blocksize);
45
46 /* ac3dec.c */
47 void ff_ac3_downmix_c(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
48
49 /* lpc.c */
50 void ff_lpc_compute_autocorr(const int32_t *data, int len, int lag, double *auto c);
51
52 /* pngdec.c */
53 void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w , int bpp);
54
55 /* eaidct.c */
56 void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
57 43
58 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; 44 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
59 uint32_t ff_squareTbl[512] = {0, }; 45 uint32_t ff_squareTbl[512] = {0, };
60 46
61 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native a rithmetic size 47 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native a rithmetic size
62 #define pb_7f (~0UL/255 * 0x7f) 48 #define pb_7f (~0UL/255 * 0x7f)
63 #define pb_80 (~0UL/255 * 0x80) 49 #define pb_80 (~0UL/255 * 0x80)
64 50
65 const uint8_t ff_zigzag_direct[64] = { 51 const uint8_t ff_zigzag_direct[64] = {
66 0, 1, 8, 16, 9, 2, 3, 10, 52 0, 1, 8, 16, 9, 2, 3, 10,
(...skipping 13 matching lines...) Expand all
80 17, 25, 32, 40, 48, 56, 33, 41, 66 17, 25, 32, 40, 48, 56, 33, 41,
81 18, 26, 3, 11, 4, 12, 19, 27, 67 18, 26, 3, 11, 4, 12, 19, 27,
82 34, 42, 49, 57, 50, 58, 35, 43, 68 34, 42, 49, 57, 50, 58, 35, 43,
83 20, 28, 5, 13, 6, 14, 21, 29, 69 20, 28, 5, 13, 6, 14, 21, 29,
84 36, 44, 51, 59, 52, 60, 37, 45, 70 36, 44, 51, 59, 52, 60, 37, 45,
85 22, 30, 7, 15, 23, 31, 38, 46, 71 22, 30, 7, 15, 23, 31, 38, 46,
86 53, 61, 54, 62, 39, 47, 55, 63, 72 53, 61, 54, 62, 39, 47, 55, 63,
87 }; 73 };
88 74
89 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ 75 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
90 DECLARE_ALIGNED_16(uint16_t, inv_zigzag_direct16)[64]; 76 DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
91 77
92 const uint8_t ff_alternate_horizontal_scan[64] = { 78 const uint8_t ff_alternate_horizontal_scan[64] = {
93 0, 1, 2, 3, 8, 9, 16, 17, 79 0, 1, 2, 3, 8, 9, 16, 17,
94 10, 11, 4, 5, 6, 7, 15, 14, 80 10, 11, 4, 5, 6, 7, 15, 14,
95 13, 12, 19, 18, 24, 25, 32, 33, 81 13, 12, 19, 18, 24, 25, 32, 33,
96 26, 27, 20, 21, 22, 23, 28, 29, 82 26, 27, 20, 21, 22, 23, 28, 29,
97 30, 31, 34, 35, 40, 41, 48, 49, 83 30, 31, 34, 35, 40, 41, 48, 49,
98 42, 43, 36, 37, 38, 39, 44, 45, 84 42, 43, 36, 37, 38, 39, 44, 45,
99 46, 47, 50, 51, 56, 57, 58, 59, 85 46, 47, 50, 51, 56, 57, 58, 59,
100 52, 53, 54, 55, 60, 61, 62, 63, 86 52, 53, 54, 55, 60, 61, 62, 63,
(...skipping 553 matching lines...) Expand 10 before | Expand all | Expand 10 after
654 *pixels = 255; 640 *pixels = 255;
655 else 641 else
656 *pixels = (uint8_t)(*block + 128); 642 *pixels = (uint8_t)(*block + 128);
657 block++; 643 block++;
658 pixels++; 644 pixels++;
659 } 645 }
660 pixels += (line_size - 8); 646 pixels += (line_size - 8);
661 } 647 }
662 } 648 }
663 649
650 static void put_pixels_nonclamped_c(const DCTELEM *block, uint8_t *restrict pixe ls,
651 int line_size)
652 {
653 int i;
654
655 /* read the pixels */
656 for(i=0;i<8;i++) {
657 pixels[0] = block[0];
658 pixels[1] = block[1];
659 pixels[2] = block[2];
660 pixels[3] = block[3];
661 pixels[4] = block[4];
662 pixels[5] = block[5];
663 pixels[6] = block[6];
664 pixels[7] = block[7];
665
666 pixels += line_size;
667 block += 8;
668 }
669 }
670
664 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, 671 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
665 int line_size) 672 int line_size)
666 { 673 {
667 int i; 674 int i;
668 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 675 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
669 676
670 /* read the pixels */ 677 /* read the pixels */
671 for(i=0;i<8;i++) { 678 for(i=0;i<8;i++) {
672 pixels[0] = cm[pixels[0] + block[0]]; 679 pixels[0] = cm[pixels[0] + block[0]];
673 pixels[1] = cm[pixels[1] + block[1]]; 680 pixels[1] = cm[pixels[1] + block[1]];
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
745 } 752 }
746 753
747 static int sum_abs_dctelem_c(DCTELEM *block) 754 static int sum_abs_dctelem_c(DCTELEM *block)
748 { 755 {
749 int sum=0, i; 756 int sum=0, i;
750 for(i=0; i<64; i++) 757 for(i=0; i<64; i++)
751 sum+= FFABS(block[i]); 758 sum+= FFABS(block[i]);
752 return sum; 759 return sum;
753 } 760 }
754 761
762 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
763 {
764 int i;
765
766 for (i = 0; i < h; i++) {
767 memset(block, value, 16);
768 block += line_size;
769 }
770 }
771
772 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
773 {
774 int i;
775
776 for (i = 0; i < h; i++) {
777 memset(block, value, 8);
778 block += line_size;
779 }
780 }
781
782 static void scale_block_c(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align 8*/, int linesize)
783 {
784 int i, j;
785 uint16_t *dst1 = dst;
786 uint16_t *dst2 = dst + linesize;
787
788 for (j = 0; j < 8; j++) {
789 for (i = 0; i < 8; i++) {
790 dst1[i] = dst2[i] = src[i] * 0x0101;
791 }
792 src += 8;
793 dst1 += linesize;
794 dst2 += linesize;
795 }
796 }
797
755 #if 0 798 #if 0
756 799
757 #define PIXOP2(OPNAME, OP) \ 800 #define PIXOP2(OPNAME, OP) \
758 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_si ze, int h)\ 801 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_si ze, int h)\
759 {\ 802 {\
760 int i;\ 803 int i;\
761 for(i=0; i<h; i++){\ 804 for(i=0; i<h; i++){\
762 OP(*((uint64_t*)block), AV_RN64(pixels));\ 805 OP(*((uint64_t*)block), AV_RN64(pixels));\
763 pixels+=line_size;\ 806 pixels+=line_size;\
764 block +=line_size;\ 807 block +=line_size;\
(...skipping 1974 matching lines...) Expand 10 before | Expand all | Expand 10 after
2739 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4]; 2782 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
2740 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4]; 2783 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
2741 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4]; 2784 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
2742 dst+=dstStride; 2785 dst+=dstStride;
2743 src+=srcStride; 2786 src+=srcStride;
2744 } 2787 }
2745 } 2788 }
2746 2789
2747 #if CONFIG_CAVS_DECODER 2790 #if CONFIG_CAVS_DECODER
2748 /* AVS specific */ 2791 /* AVS specific */
2749 void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx);
2750
2751 void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { 2792 void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
2752 put_pixels8_c(dst, src, stride, 8); 2793 put_pixels8_c(dst, src, stride, 8);
2753 } 2794 }
2754 void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { 2795 void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
2755 avg_pixels8_c(dst, src, stride, 8); 2796 avg_pixels8_c(dst, src, stride, 8);
2756 } 2797 }
2757 void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { 2798 void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
2758 put_pixels16_c(dst, src, stride, 16); 2799 put_pixels16_c(dst, src, stride, 16);
2759 } 2800 }
2760 void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { 2801 void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
2761 avg_pixels16_c(dst, src, stride, 16); 2802 avg_pixels16_c(dst, src, stride, 16);
2762 } 2803 }
2763 #endif /* CONFIG_CAVS_DECODER */ 2804 #endif /* CONFIG_CAVS_DECODER */
2764 2805
2765 void ff_mlp_init(DSPContext* c, AVCodecContext *avctx);
2766
2767 #if CONFIG_VC1_DECODER 2806 #if CONFIG_VC1_DECODER
2768 /* VC-1 specific */ 2807 /* VC-1 specific */
2769 void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx); 2808 void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int r nd) {
2770
2771 void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) {
2772 put_pixels8_c(dst, src, stride, 8); 2809 put_pixels8_c(dst, src, stride, 8);
2773 } 2810 }
2774 void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) { 2811 void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int r nd) {
2775 avg_pixels8_c(dst, src, stride, 8); 2812 avg_pixels8_c(dst, src, stride, 8);
2776 } 2813 }
2777 #endif /* CONFIG_VC1_DECODER */ 2814 #endif /* CONFIG_VC1_DECODER */
2778 2815
2779 void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
2780
2781 /* H264 specific */ 2816 /* H264 specific */
2782 void ff_h264dspenc_init(DSPContext* c, AVCodecContext *avctx); 2817 void ff_h264dspenc_init(DSPContext* c, AVCodecContext *avctx);
2783 2818
2784 #if CONFIG_RV30_DECODER
2785 void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx);
2786 #endif /* CONFIG_RV30_DECODER */
2787
2788 #if CONFIG_RV40_DECODER 2819 #if CONFIG_RV40_DECODER
2789 static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ 2820 static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2790 put_pixels16_xy2_c(dst, src, stride, 16); 2821 put_pixels16_xy2_c(dst, src, stride, 16);
2791 } 2822 }
2792 static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ 2823 static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2793 avg_pixels16_xy2_c(dst, src, stride, 16); 2824 avg_pixels16_xy2_c(dst, src, stride, 16);
2794 } 2825 }
2795 static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ 2826 static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2796 put_pixels8_xy2_c(dst, src, stride, 8); 2827 put_pixels8_xy2_c(dst, src, stride, 8);
2797 } 2828 }
2798 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ 2829 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2799 avg_pixels8_xy2_c(dst, src, stride, 8); 2830 avg_pixels8_xy2_c(dst, src, stride, 8);
2800 } 2831 }
2801
2802 void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx);
2803 #endif /* CONFIG_RV40_DECODER */ 2832 #endif /* CONFIG_RV40_DECODER */
2804 2833
2805 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){ 2834 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
2806 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 2835 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2807 int i; 2836 int i;
2808 2837
2809 for(i=0; i<w; i++){ 2838 for(i=0; i<w; i++){
2810 const int src_1= src[ -srcStride]; 2839 const int src_1= src[ -srcStride];
2811 const int src0 = src[0 ]; 2840 const int src0 = src[0 ];
2812 const int src1 = src[ srcStride]; 2841 const int src1 = src[ srcStride];
(...skipping 973 matching lines...) Expand 10 before | Expand all | Expand 10 after
3786 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); 3815 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
3787 } 3816 }
3788 3817
3789 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean 3818 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
3790 3819
3791 return sum; 3820 return sum;
3792 } 3821 }
3793 3822
3794 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2 , int stride, int h){ 3823 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2 , int stride, int h){
3795 MpegEncContext * const s= (MpegEncContext *)c; 3824 MpegEncContext * const s= (MpegEncContext *)c;
3796 DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64/8]; 3825 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
3797 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3798 3826
3799 assert(h==8); 3827 assert(h==8);
3800 3828
3801 s->dsp.diff_pixels(temp, src1, src2, stride); 3829 s->dsp.diff_pixels(temp, src1, src2, stride);
3802 s->dsp.fdct(temp); 3830 s->dsp.fdct(temp);
3803 return s->dsp.sum_abs_dctelem(temp); 3831 return s->dsp.sum_abs_dctelem(temp);
3804 } 3832 }
3805 3833
3806 #if CONFIG_GPL 3834 #if CONFIG_GPL
3807 #define DCT8_1D {\ 3835 #define DCT8_1D {\
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
3851 for( i = 0; i < 8; i++ ) 3879 for( i = 0; i < 8; i++ )
3852 DCT8_1D 3880 DCT8_1D
3853 #undef SRC 3881 #undef SRC
3854 #undef DST 3882 #undef DST
3855 return sum; 3883 return sum;
3856 } 3884 }
3857 #endif 3885 #endif
3858 3886
3859 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2 , int stride, int h){ 3887 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2 , int stride, int h){
3860 MpegEncContext * const s= (MpegEncContext *)c; 3888 MpegEncContext * const s= (MpegEncContext *)c;
3861 DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64/8]; 3889 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
3862 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3863 int sum=0, i; 3890 int sum=0, i;
3864 3891
3865 assert(h==8); 3892 assert(h==8);
3866 3893
3867 s->dsp.diff_pixels(temp, src1, src2, stride); 3894 s->dsp.diff_pixels(temp, src1, src2, stride);
3868 s->dsp.fdct(temp); 3895 s->dsp.fdct(temp);
3869 3896
3870 for(i=0; i<64; i++) 3897 for(i=0; i<64; i++)
3871 sum= FFMAX(sum, FFABS(temp[i])); 3898 sum= FFMAX(sum, FFABS(temp[i]));
3872 3899
3873 return sum; 3900 return sum;
3874 } 3901 }
3875 3902
3876 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s rc2, int stride, int h){ 3903 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s rc2, int stride, int h){
3877 MpegEncContext * const s= (MpegEncContext *)c; 3904 MpegEncContext * const s= (MpegEncContext *)c;
3878 DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64*2/8]; 3905 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
3879 DCTELEM * const temp= (DCTELEM*)aligned_temp; 3906 DCTELEM * const bak = temp+64;
3880 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
3881 int sum=0, i; 3907 int sum=0, i;
3882 3908
3883 assert(h==8); 3909 assert(h==8);
3884 s->mb_intra=0; 3910 s->mb_intra=0;
3885 3911
3886 s->dsp.diff_pixels(temp, src1, src2, stride); 3912 s->dsp.diff_pixels(temp, src1, src2, stride);
3887 3913
3888 memcpy(bak, temp, 64*sizeof(DCTELEM)); 3914 memcpy(bak, temp, 64*sizeof(DCTELEM));
3889 3915
3890 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s ->qscale, &i); 3916 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s ->qscale, &i);
3891 s->dct_unquantize_inter(s, temp, 0, s->qscale); 3917 s->dct_unquantize_inter(s, temp, 0, s->qscale);
3892 ff_simple_idct(temp); //FIXME 3918 ff_simple_idct(temp); //FIXME
3893 3919
3894 for(i=0; i<64; i++) 3920 for(i=0; i<64; i++)
3895 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); 3921 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
3896 3922
3897 return sum; 3923 return sum;
3898 } 3924 }
3899 3925
3900 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ 3926 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3901 MpegEncContext * const s= (MpegEncContext *)c; 3927 MpegEncContext * const s= (MpegEncContext *)c;
3902 const uint8_t *scantable= s->intra_scantable.permutated; 3928 const uint8_t *scantable= s->intra_scantable.permutated;
3903 DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64/8]; 3929 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
3904 DECLARE_ALIGNED_16(uint64_t, aligned_src1)[8]; 3930 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
3905 DECLARE_ALIGNED_16(uint64_t, aligned_src2)[8]; 3931 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
3906 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3907 uint8_t * const lsrc1 = (uint8_t*)aligned_src1;
3908 uint8_t * const lsrc2 = (uint8_t*)aligned_src2;
3909 int i, last, run, bits, level, distortion, start_i; 3932 int i, last, run, bits, level, distortion, start_i;
3910 const int esc_length= s->ac_esc_length; 3933 const int esc_length= s->ac_esc_length;
3911 uint8_t * length; 3934 uint8_t * length;
3912 uint8_t * last_length; 3935 uint8_t * last_length;
3913 3936
3914 assert(h==8); 3937 assert(h==8);
3915 3938
3916 copy_block8(lsrc1, src1, 8, stride, 8); 3939 copy_block8(lsrc1, src1, 8, stride, 8);
3917 copy_block8(lsrc2, src2, 8, stride, 8); 3940 copy_block8(lsrc2, src2, 8, stride, 8);
3918 3941
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
3972 s->dsp.idct_add(lsrc2, 8, temp); 3995 s->dsp.idct_add(lsrc2, 8, temp);
3973 3996
3974 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8); 3997 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
3975 3998
3976 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7); 3999 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
3977 } 4000 }
3978 4001
3979 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, in t stride, int h){ 4002 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, in t stride, int h){
3980 MpegEncContext * const s= (MpegEncContext *)c; 4003 MpegEncContext * const s= (MpegEncContext *)c;
3981 const uint8_t *scantable= s->intra_scantable.permutated; 4004 const uint8_t *scantable= s->intra_scantable.permutated;
3982 DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64/8]; 4005 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
3983 DCTELEM * const temp= (DCTELEM*)aligned_temp;
3984 int i, last, run, bits, level, start_i; 4006 int i, last, run, bits, level, start_i;
3985 const int esc_length= s->ac_esc_length; 4007 const int esc_length= s->ac_esc_length;
3986 uint8_t * length; 4008 uint8_t * length;
3987 uint8_t * last_length; 4009 uint8_t * last_length;
3988 4010
3989 assert(h==8); 4011 assert(h==8);
3990 4012
3991 s->dsp.diff_pixels(temp, src1, src2, stride); 4013 s->dsp.diff_pixels(temp, src1, src2, stride);
3992 4014
3993 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXM E*/, s->qscale, &i); 4015 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXM E*/, s->qscale, &i);
(...skipping 484 matching lines...) Expand 10 before | Expand all | Expand 10 after
4478 4500
4479 for(i=0;i<512;i++) { 4501 for(i=0;i<512;i++) {
4480 ff_squareTbl[i] = (i - 256) * (i - 256); 4502 ff_squareTbl[i] = (i - 256) * (i - 256);
4481 } 4503 }
4482 4504
4483 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; 4505 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
4484 } 4506 }
4485 4507
4486 int ff_check_alignment(void){ 4508 int ff_check_alignment(void){
4487 static int did_fail=0; 4509 static int did_fail=0;
4488 DECLARE_ALIGNED_16(int, aligned); 4510 DECLARE_ALIGNED(16, int, aligned);
4489 4511
4490 if((intptr_t)&aligned & 15){ 4512 if((intptr_t)&aligned & 15){
4491 if(!did_fail){ 4513 if(!did_fail){
4492 #if HAVE_MMX || HAVE_ALTIVEC 4514 #if HAVE_MMX || HAVE_ALTIVEC
4493 av_log(NULL, AV_LOG_ERROR, 4515 av_log(NULL, AV_LOG_ERROR,
4494 "Compiler did not align stack variables. Libavcodec has been mis compiled\n" 4516 "Compiler did not align stack variables. Libavcodec has been mis compiled\n"
4495 "and may be very slow or crash. This is not a bug in libavcodec, \n" 4517 "and may be very slow or crash. This is not a bug in libavcodec, \n"
4496 "but in the compiler. You may try recompiling using gcc >= 4.2.\ n" 4518 "but in the compiler. You may try recompiling using gcc >= 4.2.\ n"
4497 "Do not report crashes to FFmpeg developers.\n"); 4519 "Do not report crashes to FFmpeg developers.\n");
4498 #endif 4520 #endif
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
4562 c->idct = ff_wmv2_idct_c; 4584 c->idct = ff_wmv2_idct_c;
4563 c->idct_permutation_type= FF_NO_IDCT_PERM; 4585 c->idct_permutation_type= FF_NO_IDCT_PERM;
4564 }else if(avctx->idct_algo==FF_IDCT_FAAN){ 4586 }else if(avctx->idct_algo==FF_IDCT_FAAN){
4565 c->idct_put= ff_faanidct_put; 4587 c->idct_put= ff_faanidct_put;
4566 c->idct_add= ff_faanidct_add; 4588 c->idct_add= ff_faanidct_add;
4567 c->idct = ff_faanidct; 4589 c->idct = ff_faanidct;
4568 c->idct_permutation_type= FF_NO_IDCT_PERM; 4590 c->idct_permutation_type= FF_NO_IDCT_PERM;
4569 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) { 4591 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
4570 c->idct_put= ff_ea_idct_put_c; 4592 c->idct_put= ff_ea_idct_put_c;
4571 c->idct_permutation_type= FF_NO_IDCT_PERM; 4593 c->idct_permutation_type= FF_NO_IDCT_PERM;
4594 }else if(CONFIG_BINK_DECODER && avctx->idct_algo==FF_IDCT_BINK) {
4595 c->idct = ff_bink_idct_c;
4596 c->idct_add = ff_bink_idct_add_c;
4597 c->idct_put = ff_bink_idct_put_c;
4598 c->idct_permutation_type = FF_NO_IDCT_PERM;
4572 }else{ //accurate/default 4599 }else{ //accurate/default
4573 c->idct_put= ff_simple_idct_put; 4600 c->idct_put= ff_simple_idct_put;
4574 c->idct_add= ff_simple_idct_add; 4601 c->idct_add= ff_simple_idct_add;
4575 c->idct = ff_simple_idct; 4602 c->idct = ff_simple_idct;
4576 c->idct_permutation_type= FF_NO_IDCT_PERM; 4603 c->idct_permutation_type= FF_NO_IDCT_PERM;
4577 } 4604 }
4578 } 4605 }
4579 4606
4580 if (CONFIG_H264_DECODER) { 4607 if (CONFIG_H264_DECODER) {
4581 c->h264_idct_add= ff_h264_idct_add_c; 4608 c->h264_idct_add= ff_h264_idct_add_c;
4582 c->h264_idct8_add= ff_h264_idct8_add_c; 4609 c->h264_idct8_add= ff_h264_idct8_add_c;
4583 c->h264_idct_dc_add= ff_h264_idct_dc_add_c; 4610 c->h264_idct_dc_add= ff_h264_idct_dc_add_c;
4584 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c; 4611 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c;
4585 c->h264_idct_add16 = ff_h264_idct_add16_c; 4612 c->h264_idct_add16 = ff_h264_idct_add16_c;
4586 c->h264_idct8_add4 = ff_h264_idct8_add4_c; 4613 c->h264_idct8_add4 = ff_h264_idct8_add4_c;
4587 c->h264_idct_add8 = ff_h264_idct_add8_c; 4614 c->h264_idct_add8 = ff_h264_idct_add8_c;
4588 c->h264_idct_add16intra= ff_h264_idct_add16intra_c; 4615 c->h264_idct_add16intra= ff_h264_idct_add16intra_c;
4589 } 4616 }
4590 4617
4591 c->get_pixels = get_pixels_c; 4618 c->get_pixels = get_pixels_c;
4592 c->diff_pixels = diff_pixels_c; 4619 c->diff_pixels = diff_pixels_c;
4593 c->put_pixels_clamped = put_pixels_clamped_c; 4620 c->put_pixels_clamped = put_pixels_clamped_c;
4594 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c; 4621 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
4622 c->put_pixels_nonclamped = put_pixels_nonclamped_c;
4595 c->add_pixels_clamped = add_pixels_clamped_c; 4623 c->add_pixels_clamped = add_pixels_clamped_c;
4596 c->add_pixels8 = add_pixels8_c; 4624 c->add_pixels8 = add_pixels8_c;
4597 c->add_pixels4 = add_pixels4_c; 4625 c->add_pixels4 = add_pixels4_c;
4598 c->sum_abs_dctelem = sum_abs_dctelem_c; 4626 c->sum_abs_dctelem = sum_abs_dctelem_c;
4599 c->gmc1 = gmc1_c; 4627 c->gmc1 = gmc1_c;
4600 c->gmc = ff_gmc_c; 4628 c->gmc = ff_gmc_c;
4601 c->clear_block = clear_block_c; 4629 c->clear_block = clear_block_c;
4602 c->clear_blocks = clear_blocks_c; 4630 c->clear_blocks = clear_blocks_c;
4603 c->pix_sum = pix_sum_c; 4631 c->pix_sum = pix_sum_c;
4604 c->pix_norm1 = pix_norm1_c; 4632 c->pix_norm1 = pix_norm1_c;
4605 4633
4634 c->fill_block_tab[0] = fill_block16_c;
4635 c->fill_block_tab[1] = fill_block8_c;
4636 c->scale_block = scale_block_c;
4637
4606 /* TODO [0] 16 [1] 8 */ 4638 /* TODO [0] 16 [1] 8 */
4607 c->pix_abs[0][0] = pix_abs16_c; 4639 c->pix_abs[0][0] = pix_abs16_c;
4608 c->pix_abs[0][1] = pix_abs16_x2_c; 4640 c->pix_abs[0][1] = pix_abs16_x2_c;
4609 c->pix_abs[0][2] = pix_abs16_y2_c; 4641 c->pix_abs[0][2] = pix_abs16_y2_c;
4610 c->pix_abs[0][3] = pix_abs16_xy2_c; 4642 c->pix_abs[0][3] = pix_abs16_xy2_c;
4611 c->pix_abs[1][0] = pix_abs8_c; 4643 c->pix_abs[1][0] = pix_abs8_c;
4612 c->pix_abs[1][1] = pix_abs8_x2_c; 4644 c->pix_abs[1][1] = pix_abs8_x2_c;
4613 c->pix_abs[1][2] = pix_abs8_y2_c; 4645 c->pix_abs[1][2] = pix_abs8_y2_c;
4614 c->pix_abs[1][3] = pix_abs8_xy2_c; 4646 c->pix_abs[1][3] = pix_abs8_xy2_c;
4615 4647
(...skipping 307 matching lines...) Expand 10 before | Expand all | Expand 10 after
4923 break; 4955 break;
4924 case FF_SSE2_IDCT_PERM: 4956 case FF_SSE2_IDCT_PERM:
4925 for(i=0; i<64; i++) 4957 for(i=0; i<64; i++)
4926 c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7]; 4958 c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
4927 break; 4959 break;
4928 default: 4960 default:
4929 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n" ); 4961 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n" );
4930 } 4962 }
4931 } 4963 }
4932 4964
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698