| OLD | NEW |
| 1 /* | 1 /* |
| 2 * DSP utils | 2 * DSP utils |
| 3 * Copyright (c) 2000, 2001 Fabrice Bellard | 3 * Copyright (c) 2000, 2001 Fabrice Bellard |
| 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
| 5 * | 5 * |
| 6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> | 6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> |
| 7 * | 7 * |
| 8 * This file is part of FFmpeg. | 8 * This file is part of FFmpeg. |
| 9 * | 9 * |
| 10 * FFmpeg is free software; you can redistribute it and/or | 10 * FFmpeg is free software; you can redistribute it and/or |
| (...skipping 18 matching lines...) Expand all Loading... |
| 29 | 29 |
| 30 #include "avcodec.h" | 30 #include "avcodec.h" |
| 31 #include "dsputil.h" | 31 #include "dsputil.h" |
| 32 #include "simple_idct.h" | 32 #include "simple_idct.h" |
| 33 #include "faandct.h" | 33 #include "faandct.h" |
| 34 #include "faanidct.h" | 34 #include "faanidct.h" |
| 35 #include "mathops.h" | 35 #include "mathops.h" |
| 36 #include "snow.h" | 36 #include "snow.h" |
| 37 #include "mpegvideo.h" | 37 #include "mpegvideo.h" |
| 38 #include "config.h" | 38 #include "config.h" |
| 39 | 39 #include "lpc.h" |
| 40 /* snow.c */ | 40 #include "ac3dec.h" |
| 41 void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, in
t decomposition_count); | 41 #include "vorbis.h" |
| 42 | 42 #include "png.h" |
| 43 /* vorbis.c */ | |
| 44 void vorbis_inverse_coupling(float *mag, float *ang, int blocksize); | |
| 45 | |
| 46 /* ac3dec.c */ | |
| 47 void ff_ac3_downmix_c(float (*samples)[256], float (*matrix)[2], int out_ch, int
in_ch, int len); | |
| 48 | |
| 49 /* lpc.c */ | |
| 50 void ff_lpc_compute_autocorr(const int32_t *data, int len, int lag, double *auto
c); | |
| 51 | |
| 52 /* pngdec.c */ | |
| 53 void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w
, int bpp); | |
| 54 | |
| 55 /* eaidct.c */ | |
| 56 void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block); | |
| 57 | 43 |
| 58 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; | 44 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; |
| 59 uint32_t ff_squareTbl[512] = {0, }; | 45 uint32_t ff_squareTbl[512] = {0, }; |
| 60 | 46 |
| 61 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native a
rithmetic size | 47 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native a
rithmetic size |
| 62 #define pb_7f (~0UL/255 * 0x7f) | 48 #define pb_7f (~0UL/255 * 0x7f) |
| 63 #define pb_80 (~0UL/255 * 0x80) | 49 #define pb_80 (~0UL/255 * 0x80) |
| 64 | 50 |
| 65 const uint8_t ff_zigzag_direct[64] = { | 51 const uint8_t ff_zigzag_direct[64] = { |
| 66 0, 1, 8, 16, 9, 2, 3, 10, | 52 0, 1, 8, 16, 9, 2, 3, 10, |
| (...skipping 13 matching lines...) Expand all Loading... |
| 80 17, 25, 32, 40, 48, 56, 33, 41, | 66 17, 25, 32, 40, 48, 56, 33, 41, |
| 81 18, 26, 3, 11, 4, 12, 19, 27, | 67 18, 26, 3, 11, 4, 12, 19, 27, |
| 82 34, 42, 49, 57, 50, 58, 35, 43, | 68 34, 42, 49, 57, 50, 58, 35, 43, |
| 83 20, 28, 5, 13, 6, 14, 21, 29, | 69 20, 28, 5, 13, 6, 14, 21, 29, |
| 84 36, 44, 51, 59, 52, 60, 37, 45, | 70 36, 44, 51, 59, 52, 60, 37, 45, |
| 85 22, 30, 7, 15, 23, 31, 38, 46, | 71 22, 30, 7, 15, 23, 31, 38, 46, |
| 86 53, 61, 54, 62, 39, 47, 55, 63, | 72 53, 61, 54, 62, 39, 47, 55, 63, |
| 87 }; | 73 }; |
| 88 | 74 |
| 89 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ | 75 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ |
| 90 DECLARE_ALIGNED_16(uint16_t, inv_zigzag_direct16)[64]; | 76 DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64]; |
| 91 | 77 |
| 92 const uint8_t ff_alternate_horizontal_scan[64] = { | 78 const uint8_t ff_alternate_horizontal_scan[64] = { |
| 93 0, 1, 2, 3, 8, 9, 16, 17, | 79 0, 1, 2, 3, 8, 9, 16, 17, |
| 94 10, 11, 4, 5, 6, 7, 15, 14, | 80 10, 11, 4, 5, 6, 7, 15, 14, |
| 95 13, 12, 19, 18, 24, 25, 32, 33, | 81 13, 12, 19, 18, 24, 25, 32, 33, |
| 96 26, 27, 20, 21, 22, 23, 28, 29, | 82 26, 27, 20, 21, 22, 23, 28, 29, |
| 97 30, 31, 34, 35, 40, 41, 48, 49, | 83 30, 31, 34, 35, 40, 41, 48, 49, |
| 98 42, 43, 36, 37, 38, 39, 44, 45, | 84 42, 43, 36, 37, 38, 39, 44, 45, |
| 99 46, 47, 50, 51, 56, 57, 58, 59, | 85 46, 47, 50, 51, 56, 57, 58, 59, |
| 100 52, 53, 54, 55, 60, 61, 62, 63, | 86 52, 53, 54, 55, 60, 61, 62, 63, |
| (...skipping 553 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 654 *pixels = 255; | 640 *pixels = 255; |
| 655 else | 641 else |
| 656 *pixels = (uint8_t)(*block + 128); | 642 *pixels = (uint8_t)(*block + 128); |
| 657 block++; | 643 block++; |
| 658 pixels++; | 644 pixels++; |
| 659 } | 645 } |
| 660 pixels += (line_size - 8); | 646 pixels += (line_size - 8); |
| 661 } | 647 } |
| 662 } | 648 } |
| 663 | 649 |
| 650 static void put_pixels_nonclamped_c(const DCTELEM *block, uint8_t *restrict pixe
ls, |
| 651 int line_size) |
| 652 { |
| 653 int i; |
| 654 |
| 655 /* read the pixels */ |
| 656 for(i=0;i<8;i++) { |
| 657 pixels[0] = block[0]; |
| 658 pixels[1] = block[1]; |
| 659 pixels[2] = block[2]; |
| 660 pixels[3] = block[3]; |
| 661 pixels[4] = block[4]; |
| 662 pixels[5] = block[5]; |
| 663 pixels[6] = block[6]; |
| 664 pixels[7] = block[7]; |
| 665 |
| 666 pixels += line_size; |
| 667 block += 8; |
| 668 } |
| 669 } |
| 670 |
| 664 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, | 671 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, |
| 665 int line_size) | 672 int line_size) |
| 666 { | 673 { |
| 667 int i; | 674 int i; |
| 668 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | 675 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
| 669 | 676 |
| 670 /* read the pixels */ | 677 /* read the pixels */ |
| 671 for(i=0;i<8;i++) { | 678 for(i=0;i<8;i++) { |
| 672 pixels[0] = cm[pixels[0] + block[0]]; | 679 pixels[0] = cm[pixels[0] + block[0]]; |
| 673 pixels[1] = cm[pixels[1] + block[1]]; | 680 pixels[1] = cm[pixels[1] + block[1]]; |
| (...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 745 } | 752 } |
| 746 | 753 |
| 747 static int sum_abs_dctelem_c(DCTELEM *block) | 754 static int sum_abs_dctelem_c(DCTELEM *block) |
| 748 { | 755 { |
| 749 int sum=0, i; | 756 int sum=0, i; |
| 750 for(i=0; i<64; i++) | 757 for(i=0; i<64; i++) |
| 751 sum+= FFABS(block[i]); | 758 sum+= FFABS(block[i]); |
| 752 return sum; | 759 return sum; |
| 753 } | 760 } |
| 754 | 761 |
| 762 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h) |
| 763 { |
| 764 int i; |
| 765 |
| 766 for (i = 0; i < h; i++) { |
| 767 memset(block, value, 16); |
| 768 block += line_size; |
| 769 } |
| 770 } |
| 771 |
| 772 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h) |
| 773 { |
| 774 int i; |
| 775 |
| 776 for (i = 0; i < h; i++) { |
| 777 memset(block, value, 8); |
| 778 block += line_size; |
| 779 } |
| 780 } |
| 781 |
| 782 static void scale_block_c(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align
8*/, int linesize) |
| 783 { |
| 784 int i, j; |
| 785 uint16_t *dst1 = dst; |
| 786 uint16_t *dst2 = dst + linesize; |
| 787 |
| 788 for (j = 0; j < 8; j++) { |
| 789 for (i = 0; i < 8; i++) { |
| 790 dst1[i] = dst2[i] = src[i] * 0x0101; |
| 791 } |
| 792 src += 8; |
| 793 dst1 += linesize; |
| 794 dst2 += linesize; |
| 795 } |
| 796 } |
| 797 |
| 755 #if 0 | 798 #if 0 |
| 756 | 799 |
| 757 #define PIXOP2(OPNAME, OP) \ | 800 #define PIXOP2(OPNAME, OP) \ |
| 758 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_si
ze, int h)\ | 801 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_si
ze, int h)\ |
| 759 {\ | 802 {\ |
| 760 int i;\ | 803 int i;\ |
| 761 for(i=0; i<h; i++){\ | 804 for(i=0; i<h; i++){\ |
| 762 OP(*((uint64_t*)block), AV_RN64(pixels));\ | 805 OP(*((uint64_t*)block), AV_RN64(pixels));\ |
| 763 pixels+=line_size;\ | 806 pixels+=line_size;\ |
| 764 block +=line_size;\ | 807 block +=line_size;\ |
| (...skipping 1974 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2739 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4]; | 2782 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4]; |
| 2740 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4]; | 2783 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4]; |
| 2741 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4]; | 2784 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4]; |
| 2742 dst+=dstStride; | 2785 dst+=dstStride; |
| 2743 src+=srcStride; | 2786 src+=srcStride; |
| 2744 } | 2787 } |
| 2745 } | 2788 } |
| 2746 | 2789 |
| 2747 #if CONFIG_CAVS_DECODER | 2790 #if CONFIG_CAVS_DECODER |
| 2748 /* AVS specific */ | 2791 /* AVS specific */ |
| 2749 void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx); | |
| 2750 | |
| 2751 void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { | 2792 void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { |
| 2752 put_pixels8_c(dst, src, stride, 8); | 2793 put_pixels8_c(dst, src, stride, 8); |
| 2753 } | 2794 } |
| 2754 void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { | 2795 void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { |
| 2755 avg_pixels8_c(dst, src, stride, 8); | 2796 avg_pixels8_c(dst, src, stride, 8); |
| 2756 } | 2797 } |
| 2757 void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { | 2798 void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { |
| 2758 put_pixels16_c(dst, src, stride, 16); | 2799 put_pixels16_c(dst, src, stride, 16); |
| 2759 } | 2800 } |
| 2760 void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { | 2801 void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { |
| 2761 avg_pixels16_c(dst, src, stride, 16); | 2802 avg_pixels16_c(dst, src, stride, 16); |
| 2762 } | 2803 } |
| 2763 #endif /* CONFIG_CAVS_DECODER */ | 2804 #endif /* CONFIG_CAVS_DECODER */ |
| 2764 | 2805 |
| 2765 void ff_mlp_init(DSPContext* c, AVCodecContext *avctx); | |
| 2766 | |
| 2767 #if CONFIG_VC1_DECODER | 2806 #if CONFIG_VC1_DECODER |
| 2768 /* VC-1 specific */ | 2807 /* VC-1 specific */ |
| 2769 void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx); | 2808 void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int r
nd) { |
| 2770 | |
| 2771 void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) { | |
| 2772 put_pixels8_c(dst, src, stride, 8); | 2809 put_pixels8_c(dst, src, stride, 8); |
| 2773 } | 2810 } |
| 2774 void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) { | 2811 void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int r
nd) { |
| 2775 avg_pixels8_c(dst, src, stride, 8); | 2812 avg_pixels8_c(dst, src, stride, 8); |
| 2776 } | 2813 } |
| 2777 #endif /* CONFIG_VC1_DECODER */ | 2814 #endif /* CONFIG_VC1_DECODER */ |
| 2778 | 2815 |
| 2779 void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx); | |
| 2780 | |
| 2781 /* H264 specific */ | 2816 /* H264 specific */ |
| 2782 void ff_h264dspenc_init(DSPContext* c, AVCodecContext *avctx); | 2817 void ff_h264dspenc_init(DSPContext* c, AVCodecContext *avctx); |
| 2783 | 2818 |
| 2784 #if CONFIG_RV30_DECODER | |
| 2785 void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx); | |
| 2786 #endif /* CONFIG_RV30_DECODER */ | |
| 2787 | |
| 2788 #if CONFIG_RV40_DECODER | 2819 #if CONFIG_RV40_DECODER |
| 2789 static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | 2820 static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ |
| 2790 put_pixels16_xy2_c(dst, src, stride, 16); | 2821 put_pixels16_xy2_c(dst, src, stride, 16); |
| 2791 } | 2822 } |
| 2792 static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | 2823 static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ |
| 2793 avg_pixels16_xy2_c(dst, src, stride, 16); | 2824 avg_pixels16_xy2_c(dst, src, stride, 16); |
| 2794 } | 2825 } |
| 2795 static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | 2826 static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ |
| 2796 put_pixels8_xy2_c(dst, src, stride, 8); | 2827 put_pixels8_xy2_c(dst, src, stride, 8); |
| 2797 } | 2828 } |
| 2798 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | 2829 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ |
| 2799 avg_pixels8_xy2_c(dst, src, stride, 8); | 2830 avg_pixels8_xy2_c(dst, src, stride, 8); |
| 2800 } | 2831 } |
| 2801 | |
| 2802 void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx); | |
| 2803 #endif /* CONFIG_RV40_DECODER */ | 2832 #endif /* CONFIG_RV40_DECODER */ |
| 2804 | 2833 |
| 2805 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int
srcStride, int w){ | 2834 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int
srcStride, int w){ |
| 2806 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | 2835 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
| 2807 int i; | 2836 int i; |
| 2808 | 2837 |
| 2809 for(i=0; i<w; i++){ | 2838 for(i=0; i<w; i++){ |
| 2810 const int src_1= src[ -srcStride]; | 2839 const int src_1= src[ -srcStride]; |
| 2811 const int src0 = src[0 ]; | 2840 const int src0 = src[0 ]; |
| 2812 const int src1 = src[ srcStride]; | 2841 const int src1 = src[ srcStride]; |
| (...skipping 973 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3786 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); | 3815 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); |
| 3787 } | 3816 } |
| 3788 | 3817 |
| 3789 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean | 3818 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean |
| 3790 | 3819 |
| 3791 return sum; | 3820 return sum; |
| 3792 } | 3821 } |
| 3793 | 3822 |
| 3794 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2
, int stride, int h){ | 3823 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2
, int stride, int h){ |
| 3795 MpegEncContext * const s= (MpegEncContext *)c; | 3824 MpegEncContext * const s= (MpegEncContext *)c; |
| 3796 DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64/8]; | 3825 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); |
| 3797 DCTELEM * const temp= (DCTELEM*)aligned_temp; | |
| 3798 | 3826 |
| 3799 assert(h==8); | 3827 assert(h==8); |
| 3800 | 3828 |
| 3801 s->dsp.diff_pixels(temp, src1, src2, stride); | 3829 s->dsp.diff_pixels(temp, src1, src2, stride); |
| 3802 s->dsp.fdct(temp); | 3830 s->dsp.fdct(temp); |
| 3803 return s->dsp.sum_abs_dctelem(temp); | 3831 return s->dsp.sum_abs_dctelem(temp); |
| 3804 } | 3832 } |
| 3805 | 3833 |
| 3806 #if CONFIG_GPL | 3834 #if CONFIG_GPL |
| 3807 #define DCT8_1D {\ | 3835 #define DCT8_1D {\ |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3851 for( i = 0; i < 8; i++ ) | 3879 for( i = 0; i < 8; i++ ) |
| 3852 DCT8_1D | 3880 DCT8_1D |
| 3853 #undef SRC | 3881 #undef SRC |
| 3854 #undef DST | 3882 #undef DST |
| 3855 return sum; | 3883 return sum; |
| 3856 } | 3884 } |
| 3857 #endif | 3885 #endif |
| 3858 | 3886 |
| 3859 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2
, int stride, int h){ | 3887 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2
, int stride, int h){ |
| 3860 MpegEncContext * const s= (MpegEncContext *)c; | 3888 MpegEncContext * const s= (MpegEncContext *)c; |
| 3861 DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64/8]; | 3889 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); |
| 3862 DCTELEM * const temp= (DCTELEM*)aligned_temp; | |
| 3863 int sum=0, i; | 3890 int sum=0, i; |
| 3864 | 3891 |
| 3865 assert(h==8); | 3892 assert(h==8); |
| 3866 | 3893 |
| 3867 s->dsp.diff_pixels(temp, src1, src2, stride); | 3894 s->dsp.diff_pixels(temp, src1, src2, stride); |
| 3868 s->dsp.fdct(temp); | 3895 s->dsp.fdct(temp); |
| 3869 | 3896 |
| 3870 for(i=0; i<64; i++) | 3897 for(i=0; i<64; i++) |
| 3871 sum= FFMAX(sum, FFABS(temp[i])); | 3898 sum= FFMAX(sum, FFABS(temp[i])); |
| 3872 | 3899 |
| 3873 return sum; | 3900 return sum; |
| 3874 } | 3901 } |
| 3875 | 3902 |
| 3876 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s
rc2, int stride, int h){ | 3903 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s
rc2, int stride, int h){ |
| 3877 MpegEncContext * const s= (MpegEncContext *)c; | 3904 MpegEncContext * const s= (MpegEncContext *)c; |
| 3878 DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64*2/8]; | 3905 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]); |
| 3879 DCTELEM * const temp= (DCTELEM*)aligned_temp; | 3906 DCTELEM * const bak = temp+64; |
| 3880 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64; | |
| 3881 int sum=0, i; | 3907 int sum=0, i; |
| 3882 | 3908 |
| 3883 assert(h==8); | 3909 assert(h==8); |
| 3884 s->mb_intra=0; | 3910 s->mb_intra=0; |
| 3885 | 3911 |
| 3886 s->dsp.diff_pixels(temp, src1, src2, stride); | 3912 s->dsp.diff_pixels(temp, src1, src2, stride); |
| 3887 | 3913 |
| 3888 memcpy(bak, temp, 64*sizeof(DCTELEM)); | 3914 memcpy(bak, temp, 64*sizeof(DCTELEM)); |
| 3889 | 3915 |
| 3890 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s
->qscale, &i); | 3916 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s
->qscale, &i); |
| 3891 s->dct_unquantize_inter(s, temp, 0, s->qscale); | 3917 s->dct_unquantize_inter(s, temp, 0, s->qscale); |
| 3892 ff_simple_idct(temp); //FIXME | 3918 ff_simple_idct(temp); //FIXME |
| 3893 | 3919 |
| 3894 for(i=0; i<64; i++) | 3920 for(i=0; i<64; i++) |
| 3895 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); | 3921 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); |
| 3896 | 3922 |
| 3897 return sum; | 3923 return sum; |
| 3898 } | 3924 } |
| 3899 | 3925 |
| 3900 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int
stride, int h){ | 3926 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int
stride, int h){ |
| 3901 MpegEncContext * const s= (MpegEncContext *)c; | 3927 MpegEncContext * const s= (MpegEncContext *)c; |
| 3902 const uint8_t *scantable= s->intra_scantable.permutated; | 3928 const uint8_t *scantable= s->intra_scantable.permutated; |
| 3903 DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64/8]; | 3929 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); |
| 3904 DECLARE_ALIGNED_16(uint64_t, aligned_src1)[8]; | 3930 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]); |
| 3905 DECLARE_ALIGNED_16(uint64_t, aligned_src2)[8]; | 3931 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]); |
| 3906 DCTELEM * const temp= (DCTELEM*)aligned_temp; | |
| 3907 uint8_t * const lsrc1 = (uint8_t*)aligned_src1; | |
| 3908 uint8_t * const lsrc2 = (uint8_t*)aligned_src2; | |
| 3909 int i, last, run, bits, level, distortion, start_i; | 3932 int i, last, run, bits, level, distortion, start_i; |
| 3910 const int esc_length= s->ac_esc_length; | 3933 const int esc_length= s->ac_esc_length; |
| 3911 uint8_t * length; | 3934 uint8_t * length; |
| 3912 uint8_t * last_length; | 3935 uint8_t * last_length; |
| 3913 | 3936 |
| 3914 assert(h==8); | 3937 assert(h==8); |
| 3915 | 3938 |
| 3916 copy_block8(lsrc1, src1, 8, stride, 8); | 3939 copy_block8(lsrc1, src1, 8, stride, 8); |
| 3917 copy_block8(lsrc2, src2, 8, stride, 8); | 3940 copy_block8(lsrc2, src2, 8, stride, 8); |
| 3918 | 3941 |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3972 s->dsp.idct_add(lsrc2, 8, temp); | 3995 s->dsp.idct_add(lsrc2, 8, temp); |
| 3973 | 3996 |
| 3974 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8); | 3997 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8); |
| 3975 | 3998 |
| 3976 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7); | 3999 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7); |
| 3977 } | 4000 } |
| 3978 | 4001 |
| 3979 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, in
t stride, int h){ | 4002 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, in
t stride, int h){ |
| 3980 MpegEncContext * const s= (MpegEncContext *)c; | 4003 MpegEncContext * const s= (MpegEncContext *)c; |
| 3981 const uint8_t *scantable= s->intra_scantable.permutated; | 4004 const uint8_t *scantable= s->intra_scantable.permutated; |
| 3982 DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64/8]; | 4005 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); |
| 3983 DCTELEM * const temp= (DCTELEM*)aligned_temp; | |
| 3984 int i, last, run, bits, level, start_i; | 4006 int i, last, run, bits, level, start_i; |
| 3985 const int esc_length= s->ac_esc_length; | 4007 const int esc_length= s->ac_esc_length; |
| 3986 uint8_t * length; | 4008 uint8_t * length; |
| 3987 uint8_t * last_length; | 4009 uint8_t * last_length; |
| 3988 | 4010 |
| 3989 assert(h==8); | 4011 assert(h==8); |
| 3990 | 4012 |
| 3991 s->dsp.diff_pixels(temp, src1, src2, stride); | 4013 s->dsp.diff_pixels(temp, src1, src2, stride); |
| 3992 | 4014 |
| 3993 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXM
E*/, s->qscale, &i); | 4015 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXM
E*/, s->qscale, &i); |
| (...skipping 484 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4478 | 4500 |
| 4479 for(i=0;i<512;i++) { | 4501 for(i=0;i<512;i++) { |
| 4480 ff_squareTbl[i] = (i - 256) * (i - 256); | 4502 ff_squareTbl[i] = (i - 256) * (i - 256); |
| 4481 } | 4503 } |
| 4482 | 4504 |
| 4483 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; | 4505 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; |
| 4484 } | 4506 } |
| 4485 | 4507 |
| 4486 int ff_check_alignment(void){ | 4508 int ff_check_alignment(void){ |
| 4487 static int did_fail=0; | 4509 static int did_fail=0; |
| 4488 DECLARE_ALIGNED_16(int, aligned); | 4510 DECLARE_ALIGNED(16, int, aligned); |
| 4489 | 4511 |
| 4490 if((intptr_t)&aligned & 15){ | 4512 if((intptr_t)&aligned & 15){ |
| 4491 if(!did_fail){ | 4513 if(!did_fail){ |
| 4492 #if HAVE_MMX || HAVE_ALTIVEC | 4514 #if HAVE_MMX || HAVE_ALTIVEC |
| 4493 av_log(NULL, AV_LOG_ERROR, | 4515 av_log(NULL, AV_LOG_ERROR, |
| 4494 "Compiler did not align stack variables. Libavcodec has been mis
compiled\n" | 4516 "Compiler did not align stack variables. Libavcodec has been mis
compiled\n" |
| 4495 "and may be very slow or crash. This is not a bug in libavcodec,
\n" | 4517 "and may be very slow or crash. This is not a bug in libavcodec,
\n" |
| 4496 "but in the compiler. You may try recompiling using gcc >= 4.2.\
n" | 4518 "but in the compiler. You may try recompiling using gcc >= 4.2.\
n" |
| 4497 "Do not report crashes to FFmpeg developers.\n"); | 4519 "Do not report crashes to FFmpeg developers.\n"); |
| 4498 #endif | 4520 #endif |
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4562 c->idct = ff_wmv2_idct_c; | 4584 c->idct = ff_wmv2_idct_c; |
| 4563 c->idct_permutation_type= FF_NO_IDCT_PERM; | 4585 c->idct_permutation_type= FF_NO_IDCT_PERM; |
| 4564 }else if(avctx->idct_algo==FF_IDCT_FAAN){ | 4586 }else if(avctx->idct_algo==FF_IDCT_FAAN){ |
| 4565 c->idct_put= ff_faanidct_put; | 4587 c->idct_put= ff_faanidct_put; |
| 4566 c->idct_add= ff_faanidct_add; | 4588 c->idct_add= ff_faanidct_add; |
| 4567 c->idct = ff_faanidct; | 4589 c->idct = ff_faanidct; |
| 4568 c->idct_permutation_type= FF_NO_IDCT_PERM; | 4590 c->idct_permutation_type= FF_NO_IDCT_PERM; |
| 4569 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) { | 4591 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) { |
| 4570 c->idct_put= ff_ea_idct_put_c; | 4592 c->idct_put= ff_ea_idct_put_c; |
| 4571 c->idct_permutation_type= FF_NO_IDCT_PERM; | 4593 c->idct_permutation_type= FF_NO_IDCT_PERM; |
| 4594 }else if(CONFIG_BINK_DECODER && avctx->idct_algo==FF_IDCT_BINK) { |
| 4595 c->idct = ff_bink_idct_c; |
| 4596 c->idct_add = ff_bink_idct_add_c; |
| 4597 c->idct_put = ff_bink_idct_put_c; |
| 4598 c->idct_permutation_type = FF_NO_IDCT_PERM; |
| 4572 }else{ //accurate/default | 4599 }else{ //accurate/default |
| 4573 c->idct_put= ff_simple_idct_put; | 4600 c->idct_put= ff_simple_idct_put; |
| 4574 c->idct_add= ff_simple_idct_add; | 4601 c->idct_add= ff_simple_idct_add; |
| 4575 c->idct = ff_simple_idct; | 4602 c->idct = ff_simple_idct; |
| 4576 c->idct_permutation_type= FF_NO_IDCT_PERM; | 4603 c->idct_permutation_type= FF_NO_IDCT_PERM; |
| 4577 } | 4604 } |
| 4578 } | 4605 } |
| 4579 | 4606 |
| 4580 if (CONFIG_H264_DECODER) { | 4607 if (CONFIG_H264_DECODER) { |
| 4581 c->h264_idct_add= ff_h264_idct_add_c; | 4608 c->h264_idct_add= ff_h264_idct_add_c; |
| 4582 c->h264_idct8_add= ff_h264_idct8_add_c; | 4609 c->h264_idct8_add= ff_h264_idct8_add_c; |
| 4583 c->h264_idct_dc_add= ff_h264_idct_dc_add_c; | 4610 c->h264_idct_dc_add= ff_h264_idct_dc_add_c; |
| 4584 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c; | 4611 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c; |
| 4585 c->h264_idct_add16 = ff_h264_idct_add16_c; | 4612 c->h264_idct_add16 = ff_h264_idct_add16_c; |
| 4586 c->h264_idct8_add4 = ff_h264_idct8_add4_c; | 4613 c->h264_idct8_add4 = ff_h264_idct8_add4_c; |
| 4587 c->h264_idct_add8 = ff_h264_idct_add8_c; | 4614 c->h264_idct_add8 = ff_h264_idct_add8_c; |
| 4588 c->h264_idct_add16intra= ff_h264_idct_add16intra_c; | 4615 c->h264_idct_add16intra= ff_h264_idct_add16intra_c; |
| 4589 } | 4616 } |
| 4590 | 4617 |
| 4591 c->get_pixels = get_pixels_c; | 4618 c->get_pixels = get_pixels_c; |
| 4592 c->diff_pixels = diff_pixels_c; | 4619 c->diff_pixels = diff_pixels_c; |
| 4593 c->put_pixels_clamped = put_pixels_clamped_c; | 4620 c->put_pixels_clamped = put_pixels_clamped_c; |
| 4594 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c; | 4621 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c; |
| 4622 c->put_pixels_nonclamped = put_pixels_nonclamped_c; |
| 4595 c->add_pixels_clamped = add_pixels_clamped_c; | 4623 c->add_pixels_clamped = add_pixels_clamped_c; |
| 4596 c->add_pixels8 = add_pixels8_c; | 4624 c->add_pixels8 = add_pixels8_c; |
| 4597 c->add_pixels4 = add_pixels4_c; | 4625 c->add_pixels4 = add_pixels4_c; |
| 4598 c->sum_abs_dctelem = sum_abs_dctelem_c; | 4626 c->sum_abs_dctelem = sum_abs_dctelem_c; |
| 4599 c->gmc1 = gmc1_c; | 4627 c->gmc1 = gmc1_c; |
| 4600 c->gmc = ff_gmc_c; | 4628 c->gmc = ff_gmc_c; |
| 4601 c->clear_block = clear_block_c; | 4629 c->clear_block = clear_block_c; |
| 4602 c->clear_blocks = clear_blocks_c; | 4630 c->clear_blocks = clear_blocks_c; |
| 4603 c->pix_sum = pix_sum_c; | 4631 c->pix_sum = pix_sum_c; |
| 4604 c->pix_norm1 = pix_norm1_c; | 4632 c->pix_norm1 = pix_norm1_c; |
| 4605 | 4633 |
| 4634 c->fill_block_tab[0] = fill_block16_c; |
| 4635 c->fill_block_tab[1] = fill_block8_c; |
| 4636 c->scale_block = scale_block_c; |
| 4637 |
| 4606 /* TODO [0] 16 [1] 8 */ | 4638 /* TODO [0] 16 [1] 8 */ |
| 4607 c->pix_abs[0][0] = pix_abs16_c; | 4639 c->pix_abs[0][0] = pix_abs16_c; |
| 4608 c->pix_abs[0][1] = pix_abs16_x2_c; | 4640 c->pix_abs[0][1] = pix_abs16_x2_c; |
| 4609 c->pix_abs[0][2] = pix_abs16_y2_c; | 4641 c->pix_abs[0][2] = pix_abs16_y2_c; |
| 4610 c->pix_abs[0][3] = pix_abs16_xy2_c; | 4642 c->pix_abs[0][3] = pix_abs16_xy2_c; |
| 4611 c->pix_abs[1][0] = pix_abs8_c; | 4643 c->pix_abs[1][0] = pix_abs8_c; |
| 4612 c->pix_abs[1][1] = pix_abs8_x2_c; | 4644 c->pix_abs[1][1] = pix_abs8_x2_c; |
| 4613 c->pix_abs[1][2] = pix_abs8_y2_c; | 4645 c->pix_abs[1][2] = pix_abs8_y2_c; |
| 4614 c->pix_abs[1][3] = pix_abs8_xy2_c; | 4646 c->pix_abs[1][3] = pix_abs8_xy2_c; |
| 4615 | 4647 |
| (...skipping 307 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4923 break; | 4955 break; |
| 4924 case FF_SSE2_IDCT_PERM: | 4956 case FF_SSE2_IDCT_PERM: |
| 4925 for(i=0; i<64; i++) | 4957 for(i=0; i<64; i++) |
| 4926 c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7]; | 4958 c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7]; |
| 4927 break; | 4959 break; |
| 4928 default: | 4960 default: |
| 4929 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n"
); | 4961 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n"
); |
| 4930 } | 4962 } |
| 4931 } | 4963 } |
| 4932 | 4964 |
| OLD | NEW |