| OLD | NEW |
| 1 /* | 1 /* |
| 2 * VC-1 and WMV3 - DSP functions MMX-optimized | 2 * VC-1 and WMV3 - DSP functions MMX-optimized |
| 3 * Copyright (c) 2007 Christophe GISQUET <christophe.gisquet@free.fr> | 3 * Copyright (c) 2007 Christophe GISQUET <christophe.gisquet@free.fr> |
| 4 * | 4 * |
| 5 * Permission is hereby granted, free of charge, to any person | 5 * Permission is hereby granted, free of charge, to any person |
| 6 * obtaining a copy of this software and associated documentation | 6 * obtaining a copy of this software and associated documentation |
| 7 * files (the "Software"), to deal in the Software without | 7 * files (the "Software"), to deal in the Software without |
| 8 * restriction, including without limitation the rights to use, | 8 * restriction, including without limitation the rights to use, |
| 9 * copy, modify, merge, publish, distribute, sublicense, and/or sell | 9 * copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 10 * copies of the Software, and to permit persons to whom the | 10 * copies of the Software, and to permit persons to whom the |
| 11 * Software is furnished to do so, subject to the following | 11 * Software is furnished to do so, subject to the following |
| 12 * conditions: | 12 * conditions: |
| 13 * | 13 * |
| 14 * The above copyright notice and this permission notice shall be | 14 * The above copyright notice and this permission notice shall be |
| 15 * included in all copies or substantial portions of the Software. | 15 * included in all copies or substantial portions of the Software. |
| 16 * | 16 * |
| 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
| 19 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | 19 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | 20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
| 21 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | 21 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
| 22 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | 22 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
| 24 * OTHER DEALINGS IN THE SOFTWARE. | 24 * OTHER DEALINGS IN THE SOFTWARE. |
| 25 */ | 25 */ |
| 26 | 26 |
| 27 #include "libavutil/cpu.h" |
| 27 #include "libavutil/x86_cpu.h" | 28 #include "libavutil/x86_cpu.h" |
| 28 #include "libavcodec/dsputil.h" | 29 #include "libavcodec/dsputil.h" |
| 29 #include "dsputil_mmx.h" | 30 #include "dsputil_mmx.h" |
| 30 | 31 |
| 31 #define OP_PUT(S,D) | 32 #define OP_PUT(S,D) |
| 32 #define OP_AVG(S,D) "pavgb " #S ", " #D " \n\t" | 33 #define OP_AVG(S,D) "pavgb " #S ", " #D " \n\t" |
| 33 | 34 |
| 34 /** Add rounder from mm7 to mm3 and pack result at destination */ | 35 /** Add rounder from mm7 to mm3 and pack result at destination */ |
| 35 #define NORMALIZE_MMX(SHIFT) \ | 36 #define NORMALIZE_MMX(SHIFT) \ |
| 36 "paddw %%mm7, %%mm3 \n\t" /* +bias-r */ \ | 37 "paddw %%mm7, %%mm3 \n\t" /* +bias-r */ \ |
| (...skipping 670 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 707 void ff_vc1_h_loop_filter8_sse4(uint8_t *src, int stride, int pq); | 708 void ff_vc1_h_loop_filter8_sse4(uint8_t *src, int stride, int pq); |
| 708 | 709 |
| 709 static void vc1_h_loop_filter16_sse4(uint8_t *src, int stride, int pq) | 710 static void vc1_h_loop_filter16_sse4(uint8_t *src, int stride, int pq) |
| 710 { | 711 { |
| 711 ff_vc1_h_loop_filter8_sse4(src, stride, pq); | 712 ff_vc1_h_loop_filter8_sse4(src, stride, pq); |
| 712 ff_vc1_h_loop_filter8_sse4(src+8*stride, stride, pq); | 713 ff_vc1_h_loop_filter8_sse4(src+8*stride, stride, pq); |
| 713 } | 714 } |
| 714 #endif | 715 #endif |
| 715 | 716 |
| 716 void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx) { | 717 void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx) { |
| 717 mm_flags = mm_support(); | 718 int mm_flags = av_get_cpu_flags(); |
| 718 | 719 |
| 719 dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_mmx; | 720 dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_mmx; |
| 720 dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_mmx; | 721 dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_mmx; |
| 721 dsp->put_vc1_mspel_pixels_tab[ 8] = put_vc1_mspel_mc02_mmx; | 722 dsp->put_vc1_mspel_pixels_tab[ 8] = put_vc1_mspel_mc02_mmx; |
| 722 dsp->put_vc1_mspel_pixels_tab[12] = put_vc1_mspel_mc03_mmx; | 723 dsp->put_vc1_mspel_pixels_tab[12] = put_vc1_mspel_mc03_mmx; |
| 723 | 724 |
| 724 dsp->put_vc1_mspel_pixels_tab[ 1] = put_vc1_mspel_mc10_mmx; | 725 dsp->put_vc1_mspel_pixels_tab[ 1] = put_vc1_mspel_mc10_mmx; |
| 725 dsp->put_vc1_mspel_pixels_tab[ 5] = put_vc1_mspel_mc11_mmx; | 726 dsp->put_vc1_mspel_pixels_tab[ 5] = put_vc1_mspel_mc11_mmx; |
| 726 dsp->put_vc1_mspel_pixels_tab[ 9] = put_vc1_mspel_mc12_mmx; | 727 dsp->put_vc1_mspel_pixels_tab[ 9] = put_vc1_mspel_mc12_mmx; |
| 727 dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_mmx; | 728 dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_mmx; |
| 728 | 729 |
| 729 dsp->put_vc1_mspel_pixels_tab[ 2] = put_vc1_mspel_mc20_mmx; | 730 dsp->put_vc1_mspel_pixels_tab[ 2] = put_vc1_mspel_mc20_mmx; |
| 730 dsp->put_vc1_mspel_pixels_tab[ 6] = put_vc1_mspel_mc21_mmx; | 731 dsp->put_vc1_mspel_pixels_tab[ 6] = put_vc1_mspel_mc21_mmx; |
| 731 dsp->put_vc1_mspel_pixels_tab[10] = put_vc1_mspel_mc22_mmx; | 732 dsp->put_vc1_mspel_pixels_tab[10] = put_vc1_mspel_mc22_mmx; |
| 732 dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_mmx; | 733 dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_mmx; |
| 733 | 734 |
| 734 dsp->put_vc1_mspel_pixels_tab[ 3] = put_vc1_mspel_mc30_mmx; | 735 dsp->put_vc1_mspel_pixels_tab[ 3] = put_vc1_mspel_mc30_mmx; |
| 735 dsp->put_vc1_mspel_pixels_tab[ 7] = put_vc1_mspel_mc31_mmx; | 736 dsp->put_vc1_mspel_pixels_tab[ 7] = put_vc1_mspel_mc31_mmx; |
| 736 dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_mmx; | 737 dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_mmx; |
| 737 dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_mmx; | 738 dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_mmx; |
| 738 | 739 |
| 739 if (mm_flags & FF_MM_MMX2){ | 740 if (mm_flags & AV_CPU_FLAG_MMX2){ |
| 740 dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_mmx2; | 741 dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_mmx2; |
| 741 dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_mmx2; | 742 dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_mmx2; |
| 742 dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_mmx2; | 743 dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_mmx2; |
| 743 dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_mmx2; | 744 dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_mmx2; |
| 744 | 745 |
| 745 dsp->avg_vc1_mspel_pixels_tab[ 1] = avg_vc1_mspel_mc10_mmx2; | 746 dsp->avg_vc1_mspel_pixels_tab[ 1] = avg_vc1_mspel_mc10_mmx2; |
| 746 dsp->avg_vc1_mspel_pixels_tab[ 5] = avg_vc1_mspel_mc11_mmx2; | 747 dsp->avg_vc1_mspel_pixels_tab[ 5] = avg_vc1_mspel_mc11_mmx2; |
| 747 dsp->avg_vc1_mspel_pixels_tab[ 9] = avg_vc1_mspel_mc12_mmx2; | 748 dsp->avg_vc1_mspel_pixels_tab[ 9] = avg_vc1_mspel_mc12_mmx2; |
| 748 dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_mmx2; | 749 dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_mmx2; |
| 749 | 750 |
| (...skipping 15 matching lines...) Expand all Loading... |
| 765 | 766 |
| 766 #define ASSIGN_LF(EXT) \ | 767 #define ASSIGN_LF(EXT) \ |
| 767 dsp->vc1_v_loop_filter4 = ff_vc1_v_loop_filter4_ ## EXT; \ | 768 dsp->vc1_v_loop_filter4 = ff_vc1_v_loop_filter4_ ## EXT; \ |
| 768 dsp->vc1_h_loop_filter4 = ff_vc1_h_loop_filter4_ ## EXT; \ | 769 dsp->vc1_h_loop_filter4 = ff_vc1_h_loop_filter4_ ## EXT; \ |
| 769 dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_ ## EXT; \ | 770 dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_ ## EXT; \ |
| 770 dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_ ## EXT; \ | 771 dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_ ## EXT; \ |
| 771 dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_ ## EXT; \ | 772 dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_ ## EXT; \ |
| 772 dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_ ## EXT | 773 dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_ ## EXT |
| 773 | 774 |
| 774 #if HAVE_YASM | 775 #if HAVE_YASM |
| 775 if (mm_flags & FF_MM_MMX) { | 776 if (mm_flags & AV_CPU_FLAG_MMX) { |
| 776 ASSIGN_LF(mmx); | 777 ASSIGN_LF(mmx); |
| 777 } | 778 } |
| 778 return; | 779 return; |
| 779 if (mm_flags & FF_MM_MMX2) { | 780 if (mm_flags & AV_CPU_FLAG_MMX2) { |
| 780 ASSIGN_LF(mmx2); | 781 ASSIGN_LF(mmx2); |
| 781 } | 782 } |
| 782 if (mm_flags & FF_MM_SSE2) { | 783 if (mm_flags & AV_CPU_FLAG_SSE2) { |
| 783 dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_sse2; | 784 dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_sse2; |
| 784 dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse2; | 785 dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse2; |
| 785 dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_sse2; | 786 dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_sse2; |
| 786 dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse2; | 787 dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse2; |
| 787 } | 788 } |
| 788 if (mm_flags & FF_MM_SSSE3) { | 789 if (mm_flags & AV_CPU_FLAG_SSSE3) { |
| 789 ASSIGN_LF(ssse3); | 790 ASSIGN_LF(ssse3); |
| 790 } | 791 } |
| 791 if (mm_flags & FF_MM_SSE4) { | 792 if (mm_flags & AV_CPU_FLAG_SSE4) { |
| 792 dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse4; | 793 dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse4; |
| 793 dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse4; | 794 dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse4; |
| 794 } | 795 } |
| 795 #endif | 796 #endif |
| 796 } | 797 } |
| OLD | NEW |