| OLD | NEW |
| 1 /* | 1 /* |
| 2 * FFT/MDCT transform with Extended 3DNow! optimizations | 2 * FFT/MDCT transform with Extended 3DNow! optimizations |
| 3 * Copyright (c) 2006-2008 Zuxy MENG Jie, Loren Merritt | 3 * Copyright (c) 2006-2008 Zuxy MENG Jie, Loren Merritt |
| 4 * | 4 * |
| 5 * This file is part of FFmpeg. | 5 * This file is part of FFmpeg. |
| 6 * | 6 * |
| 7 * FFmpeg is free software; you can redistribute it and/or | 7 * FFmpeg is free software; you can redistribute it and/or |
| 8 * modify it under the terms of the GNU Lesser General Public | 8 * modify it under the terms of the GNU Lesser General Public |
| 9 * License as published by the Free Software Foundation; either | 9 * License as published by the Free Software Foundation; either |
| 10 * version 2.1 of the License, or (at your option) any later version. | 10 * version 2.1 of the License, or (at your option) any later version. |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 49 ff_fft_dispatch_interleave_3dn2(z, s->nbits); | 49 ff_fft_dispatch_interleave_3dn2(z, s->nbits); |
| 50 __asm__ volatile("femms"); | 50 __asm__ volatile("femms"); |
| 51 if(n <= 8) | 51 if(n <= 8) |
| 52 for(i=0; i<n; i+=2) | 52 for(i=0; i<n; i+=2) |
| 53 FFSWAP(FFTSample, z[i].im, z[i+1].re); | 53 FFSWAP(FFTSample, z[i].im, z[i+1].re); |
| 54 } | 54 } |
| 55 | 55 |
| 56 void ff_imdct_half_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input
) | 56 void ff_imdct_half_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input
) |
| 57 { | 57 { |
| 58 x86_reg j, k; | 58 x86_reg j, k; |
| 59 long n = 1 << s->mdct_bits; | 59 long n = s->mdct_size; |
| 60 long n2 = n >> 1; | 60 long n2 = n >> 1; |
| 61 long n4 = n >> 2; | 61 long n4 = n >> 2; |
| 62 long n8 = n >> 3; | 62 long n8 = n >> 3; |
| 63 const uint16_t *revtab = s->revtab; | 63 const uint16_t *revtab = s->revtab; |
| 64 const FFTSample *tcos = s->tcos; | 64 const FFTSample *tcos = s->tcos; |
| 65 const FFTSample *tsin = s->tsin; | 65 const FFTSample *tsin = s->tsin; |
| 66 const FFTSample *in1, *in2; | 66 const FFTSample *in1, *in2; |
| 67 FFTComplex *z = (FFTComplex *)output; | 67 FFTComplex *z = (FFTComplex *)output; |
| 68 | 68 |
| 69 /* pre rotation */ | 69 /* pre rotation */ |
| (...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 140 :"+r"(j), "+r"(k) | 140 :"+r"(j), "+r"(k) |
| 141 :"r"(z+n8), "r"(tcos+n8), "r"(tsin+n8) | 141 :"r"(z+n8), "r"(tcos+n8), "r"(tsin+n8) |
| 142 :"memory" | 142 :"memory" |
| 143 ); | 143 ); |
| 144 __asm__ volatile("femms"); | 144 __asm__ volatile("femms"); |
| 145 } | 145 } |
| 146 | 146 |
| 147 void ff_imdct_calc_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input
) | 147 void ff_imdct_calc_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input
) |
| 148 { | 148 { |
| 149 x86_reg j, k; | 149 x86_reg j, k; |
| 150 long n = 1 << s->mdct_bits; | 150 long n = s->mdct_size; |
| 151 long n4 = n >> 2; | 151 long n4 = n >> 2; |
| 152 | 152 |
| 153 ff_imdct_half_3dn2(s, output+n4, input); | 153 ff_imdct_half_3dn2(s, output+n4, input); |
| 154 | 154 |
| 155 j = -n; | 155 j = -n; |
| 156 k = n-8; | 156 k = n-8; |
| 157 __asm__ volatile( | 157 __asm__ volatile( |
| 158 "movq %4, %%mm7 \n" | 158 "movq %4, %%mm7 \n" |
| 159 "1: \n" | 159 "1: \n" |
| 160 PSWAPD((%2,%1), %%mm0) | 160 PSWAPD((%2,%1), %%mm0) |
| 161 PSWAPD((%3,%0), %%mm1) | 161 PSWAPD((%3,%0), %%mm1) |
| 162 "pxor %%mm7, %%mm0 \n" | 162 "pxor %%mm7, %%mm0 \n" |
| 163 "movq %%mm1, (%3,%1) \n" | 163 "movq %%mm1, (%3,%1) \n" |
| 164 "movq %%mm0, (%2,%0) \n" | 164 "movq %%mm0, (%2,%0) \n" |
| 165 "sub $8, %1 \n" | 165 "sub $8, %1 \n" |
| 166 "add $8, %0 \n" | 166 "add $8, %0 \n" |
| 167 "jl 1b \n" | 167 "jl 1b \n" |
| 168 :"+r"(j), "+r"(k) | 168 :"+r"(j), "+r"(k) |
| 169 :"r"(output+n4), "r"(output+n4*3), | 169 :"r"(output+n4), "r"(output+n4*3), |
| 170 "m"(*m1m1) | 170 "m"(*m1m1) |
| 171 ); | 171 ); |
| 172 __asm__ volatile("femms"); | 172 __asm__ volatile("femms"); |
| 173 } | 173 } |
| 174 | 174 |
| OLD | NEW |