| OLD | NEW |
| 1 /*Copyright (c) 2003-2004, Mark Borgerding | 1 /*Copyright (c) 2003-2004, Mark Borgerding |
| 2 Lots of modifications by Jean-Marc Valin | 2 Lots of modifications by Jean-Marc Valin |
| 3 Copyright (c) 2005-2007, Xiph.Org Foundation | 3 Copyright (c) 2005-2007, Xiph.Org Foundation |
| 4 Copyright (c) 2008, Xiph.Org Foundation, CSIRO | 4 Copyright (c) 2008, Xiph.Org Foundation, CSIRO |
| 5 | 5 |
| 6 All rights reserved. | 6 All rights reserved. |
| 7 | 7 |
| 8 Redistribution and use in source and binary forms, with or without | 8 Redistribution and use in source and binary forms, with or without |
| 9 modification, are permitted provided that the following conditions are met: | 9 modification, are permitted provided that the following conditions are met: |
| 10 | 10 |
| (...skipping 29 matching lines...) Expand all Loading... |
| 40 #include "os_support.h" | 40 #include "os_support.h" |
| 41 #include "mathops.h" | 41 #include "mathops.h" |
| 42 #include "stack_alloc.h" | 42 #include "stack_alloc.h" |
| 43 | 43 |
| 44 /* The guts header contains all the multiplication and addition macros that are
defined for | 44 /* The guts header contains all the multiplication and addition macros that are
defined for |
| 45 complex numbers. It also delares the kf_ internal functions. | 45 complex numbers. It also delares the kf_ internal functions. |
| 46 */ | 46 */ |
| 47 | 47 |
| 48 static void kf_bfly2( | 48 static void kf_bfly2( |
| 49 kiss_fft_cpx * Fout, | 49 kiss_fft_cpx * Fout, |
| 50 const size_t fstride, | |
| 51 const kiss_fft_state *st, | |
| 52 int m, | 50 int m, |
| 53 int N, | 51 int N |
| 54 int mm | |
| 55 ) | 52 ) |
| 56 { | 53 { |
| 57 kiss_fft_cpx * Fout2; | 54 kiss_fft_cpx * Fout2; |
| 58 const kiss_twiddle_cpx * tw1; | 55 int i; |
| 59 int i,j; | 56 (void)m; |
| 60 kiss_fft_cpx * Fout_beg = Fout; | 57 #ifdef CUSTOM_MODES |
| 61 for (i=0;i<N;i++) | 58 if (m==1) |
| 62 { | 59 { |
| 63 Fout = Fout_beg + i*mm; | 60 celt_assert(m==1); |
| 64 Fout2 = Fout + m; | 61 for (i=0;i<N;i++) |
| 65 tw1 = st->twiddles; | |
| 66 for(j=0;j<m;j++) | |
| 67 { | 62 { |
| 68 kiss_fft_cpx t; | 63 kiss_fft_cpx t; |
| 69 Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1); | 64 Fout2 = Fout + 1; |
| 70 Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1); | 65 t = *Fout2; |
| 71 C_MUL (t, *Fout2 , *tw1); | |
| 72 tw1 += fstride; | |
| 73 C_SUB( *Fout2 , *Fout , t ); | 66 C_SUB( *Fout2 , *Fout , t ); |
| 74 C_ADDTO( *Fout , t ); | 67 C_ADDTO( *Fout , t ); |
| 75 ++Fout2; | 68 Fout += 2; |
| 76 ++Fout; | 69 } |
| 70 } else |
| 71 #endif |
| 72 { |
| 73 opus_val16 tw; |
| 74 tw = QCONST16(0.7071067812f, 15); |
| 75 /* We know that m==4 here because the radix-2 is just after a radix-4 */ |
| 76 celt_assert(m==4); |
| 77 for (i=0;i<N;i++) |
| 78 { |
| 79 kiss_fft_cpx t; |
| 80 Fout2 = Fout + 4; |
| 81 t = Fout2[0]; |
| 82 C_SUB( Fout2[0] , Fout[0] , t ); |
| 83 C_ADDTO( Fout[0] , t ); |
| 84 |
| 85 t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw); |
| 86 t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw); |
| 87 C_SUB( Fout2[1] , Fout[1] , t ); |
| 88 C_ADDTO( Fout[1] , t ); |
| 89 |
| 90 t.r = Fout2[2].i; |
| 91 t.i = -Fout2[2].r; |
| 92 C_SUB( Fout2[2] , Fout[2] , t ); |
| 93 C_ADDTO( Fout[2] , t ); |
| 94 |
| 95 t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw); |
| 96 t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw); |
| 97 C_SUB( Fout2[3] , Fout[3] , t ); |
| 98 C_ADDTO( Fout[3] , t ); |
| 99 Fout += 8; |
| 77 } | 100 } |
| 78 } | 101 } |
| 79 } | 102 } |
| 80 | |
| 81 static void ki_bfly2( | |
| 82 kiss_fft_cpx * Fout, | |
| 83 const size_t fstride, | |
| 84 const kiss_fft_state *st, | |
| 85 int m, | |
| 86 int N, | |
| 87 int mm | |
| 88 ) | |
| 89 { | |
| 90 kiss_fft_cpx * Fout2; | |
| 91 const kiss_twiddle_cpx * tw1; | |
| 92 kiss_fft_cpx t; | |
| 93 int i,j; | |
| 94 kiss_fft_cpx * Fout_beg = Fout; | |
| 95 for (i=0;i<N;i++) | |
| 96 { | |
| 97 Fout = Fout_beg + i*mm; | |
| 98 Fout2 = Fout + m; | |
| 99 tw1 = st->twiddles; | |
| 100 for(j=0;j<m;j++) | |
| 101 { | |
| 102 C_MULC (t, *Fout2 , *tw1); | |
| 103 tw1 += fstride; | |
| 104 C_SUB( *Fout2 , *Fout , t ); | |
| 105 C_ADDTO( *Fout , t ); | |
| 106 ++Fout2; | |
| 107 ++Fout; | |
| 108 } | |
| 109 } | |
| 110 } | |
| 111 | 103 |
| 112 static void kf_bfly4( | 104 static void kf_bfly4( |
| 113 kiss_fft_cpx * Fout, | 105 kiss_fft_cpx * Fout, |
| 114 const size_t fstride, | 106 const size_t fstride, |
| 115 const kiss_fft_state *st, | 107 const kiss_fft_state *st, |
| 116 int m, | 108 int m, |
| 117 int N, | 109 int N, |
| 118 int mm | 110 int mm |
| 119 ) | 111 ) |
| 120 { | 112 { |
| 121 const kiss_twiddle_cpx *tw1,*tw2,*tw3; | 113 int i; |
| 122 kiss_fft_cpx scratch[6]; | |
| 123 const size_t m2=2*m; | |
| 124 const size_t m3=3*m; | |
| 125 int i, j; | |
| 126 | 114 |
| 127 kiss_fft_cpx * Fout_beg = Fout; | 115 if (m==1) |
| 128 for (i=0;i<N;i++) | |
| 129 { | 116 { |
| 130 Fout = Fout_beg + i*mm; | 117 /* Degenerate case where all the twiddles are 1. */ |
| 131 tw3 = tw2 = tw1 = st->twiddles; | 118 for (i=0;i<N;i++) |
| 132 for (j=0;j<m;j++) | |
| 133 { | 119 { |
| 134 C_MUL4(scratch[0],Fout[m] , *tw1 ); | 120 kiss_fft_cpx scratch0, scratch1; |
| 135 C_MUL4(scratch[1],Fout[m2] , *tw2 ); | |
| 136 C_MUL4(scratch[2],Fout[m3] , *tw3 ); | |
| 137 | 121 |
| 138 Fout->r = PSHR32(Fout->r, 2); | 122 C_SUB( scratch0 , *Fout, Fout[2] ); |
| 139 Fout->i = PSHR32(Fout->i, 2); | 123 C_ADDTO(*Fout, Fout[2]); |
| 140 C_SUB( scratch[5] , *Fout, scratch[1] ); | 124 C_ADD( scratch1 , Fout[1] , Fout[3] ); |
| 141 C_ADDTO(*Fout, scratch[1]); | 125 C_SUB( Fout[2], *Fout, scratch1 ); |
| 142 C_ADD( scratch[3] , scratch[0] , scratch[2] ); | 126 C_ADDTO( *Fout , scratch1 ); |
| 143 C_SUB( scratch[4] , scratch[0] , scratch[2] ); | 127 C_SUB( scratch1 , Fout[1] , Fout[3] ); |
| 144 C_SUB( Fout[m2], *Fout, scratch[3] ); | |
| 145 tw1 += fstride; | |
| 146 tw2 += fstride*2; | |
| 147 tw3 += fstride*3; | |
| 148 C_ADDTO( *Fout , scratch[3] ); | |
| 149 | 128 |
| 150 Fout[m].r = scratch[5].r + scratch[4].i; | 129 Fout[1].r = scratch0.r + scratch1.i; |
| 151 Fout[m].i = scratch[5].i - scratch[4].r; | 130 Fout[1].i = scratch0.i - scratch1.r; |
| 152 Fout[m3].r = scratch[5].r - scratch[4].i; | 131 Fout[3].r = scratch0.r - scratch1.i; |
| 153 Fout[m3].i = scratch[5].i + scratch[4].r; | 132 Fout[3].i = scratch0.i + scratch1.r; |
| 154 ++Fout; | 133 Fout+=4; |
| 134 } |
| 135 } else { |
| 136 int j; |
| 137 kiss_fft_cpx scratch[6]; |
| 138 const kiss_twiddle_cpx *tw1,*tw2,*tw3; |
| 139 const int m2=2*m; |
| 140 const int m3=3*m; |
| 141 kiss_fft_cpx * Fout_beg = Fout; |
| 142 for (i=0;i<N;i++) |
| 143 { |
| 144 Fout = Fout_beg + i*mm; |
| 145 tw3 = tw2 = tw1 = st->twiddles; |
| 146 /* m is guaranteed to be a multiple of 4. */ |
| 147 for (j=0;j<m;j++) |
| 148 { |
| 149 C_MUL(scratch[0],Fout[m] , *tw1 ); |
| 150 C_MUL(scratch[1],Fout[m2] , *tw2 ); |
| 151 C_MUL(scratch[2],Fout[m3] , *tw3 ); |
| 152 |
| 153 C_SUB( scratch[5] , *Fout, scratch[1] ); |
| 154 C_ADDTO(*Fout, scratch[1]); |
| 155 C_ADD( scratch[3] , scratch[0] , scratch[2] ); |
| 156 C_SUB( scratch[4] , scratch[0] , scratch[2] ); |
| 157 C_SUB( Fout[m2], *Fout, scratch[3] ); |
| 158 tw1 += fstride; |
| 159 tw2 += fstride*2; |
| 160 tw3 += fstride*3; |
| 161 C_ADDTO( *Fout , scratch[3] ); |
| 162 |
| 163 Fout[m].r = scratch[5].r + scratch[4].i; |
| 164 Fout[m].i = scratch[5].i - scratch[4].r; |
| 165 Fout[m3].r = scratch[5].r - scratch[4].i; |
| 166 Fout[m3].i = scratch[5].i + scratch[4].r; |
| 167 ++Fout; |
| 168 } |
| 155 } | 169 } |
| 156 } | 170 } |
| 157 } | 171 } |
| 158 | 172 |
| 159 static void ki_bfly4( | |
| 160 kiss_fft_cpx * Fout, | |
| 161 const size_t fstride, | |
| 162 const kiss_fft_state *st, | |
| 163 int m, | |
| 164 int N, | |
| 165 int mm | |
| 166 ) | |
| 167 { | |
| 168 const kiss_twiddle_cpx *tw1,*tw2,*tw3; | |
| 169 kiss_fft_cpx scratch[6]; | |
| 170 const size_t m2=2*m; | |
| 171 const size_t m3=3*m; | |
| 172 int i, j; | |
| 173 | |
| 174 kiss_fft_cpx * Fout_beg = Fout; | |
| 175 for (i=0;i<N;i++) | |
| 176 { | |
| 177 Fout = Fout_beg + i*mm; | |
| 178 tw3 = tw2 = tw1 = st->twiddles; | |
| 179 for (j=0;j<m;j++) | |
| 180 { | |
| 181 C_MULC(scratch[0],Fout[m] , *tw1 ); | |
| 182 C_MULC(scratch[1],Fout[m2] , *tw2 ); | |
| 183 C_MULC(scratch[2],Fout[m3] , *tw3 ); | |
| 184 | |
| 185 C_SUB( scratch[5] , *Fout, scratch[1] ); | |
| 186 C_ADDTO(*Fout, scratch[1]); | |
| 187 C_ADD( scratch[3] , scratch[0] , scratch[2] ); | |
| 188 C_SUB( scratch[4] , scratch[0] , scratch[2] ); | |
| 189 C_SUB( Fout[m2], *Fout, scratch[3] ); | |
| 190 tw1 += fstride; | |
| 191 tw2 += fstride*2; | |
| 192 tw3 += fstride*3; | |
| 193 C_ADDTO( *Fout , scratch[3] ); | |
| 194 | |
| 195 Fout[m].r = scratch[5].r - scratch[4].i; | |
| 196 Fout[m].i = scratch[5].i + scratch[4].r; | |
| 197 Fout[m3].r = scratch[5].r + scratch[4].i; | |
| 198 Fout[m3].i = scratch[5].i - scratch[4].r; | |
| 199 ++Fout; | |
| 200 } | |
| 201 } | |
| 202 } | |
| 203 | 173 |
| 204 #ifndef RADIX_TWO_ONLY | 174 #ifndef RADIX_TWO_ONLY |
| 205 | 175 |
| 206 static void kf_bfly3( | 176 static void kf_bfly3( |
| 207 kiss_fft_cpx * Fout, | 177 kiss_fft_cpx * Fout, |
| 208 const size_t fstride, | 178 const size_t fstride, |
| 209 const kiss_fft_state *st, | 179 const kiss_fft_state *st, |
| 210 int m, | 180 int m, |
| 211 int N, | 181 int N, |
| 212 int mm | 182 int mm |
| 213 ) | 183 ) |
| 214 { | 184 { |
| 215 int i; | 185 int i; |
| 216 size_t k; | 186 size_t k; |
| 217 const size_t m2 = 2*m; | 187 const size_t m2 = 2*m; |
| 218 const kiss_twiddle_cpx *tw1,*tw2; | 188 const kiss_twiddle_cpx *tw1,*tw2; |
| 219 kiss_fft_cpx scratch[5]; | 189 kiss_fft_cpx scratch[5]; |
| 220 kiss_twiddle_cpx epi3; | 190 kiss_twiddle_cpx epi3; |
| 221 | 191 |
| 222 kiss_fft_cpx * Fout_beg = Fout; | 192 kiss_fft_cpx * Fout_beg = Fout; |
| 193 #ifdef FIXED_POINT |
| 194 epi3.r = -16384; |
| 195 epi3.i = -28378; |
| 196 #else |
| 223 epi3 = st->twiddles[fstride*m]; | 197 epi3 = st->twiddles[fstride*m]; |
| 198 #endif |
| 224 for (i=0;i<N;i++) | 199 for (i=0;i<N;i++) |
| 225 { | 200 { |
| 226 Fout = Fout_beg + i*mm; | 201 Fout = Fout_beg + i*mm; |
| 227 tw1=tw2=st->twiddles; | 202 tw1=tw2=st->twiddles; |
| 203 /* For non-custom modes, m is guaranteed to be a multiple of 4. */ |
| 228 k=m; | 204 k=m; |
| 229 do { | 205 do { |
| 230 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); | |
| 231 | 206 |
| 232 C_MUL(scratch[1],Fout[m] , *tw1); | 207 C_MUL(scratch[1],Fout[m] , *tw1); |
| 233 C_MUL(scratch[2],Fout[m2] , *tw2); | 208 C_MUL(scratch[2],Fout[m2] , *tw2); |
| 234 | 209 |
| 235 C_ADD(scratch[3],scratch[1],scratch[2]); | 210 C_ADD(scratch[3],scratch[1],scratch[2]); |
| 236 C_SUB(scratch[0],scratch[1],scratch[2]); | 211 C_SUB(scratch[0],scratch[1],scratch[2]); |
| 237 tw1 += fstride; | 212 tw1 += fstride; |
| 238 tw2 += fstride*2; | 213 tw2 += fstride*2; |
| 239 | 214 |
| 240 Fout[m].r = Fout->r - HALF_OF(scratch[3].r); | 215 Fout[m].r = Fout->r - HALF_OF(scratch[3].r); |
| 241 Fout[m].i = Fout->i - HALF_OF(scratch[3].i); | 216 Fout[m].i = Fout->i - HALF_OF(scratch[3].i); |
| 242 | 217 |
| 243 C_MULBYSCALAR( scratch[0] , epi3.i ); | 218 C_MULBYSCALAR( scratch[0] , epi3.i ); |
| 244 | 219 |
| 245 C_ADDTO(*Fout,scratch[3]); | 220 C_ADDTO(*Fout,scratch[3]); |
| 246 | 221 |
| 247 Fout[m2].r = Fout[m].r + scratch[0].i; | 222 Fout[m2].r = Fout[m].r + scratch[0].i; |
| 248 Fout[m2].i = Fout[m].i - scratch[0].r; | 223 Fout[m2].i = Fout[m].i - scratch[0].r; |
| 249 | 224 |
| 250 Fout[m].r -= scratch[0].i; | 225 Fout[m].r -= scratch[0].i; |
| 251 Fout[m].i += scratch[0].r; | 226 Fout[m].i += scratch[0].r; |
| 252 | 227 |
| 253 ++Fout; | 228 ++Fout; |
| 254 } while(--k); | 229 } while(--k); |
| 255 } | 230 } |
| 256 } | 231 } |
| 257 | 232 |
| 258 static void ki_bfly3( | |
| 259 kiss_fft_cpx * Fout, | |
| 260 const size_t fstride, | |
| 261 const kiss_fft_state *st, | |
| 262 int m, | |
| 263 int N, | |
| 264 int mm | |
| 265 ) | |
| 266 { | |
| 267 int i, k; | |
| 268 const size_t m2 = 2*m; | |
| 269 const kiss_twiddle_cpx *tw1,*tw2; | |
| 270 kiss_fft_cpx scratch[5]; | |
| 271 kiss_twiddle_cpx epi3; | |
| 272 | 233 |
| 273 kiss_fft_cpx * Fout_beg = Fout; | 234 #ifndef OVERRIDE_kf_bfly5 |
| 274 epi3 = st->twiddles[fstride*m]; | |
| 275 for (i=0;i<N;i++) | |
| 276 { | |
| 277 Fout = Fout_beg + i*mm; | |
| 278 tw1=tw2=st->twiddles; | |
| 279 k=m; | |
| 280 do{ | |
| 281 | |
| 282 C_MULC(scratch[1],Fout[m] , *tw1); | |
| 283 C_MULC(scratch[2],Fout[m2] , *tw2); | |
| 284 | |
| 285 C_ADD(scratch[3],scratch[1],scratch[2]); | |
| 286 C_SUB(scratch[0],scratch[1],scratch[2]); | |
| 287 tw1 += fstride; | |
| 288 tw2 += fstride*2; | |
| 289 | |
| 290 Fout[m].r = Fout->r - HALF_OF(scratch[3].r); | |
| 291 Fout[m].i = Fout->i - HALF_OF(scratch[3].i); | |
| 292 | |
| 293 C_MULBYSCALAR( scratch[0] , -epi3.i ); | |
| 294 | |
| 295 C_ADDTO(*Fout,scratch[3]); | |
| 296 | |
| 297 Fout[m2].r = Fout[m].r + scratch[0].i; | |
| 298 Fout[m2].i = Fout[m].i - scratch[0].r; | |
| 299 | |
| 300 Fout[m].r -= scratch[0].i; | |
| 301 Fout[m].i += scratch[0].r; | |
| 302 | |
| 303 ++Fout; | |
| 304 }while(--k); | |
| 305 } | |
| 306 } | |
| 307 | |
| 308 static void kf_bfly5( | 235 static void kf_bfly5( |
| 309 kiss_fft_cpx * Fout, | 236 kiss_fft_cpx * Fout, |
| 310 const size_t fstride, | 237 const size_t fstride, |
| 311 const kiss_fft_state *st, | 238 const kiss_fft_state *st, |
| 312 int m, | 239 int m, |
| 313 int N, | 240 int N, |
| 314 int mm | 241 int mm |
| 315 ) | 242 ) |
| 316 { | 243 { |
| 317 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; | 244 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; |
| 318 int i, u; | 245 int i, u; |
| 319 kiss_fft_cpx scratch[13]; | 246 kiss_fft_cpx scratch[13]; |
| 320 const kiss_twiddle_cpx * twiddles = st->twiddles; | |
| 321 const kiss_twiddle_cpx *tw; | 247 const kiss_twiddle_cpx *tw; |
| 322 kiss_twiddle_cpx ya,yb; | 248 kiss_twiddle_cpx ya,yb; |
| 323 kiss_fft_cpx * Fout_beg = Fout; | 249 kiss_fft_cpx * Fout_beg = Fout; |
| 324 | 250 |
| 325 ya = twiddles[fstride*m]; | 251 #ifdef FIXED_POINT |
| 326 yb = twiddles[fstride*2*m]; | 252 ya.r = 10126; |
| 253 ya.i = -31164; |
| 254 yb.r = -26510; |
| 255 yb.i = -19261; |
| 256 #else |
| 257 ya = st->twiddles[fstride*m]; |
| 258 yb = st->twiddles[fstride*2*m]; |
| 259 #endif |
| 327 tw=st->twiddles; | 260 tw=st->twiddles; |
| 328 | 261 |
| 329 for (i=0;i<N;i++) | 262 for (i=0;i<N;i++) |
| 330 { | 263 { |
| 331 Fout = Fout_beg + i*mm; | 264 Fout = Fout_beg + i*mm; |
| 332 Fout0=Fout; | 265 Fout0=Fout; |
| 333 Fout1=Fout0+m; | 266 Fout1=Fout0+m; |
| 334 Fout2=Fout0+2*m; | 267 Fout2=Fout0+2*m; |
| 335 Fout3=Fout0+3*m; | 268 Fout3=Fout0+3*m; |
| 336 Fout4=Fout0+4*m; | 269 Fout4=Fout0+4*m; |
| 337 | 270 |
| 271 /* For non-custom modes, m is guaranteed to be a multiple of 4. */ |
| 338 for ( u=0; u<m; ++u ) { | 272 for ( u=0; u<m; ++u ) { |
| 339 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV
( *Fout3,5); C_FIXDIV( *Fout4,5); | |
| 340 scratch[0] = *Fout0; | 273 scratch[0] = *Fout0; |
| 341 | 274 |
| 342 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); | 275 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); |
| 343 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); | 276 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); |
| 344 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); | 277 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); |
| 345 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); | 278 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); |
| 346 | 279 |
| 347 C_ADD( scratch[7],scratch[1],scratch[4]); | 280 C_ADD( scratch[7],scratch[1],scratch[4]); |
| 348 C_SUB( scratch[10],scratch[1],scratch[4]); | 281 C_SUB( scratch[10],scratch[1],scratch[4]); |
| 349 C_ADD( scratch[8],scratch[2],scratch[3]); | 282 C_ADD( scratch[8],scratch[2],scratch[3]); |
| (...skipping 16 matching lines...) Expand all Loading... |
| 366 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); | 299 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); |
| 367 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); | 300 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); |
| 368 | 301 |
| 369 C_ADD(*Fout2,scratch[11],scratch[12]); | 302 C_ADD(*Fout2,scratch[11],scratch[12]); |
| 370 C_SUB(*Fout3,scratch[11],scratch[12]); | 303 C_SUB(*Fout3,scratch[11],scratch[12]); |
| 371 | 304 |
| 372 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; | 305 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; |
| 373 } | 306 } |
| 374 } | 307 } |
| 375 } | 308 } |
| 309 #endif /* OVERRIDE_kf_bfly5 */ |
| 376 | 310 |
| 377 static void ki_bfly5( | |
| 378 kiss_fft_cpx * Fout, | |
| 379 const size_t fstride, | |
| 380 const kiss_fft_state *st, | |
| 381 int m, | |
| 382 int N, | |
| 383 int mm | |
| 384 ) | |
| 385 { | |
| 386 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; | |
| 387 int i, u; | |
| 388 kiss_fft_cpx scratch[13]; | |
| 389 const kiss_twiddle_cpx * twiddles = st->twiddles; | |
| 390 const kiss_twiddle_cpx *tw; | |
| 391 kiss_twiddle_cpx ya,yb; | |
| 392 kiss_fft_cpx * Fout_beg = Fout; | |
| 393 | |
| 394 ya = twiddles[fstride*m]; | |
| 395 yb = twiddles[fstride*2*m]; | |
| 396 tw=st->twiddles; | |
| 397 | |
| 398 for (i=0;i<N;i++) | |
| 399 { | |
| 400 Fout = Fout_beg + i*mm; | |
| 401 Fout0=Fout; | |
| 402 Fout1=Fout0+m; | |
| 403 Fout2=Fout0+2*m; | |
| 404 Fout3=Fout0+3*m; | |
| 405 Fout4=Fout0+4*m; | |
| 406 | |
| 407 for ( u=0; u<m; ++u ) { | |
| 408 scratch[0] = *Fout0; | |
| 409 | |
| 410 C_MULC(scratch[1] ,*Fout1, tw[u*fstride]); | |
| 411 C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]); | |
| 412 C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]); | |
| 413 C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]); | |
| 414 | |
| 415 C_ADD( scratch[7],scratch[1],scratch[4]); | |
| 416 C_SUB( scratch[10],scratch[1],scratch[4]); | |
| 417 C_ADD( scratch[8],scratch[2],scratch[3]); | |
| 418 C_SUB( scratch[9],scratch[2],scratch[3]); | |
| 419 | |
| 420 Fout0->r += scratch[7].r + scratch[8].r; | |
| 421 Fout0->i += scratch[7].i + scratch[8].i; | |
| 422 | |
| 423 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[
8].r,yb.r); | |
| 424 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[
8].i,yb.r); | |
| 425 | |
| 426 scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i); | |
| 427 scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i); | |
| 428 | |
| 429 C_SUB(*Fout1,scratch[5],scratch[6]); | |
| 430 C_ADD(*Fout4,scratch[5],scratch[6]); | |
| 431 | |
| 432 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch
[8].r,ya.r); | |
| 433 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch
[8].i,ya.r); | |
| 434 scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i); | |
| 435 scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i); | |
| 436 | |
| 437 C_ADD(*Fout2,scratch[11],scratch[12]); | |
| 438 C_SUB(*Fout3,scratch[11],scratch[12]); | |
| 439 | |
| 440 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; | |
| 441 } | |
| 442 } | |
| 443 } | |
| 444 | 311 |
| 445 #endif | 312 #endif |
| 446 | 313 |
| 447 | 314 |
| 448 #ifdef CUSTOM_MODES | 315 #ifdef CUSTOM_MODES |
| 449 | 316 |
| 450 static | 317 static |
| 451 void compute_bitrev_table( | 318 void compute_bitrev_table( |
| 452 int Fout, | 319 int Fout, |
| 453 opus_int16 *f, | 320 opus_int16 *f, |
| (...skipping 27 matching lines...) Expand all Loading... |
| 481 } | 348 } |
| 482 | 349 |
| 483 /* facbuf is populated by p1,m1,p2,m2, ... | 350 /* facbuf is populated by p1,m1,p2,m2, ... |
| 484 where | 351 where |
| 485 p[i] * m[i] = m[i-1] | 352 p[i] * m[i] = m[i-1] |
| 486 m0 = n */ | 353 m0 = n */ |
| 487 static | 354 static |
| 488 int kf_factor(int n,opus_int16 * facbuf) | 355 int kf_factor(int n,opus_int16 * facbuf) |
| 489 { | 356 { |
| 490 int p=4; | 357 int p=4; |
| 358 int i; |
| 359 int stages=0; |
| 360 int nbak = n; |
| 491 | 361 |
| 492 /*factor out powers of 4, powers of 2, then any remaining primes */ | 362 /*factor out powers of 4, powers of 2, then any remaining primes */ |
| 493 do { | 363 do { |
| 494 while (n % p) { | 364 while (n % p) { |
| 495 switch (p) { | 365 switch (p) { |
| 496 case 4: p = 2; break; | 366 case 4: p = 2; break; |
| 497 case 2: p = 3; break; | 367 case 2: p = 3; break; |
| 498 default: p += 2; break; | 368 default: p += 2; break; |
| 499 } | 369 } |
| 500 if (p>32000 || (opus_int32)p*(opus_int32)p > n) | 370 if (p>32000 || (opus_int32)p*(opus_int32)p > n) |
| 501 p = n; /* no more factors, skip to end */ | 371 p = n; /* no more factors, skip to end */ |
| 502 } | 372 } |
| 503 n /= p; | 373 n /= p; |
| 504 #ifdef RADIX_TWO_ONLY | 374 #ifdef RADIX_TWO_ONLY |
| 505 if (p!=2 && p != 4) | 375 if (p!=2 && p != 4) |
| 506 #else | 376 #else |
| 507 if (p>5) | 377 if (p>5) |
| 508 #endif | 378 #endif |
| 509 { | 379 { |
| 510 return 0; | 380 return 0; |
| 511 } | 381 } |
| 512 *facbuf++ = p; | 382 facbuf[2*stages] = p; |
| 513 *facbuf++ = n; | 383 if (p==2 && stages > 1) |
| 384 { |
| 385 facbuf[2*stages] = 4; |
| 386 facbuf[2] = 2; |
| 387 } |
| 388 stages++; |
| 514 } while (n > 1); | 389 } while (n > 1); |
| 390 n = nbak; |
| 391 /* Reverse the order to get the radix 4 at the end, so we can use the |
| 392 fast degenerate case. It turns out that reversing the order also |
| 393 improves the noise behaviour. */ |
| 394 for (i=0;i<stages/2;i++) |
| 395 { |
| 396 int tmp; |
| 397 tmp = facbuf[2*i]; |
| 398 facbuf[2*i] = facbuf[2*(stages-i-1)]; |
| 399 facbuf[2*(stages-i-1)] = tmp; |
| 400 } |
| 401 for (i=0;i<stages;i++) |
| 402 { |
| 403 n /= facbuf[2*i]; |
| 404 facbuf[2*i+1] = n; |
| 405 } |
| 515 return 1; | 406 return 1; |
| 516 } | 407 } |
| 517 | 408 |
| 518 static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft) | 409 static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft) |
| 519 { | 410 { |
| 520 int i; | 411 int i; |
| 521 #ifdef FIXED_POINT | 412 #ifdef FIXED_POINT |
| 522 for (i=0;i<nfft;++i) { | 413 for (i=0;i<nfft;++i) { |
| 523 opus_val32 phase = -i; | 414 opus_val32 phase = -i; |
| 524 kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft)); | 415 kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft)); |
| (...skipping 23 matching lines...) Expand all Loading... |
| 548 }else{ | 439 }else{ |
| 549 if (mem != NULL && *lenmem >= memneeded) | 440 if (mem != NULL && *lenmem >= memneeded) |
| 550 st = (kiss_fft_state*)mem; | 441 st = (kiss_fft_state*)mem; |
| 551 *lenmem = memneeded; | 442 *lenmem = memneeded; |
| 552 } | 443 } |
| 553 if (st) { | 444 if (st) { |
| 554 opus_int16 *bitrev; | 445 opus_int16 *bitrev; |
| 555 kiss_twiddle_cpx *twiddles; | 446 kiss_twiddle_cpx *twiddles; |
| 556 | 447 |
| 557 st->nfft=nfft; | 448 st->nfft=nfft; |
| 558 #ifndef FIXED_POINT | 449 #ifdef FIXED_POINT |
| 450 st->scale_shift = celt_ilog2(st->nfft); |
| 451 if (st->nfft == 1<<st->scale_shift) |
| 452 st->scale = Q15ONE; |
| 453 else |
| 454 st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift); |
| 455 #else |
| 559 st->scale = 1.f/nfft; | 456 st->scale = 1.f/nfft; |
| 560 #endif | 457 #endif |
| 561 if (base != NULL) | 458 if (base != NULL) |
| 562 { | 459 { |
| 563 st->twiddles = base->twiddles; | 460 st->twiddles = base->twiddles; |
| 564 st->shift = 0; | 461 st->shift = 0; |
| 565 while (nfft<<st->shift != base->nfft && st->shift < 32) | 462 while (st->shift < 32 && nfft<<st->shift != base->nfft) |
| 566 st->shift++; | 463 st->shift++; |
| 567 if (st->shift>=32) | 464 if (st->shift>=32) |
| 568 goto fail; | 465 goto fail; |
| 569 } else { | 466 } else { |
| 570 st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(k
iss_twiddle_cpx)*nfft); | 467 st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(k
iss_twiddle_cpx)*nfft); |
| 571 compute_twiddles(twiddles, nfft); | 468 compute_twiddles(twiddles, nfft); |
| 572 st->shift = -1; | 469 st->shift = -1; |
| 573 } | 470 } |
| 574 if (!kf_factor(nfft,st->factors)) | 471 if (!kf_factor(nfft,st->factors)) |
| 575 { | 472 { |
| (...skipping 23 matching lines...) Expand all Loading... |
| 599 { | 496 { |
| 600 opus_free((opus_int16*)cfg->bitrev); | 497 opus_free((opus_int16*)cfg->bitrev); |
| 601 if (cfg->shift < 0) | 498 if (cfg->shift < 0) |
| 602 opus_free((kiss_twiddle_cpx*)cfg->twiddles); | 499 opus_free((kiss_twiddle_cpx*)cfg->twiddles); |
| 603 opus_free((kiss_fft_state*)cfg); | 500 opus_free((kiss_fft_state*)cfg); |
| 604 } | 501 } |
| 605 } | 502 } |
| 606 | 503 |
| 607 #endif /* CUSTOM_MODES */ | 504 #endif /* CUSTOM_MODES */ |
| 608 | 505 |
| 609 void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
t) | 506 void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout) |
| 610 { | 507 { |
| 611 int m2, m; | 508 int m2, m; |
| 612 int p; | 509 int p; |
| 613 int L; | 510 int L; |
| 614 int fstride[MAXFACTORS]; | 511 int fstride[MAXFACTORS]; |
| 615 int i; | 512 int i; |
| 616 int shift; | 513 int shift; |
| 617 | 514 |
| 618 /* st->shift can be -1 */ | 515 /* st->shift can be -1 */ |
| 619 shift = st->shift>0 ? st->shift : 0; | 516 shift = st->shift>0 ? st->shift : 0; |
| 620 | 517 |
| 621 celt_assert2 (fin != fout, "In-place FFT not supported"); | |
| 622 /* Bit-reverse the input */ | |
| 623 for (i=0;i<st->nfft;i++) | |
| 624 { | |
| 625 fout[st->bitrev[i]] = fin[i]; | |
| 626 #ifndef FIXED_POINT | |
| 627 fout[st->bitrev[i]].r *= st->scale; | |
| 628 fout[st->bitrev[i]].i *= st->scale; | |
| 629 #endif | |
| 630 } | |
| 631 | |
| 632 fstride[0] = 1; | 518 fstride[0] = 1; |
| 633 L=0; | 519 L=0; |
| 634 do { | 520 do { |
| 635 p = st->factors[2*L]; | 521 p = st->factors[2*L]; |
| 636 m = st->factors[2*L+1]; | 522 m = st->factors[2*L+1]; |
| 637 fstride[L+1] = fstride[L]*p; | 523 fstride[L+1] = fstride[L]*p; |
| 638 L++; | 524 L++; |
| 639 } while(m!=1); | 525 } while(m!=1); |
| 640 m = st->factors[2*L-1]; | 526 m = st->factors[2*L-1]; |
| 641 for (i=L-1;i>=0;i--) | 527 for (i=L-1;i>=0;i--) |
| 642 { | 528 { |
| 643 if (i!=0) | 529 if (i!=0) |
| 644 m2 = st->factors[2*i-1]; | 530 m2 = st->factors[2*i-1]; |
| 645 else | 531 else |
| 646 m2 = 1; | 532 m2 = 1; |
| 647 switch (st->factors[2*i]) | 533 switch (st->factors[2*i]) |
| 648 { | 534 { |
| 649 case 2: | 535 case 2: |
| 650 kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2); | 536 kf_bfly2(fout, m, fstride[i]); |
| 651 break; | 537 break; |
| 652 case 4: | 538 case 4: |
| 653 kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2); | 539 kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2); |
| 654 break; | 540 break; |
| 655 #ifndef RADIX_TWO_ONLY | 541 #ifndef RADIX_TWO_ONLY |
| 656 case 3: | 542 case 3: |
| 657 kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2); | 543 kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2); |
| 658 break; | 544 break; |
| 659 case 5: | 545 case 5: |
| 660 kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2); | 546 kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2); |
| 661 break; | 547 break; |
| 662 #endif | 548 #endif |
| 663 } | 549 } |
| 664 m = m2; | 550 m = m2; |
| 665 } | 551 } |
| 666 } | 552 } |
| 667 | 553 |
| 554 void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
t) |
| 555 { |
| 556 int i; |
| 557 opus_val16 scale; |
| 558 #ifdef FIXED_POINT |
| 559 /* Allows us to scale with MULT16_32_Q16(), which is faster than |
| 560 MULT16_32_Q15() on ARM. */ |
| 561 int scale_shift = st->scale_shift-1; |
| 562 #endif |
| 563 scale = st->scale; |
| 564 |
| 565 celt_assert2 (fin != fout, "In-place FFT not supported"); |
| 566 /* Bit-reverse the input */ |
| 567 for (i=0;i<st->nfft;i++) |
| 568 { |
| 569 kiss_fft_cpx x = fin[i]; |
| 570 fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift); |
| 571 fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift); |
| 572 } |
| 573 opus_fft_impl(st, fout); |
| 574 } |
| 575 |
| 576 |
| 577 #ifdef TEST_UNIT_DFT_C |
| 668 void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fo
ut) | 578 void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fo
ut) |
| 669 { | 579 { |
| 670 int m2, m; | |
| 671 int p; | |
| 672 int L; | |
| 673 int fstride[MAXFACTORS]; | |
| 674 int i; | 580 int i; |
| 675 int shift; | |
| 676 | |
| 677 /* st->shift can be -1 */ | |
| 678 shift = st->shift>0 ? st->shift : 0; | |
| 679 celt_assert2 (fin != fout, "In-place FFT not supported"); | 581 celt_assert2 (fin != fout, "In-place FFT not supported"); |
| 680 /* Bit-reverse the input */ | 582 /* Bit-reverse the input */ |
| 681 for (i=0;i<st->nfft;i++) | 583 for (i=0;i<st->nfft;i++) |
| 682 fout[st->bitrev[i]] = fin[i]; | 584 fout[st->bitrev[i]] = fin[i]; |
| 683 | 585 for (i=0;i<st->nfft;i++) |
| 684 fstride[0] = 1; | 586 fout[i].i = -fout[i].i; |
| 685 L=0; | 587 opus_fft_impl(st, fout); |
| 686 do { | 588 for (i=0;i<st->nfft;i++) |
| 687 p = st->factors[2*L]; | 589 fout[i].i = -fout[i].i; |
| 688 m = st->factors[2*L+1]; | 590 } |
| 689 fstride[L+1] = fstride[L]*p; | |
| 690 L++; | |
| 691 } while(m!=1); | |
| 692 m = st->factors[2*L-1]; | |
| 693 for (i=L-1;i>=0;i--) | |
| 694 { | |
| 695 if (i!=0) | |
| 696 m2 = st->factors[2*i-1]; | |
| 697 else | |
| 698 m2 = 1; | |
| 699 switch (st->factors[2*i]) | |
| 700 { | |
| 701 case 2: | |
| 702 ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2); | |
| 703 break; | |
| 704 case 4: | |
| 705 ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2); | |
| 706 break; | |
| 707 #ifndef RADIX_TWO_ONLY | |
| 708 case 3: | |
| 709 ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2); | |
| 710 break; | |
| 711 case 5: | |
| 712 ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2); | |
| 713 break; | |
| 714 #endif | 591 #endif |
| 715 } | |
| 716 m = m2; | |
| 717 } | |
| 718 } | |
| 719 | |
| OLD | NEW |