OLD | NEW |
---|---|
(Empty) | |
1 @// | |
2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. | |
3 @// | |
4 @// Use of this source code is governed by a BSD-style license | |
5 @// that can be found in the LICENSE file in the root of the source | |
6 @// tree. An additional intellectual property rights grant can be found | |
7 @// in the file PATENTS. All contributing project authors may | |
8 @// be found in the AUTHORS file in the root of the source tree. | |
9 @// | |
10 @// This file was originally licensed as follows. It has been | |
11 @// relicensed with permission from the copyright holders. | |
12 | |
13 @// | |
14 @// | |
15 @// File Name: armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.s | |
16 @// OpenMAX DL: v1.0.2 | |
17 @// Last Modified Revision: 7766 | |
18 @// Last Modified Date: Thu, 27 Sep 2007 | |
19 @// | |
20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. | |
21 @// | |
22 @// | |
23 @// | |
24 @// Description: | |
25 @// Compute a first stage Radix 8 FFT stage for a N point complex signal | |
26 @// | |
27 @// | |
28 | |
29 | |
30 @// Include standard headers | |
31 | |
32 #include "dl/api/armCOMM_s.h" | |
33 #include "dl/api/omxtypes_s.h" | |
34 | |
35 | |
36 @// Import symbols required from other files | |
37 @// (For example tables) | |
38 | |
39 | |
40 @// Set debugging level | |
41 @//DEBUG_ON SETL {TRUE} | |
42 | |
43 | |
44 | |
45 @// Guarding implementation by the processor name | |
46 | |
47 | |
48 | |
49 | |
50 @// Guarding implementation by the processor name | |
51 | |
52 | |
53 @//Input Registers | |
54 | |
55 #define pSrc r0 | |
56 #define pDst r2 | |
57 #define pTwiddle r1 | |
58 #define subFFTNum r6 | |
59 #define subFFTSize r7 | |
60 @// dest buffer for the next stage (not pSrc for first stage) | |
61 #define pPingPongBuf r5 | |
62 | |
63 | |
64 @//Output Registers | |
65 | |
66 | |
67 @//Local Scratch Registers | |
68 | |
69 #define grpSize r3 | |
70 @// Reuse grpSize as setCount | |
71 #define setCount r3 | |
72 #define pointStep r4 | |
73 #define outPointStep r4 | |
74 #define setStep r8 | |
75 #define step1 r9 | |
76 #define step2 r10 | |
77 #define t0 r11 | |
78 | |
79 | |
80 @// Neon Registers | |
81 | |
82 #define dXr0 D14.S16 | |
83 #define dXi0 D15.S16 | |
84 #define dXr1 D2.S16 | |
85 #define dXi1 D3.S16 | |
86 #define dXr2 D4.S16 | |
87 #define dXi2 D5.S16 | |
88 #define dXr3 D6.S16 | |
89 #define dXi3 D7.S16 | |
90 #define dXr4 D8.S16 | |
91 #define dXi4 D9.S16 | |
92 #define dXr5 D10.S16 | |
93 #define dXi5 D11.S16 | |
94 #define dXr6 D12.S16 | |
95 #define dXi6 D13.S16 | |
96 #define dXr7 D0.S16 | |
97 #define dXi7 D1.S16 | |
98 #define qX0 Q7.S16 | |
99 #define qX1 Q1.S16 | |
100 #define qX2 Q2.S16 | |
101 #define qX3 Q3.S16 | |
102 #define qX4 Q4.S16 | |
103 #define qX5 Q5.S16 | |
104 #define qX6 Q6.S16 | |
105 #define qX7 Q0.S16 | |
106 | |
107 #define dUr0 D16.S16 | |
108 #define dUi0 D17.S16 | |
109 #define dUr2 D18.S16 | |
110 #define dUi2 D19.S16 | |
111 #define dUr4 D20.S16 | |
112 #define dUi4 D21.S16 | |
113 #define dUr6 D22.S16 | |
114 #define dUi6 D23.S16 | |
115 #define dUr1 D24.S16 | |
116 #define dUi1 D25.S16 | |
117 #define dUr3 D26.S16 | |
118 #define dUi3 D27.S16 | |
119 #define dUr5 D28.S16 | |
120 #define dUi5 D29.S16 | |
121 @// reuse dXr7 and dXi7 | |
122 #define dUr7 D30.S16 | |
123 #define dUi7 D31.S16 | |
124 #define qU0 Q8.S16 | |
125 #define qU1 Q12.S16 | |
126 #define qU2 Q9.S16 | |
127 #define qU3 Q13.S16 | |
128 #define qU4 Q10.S16 | |
129 #define qU5 Q14.S16 | |
130 #define qU6 Q11.S16 | |
131 #define qU7 Q15.S16 | |
132 | |
133 | |
134 | |
135 #define dVr0 D24.S16 | |
136 #define dVi0 D25.S16 | |
137 #define dVr2 D26.S16 | |
138 #define dVi2 D27.S16 | |
139 #define dVr4 D28.S16 | |
140 #define dVi4 D29.S16 | |
141 #define dVr6 D30.S16 | |
142 #define dVi6 D31.S16 | |
143 #define dVr1 D16.S16 | |
144 #define dVi1 D17.S16 | |
145 #define dVr3 D18.S16 | |
146 #define dVi3 D19.S16 | |
147 #define dVr5 D20.S16 | |
148 #define dVi5 D21.S16 | |
149 @// reuse dUi7 | |
150 #define dVr7 D22.S16 | |
151 @// reuse dUr7 | |
152 #define dVi7 D23.S16 | |
153 #define qV0 Q12.S16 | |
154 #define qV1 Q8.S16 | |
155 #define qV2 Q13.S16 | |
156 #define qV3 Q9.S16 | |
157 #define qV4 Q14.S16 | |
158 #define qV5 Q10.S16 | |
159 #define qV6 Q15.S16 | |
160 #define qV7 Q11.S16 | |
161 | |
162 | |
163 | |
164 #define dYr0 D16.S16 | |
165 #define dYi0 D17.S16 | |
166 #define dYr2 D18.S16 | |
167 #define dYi2 D19.S16 | |
168 #define dYr4 D20.S16 | |
169 #define dYi4 D21.S16 | |
170 #define dYr6 D22.S16 | |
171 #define dYi6 D23.S16 | |
172 #define dYr1 D24.S16 | |
173 #define dYi1 D25.S16 | |
174 #define dYr3 D26.S16 | |
175 #define dYi3 D27.S16 | |
176 #define dYr5 D28.S16 | |
177 #define dYi5 D29.S16 | |
178 @// reuse dYr4 and dYi4 | |
179 #define dYr7 D30.S16 | |
180 #define dYi7 D31.S16 | |
181 #define qY0 Q8.S16 | |
182 #define qY1 Q12.S16 | |
183 #define qY2 Q9.S16 | |
184 #define qY3 Q13.S16 | |
185 #define qY4 Q10.S16 | |
186 #define qY5 Q14.S16 | |
187 #define qY6 Q11.S16 | |
188 #define qY7 Q15.S16 | |
189 | |
190 | |
191 #define dT0 D0.S16 | |
192 #define dT1 D1.S16 | |
193 | |
194 | |
195 @// Define constants | |
196 .set ONEBYSQRT2, 0x00005A82 @// Q15 format | |
197 | |
198 | |
199 .MACRO FFTSTAGE scaled, inverse , name | |
200 | |
201 @// Define stack arguments | |
202 | |
203 @// Update pSubFFTSize and pSubFFTNum regs | |
204 MOV subFFTSize,#8 @// subFFTSize = 1 f or the first stage | |
205 LDR t0,=ONEBYSQRT2 @// t0=(1/sqrt(2)) a s Q15 format | |
206 | |
207 @// Note: setCount = subFFTNum/8 (reuse the grpSize reg for setCount) | |
208 LSR grpSize,subFFTNum,#3 | |
209 MOV subFFTNum,grpSize | |
210 | |
211 | |
212 @// pT0+1 increments pT0 by 4 bytes | |
213 @// pT0+pointStep = increment of 4*pointStep bytes = grpSize/2 bytes | |
214 @// Note: outPointStep = pointStep for firststage | |
215 | |
216 MOV pointStep,grpSize,LSL #2 | |
217 | |
218 | |
219 @// Calculate the step of input data for the next set | |
220 @//MOV step1,pointStep,LSL #1 @// step1 = 2*poi ntStep | |
221 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] | |
222 MOV step1,grpSize,LSL #3 | |
223 | |
224 MOV step2,pointStep,LSL #3 | |
225 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1] | |
226 SUB step2,step2,pointStep @// step2 = 7*poi ntStep | |
227 RSB setStep,step2,#16 @// setStep = - 7 *pointStep+16 | |
228 | |
229 | |
230 | |
231 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2] | |
232 VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3] | |
233 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4] | |
234 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5] | |
235 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6] | |
236 VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7] & update pSrc for the next set | |
237 @// setStep = -7*pointSte p + 16 | |
238 @// grp = 0 a special case since all the twiddle factors are 1 | |
239 @// Loop on the sets : 4 sets at a time | |
240 | |
241 grpZeroSetLoop\name: | |
242 | |
243 @// Decrement setcount | |
244 SUBS setCount,setCount,#4 @// decrement the set lo op counter | |
245 | |
246 | |
247 .ifeqs "\scaled", "TRUE" | |
248 @// finish first stage of 8 point FFT | |
249 | |
250 VHADD qU0,qX0,qX4 | |
251 VHADD qU2,qX1,qX5 | |
252 VHADD qU4,qX2,qX6 | |
253 VHADD qU6,qX3,qX7 | |
254 | |
255 @// finish second stage of 8 point FFT | |
256 | |
257 VHADD qV0,qU0,qU4 | |
258 VHSUB qV2,qU0,qU4 | |
259 VHADD qV4,qU2,qU6 | |
260 VHSUB qV6,qU2,qU6 | |
261 | |
262 @// finish third stage of 8 point FFT | |
263 | |
264 VHADD qY0,qV0,qV4 | |
265 VHSUB qY4,qV0,qV4 | |
266 VST2 {dYr0,dYi0},[pDst :128],step1 @// store y 0 | |
267 | |
268 .ifeqs "\inverse", "TRUE" | |
269 | |
270 VHSUB dYr2,dVr2,dVi6 | |
271 VHADD dYi2,dVi2,dVr6 | |
272 | |
273 VHADD dYr6,dVr2,dVi6 | |
274 VST2 {dYr2,dYi2},[pDst :128],step1 @// sto re y2 | |
275 VHSUB dYi6,dVi2,dVr6 | |
276 | |
277 VHSUB qU1,qX0,qX4 | |
278 VST2 {dYr4,dYi4},[pDst :128],step1 @// sto re y4 | |
279 | |
280 VHSUB qU3,qX1,qX5 | |
281 VHSUB qU5,qX2,qX6 | |
282 VST2 {dYr6,dYi6},[pDst :128],step1 @// sto re y6 | |
283 | |
284 .ELSE | |
285 | |
286 VHADD dYr6,dVr2,dVi6 | |
287 VHSUB dYi6,dVi2,dVr6 | |
288 | |
289 VHSUB dYr2,dVr2,dVi6 | |
290 VST2 {dYr6,dYi6},[pDst :128],step1 @// sto re y2 | |
291 VHADD dYi2,dVi2,dVr6 | |
292 | |
293 | |
294 VHSUB qU1,qX0,qX4 | |
295 VST2 {dYr4,dYi4},[pDst :128],step1 @// sto re y4 | |
296 VHSUB qU3,qX1,qX5 | |
297 VHSUB qU5,qX2,qX6 | |
298 VST2 {dYr2,dYi2},[pDst :128],step1 @// sto re y6 | |
299 | |
300 | |
301 .ENDIF | |
302 | |
303 @// finish first stage of 8 point FFT | |
304 | |
305 VHSUB qU7,qX3,qX7 | |
306 VMOV dT0[0],t0 | |
307 | |
308 @// finish second stage of 8 point FFT | |
309 | |
310 VHSUB dVr1,dUr1,dUi5 | |
311 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] for next iteration | |
312 VHADD dVi1,dUi1,dUr5 | |
313 VHADD dVr3,dUr1,dUi5 | |
314 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1] | |
315 VHSUB dVi3,dUi1,dUr5 | |
316 | |
317 VHSUB dVr5,dUr3,dUi7 | |
318 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2] | |
319 VHADD dVi5,dUi3,dUr7 | |
320 VHADD dVr7,dUr3,dUi7 | |
321 VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3] | |
322 VHSUB dVi7,dUi3,dUr7 | |
323 | |
324 @// finish third stage of 8 point FFT | |
325 | |
326 .ifeqs "\inverse", "TRUE" | |
327 | |
328 @// calculate a*v5 | |
329 VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1 | |
330 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4] | |
331 VQRDMULH dVi5,dVi5,dT0[0] | |
332 | |
333 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5] | |
334 VSUB dVr5,dT1,dVi5 @// a * V5 | |
335 VADD dVi5,dT1,dVi5 | |
336 | |
337 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6] | |
338 | |
339 @// calculate b*v7 | |
340 VQRDMULH dT1,dVr7,dT0[0] | |
341 VQRDMULH dVi7,dVi7,dT0[0] | |
342 | |
343 VHADD qY1,qV1,qV5 | |
344 VHSUB qY5,qV1,qV5 | |
345 | |
346 | |
347 VADD dVr7,dT1,dVi7 @// b * V7 | |
348 VSUB dVi7,dVi7,dT1 | |
349 SUB pDst, pDst, step2 @// set pDst to y1 | |
350 | |
351 VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7] | |
aedla
2013/06/26 12:52:17
Last iteration 16-byte OOB read here,
| |
352 | |
353 | |
354 VHSUB dYr3,dVr3,dVr7 | |
355 VHSUB dYi3,dVi3,dVi7 | |
356 VST2 {dYr1,dYi1},[pDst :128],step1 @// sto re y1 | |
357 VHADD dYr7,dVr3,dVr7 | |
358 VHADD dYi7,dVi3,dVi7 | |
359 | |
360 | |
361 VST2 {dYr3,dYi3},[pDst :128],step1 @// sto re y3 | |
362 VST2 {dYr5,dYi5},[pDst :128],step1 @// sto re y5 | |
363 #if 0 | |
364 VST2 {dYr7,dYi7},[pDst :128],#16 @// sto re y7 | |
365 #else | |
366 VST2 {dYr7,dYi7},[pDst :128]! @// store y7 | |
367 #endif | |
368 .ELSE | |
369 | |
370 @// calculate b*v7 | |
371 VQRDMULH dT1,dVr7,dT0[0] | |
372 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4] | |
373 VQRDMULH dVi7,dVi7,dT0[0] | |
374 | |
375 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5] | |
376 VADD dVr7,dT1,dVi7 @// b * V7 | |
377 VSUB dVi7,dVi7,dT1 | |
378 | |
379 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6] | |
380 | |
381 @// calculate a*v5 | |
382 VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1 | |
383 VQRDMULH dVi5,dVi5,dT0[0] | |
384 | |
385 VHADD dYr7,dVr3,dVr7 | |
386 VHADD dYi7,dVi3,dVi7 | |
387 SUB pDst, pDst, step2 @// set pDst to y1 | |
388 | |
389 VSUB dVr5,dT1,dVi5 @// a * V5 | |
390 VADD dVi5,dT1,dVi5 | |
391 VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7] | |
aedla
2013/06/26 12:52:17
here,
| |
392 | |
393 VHSUB qY5,qV1,qV5 | |
394 | |
395 VHSUB dYr3,dVr3,dVr7 | |
396 VST2 {dYr7,dYi7},[pDst :128],step1 @// sto re y1 | |
397 VHSUB dYi3,dVi3,dVi7 | |
398 VHADD qY1,qV1,qV5 | |
399 | |
400 | |
401 VST2 {dYr5,dYi5},[pDst :128],step1 @// sto re y3 | |
402 VST2 {dYr3,dYi3},[pDst :128],step1 @// sto re y5 | |
403 #if 0 | |
404 VST2 {dYr1,dYi1},[pDst :128],#16 @// sto re y7 | |
405 #else | |
406 VST2 {dYr1,dYi1},[pDst :128]! @// store y7 | |
407 #endif | |
408 | |
409 .ENDIF | |
410 | |
411 | |
412 | |
413 .ELSE | |
414 @// finish first stage of 8 point FFT | |
415 | |
416 VADD qU0,qX0,qX4 | |
417 VADD qU2,qX1,qX5 | |
418 VADD qU4,qX2,qX6 | |
419 VADD qU6,qX3,qX7 | |
420 | |
421 @// finish second stage of 8 point FFT | |
422 | |
423 VADD qV0,qU0,qU4 | |
424 VSUB qV2,qU0,qU4 | |
425 VADD qV4,qU2,qU6 | |
426 VSUB qV6,qU2,qU6 | |
427 | |
428 @// finish third stage of 8 point FFT | |
429 | |
430 VADD qY0,qV0,qV4 | |
431 VSUB qY4,qV0,qV4 | |
432 VST2 {dYr0,dYi0},[pDst :128],step1 @// store y 0 | |
433 | |
434 .ifeqs "\inverse", "TRUE" | |
435 | |
436 VSUB dYr2,dVr2,dVi6 | |
437 VADD dYi2,dVi2,dVr6 | |
438 | |
439 VADD dYr6,dVr2,dVi6 | |
440 VST2 {dYr2,dYi2},[pDst :128],step1 @// sto re y2 | |
441 VSUB dYi6,dVi2,dVr6 | |
442 | |
443 VSUB qU1,qX0,qX4 | |
444 VST2 {dYr4,dYi4},[pDst :128],step1 @// sto re y4 | |
445 | |
446 VSUB qU3,qX1,qX5 | |
447 VSUB qU5,qX2,qX6 | |
448 VST2 {dYr6,dYi6},[pDst :128],step1 @// sto re y6 | |
449 | |
450 .ELSE | |
451 | |
452 VADD dYr6,dVr2,dVi6 | |
453 VSUB dYi6,dVi2,dVr6 | |
454 | |
455 VSUB dYr2,dVr2,dVi6 | |
456 VST2 {dYr6,dYi6},[pDst :128],step1 @// sto re y2 | |
457 VADD dYi2,dVi2,dVr6 | |
458 | |
459 | |
460 VSUB qU1,qX0,qX4 | |
461 VST2 {dYr4,dYi4},[pDst :128],step1 @// sto re y4 | |
462 VSUB qU3,qX1,qX5 | |
463 VSUB qU5,qX2,qX6 | |
464 VST2 {dYr2,dYi2},[pDst :128],step1 @// sto re y6 | |
465 | |
466 | |
467 .ENDIF | |
468 | |
469 @// finish first stage of 8 point FFT | |
470 | |
471 VSUB qU7,qX3,qX7 | |
472 VMOV dT0[0],t0 | |
473 | |
474 @// finish second stage of 8 point FFT | |
475 | |
476 VSUB dVr1,dUr1,dUi5 | |
477 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] for next iteration | |
478 VADD dVi1,dUi1,dUr5 | |
479 VADD dVr3,dUr1,dUi5 | |
480 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1] | |
481 VSUB dVi3,dUi1,dUr5 | |
482 | |
483 VSUB dVr5,dUr3,dUi7 | |
484 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2] | |
485 VADD dVi5,dUi3,dUr7 | |
486 VADD dVr7,dUr3,dUi7 | |
487 VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3] | |
488 VSUB dVi7,dUi3,dUr7 | |
489 | |
490 @// finish third stage of 8 point FFT | |
491 | |
492 .ifeqs "\inverse", "TRUE" | |
493 | |
494 @// calculate a*v5 | |
495 VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1 | |
496 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4] | |
497 VQRDMULH dVi5,dVi5,dT0[0] | |
498 | |
499 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5] | |
500 VSUB dVr5,dT1,dVi5 @// a * V5 | |
501 VADD dVi5,dT1,dVi5 | |
502 | |
503 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6] | |
504 | |
505 @// calculate b*v7 | |
506 VQRDMULH dT1,dVr7,dT0[0] | |
507 VQRDMULH dVi7,dVi7,dT0[0] | |
508 | |
509 VADD qY1,qV1,qV5 | |
510 VSUB qY5,qV1,qV5 | |
511 | |
512 | |
513 VADD dVr7,dT1,dVi7 @// b * V7 | |
514 VSUB dVi7,dVi7,dT1 | |
515 SUB pDst, pDst, step2 @// set pDst to y1 | |
516 | |
517 VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7] | |
aedla
2013/06/26 12:52:17
here,
| |
518 | |
519 | |
520 VSUB dYr3,dVr3,dVr7 | |
521 VSUB dYi3,dVi3,dVi7 | |
522 VST2 {dYr1,dYi1},[pDst :128],step1 @// sto re y1 | |
523 VADD dYr7,dVr3,dVr7 | |
524 VADD dYi7,dVi3,dVi7 | |
525 | |
526 | |
527 VST2 {dYr3,dYi3},[pDst :128],step1 @// sto re y3 | |
528 VST2 {dYr5,dYi5},[pDst :128],step1 @// sto re y5 | |
529 #if 0 | |
530 VST2 {dYr7,dYi7},[pDst :128],#16 @// sto re y7 | |
531 #else | |
532 VST2 {dYr7,dYi7},[pDst :128]! @// store y7 | |
533 #endif | |
534 .ELSE | |
535 | |
536 @// calculate b*v7 | |
537 VQRDMULH dT1,dVr7,dT0[0] | |
538 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4] | |
539 VQRDMULH dVi7,dVi7,dT0[0] | |
540 | |
541 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5] | |
542 VADD dVr7,dT1,dVi7 @// b * V7 | |
543 VSUB dVi7,dVi7,dT1 | |
544 | |
545 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6] | |
546 | |
547 @// calculate a*v5 | |
548 VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1 | |
549 VQRDMULH dVi5,dVi5,dT0[0] | |
550 | |
551 VADD dYr7,dVr3,dVr7 | |
552 VADD dYi7,dVi3,dVi7 | |
553 SUB pDst, pDst, step2 @// set pDst to y1 | |
554 | |
555 VSUB dVr5,dT1,dVi5 @// a * V5 | |
556 VADD dVi5,dT1,dVi5 | |
557 VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7] | |
aedla
2013/06/26 12:52:17
and here.
| |
558 | |
559 VSUB qY5,qV1,qV5 | |
560 | |
561 VSUB dYr3,dVr3,dVr7 | |
562 VST2 {dYr7,dYi7},[pDst :128],step1 @// sto re y1 | |
563 VSUB dYi3,dVi3,dVi7 | |
564 VADD qY1,qV1,qV5 | |
565 | |
566 | |
567 VST2 {dYr5,dYi5},[pDst :128],step1 @// sto re y3 | |
568 VST2 {dYr3,dYi3},[pDst :128],step1 @// sto re y5 | |
569 #if 0 | |
570 VST2 {dYr1,dYi1},[pDst :128],#16 @// sto re y7 | |
571 #else | |
572 VST2 {dYr1,dYi1},[pDst :128]! @// store y7 | |
573 #endif | |
574 | |
575 .ENDIF | |
576 | |
577 | |
578 .ENDIF | |
579 | |
580 SUB pDst, pDst, step2 @// update pDst for the next set | |
581 BGT grpZeroSetLoop\name | |
582 | |
583 | |
584 @// reset pSrc to pDst for the next stage | |
585 SUB pSrc,pDst,pointStep @// pDst -= 2*gr pSize | |
586 MOV pDst,pPingPongBuf | |
587 | |
588 | |
589 | |
590 .endm | |
591 | |
592 | |
593 @// Allocate stack memory required by the function | |
594 | |
595 | |
596 M_START armSP_FFTFwd_CToC_SC16_Radix8_fs_OutOfPlace_unsafe,r4 | |
597 FFTSTAGE "FALSE","FALSE",FWD | |
598 M_END | |
599 | |
600 | |
601 M_START armSP_FFTInv_CToC_SC16_Radix8_fs_OutOfPlace_unsafe,r4 | |
602 FFTSTAGE "FALSE","TRUE",INV | |
603 M_END | |
604 | |
605 | |
606 M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe,r4 | |
607 FFTSTAGE "TRUE","FALSE",FWDSFS | |
608 M_END | |
609 | |
610 | |
611 M_START armSP_FFTInv_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe,r4 | |
612 FFTSTAGE "TRUE","TRUE",INVSFS | |
613 M_END | |
614 | |
615 | |
616 | |
617 | |
618 | |
619 .END | |
OLD | NEW |