OLD | NEW |
---|---|
(Empty) | |
1 @// | |
2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. | |
3 @// | |
4 @// Use of this source code is governed by a BSD-style license | |
5 @// that can be found in the LICENSE file in the root of the source | |
6 @// tree. An additional intellectual property rights grant can be found | |
7 @// in the file PATENTS. All contributing project authors may | |
8 @// be found in the AUTHORS file in the root of the source tree. | |
9 @// | |
10 @// This file was originally licensed as follows. It has been | |
11 @// relicensed with permission from the copyright holders. | |
12 | |
13 @// | |
14 @// | |
15 @// File Name: armSP_FFT_CToC_SC16_Radix4_fs_unsafe_s.s | |
16 @// OpenMAX DL: v1.0.2 | |
17 @// Last Modified Revision: 7761 | |
18 @// Last Modified Date: Wed, 26 Sep 2007 | |
19 @// | |
20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. | |
21 @// | |
22 @// | |
23 @// | |
24 @// Description: | |
25 @// Compute a first stage Radix 4 FFT stage for a N point complex signal | |
26 @// | |
27 @// | |
28 | |
29 | |
30 @// Include standard headers | |
31 | |
32 #include "dl/api/armCOMM_s.h" | |
33 #include "dl/api/omxtypes_s.h" | |
34 | |
35 @// Import symbols required from other files | |
36 @// (For example tables) | |
37 | |
38 | |
39 | |
40 | |
41 @// Set debugging level | |
42 @//DEBUG_ON SETL {TRUE} | |
43 | |
44 | |
45 | |
46 @// Guarding implementation by the processor name | |
47 | |
48 | |
49 | |
50 @// Guarding implementation by the processor name | |
51 | |
52 | |
53 @//Input Registers | |
54 | |
55 #define pSrc r0 | |
56 #define pDst r2 | |
57 #define pTwiddle r1 | |
58 #define pPingPongBuf r5 | |
59 #define subFFTNum r6 | |
60 #define subFFTSize r7 | |
61 | |
62 | |
63 @//Output Registers | |
64 | |
65 | |
66 @//Local Scratch Registers | |
67 | |
68 #define grpSize r3 | |
69 @// Reuse grpSize as setCount | |
70 #define setCount r3 | |
71 #define pointStep r4 | |
72 #define outPointStep r4 | |
73 #define setStep r8 | |
74 #define step1 r9 | |
75 #define step3 r10 | |
76 | |
77 @// Neon Registers | |
78 | |
79 #define dXr0 D0.S16 | |
80 #define dXi0 D1.S16 | |
81 #define dXr1 D2.S16 | |
82 #define dXi1 D3.S16 | |
83 #define dXr2 D4.S16 | |
84 #define dXi2 D5.S16 | |
85 #define dXr3 D6.S16 | |
86 #define dXi3 D7.S16 | |
87 #define dYr0 D8.S16 | |
88 #define dYi0 D9.S16 | |
89 #define dYr1 D10.S16 | |
90 #define dYi1 D11.S16 | |
91 #define dYr2 D12.S16 | |
92 #define dYi2 D13.S16 | |
93 #define dYr3 D14.S16 | |
94 #define dYi3 D15.S16 | |
95 #define dZr0 D16.S16 | |
96 #define dZi0 D17.S16 | |
97 #define dZr1 D18.S16 | |
98 #define dZi1 D19.S16 | |
99 #define dZr2 D20.S16 | |
100 #define dZi2 D21.S16 | |
101 #define dZr3 D22.S16 | |
102 #define dZi3 D23.S16 | |
103 #define qY0 Q4.S16 | |
104 #define qY2 Q6.S16 | |
105 #define qX0 Q0.S16 | |
106 #define qX2 Q2.S16 | |
107 | |
108 #define qY1 Q5.S16 | |
109 #define qY3 Q7.S16 | |
110 #define qX1 Q1.S16 | |
111 #define qX3 Q3.S16 | |
112 #define qZ0 Q8.S16 | |
113 #define qZ1 Q9.S16 | |
114 | |
115 | |
116 .MACRO FFTSTAGE scaled, inverse, name | |
117 | |
118 @// Define stack arguments | |
119 | |
120 MOV pointStep,subFFTNum | |
121 @// Update pSubFFTSize and pSubFFTNum regs | |
122 | |
123 | |
124 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] | |
125 @// Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount) | |
126 LSR grpSize,subFFTNum,#2 | |
127 MOV subFFTNum,grpSize | |
128 | |
129 | |
130 @// pT0+1 increments pT0 by 4 bytes | |
131 @// pT0+pointStep = increment of 4*pointStep bytes = grpSize bytes | |
132 @// Note: outPointStep = pointStep for firststage | |
133 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1] | |
134 | |
135 | |
136 @// Calculate the step of input data for the next set | |
137 @//MOV setStep,pointStep,LSL #1 | |
138 MOV setStep,grpSize,LSL #3 | |
139 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2] | |
140 MOV step1,setStep | |
141 ADD setStep,setStep,pointStep @// setStep = 3*pointStep | |
142 RSB setStep,setStep,#16 @// setStep = - 3*pointSte p+16 | |
143 | |
144 | |
145 VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] | |
146 MOV subFFTSize,#4 @// subFFTSize = 1 for the first stage | |
147 | |
148 | |
149 .ifeqs "\scaled", "TRUE" | |
150 VHADD qY0,qX0,qX2 @// u0 | |
151 .ELSE | |
152 VADD qY0,qX0,qX2 @// u0 | |
153 .ENDIF | |
154 RSB step3,pointStep,#0 | |
155 | |
156 @// grp = 0 a special case since all the twiddle factors are 1 | |
157 @// Loop on the sets: 4 sets at a time | |
158 | |
159 grpZeroSetLoop\name: | |
160 | |
161 | |
162 .ifeqs "\scaled", "TRUE" | |
163 | |
164 @// finish first stage of 4 point FFT | |
165 | |
166 VHSUB qY2,qX0,qX2 @// u1 | |
167 SUBS setCount,setCount,#4 @// decrement the se t loop counter | |
168 | |
169 VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0] | |
170 VHADD qY1,qX1,qX3 @// u2 | |
171 VLD2 {dXr2,dXi2},[pSrc :128],step3 | |
172 VHSUB qY3,qX1,qX3 @// u3 | |
173 | |
174 | |
175 | |
176 @// finish second stage of 4 point FFT | |
177 | |
178 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1] | |
179 VHADD qZ0,qY0,qY1 @// y0 | |
180 | |
181 VLD2 {dXr3,dXi3},[pSrc :128],setStep | |
aedla
2013/06/21 12:56:34
This seems to read 16 bytes OOB at the last iterat
| |
182 | |
183 | |
184 .ifeqs "\inverse", "TRUE" | |
185 | |
186 VHSUB dZr3,dYr2,dYi3 @// y3 | |
187 VHADD dZi3,dYi2,dYr3 | |
188 VST2 {dZr0,dZi0},[pDst :128],outPointStep | |
189 | |
190 VHSUB qZ1,qY0,qY1 @// y2 | |
191 VST2 {dZr3,dZi3},[pDst :128],outPointStep | |
192 | |
193 VHADD dZr2,dYr2,dYi3 @// y1 | |
194 VST2 {dZr1,dZi1},[pDst :128],outPointStep | |
195 VHSUB dZi2,dYi2,dYr3 | |
196 | |
197 VHADD qY0,qX0,qX2 @// u0 (next loop) | |
198 VST2 {dZr2,dZi2},[pDst :128],setStep | |
199 | |
200 | |
201 .ELSE | |
202 | |
203 VHADD dZr2,dYr2,dYi3 @// y1 | |
204 VHSUB dZi2,dYi2,dYr3 | |
205 | |
206 VST2 {dZr0,dZi0},[pDst :128],outPointStep | |
207 VHSUB qZ1,qY0,qY1 @// y2 | |
208 | |
209 VST2 {dZr2,dZi2},[pDst :128],outPointStep | |
210 VHSUB dZr3,dYr2,dYi3 @// y3 | |
211 VHADD dZi3,dYi2,dYr3 | |
212 VST2 {dZr1,dZi1},[pDst :128],outPointStep | |
213 VHADD qY0,qX0,qX2 @// u0 (next loop) | |
214 VST2 {dZr3,dZi3},[pDst :128],setStep | |
215 | |
216 .ENDIF | |
217 | |
218 | |
219 .ELSE | |
220 | |
221 @// finish first stage of 4 point FFT | |
222 | |
223 VSUB qY2,qX0,qX2 @// u1 | |
224 SUBS setCount,setCount,#4 @// decrement the se t loop counter | |
225 | |
226 VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0] | |
227 VADD qY1,qX1,qX3 @// u2 | |
228 VLD2 {dXr2,dXi2},[pSrc :128],step3 | |
229 VSUB qY3,qX1,qX3 @// u3 | |
230 | |
231 | |
232 | |
233 @// finish second stage of 4 point FFT | |
234 | |
235 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1] | |
236 VADD qZ0,qY0,qY1 @// y0 | |
237 | |
238 VLD2 {dXr3,dXi3},[pSrc :128],setStep | |
aedla
2013/06/21 12:56:34
Same here.
| |
239 | |
240 | |
241 .ifeqs "\inverse", "TRUE" | |
242 | |
243 VSUB dZr3,dYr2,dYi3 @// y3 | |
244 VADD dZi3,dYi2,dYr3 | |
245 VST2 {dZr0,dZi0},[pDst :128],outPointStep | |
246 | |
247 VSUB qZ1,qY0,qY1 @// y2 | |
248 VST2 {dZr3,dZi3},[pDst :128],outPointStep | |
249 | |
250 VADD dZr2,dYr2,dYi3 @// y1 | |
251 VST2 {dZr1,dZi1},[pDst :128],outPointStep | |
252 VSUB dZi2,dYi2,dYr3 | |
253 | |
254 VADD qY0,qX0,qX2 @// u0 (next loop) | |
255 VST2 {dZr2,dZi2},[pDst :128],setStep | |
256 | |
257 | |
258 .ELSE | |
259 | |
260 VADD dZr2,dYr2,dYi3 @// y1 | |
261 VSUB dZi2,dYi2,dYr3 | |
262 | |
263 VST2 {dZr0,dZi0},[pDst :128],outPointStep | |
264 VSUB qZ1,qY0,qY1 @// y2 | |
265 | |
266 VST2 {dZr2,dZi2},[pDst :128],outPointStep | |
267 VSUB dZr3,dYr2,dYi3 @// y3 | |
268 VADD dZi3,dYi2,dYr3 | |
269 VST2 {dZr1,dZi1},[pDst :128],outPointStep | |
270 VADD qY0,qX0,qX2 @// u0 (next loop) | |
271 VST2 {dZr3,dZi3},[pDst :128],setStep | |
272 | |
273 .ENDIF | |
274 | |
275 | |
276 .ENDIF | |
277 | |
278 BGT grpZeroSetLoop\name | |
279 | |
280 | |
281 @// reset pSrc to pDst for the next stage | |
282 SUB pSrc,pDst,pointStep @// pDst -= grpSize | |
283 MOV pDst,pPingPongBuf | |
284 | |
285 | |
286 .endm | |
287 | |
288 | |
289 | |
290 M_START armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe,r4 | |
291 FFTSTAGE "FALSE","FALSE",FWD | |
292 M_END | |
293 | |
294 | |
295 | |
296 M_START armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe,r4 | |
297 FFTSTAGE "FALSE","TRUE",INV | |
298 M_END | |
299 | |
300 | |
301 M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe,r4 | |
302 FFTSTAGE "TRUE","FALSE",FWDSFS | |
303 M_END | |
304 | |
305 | |
306 M_START armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe,r4 | |
307 FFTSTAGE "TRUE","TRUE",INVSFS | |
308 M_END | |
309 | |
310 | |
311 | |
312 | |
313 | |
314 .END | |
OLD | NEW |