Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(16)

Side by Side Diff: third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_F32_Sfs_s.S

Issue 12317152: Add openmax dl routines for review. MUST NOT BE LANDED (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 @//
2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 @//
4 @// Use of this source code is governed by a BSD-style license
5 @// that can be found in the LICENSE file in the root of the source
6 @// tree. An additional intellectual property rights grant can be found
7 @// in the file PATENTS. All contributing project authors may
8 @// be found in the AUTHORS file in the root of the source tree.
9 @//
10 @// This is a modification of omxSP_FFTInv_CCSToR_S32_Sfs_s.s
11 @// to support float instead of SC32.
12 @//
13
14 @//
15 @// Description:
16 @// Compute an inverse FFT for a complex signal
17 @//
18 @//
19
20
21 @// Include standard headers
22
23 #include "dl/api/armCOMM_s.h"
24 #include "dl/api/omxtypes_s.h"
25
26
27 @// Import symbols required from other files
28 @// (For example tables)
29
30 .extern armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
31 .extern armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
32 .extern armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
33 .extern armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe
34 .extern armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe
35 .extern armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe
36
37
38 @// Set debugging level
39 @//DEBUG_ON SETL {TRUE}
40
41
42
43 @// Guarding implementation by the processor name
44
45
46
47 @// Guarding implementation by the processor name
48
49 @// Import symbols required from other files
50 @// (For example tables)
51 .extern armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace_unsafe
52 .extern armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
53
54
55 @//Input Registers
56
57 #define pSrc r0
58 #define pDst r1
59 #define pFFTSpec r2
60 #define scale r3
61
62
63 @// Output registers
64 #define result r0
65
66 @//Local Scratch Registers
67
68 #define argTwiddle r1
69 #define argDst r2
70 #define argScale r4
71 #define tmpOrder r4
72 #define pTwiddle r4
73 #define pOut r5
74 #define subFFTSize r7
75 #define subFFTNum r6
76 #define N r6
77 #define order r14
78 #define diff r9
79 @// Total num of radix stages required to comple the FFT
80 #define count r8
81 #define x0r r4
82 #define x0i r5
83 #define diffMinusOne r2
84 #define round r3
85
86 #define pOut1 r2
87 #define size r7
88 #define step r8
89 #define step1 r9
90 #define twStep r10
91 #define pTwiddleTmp r11
92 #define argTwiddle1 r12
93 #define zero r14
94
95 @// Neon registers
96
97 #define dX0 D0.F32
98 #define dShift D1.F32
99 #define dX1 D1.F32
100 #define dY0 D2.F32
101 #define dY1 D3.F32
102 #define dX0r D0.F32
103 #define dX0i D1.F32
104 #define dX1r D2.F32
105 #define dX1i D3.F32
106 #define dW0r D4.F32
107 #define dW0i D5.F32
108 #define dW1r D6.F32
109 #define dW1i D7.F32
110 #define dT0 D8.F32
111 #define dT1 D9.F32
112 #define dT2 D10.F32
113 #define dT3 D11.F32
114 #define qT0 d12.F32
115 #define qT1 d14.F32
116 #define qT2 d16.F32
117 #define qT3 d18.F32
118 #define dY0r D4.F32
119 #define dY0i D5.F32
120 #define dY1r D6.F32
121 #define dY1i D7.F32
122 #define dzero D20.F32
123
124 #define dY2 D4.F32
125 #define dY3 D5.F32
126 #define dW0 D6.F32
127 #define dW1 D7.F32
128 #define dW0Tmp D10.F32
129 #define dW1Neg D11.F32
130
131 #define sN S0.S32
132 #define fN S1.F32
133 @// one must be the same as dScale[0]!
134 #define dScale D2.F32
135 #define one S4.F32
136
137
138 @// Allocate stack memory required by the function
139 M_ALLOC4 complexFFTSize, 4
140
141 @// Write function header
142 M_START omxSP_FFTInv_CCSToR_F32_Sfs,r11,d15
143
144 @ Structure offsets for the FFTSpec
145 .set ARMsFFTSpec_N, 0
146 .set ARMsFFTSpec_pBitRev, 4
147 .set ARMsFFTSpec_pTwiddle, 8
148 .set ARMsFFTSpec_pBuf, 12
149
150 @// Define stack arguments
151
152 @// Read the size from structure and take log
153 LDR N, [pFFTSpec, #ARMsFFTSpec_N]
154
155 @// Read other structure parameters
156 LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
157 LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
158
159 @// N=1 Treat seperately
160 CMP N,#1
161 BGT sizeGreaterThanOne
162 VLD1 dX0[0],[pSrc]
163 VST1 dX0[0],[pDst]
164
165 B End
166
167 sizeGreaterThanOne:
168
169 @// Call the preTwiddle Radix2 stage before doing the compledIFFT
170
171
172 BL armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe
173
174
175 complexIFFT:
176
177 ASR N,N,#1 @// N/2 point complex IFFT
178 M_STR N, complexFFTSize @ Save N for scaling later
179 ADD pSrc,pOut,N,LSL #3 @// set pSrc as pOut1
180
181 CLZ order,N @// N = 2^order
182 RSB order,order,#31
183 MOV subFFTSize,#1
184 @//MOV subFFTNum,N
185
186 CMP order,#3
187 BGT orderGreaterthan3 @// order > 3
188
189 CMP order,#1
190 BGE orderGreaterthan0 @// order > 0
191
192 VLD1 dX0,[pSrc]
193 VST1 dX0,[pDst]
194 MOV pSrc,pDst
195 BLT FFTEnd
196
197 orderGreaterthan0:
198 @// set the buffers appropriately for various orders
199 CMP order,#2
200 MOVNE argDst,pDst
201 MOVEQ argDst,pOut
202 @// Pass the first stage destination in RN5
203 MOVEQ pOut,pDst
204 MOV argTwiddle,pTwiddle
205
206 BGE orderGreaterthan1
207 BLLT armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe @// order = 1
208 B FFTEnd
209
210 orderGreaterthan1:
211 MOV tmpOrder,order @// tmpOrder = RN 4
212 BL armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
213 CMP tmpOrder,#2
214 BLGT armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe
215 BL armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
216 B FFTEnd
217
218
219 orderGreaterthan3:
220 specialScaleCase:
221
222 @// Set input args to fft stages
223 TST order, #2
224 MOVNE argDst,pDst
225 MOVEQ argDst,pOut
226 @// Pass the first stage destination in RN5
227 MOVEQ pOut,pDst
228 MOV argTwiddle,pTwiddle
229
230 @//check for even or odd order
231 @// NOTE: The following combination of BL's would work fine even though
232 @// the first BL would corrupt the flags. This is because the end of
233 @// the "grpZeroSetLoop" loop inside
234 @// armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag
235 @// to EQ
236
237 TST order,#0x00000001
238 BLEQ armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
239 BLNE armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
240
241 CMP subFFTNum,#4
242 BLT FFTEnd
243
244
245 unscaledRadix4Loop:
246 BEQ lastStageUnscaledRadix4
247 BL armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe
248 CMP subFFTNum,#4
249 B unscaledRadix4Loop
250
251 lastStageUnscaledRadix4:
252 BL armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace_unsafe
253 B FFTEnd
254
255 FFTEnd: @// Does only the scaling
256 @ Scale inverse FFT result by 1/N
257
258 M_LDR N, complexFFTSize
259 VMOV sN,N
260 VCVT fN, sN @ fn = fftSize, as a float
261 VMOV one, 1.0
262 VDIV one, one, fN @ one = dScale[0] = 1 / fftSize
263
264
265 @// N = subFFTSize ; dataptr = pDst
266 scaleFFTData:
267 VLD1 {dX0},[pSrc] @// pSrc contains pDst pointer
268 SUBS subFFTSize,subFFTSize,#1
269 VMUL dX0, dX0, dScale[0]
270 VST1 {dX0},[pSrc]!
271
272 BGT scaleFFTData
273
274 End:
275 @// Set return value
276 MOV result, #OMX_Sts_NoErr
277
278 @// Write function tail
279 M_END
280
281
282
283 .end
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698