Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(17)

Side by Side Diff: third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.S

Issue 12317152: Add openmax dl routines for review. MUST NOT BE LANDED (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 @//
2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 @//
4 @// Use of this source code is governed by a BSD-style license
5 @// that can be found in the LICENSE file in the root of the source
6 @// tree. An additional intellectual property rights grant can be found
7 @// in the file PATENTS. All contributing project authors may
8 @// be found in the AUTHORS file in the root of the source tree.
9 @//
10 @// This file was originally licensed as follows. It has been
11 @// relicensed with permission from the copyright holders.
12 @//
13
14 @//
15 @// File Name: armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.s
16 @// OpenMAX DL: v1.0.2
17 @// Last Modified Revision: 7767
18 @// Last Modified Date: Thu, 27 Sep 2007
19 @//
20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
21 @//
22 @//
23 @//
24 @// Description:
25 @// Compute a first stage Radix 4 FFT stage for a N point complex signal
26 @//
27
28
29
30 @// Include standard headers
31
32 #include "dl/api/armCOMM_s.h"
33 #include "dl/api/omxtypes_s.h"
34
35 @// Import symbols required from other files
36 @// (For example tables)
37
38
39
40
41 @// Set debugging level
42 @//DEBUG_ON SETL {TRUE}
43
44
45
46 @// Guarding implementation by the processor name
47
48
49
50 @// Guarding implementation by the processor name
51
52
53 @//Input Registers
54
55 #define pSrc r0
56 #define pDst r2
57 #define pTwiddle r1
58 #define pPingPongBuf r5
59 #define subFFTNum r6
60 #define subFFTSize r7
61
62
63 @//Output Registers
64
65
66 @//Local Scratch Registers
67
68 #define grpSize r3
69 @// Reuse grpSize as setCount
70 #define setCount r3
71 #define pointStep r4
72 #define outPointStep r4
73 #define setStep r8
74 #define step1 r9
75 #define step3 r10
76
77 @// Neon Registers
78
79 #define dXr0 D0.S32
80 #define dXi0 D1.S32
81 #define dXr1 D2.S32
82 #define dXi1 D3.S32
83 #define dXr2 D4.S32
84 #define dXi2 D5.S32
85 #define dXr3 D6.S32
86 #define dXi3 D7.S32
87 #define dYr0 D8.S32
88 #define dYi0 D9.S32
89 #define dYr1 D10.S32
90 #define dYi1 D11.S32
91 #define dYr2 D12.S32
92 #define dYi2 D13.S32
93 #define dYr3 D14.S32
94 #define dYi3 D15.S32
95 #define qX0 Q0.S32
96 #define qX1 Q1.S32
97 #define qX2 Q2.S32
98 #define qX3 Q3.S32
99 #define qY0 Q4.S32
100 #define qY1 Q5.S32
101 #define qY2 Q6.S32
102 #define qY3 Q7.S32
103 #define dZr0 D16.S32
104 #define dZi0 D17.S32
105 #define dZr1 D18.S32
106 #define dZi1 D19.S32
107 #define dZr2 D20.S32
108 #define dZi2 D21.S32
109 #define dZr3 D22.S32
110 #define dZi3 D23.S32
111 #define qZ0 Q8.S32
112 #define qZ1 Q9.S32
113 #define qZ2 Q10.S32
114 #define qZ3 Q11.S32
115
116
117 .MACRO FFTSTAGE scaled, inverse, name
118
119 @// Define stack arguments
120
121 @// pT0+1 increments pT0 by 8 bytes
122 @// pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes
123 @// Note: outPointStep = pointStep for firststage
124
125 MOV pointStep,subFFTNum,LSL #1
126
127
128 @// Update pSubFFTSize and pSubFFTNum regs
129 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0]
130 MOV subFFTSize,#4 @// subFFTSize = 1 for the first stage
131
132 @// Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount)
133 LSR grpSize,subFFTNum,#2
134 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
135 MOV subFFTNum,grpSize
136
137
138 @// Calculate the step of input data for the next set
139 @//MOV setStep,pointStep,LSL #1
140 MOV setStep,grpSize,LSL #4
141 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
142 ADD setStep,setStep,pointStep @// setStep = 3*poin tStep
143 RSB setStep,setStep,#16 @// setStep = - 3*po intStep+16
144
145 VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set
146 MOV step1,pointStep,LSL #1 @// step1 = 2*pointS tep
147
148 .ifeqs "\scaled", "TRUE"
149 VHADD qY0,qX0,qX2
150 .else
151 VADD qY0,qX0,qX2
152 .endif
153
154 RSB step3,pointStep,#0 @// step3 = -pointSt ep
155
156 @// grp = 0 a special case since all the twiddle factors are 1
157 @// Loop on the sets : 2 sets at a time
158
159 grpZeroSetLoop\name :
160
161
162
163 @// Decrement setcount
164 SUBS setCount,setCount,#2 @// decrement the set lo op counter
165
166 .ifeqs "\scaled", "TRUE"
167
168 @// finish first stage of 4 point FFT
169
170 VHSUB qY2,qX0,qX2
171
172 VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0]
173 VHADD qY1,qX1,qX3
174 VLD2 {dXr2,dXi2},[pSrc :128],step3 @// data[2]
175 VHSUB qY3,qX1,qX3
176
177
178 @// finish second stage of 4 point FFT
179
180 .ifeqs "\inverse", "TRUE"
181
182 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
183 VHADD qZ0,qY0,qY1
184
185 VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set
186 VHSUB dZr3,dYr2,dYi3
187
188 VST2 {dZr0,dZi0},[pDst :128],outPointStep
189 VHADD dZi3,dYi2,dYr3
190
191 VHSUB qZ1,qY0,qY1
192 VST2 {dZr3,dZi3},[pDst :128],outPointStep
193
194 VHADD dZr2,dYr2,dYi3
195 VST2 {dZr1,dZi1},[pDst :128],outPointStep
196 VHSUB dZi2,dYi2,dYr3
197
198 VHADD qY0,qX0,qX2 @// u0 for next iterati on
199 VST2 {dZr2,dZi2},[pDst :128],setStep
200
201
202 .else
203
204 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
205 VHADD qZ0,qY0,qY1
206
207 VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set
208 VHADD dZr2,dYr2,dYi3
209
210 VST2 {dZr0,dZi0},[pDst :128],outPointStep
211 VHSUB dZi2,dYi2,dYr3
212
213 VHSUB qZ1,qY0,qY1
214 VST2 {dZr2,dZi2},[pDst :128],outPointStep
215
216 VHSUB dZr3,dYr2,dYi3
217 VST2 {dZr1,dZi1},[pDst :128],outPointStep
218 VHADD dZi3,dYi2,dYr3
219
220 VHADD qY0,qX0,qX2 @// u0 for next iterati on
221 VST2 {dZr3,dZi3},[pDst :128],setStep
222
223 .endif
224
225
226
227 .else
228
229 @// finish first stage of 4 point FFT
230
231
232 VSUB qY2,qX0,qX2
233
234 VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0]
235 VADD qY1,qX1,qX3
236 VLD2 {dXr2,dXi2},[pSrc :128],step3 @// data[2]
237 VSUB qY3,qX1,qX3
238
239
240 @// finish second stage of 4 point FFT
241
242 .ifeqs "\inverse", "TRUE"
243
244 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
245 VADD qZ0,qY0,qY1
246
247 VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set
248 VSUB dZr3,dYr2,dYi3
249
250 VST2 {dZr0,dZi0},[pDst :128],outPointStep
251 VADD dZi3,dYi2,dYr3
252
253 VSUB qZ1,qY0,qY1
254 VST2 {dZr3,dZi3},[pDst :128],outPointStep
255
256 VADD dZr2,dYr2,dYi3
257 VST2 {dZr1,dZi1},[pDst :128],outPointStep
258 VSUB dZi2,dYi2,dYr3
259
260 VADD qY0,qX0,qX2 @// u0 for next iteratio n
261 VST2 {dZr2,dZi2},[pDst :128],setStep
262
263
264 .else
265
266 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
267 VADD qZ0,qY0,qY1
268
269 VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set
270 VADD dZr2,dYr2,dYi3
271
272 VST2 {dZr0,dZi0},[pDst :128],outPointStep
273 VSUB dZi2,dYi2,dYr3
274
275 VSUB qZ1,qY0,qY1
276 VST2 {dZr2,dZi2},[pDst :128],outPointStep
277
278 VSUB dZr3,dYr2,dYi3
279 VST2 {dZr1,dZi1},[pDst :128],outPointStep
280 VADD dZi3,dYi2,dYr3
281
282 VADD qY0,qX0,qX2 @// u0 for next iteratio n
283 VST2 {dZr3,dZi3},[pDst :128],setStep
284
285 .endif
286
287 .endif
288
289 BGT grpZeroSetLoop\name
290
291 @// reset pSrc to pDst for the next stage
292 SUB pSrc,pDst,pointStep @// pDst -= 2*grpSize
293 MOV pDst,pPingPongBuf
294
295
296 .endm
297
298
299
300 M_START armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe,r4
301 FFTSTAGE "FALSE","FALSE",fwd
302 M_END
303
304
305
306 M_START armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe,r4
307 FFTSTAGE "FALSE","TRUE",inv
308 M_END
309
310
311 M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
312 FFTSTAGE "TRUE","FALSE",fwdsfs
313 M_END
314
315
316 M_START armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
317 FFTSTAGE "TRUE","TRUE",invsfs
318 M_END
319
320 .end
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698