Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(10)

Side by Side Diff: third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_fs_unsafe_s.S

Issue 12317152: Add openmax dl routines for review. MUST NOT BE LANDED (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 @//
2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 @//
4 @// Use of this source code is governed by a BSD-style license
5 @// that can be found in the LICENSE file in the root of the source
6 @// tree. An additional intellectual property rights grant can be found
7 @// in the file PATENTS. All contributing project authors may
8 @// be found in the AUTHORS file in the root of the source tree.
9 @//
10 @// This file was originally licensed as follows. It has been
11 @// relicensed with permission from the copyright holders.
12
13 @//
14 @//
15 @// File Name: armSP_FFT_CToC_SC16_Radix4_fs_unsafe_s.s
16 @// OpenMAX DL: v1.0.2
17 @// Last Modified Revision: 7761
18 @// Last Modified Date: Wed, 26 Sep 2007
19 @//
20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
21 @//
22 @//
23 @//
24 @// Description:
25 @// Compute a first stage Radix 4 FFT stage for a N point complex signal
26 @//
27 @//
28
29
30 @// Include standard headers
31
32 #include "dl/api/armCOMM_s.h"
33 #include "dl/api/omxtypes_s.h"
34
35 @// Import symbols required from other files
36 @// (For example tables)
37
38
39
40
41 @// Set debugging level
42 @//DEBUG_ON SETL {TRUE}
43
44
45
46 @// Guarding implementation by the processor name
47
48
49
50 @// Guarding implementation by the processor name
51
52
53 @//Input Registers
54
55 #define pSrc r0
56 #define pDst r2
57 #define pTwiddle r1
58 #define pPingPongBuf r5
59 #define subFFTNum r6
60 #define subFFTSize r7
61
62
63 @//Output Registers
64
65
66 @//Local Scratch Registers
67
68 #define grpSize r3
69 @// Reuse grpSize as setCount
70 #define setCount r3
71 #define pointStep r4
72 #define outPointStep r4
73 #define setStep r8
74 #define step1 r9
75 #define step3 r10
76
77 @// Neon Registers
78
79 #define dXr0 D0.S16
80 #define dXi0 D1.S16
81 #define dXr1 D2.S16
82 #define dXi1 D3.S16
83 #define dXr2 D4.S16
84 #define dXi2 D5.S16
85 #define dXr3 D6.S16
86 #define dXi3 D7.S16
87 #define dYr0 D8.S16
88 #define dYi0 D9.S16
89 #define dYr1 D10.S16
90 #define dYi1 D11.S16
91 #define dYr2 D12.S16
92 #define dYi2 D13.S16
93 #define dYr3 D14.S16
94 #define dYi3 D15.S16
95 #define dZr0 D16.S16
96 #define dZi0 D17.S16
97 #define dZr1 D18.S16
98 #define dZi1 D19.S16
99 #define dZr2 D20.S16
100 #define dZi2 D21.S16
101 #define dZr3 D22.S16
102 #define dZi3 D23.S16
103 #define qY0 Q4.S16
104 #define qY2 Q6.S16
105 #define qX0 Q0.S16
106 #define qX2 Q2.S16
107
108 #define qY1 Q5.S16
109 #define qY3 Q7.S16
110 #define qX1 Q1.S16
111 #define qX3 Q3.S16
112 #define qZ0 Q8.S16
113 #define qZ1 Q9.S16
114
115
116 .MACRO FFTSTAGE scaled, inverse, name
117
118 @// Define stack arguments
119
120 MOV pointStep,subFFTNum
121 @// Update pSubFFTSize and pSubFFTNum regs
122
123
124 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0]
125 @// Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount)
126 LSR grpSize,subFFTNum,#2
127 MOV subFFTNum,grpSize
128
129
130 @// pT0+1 increments pT0 by 4 bytes
131 @// pT0+pointStep = increment of 4*pointStep bytes = grpSize bytes
132 @// Note: outPointStep = pointStep for firststage
133 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
134
135
136 @// Calculate the step of input data for the next set
137 @//MOV setStep,pointStep,LSL #1
138 MOV setStep,grpSize,LSL #3
139 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
140 MOV step1,setStep
141 ADD setStep,setStep,pointStep @// setStep = 3*pointStep
142 RSB setStep,setStep,#16 @// setStep = - 3*pointSte p+16
143
144
145 VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3]
146 MOV subFFTSize,#4 @// subFFTSize = 1 for the first stage
147
148
149 .ifeqs "\scaled", "TRUE"
150 VHADD qY0,qX0,qX2 @// u0
151 .ELSE
152 VADD qY0,qX0,qX2 @// u0
153 .ENDIF
154 RSB step3,pointStep,#0
155
156 @// grp = 0 a special case since all the twiddle factors are 1
157 @// Loop on the sets: 4 sets at a time
158
159 grpZeroSetLoop\name:
160
161
162 .ifeqs "\scaled", "TRUE"
163
164 @// finish first stage of 4 point FFT
165
166 VHSUB qY2,qX0,qX2 @// u1
167 SUBS setCount,setCount,#4 @// decrement the se t loop counter
168
169 VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0]
170 VHADD qY1,qX1,qX3 @// u2
171 VLD2 {dXr2,dXi2},[pSrc :128],step3
172 VHSUB qY3,qX1,qX3 @// u3
173
174
175
176 @// finish second stage of 4 point FFT
177
178 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
179 VHADD qZ0,qY0,qY1 @// y0
180
181 VLD2 {dXr3,dXi3},[pSrc :128],setStep
aedla 2013/06/21 12:56:34 This seems to read 16 bytes OOB at the last iterat
182
183
184 .ifeqs "\inverse", "TRUE"
185
186 VHSUB dZr3,dYr2,dYi3 @// y3
187 VHADD dZi3,dYi2,dYr3
188 VST2 {dZr0,dZi0},[pDst :128],outPointStep
189
190 VHSUB qZ1,qY0,qY1 @// y2
191 VST2 {dZr3,dZi3},[pDst :128],outPointStep
192
193 VHADD dZr2,dYr2,dYi3 @// y1
194 VST2 {dZr1,dZi1},[pDst :128],outPointStep
195 VHSUB dZi2,dYi2,dYr3
196
197 VHADD qY0,qX0,qX2 @// u0 (next loop)
198 VST2 {dZr2,dZi2},[pDst :128],setStep
199
200
201 .ELSE
202
203 VHADD dZr2,dYr2,dYi3 @// y1
204 VHSUB dZi2,dYi2,dYr3
205
206 VST2 {dZr0,dZi0},[pDst :128],outPointStep
207 VHSUB qZ1,qY0,qY1 @// y2
208
209 VST2 {dZr2,dZi2},[pDst :128],outPointStep
210 VHSUB dZr3,dYr2,dYi3 @// y3
211 VHADD dZi3,dYi2,dYr3
212 VST2 {dZr1,dZi1},[pDst :128],outPointStep
213 VHADD qY0,qX0,qX2 @// u0 (next loop)
214 VST2 {dZr3,dZi3},[pDst :128],setStep
215
216 .ENDIF
217
218
219 .ELSE
220
221 @// finish first stage of 4 point FFT
222
223 VSUB qY2,qX0,qX2 @// u1
224 SUBS setCount,setCount,#4 @// decrement the se t loop counter
225
226 VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0]
227 VADD qY1,qX1,qX3 @// u2
228 VLD2 {dXr2,dXi2},[pSrc :128],step3
229 VSUB qY3,qX1,qX3 @// u3
230
231
232
233 @// finish second stage of 4 point FFT
234
235 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
236 VADD qZ0,qY0,qY1 @// y0
237
238 VLD2 {dXr3,dXi3},[pSrc :128],setStep
aedla 2013/06/21 12:56:34 Same here.
239
240
241 .ifeqs "\inverse", "TRUE"
242
243 VSUB dZr3,dYr2,dYi3 @// y3
244 VADD dZi3,dYi2,dYr3
245 VST2 {dZr0,dZi0},[pDst :128],outPointStep
246
247 VSUB qZ1,qY0,qY1 @// y2
248 VST2 {dZr3,dZi3},[pDst :128],outPointStep
249
250 VADD dZr2,dYr2,dYi3 @// y1
251 VST2 {dZr1,dZi1},[pDst :128],outPointStep
252 VSUB dZi2,dYi2,dYr3
253
254 VADD qY0,qX0,qX2 @// u0 (next loop)
255 VST2 {dZr2,dZi2},[pDst :128],setStep
256
257
258 .ELSE
259
260 VADD dZr2,dYr2,dYi3 @// y1
261 VSUB dZi2,dYi2,dYr3
262
263 VST2 {dZr0,dZi0},[pDst :128],outPointStep
264 VSUB qZ1,qY0,qY1 @// y2
265
266 VST2 {dZr2,dZi2},[pDst :128],outPointStep
267 VSUB dZr3,dYr2,dYi3 @// y3
268 VADD dZi3,dYi2,dYr3
269 VST2 {dZr1,dZi1},[pDst :128],outPointStep
270 VADD qY0,qX0,qX2 @// u0 (next loop)
271 VST2 {dZr3,dZi3},[pDst :128],setStep
272
273 .ENDIF
274
275
276 .ENDIF
277
278 BGT grpZeroSetLoop\name
279
280
281 @// reset pSrc to pDst for the next stage
282 SUB pSrc,pDst,pointStep @// pDst -= grpSize
283 MOV pDst,pPingPongBuf
284
285
286 .endm
287
288
289
290 M_START armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe,r4
291 FFTSTAGE "FALSE","FALSE",FWD
292 M_END
293
294
295
296 M_START armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe,r4
297 FFTSTAGE "FALSE","TRUE",INV
298 M_END
299
300
301 M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
302 FFTSTAGE "TRUE","FALSE",FWDSFS
303 M_END
304
305
306 M_START armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
307 FFTSTAGE "TRUE","TRUE",INVSFS
308 M_END
309
310
311
312
313
314 .END
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698