Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(21)

Side by Side Diff: third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix4_ls_unsafe_s.S

Issue 12317152: Add openmax dl routines for review. MUST NOT BE LANDED (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 @//
2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 @//
4 @// Use of this source code is governed by a BSD-style license
5 @// that can be found in the LICENSE file in the root of the source
6 @// tree. An additional intellectual property rights grant can be found
7 @// in the file PATENTS. All contributing project authors may
8 @// be found in the AUTHORS file in the root of the source tree.
9 @//
10 @// This file was originally licensed as follows. It has been
11 @// relicensed with permission from the copyright holders.
12 @//
13
14 @//
15 @// File Name: armSP_FFT_CToC_SC32_Radix4_ls_unsafe_s.s
16 @// OpenMAX DL: v1.0.2
17 @// Last Modified Revision: 7767
18 @// Last Modified Date: Thu, 27 Sep 2007
19 @//
20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
21 @//
22 @//
23 @//
24 @// Description:
25 @// Compute a Radix 4 FFT stage for a N point complex signal
26 @//
27
28
29 @// Include standard headers
30
31 #include "dl/api/armCOMM_s.h"
32 #include "dl/api/omxtypes_s.h"
33
34 @// Import symbols required from other files
35 @// (For example tables)
36
37
38
39
40 @// Set debugging level
41 @//DEBUG_ON SETL {TRUE}
42
43
44 @// Guarding implementation by the processor name
45
46
47 @// Import symbols required from other files
48 @// (For example tables)
49 @//IMPORT armAAC_constTable
50
51 @//Input Registers
52
53 #define pSrc r0
54 #define pDst r2
55 #define pTwiddle r1
56 #define subFFTNum r6
57 #define subFFTSize r7
58
59
60
61 @//Output Registers
62
63
64 @//Local Scratch Registers
65
66 #define outPointStep r3
67 #define grpCount r4
68 #define dstStep r5
69 #define grpTwStep r8
70 #define stepTwiddle r9
71 #define twStep r10
72 #define pTmp r4
73 #define step16 r11
74 #define step24 r12
75
76
77 @// Neon Registers
78
79 #define dButterfly1Real02 D0.S32
80 #define dButterfly1Imag02 D1.S32
81 #define dButterfly1Real13 D2.S32
82 #define dButterfly1Imag13 D3.S32
83 #define dButterfly2Real02 D4.S32
84 #define dButterfly2Imag02 D5.S32
85 #define dButterfly2Real13 D6.S32
86 #define dButterfly2Imag13 D7.S32
87 #define dXr0 D0.S32
88 #define dXi0 D1.S32
89 #define dXr1 D2.S32
90 #define dXi1 D3.S32
91 #define dXr2 D4.S32
92 #define dXi2 D5.S32
93 #define dXr3 D6.S32
94 #define dXi3 D7.S32
95
96 #define dYr0 D16.S32
97 #define dYi0 D17.S32
98 #define dYr1 D18.S32
99 #define dYi1 D19.S32
100 #define dYr2 D20.S32
101 #define dYi2 D21.S32
102 #define dYr3 D22.S32
103 #define dYi3 D23.S32
104
105 #define dW1r D8.S32
106 #define dW1i D9.S32
107 #define dW2r D10.S32
108 #define dW2i D11.S32
109 #define dW3r D12.S32
110 #define dW3i D13.S32
111 #define qT0 Q7.S64
112 #define qT1 Q8.S64
113 #define qT2 Q9.S64
114 #define qT3 Q10.S64
115 #define qT4 Q11.S64
116 #define qT5 Q12.S64
117
118 #define dZr0 D14.S32
119 #define dZi0 D15.S32
120 #define dZr1 D26.S32
121 #define dZi1 D27.S32
122 #define dZr2 D28.S32
123 #define dZi2 D29.S32
124 #define dZr3 D30.S32
125 #define dZi3 D31.S32
126
127 #define qX0 Q0.S32
128 #define qY0 Q8.S32
129 #define qY1 Q9.S32
130 #define qY2 Q10.S32
131 #define qY3 Q11.S32
132 #define qZ0 Q7.S32
133 #define qZ1 Q13.S32
134 #define qZ2 Q14.S32
135 #define qZ3 Q15.S32
136
137
138
139 .MACRO FFTSTAGE scaled, inverse , name
140
141 @// Define stack arguments
142
143
144 @// pOut0+1 increments pOut0 by 8 bytes
145 @// pOut0+outPointStep == increment of 8*outPointStep bytes
146 MOV outPointStep,subFFTSize,LSL #3
147
148 @// Update grpCount and grpSize rightaway
149
150 VLD2 {dW1r,dW1i},[pTwiddle :128] @// [wi|wr]
151 MOV step16,#16
152 LSL grpCount,subFFTSize,#2
153
154 VLD1 dW2r,[pTwiddle :64] @// [wi|wr]
155 MOV subFFTNum,#1 @//after the last stage
156
157 VLD1 dW3r,[pTwiddle :64],step16 @// [wi|wr]
158 MOV stepTwiddle,#0
159
160 VLD1 dW2i,[pTwiddle :64]! @// [wi|wr]
161 SUB grpTwStep,stepTwiddle,#8 @// grpTwStep = -8 t o start with
162
163 @// update subFFTSize for the next stage
164 MOV subFFTSize,grpCount
165 VLD1 dW3i,[pTwiddle :64],grpTwStep @// [wi| wr]
166 MOV dstStep,outPointStep,LSL #1
167
168 VLD4 {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterf ly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
169 ADD dstStep,dstStep,outPointStep @// dstStep = 3*outP ointStep
170 RSB dstStep,dstStep,#16 @// dstStep = - 3*ou tPointStep+16
171 MOV step24,#24
172
173 VLD4 {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterf ly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
174
175
176 @// Process two groups at a time
177
178 grpLoop\name :
179
180 VZIP dW2r,dW2i
181 ADD stepTwiddle,stepTwiddle,#16 @// increment for th e next iteration
182 VZIP dW3r,dW3i
183 ADD grpTwStep,stepTwiddle,#4
184 VUZP dButterfly1Real13, dButterfly2Real13 @// B.r D.r
185 SUB twStep,stepTwiddle,#16 @// -16+stepTwiddle
186 VUZP dButterfly1Imag13, dButterfly2Imag13 @// B.i D.i
187 MOV grpTwStep,grpTwStep,LSL #1
188 VUZP dButterfly1Real02, dButterfly2Real02 @// A.r C.r
189 RSB grpTwStep,grpTwStep,#0 @// -8-2*stepTwiddle
190
191
192 VUZP dButterfly1Imag02, dButterfly2Imag02 @// A.i C.i
193
194
195 SUBS grpCount,grpCount,#8 @// grpCount is multipli ed by 4
196
197 .ifeqs "\inverse", "TRUE"
198 VMULL qT0,dW1r,dXr1
199 VMLAL qT0,dW1i,dXi1 @// real part
200 VMULL qT1,dW1r,dXi1
201 VMLSL qT1,dW1i,dXr1 @// imag part
202
203 .else
204
205 VMULL qT0,dW1r,dXr1
206 VMLSL qT0,dW1i,dXi1 @// real part
207 VMULL qT1,dW1r,dXi1
208 VMLAL qT1,dW1i,dXr1 @// imag part
209
210 .endif
211
212 VLD2 {dW1r,dW1i},[pTwiddle :128],stepTwiddle @// [wi|wr]
213
214 .ifeqs "\inverse", "TRUE"
215 VMULL qT2,dW2r,dXr2
216 VMLAL qT2,dW2i,dXi2 @// real part
217 VMULL qT3,dW2r,dXi2
218 VLD1 dW2r,[pTwiddle :64],step16 @// [wi|wr]
219 VMLSL qT3,dW2i,dXr2 @// imag part
220
221 .else
222
223 VMULL qT2,dW2r,dXr2
224 VMLSL qT2,dW2i,dXi2 @// real part
225 VMULL qT3,dW2r,dXi2
226 VLD1 dW2r,[pTwiddle :64],step16 @// [wi|wr]
227 VMLAL qT3,dW2i,dXr2 @// imag part
228
229 .endif
230
231
232 VRSHRN dZr1,qT0,#31
233 VLD1 dW2i,[pTwiddle :64],twStep @// [wi|wr]
234 VRSHRN dZi1,qT1,#31
235
236 VMOV qZ0,qX0 @// move qX0 so as to lo ad for the next iteration
237 VLD4 {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterf ly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
238
239
240 .ifeqs "\inverse", "TRUE"
241 VMULL qT4,dW3r,dXr3
242 VMLAL qT4,dW3i,dXi3 @// real part
243 VMULL qT5,dW3r,dXi3
244 VLD1 dW3r,[pTwiddle :64],step24
245 VMLSL qT5,dW3i,dXr3 @// imag part
246
247 .else
248
249 VMULL qT4,dW3r,dXr3
250 VMLSL qT4,dW3i,dXi3 @// real part
251 VMULL qT5,dW3r,dXi3
252 VLD1 dW3r,[pTwiddle :64],step24
253 VMLAL qT5,dW3i,dXr3 @// imag part
254
255 .endif
256
257 VRSHRN dZr2,qT2,#31
258 VLD1 dW3i,[pTwiddle :64],grpTwStep @// [wi| wr]
259 VRSHRN dZi2,qT3,#31
260
261 VRSHRN dZr3,qT4,#31
262 VRSHRN dZi3,qT5,#31
263 VLD4 {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterf ly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
264
265
266 .ifeqs "\scaled", "TRUE"
267
268 @// finish first stage of 4 point FFT
269
270 VHADD qY0,qZ0,qZ2
271 VHSUB qY2,qZ0,qZ2
272 VHADD qY1,qZ1,qZ3
273 VHSUB qY3,qZ1,qZ3
274
275
276 @// finish second stage of 4 point FFT
277
278 .ifeqs "\inverse", "TRUE"
279
280 VHSUB qZ0,qY2,qY1
281
282 VHADD dZr3,dYr0,dYi3
283 VST2 {dZr0,dZi0},[pDst :128],outPointStep
284 VHSUB dZi3,dYi0,dYr3
285
286 VHADD qZ2,qY2,qY1
287 VST2 {dZr3,dZi3},[pDst :128],outPointStep
288
289 VHSUB dZr1,dYr0,dYi3
290 VST2 {dZr2,dZi2},[pDst :128],outPointStep
291 VHADD dZi1,dYi0,dYr3
292
293 VST2 {dZr1,dZi1},[pDst :128],dstStep @// dstStep = -outPointStep + 16
294
295
296 .else
297
298 VHSUB qZ0,qY2,qY1
299
300 VHSUB dZr1,dYr0,dYi3
301 VST2 {dZr0,dZi0},[pDst :128],outPointStep
302 VHADD dZi1,dYi0,dYr3
303
304 VHADD qZ2,qY2,qY1
305 VST2 {dZr1,dZi1},[pDst :128],outPointStep
306
307 VHADD dZr3,dYr0,dYi3
308 VST2 {dZr2,dZi2},[pDst :128],outPointStep
309 VHSUB dZi3,dYi0,dYr3
310
311 VST2 {dZr3,dZi3},[pDst :128],dstStep @// dstStep = -outPointStep + 16
312
313
314 .endif
315
316
317
318 .else
319
320 @// finish first stage of 4 point FFT
321
322 VADD qY0,qZ0,qZ2
323 VSUB qY2,qZ0,qZ2
324 VADD qY1,qZ1,qZ3
325 VSUB qY3,qZ1,qZ3
326
327
328 @// finish second stage of 4 point FFT
329
330 .ifeqs "\inverse", "TRUE"
331
332 VSUB qZ0,qY2,qY1
333
334 VADD dZr3,dYr0,dYi3
335 VST2 {dZr0,dZi0},[pDst :128],outPointStep
336 VSUB dZi3,dYi0,dYr3
337
338 VADD qZ2,qY2,qY1
339 VST2 {dZr3,dZi3},[pDst :128],outPointStep
340
341 VSUB dZr1,dYr0,dYi3
342 VST2 {dZr2,dZi2},[pDst :128],outPointStep
343 VADD dZi1,dYi0,dYr3
344
345 VST2 {dZr1,dZi1},[pDst :128],dstStep @// dstStep = -outPointStep + 16
346
347
348 .else
349
350 VSUB qZ0,qY2,qY1
351
352 VSUB dZr1,dYr0,dYi3
353 VST2 {dZr0,dZi0},[pDst :128],outPointStep
354 VADD dZi1,dYi0,dYr3
355
356 VADD qZ2,qY2,qY1
357 VST2 {dZr1,dZi1},[pDst :128],outPointStep
358
359 VADD dZr3,dYr0,dYi3
360 VST2 {dZr2,dZi2},[pDst :128],outPointStep
361 VSUB dZi3,dYi0,dYr3
362
363 VST2 {dZr3,dZi3},[pDst :128],dstStep @// dstStep = -outPointStep + 16
364
365
366 .endif
367
368 .endif
369
370 BGT grpLoop\name
371
372
373 @// Reset and Swap pSrc and pDst for the next stage
374 MOV pTmp,pDst
375 SUB pSrc,pSrc,#64 @// Extra increment done in final iteration of the loop
376 SUB pDst,pSrc,outPointStep,LSL #2 @// pDst -= 4*size; pSrc -= 8*size bytes
377 SUB pSrc,pTmp,outPointStep
378 SUB pTwiddle,pTwiddle,subFFTSize,LSL #1
379 SUB pTwiddle,pTwiddle,#16 @// Extra increment done in final iteration of the loop
380
381 .endm
382
383
384 M_START armSP_FFTFwd_CToC_SC32_Radix4_ls_OutOfPlace_unsafe,r4
385 FFTSTAGE "FALSE","FALSE",fwd
386 M_END
387
388
389 M_START armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe,r4
390 FFTSTAGE "FALSE","TRUE",inv
391 M_END
392
393
394 M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe,r4
395 FFTSTAGE "TRUE","FALSE",fwdsfs
396 M_END
397
398
399 M_START armSP_FFTInv_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe,r4
400 FFTSTAGE "TRUE","TRUE",invsfs
401 M_END
402
403
404 .end
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698