Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(76)

Side by Side Diff: third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix4_fs_unsafe_s.S

Issue 12317152: Add openmax dl routines for review. MUST NOT BE LANDED (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 @//
2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 @//
4 @// Use of this source code is governed by a BSD-style license
5 @// that can be found in the LICENSE file in the root of the source
6 @// tree. An additional intellectual property rights grant can be found
7 @// in the file PATENTS. All contributing project authors may
8 @// be found in the AUTHORS file in the root of the source tree.
9 @//
10 @//
11 @// This is a modification of armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.s
12 @// to support float instead of SC32.
13 @//
14
15 @//
16 @// Description:
17 @// Compute a first stage Radix 4 FFT stage for a N point complex signal
18 @//
19 @//
20
21
22 @// Include standard headers
23
24 #include "dl/api/armCOMM_s.h"
25 #include "dl/api/omxtypes_s.h"
26
27 @// Import symbols required from other files
28 @// (For example tables)
29
30
31
32
33 @// Set debugging level
34 @//DEBUG_ON SETL {TRUE}
35
36
37
38 @// Guarding implementation by the processor name
39
40
41
42 @// Guarding implementation by the processor name
43
44
45 @//Input Registers
46
47 #define pSrc r0
48 #define pDst r2
49 #define pTwiddle r1
50 #define pPingPongBuf r5
51 #define subFFTNum r6
52 #define subFFTSize r7
53
54
55 @//Output Registers
56
57
58 @//Local Scratch Registers
59
60 #define grpSize r3
61 @// Reuse grpSize as setCount
62 #define setCount r3
63 #define pointStep r4
64 #define outPointStep r4
65 #define setStep r8
66 #define step1 r9
67 #define step3 r10
68
69 @// Neon Registers
70
71 #define dXr0 D0.F32
72 #define dXi0 D1.F32
73 #define dXr1 D2.F32
74 #define dXi1 D3.F32
75 #define dXr2 D4.F32
76 #define dXi2 D5.F32
77 #define dXr3 D6.F32
78 #define dXi3 D7.F32
79 #define dYr0 D8.F32
80 #define dYi0 D9.F32
81 #define dYr1 D10.F32
82 #define dYi1 D11.F32
83 #define dYr2 D12.F32
84 #define dYi2 D13.F32
85 #define dYr3 D14.F32
86 #define dYi3 D15.F32
87 #define qX0 Q0.F32
88 #define qX1 Q1.F32
89 #define qX2 Q2.F32
90 #define qX3 Q3.F32
91 #define qY0 Q4.F32
92 #define qY1 Q5.F32
93 #define qY2 Q6.F32
94 #define qY3 Q7.F32
95 #define dZr0 D16.F32
96 #define dZi0 D17.F32
97 #define dZr1 D18.F32
98 #define dZi1 D19.F32
99 #define dZr2 D20.F32
100 #define dZi2 D21.F32
101 #define dZr3 D22.F32
102 #define dZi3 D23.F32
103 #define qZ0 Q8.F32
104 #define qZ1 Q9.F32
105 #define qZ2 Q10.F32
106 #define qZ3 Q11.F32
107
108
109 .MACRO FFTSTAGE scaled, inverse, name
110
111 @// Define stack arguments
112
113 @// pT0+1 increments pT0 by 8 bytes
114 @// pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes
115 @// Note: outPointStep = pointStep for firststage
116
117 MOV pointStep,subFFTNum,LSL #1
118
119
120 @// Update pSubFFTSize and pSubFFTNum regs
121 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0]
122 @// subFFTSize = 1 for the first stage
123 MOV subFFTSize,#4
124
125 @// Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount)
126 LSR grpSize,subFFTNum,#2
127 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
128 MOV subFFTNum,grpSize
129
130
131 @// Calculate the step of input data for the next set
132 @//MOV setStep,pointStep,LSL #1
133 MOV setStep,grpSize,LSL #4
134 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
135 @// setStep = 3*pointStep
136 ADD setStep,setStep,pointStep
137 @// setStep = - 3*pointStep+16
138 RSB setStep,setStep,#16
139
140 @// data[3] & update pSrc for the next set
141 VLD2 {dXr3,dXi3},[pSrc :128],setStep
142 @// step1 = 2*pointStep
143 MOV step1,pointStep,LSL #1
144
145 VADD qY0,qX0,qX2
146
147 @// step3 = -pointStep
148 RSB step3,pointStep,#0
149
150 @// grp = 0 a special case since all the twiddle factors are 1
151 @// Loop on the sets : 2 sets at a time
152
153 radix4fsGrpZeroSetLoop\name :
154
155
156
157 @// Decrement setcount
158 SUBS setCount,setCount,#2
159
160
161 @// finish first stage of 4 point FFT
162
163
164 VSUB qY2,qX0,qX2
165
166 VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0]
167 VADD qY1,qX1,qX3
168 VLD2 {dXr2,dXi2},[pSrc :128],step3 @// data[2]
169 VSUB qY3,qX1,qX3
170
171
172 @// finish second stage of 4 point FFT
173
174 .ifeqs "\inverse", "TRUE"
175
176 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
177 VADD qZ0,qY0,qY1
178
179 @// data[3] & update pSrc for the next set, but not if it's the
180 @// last iteration so that we don't read past the end of the
181 @// input array.
182 BEQ radix4SkipLastUpdateInv\name
183 VLD2 {dXr3,dXi3},[pSrc :128],setStep
184 radix4SkipLastUpdateInv\name:
185 VSUB dZr3,dYr2,dYi3
186
187 VST2 {dZr0,dZi0},[pDst :128],outPointStep
188 VADD dZi3,dYi2,dYr3
189
190 VSUB qZ1,qY0,qY1
191 VST2 {dZr3,dZi3},[pDst :128],outPointStep
192
193 VADD dZr2,dYr2,dYi3
194 VST2 {dZr1,dZi1},[pDst :128],outPointStep
195 VSUB dZi2,dYi2,dYr3
196
197 VADD qY0,qX0,qX2 @// u0 for next iteration
198 VST2 {dZr2,dZi2},[pDst :128],setStep
199
200
201 .else
202
203 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
204 VADD qZ0,qY0,qY1
205
206 @// data[3] & update pSrc for the next set, but not if it's the
207 @// last iteration so that we don't read past the end of the
208 @// input array.
209 BEQ radix4SkipLastUpdateFwd\name
210 VLD2 {dXr3,dXi3},[pSrc :128],setStep
211 radix4SkipLastUpdateFwd\name:
212 VADD dZr2,dYr2,dYi3
213
214 VST2 {dZr0,dZi0},[pDst :128],outPointStep
215 VSUB dZi2,dYi2,dYr3
216
217 VSUB qZ1,qY0,qY1
218 VST2 {dZr2,dZi2},[pDst :128],outPointStep
219
220 VSUB dZr3,dYr2,dYi3
221 VST2 {dZr1,dZi1},[pDst :128],outPointStep
222 VADD dZi3,dYi2,dYr3
223
224 VADD qY0,qX0,qX2 @// u0 for next iteration
225 VST2 {dZr3,dZi3},[pDst :128],setStep
226
227 .endif
228
229 BGT radix4fsGrpZeroSetLoop\name
230
231 @// reset pSrc to pDst for the next stage
232 SUB pSrc,pDst,pointStep @// pDst -= 2*grpSize
233 MOV pDst,pPingPongBuf
234
235
236 .endm
237
238
239
240 M_START armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe,r4
241 FFTSTAGE "FALSE","FALSE",fwd
242 M_END
243
244
245
246 M_START armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe,r4
247 FFTSTAGE "FALSE","TRUE",inv
248 M_END
249
250
251 .end
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698