Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Side by Side Diff: third_party/openmax_dl/dl/sp/src/armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe_s.S

Issue 12317152: Add openmax dl routines for review. MUST NOT BE LANDED (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 @//
2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 @//
4 @// Use of this source code is governed by a BSD-style license
5 @// that can be found in the LICENSE file in the root of the source
6 @// tree. An additional intellectual property rights grant can be found
7 @// in the file PATENTS. All contributing project authors may
8 @// be found in the AUTHORS file in the root of the source tree.
9 @//
10 @// This file was originally licensed as follows. It has been
11 @// relicensed with permission from the copyright holders.
12 @//
13
14 @//
15 @// File Name: armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe_s.s
16 @// OpenMAX DL: v1.0.2
17 @// Last Modified Revision: 7485
18 @// Last Modified Date: Fri, 21 Sep 2007
19 @//
20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
21 @//
22 @//
23 @//
24 @// Description:
25 @// Compute the "preTwiddleRadix2" stage prior to the call to the complexFFT
26 @// It does a Z(k) = Feven(k) + jW^(-k) FOdd(k); k=0,1,2,...N/2-1 computation
27 @// It implements both "scaled"(by 1/2) and "unsclaed" versions of the above for mula
28 @//
29
30
31 @// Include standard headers
32
33 #include "dl/api/armCOMM_s.h"
34 #include "dl/api/omxtypes_s.h"
35
36
37 @// Import symbols required from other files
38 @// (For example tables)
39
40
41 @// Set debugging level
42 @//DEBUG_ON SETL {TRUE}
43
44
45
46 @// Guarding implementation by the processor name
47
48
49
50 @// Guarding implementation by the processor name
51
52
53
54 @//Input Registers
55
56 #define pSrc r0
57 #define pDst r1
58 #define pFFTSpec r2
59 #define scale r3
60
61
62 @// Output registers
63 #define result r0
64
65 @//Local Scratch Registers
66
67 #define argTwiddle r1
68 #define argDst r2
69 #define argScale r4
70 #define tmpOrder r4
71 #define pTwiddle r4
72 #define pOut r5
73 #define subFFTSize r7
74 #define subFFTNum r6
75 #define N r6
76 #define order r14
77 #define diff r9
78 #define count r8 @// Total num of radix stages requi red to comple the FFT
79 #define x0r r4
80 #define x0i r5
81 #define diffMinusOne r2
82 #define round r3
83
84 #define pOut1 r2
85 #define size r7
86 #define step r8
87 #define step1 r9
88 #define twStep r10
89 #define pTwiddleTmp r11
90 #define argTwiddle1 r12
91 #define zero r14
92
93 @// Neon registers
94
95 #define dX0 D0.S32
96 #define dShift D1.S32
97 #define dX1 D1.S32
98 #define dY0 D2.S32
99 #define dY1 D3.S32
100 #define dX0r D0.S32
101 #define dX0i D1.S32
102 #define dX1r D2.S32
103 #define dX1i D3.S32
104 #define dW0r D4.S32
105 #define dW0i D5.S32
106 #define dW1r D6.S32
107 #define dW1i D7.S32
108 #define dT0 D8.S32
109 #define dT1 D9.S32
110 #define dT2 D10.S32
111 #define dT3 D11.S32
112 #define qT0 Q6.S64
113 #define qT1 Q7.S64
114 #define qT2 Q8.S64
115 #define qT3 Q9.S64
116 #define dY0r D4.S32
117 #define dY0i D5.S32
118 #define dY1r D6.S32
119 #define dY1i D7.S32
120
121 #define dY2 D4.S32
122 #define dY3 D5.S32
123 #define dW0 D6.S32
124 #define dW1 D7.S32
125 #define dW0Tmp D10.S32
126 #define dW1Neg D11.S32
127
128
129 @ Structure offsets for the FFTSpec
130 .set ARMsFFTSpec_N, 0
131 .set ARMsFFTSpec_pBitRev, 4
132 .set ARMsFFTSpec_pTwiddle, 8
133 .set ARMsFFTSpec_pBuf, 12
134
135
136 .MACRO FFTSTAGE scaled, inverse, name
137
138 @// Read the size from structure and take log
139 LDR N, [pFFTSpec, #ARMsFFTSpec_N]
140
141 @// Read other structure parameters
142 LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
143 LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
144
145
146
147 MOV size,N,ASR #1 @// preserve the contents of N
148 MOV step,N,LSL #2 @// step = N/2 * 8 bytes
149
150
151 @// Z(k) = 1/2 {[F(k) + F'(N/2-k)] +j*W^(-k) [F(k) - F'(N/2-k)]}
152 @// Note: W^(k) is stored as negated value and also need to conjugate th e values from the table
153
154 @// Z(0) : no need of twiddle multiply
155 @// Z(0) = 1/2 { [F(0) + F'(N/2)] +j [F(0) - F'(N/2)] }
156
157 VLD1 dX0,[pSrc],step
158 ADD pOut1,pOut,step @// pOut1 = pOut+ N/2*8 bytes
159
160 VLD1 dX1,[pSrc]!
161 SUB twStep,step,size,LSL #1 @// twStep = 3N/8 * 8 bytes poi nting to W^1
162
163 MOV step1,size,LSL #2 @// step1 = N/4 * 8 = N/2*4 byt es
164 SUB step1,step1,#8 @// (N/4-1)*8 bytes
165
166 VHADD dY0,dX0,dX1 @// [b+d | a+c]
167 VHSUB dY1,dX0,dX1 @// [b-d | a-c]
168 VZIP dY0,dY1 @// dY0= [a-c | a+c] ;dY1= [b-d | b+d]
169
170 .ifeqs "\scaled", "TRUE"
171 VHSUB dX0,dY0,dY1
172 SUBS size,size,#2
173 VHADD dX1,dY0,dY1
174 .else
175 VSUB dX0,dY0,dY1
176 SUBS size,size,#2
177 VADD dX1,dY0,dY1
178 .endif
179
180 SUB pSrc,pSrc,step
181
182 VST1 dX0[0],[pOut1]!
183 ADD pTwiddleTmp,pTwiddle,#8 @// W^2
184 VST1 dX1[1],[pOut1]!
185 ADD argTwiddle1,pTwiddle,twStep @// W^1
186
187
188 BLT decrementScale\name
189 BEQ lastElement\name
190
191
192 @// Z(k) = 1/2[F(k) + F'(N/2-k)] +j*W^(-k) [F(k) - F'(N/2-k)]
193 @// Note: W^k is stored as negative values in the table and also need to conjugate the values from the table
194 @// Process 4 elements at a time. E.g: Z(1),Z(2) and Z(N/2-2),Z(N/2-1) s ince both of them
195 @// require F(1),F(2) and F(N/2-2),F(N/2-1)
196
197
198 SUB step,step,#24
199 evenOddButterflyLoop\name :
200
201
202 VLD1 dW0r,[argTwiddle1],step1
203 VLD1 dW1r,[argTwiddle1]!
204
205 VLD2 {dX0r,dX0i},[pSrc],step
206 SUB argTwiddle1,argTwiddle1,step1
207 VLD2 {dX1r,dX1i},[pSrc]!
208
209 SUB step1,step1,#8 @// (N/4-2)*8 bytes
210 VLD1 dW0i,[pTwiddleTmp],step1
211 VLD1 dW1i,[pTwiddleTmp]!
212 SUB pSrc,pSrc,step
213
214 SUB pTwiddleTmp,pTwiddleTmp,step1
215 VREV64 dX1r,dX1r
216 VREV64 dX1i,dX1i
217 SUBS size,size,#4
218
219
220 VHSUB dT2,dX0r,dX1r @// a-c
221 VHADD dT3,dX0i,dX1i @// b+d
222 SUB step1,step1,#8
223 VHADD dT0,dX0r,dX1r @// a+c
224 VHSUB dT1,dX0i,dX1i @// b-d
225
226 VZIP dW1r,dW1i
227 VZIP dW0r,dW0i
228
229
230 VMULL qT0,dW1r,dT2
231 VMLSL qT0,dW1i,dT3
232 VMULL qT1,dW1r,dT3
233 VMLAL qT1,dW1i,dT2
234
235 VMULL qT2,dW0r,dT2
236 VMLAL qT2,dW0i,dT3
237 VMULL qT3,dW0r,dT3
238 VMLSL qT3,dW0i,dT2
239
240
241 VRSHRN dX1r,qT0,#31
242 VRSHRN dX1i,qT1,#31
243
244 .ifeqs "\scaled", "TRUE"
245 VHADD dY1r,dT0,dX1i @// F(N/2 -1)
246 VHSUB dY1i,dX1r,dT1
247 .else
248 VADD dY1r,dT0,dX1i @// F(N/2 -1)
249 VSUB dY1i,dX1r,dT1
250
251 .endif
252
253
254 VREV64 dY1r,dY1r
255 VREV64 dY1i,dY1i
256
257
258 VRSHRN dX0r,qT2,#31
259 VRSHRN dX0i,qT3,#31
260
261 .ifeqs "\scaled", "TRUE"
262 VHADD dY0r,dT0,dX0i @// F(1)
263 VHSUB dY0i,dT1,dX0r
264 .else
265 VADD dY0r,dT0,dX0i @// F(1)
266 VSUB dY0i,dT1,dX0r
267 .endif
268
269
270 VST2 {dY0r,dY0i},[pOut1],step
271 VST2 {dY1r,dY1i},[pOut1]!
272 SUB pOut1,pOut1,step
273 SUB step,step,#32 @// (N/2-4)*8 bytes
274
275
276 BGT evenOddButterflyLoop\name
277
278
279 SUB pSrc,pSrc,#8 @// set both the ptrs to the last el ement
280 SUB pOut1,pOut1,#8
281
282 @// Last element can be expanded as follows
283 @// 1/2[Z(k) + Z'(k)] - j w^-k [Z(k) - Z'(k)] (since W^k is stored as -v e)
284 @// 1/2[(a+jb) + (a-jb)] - j w^-k [(a+jb) - (a-jb)]
285 @// 1/2[2a+j0] - j (c-jd) [0+j2b]
286 @// (a+bc, -bd)
287 @// Since (c,d) = (0,1) for the last element, result is just (a,-b)
288
289 lastElement\name :
290 VLD1 dX0r,[pSrc]
291
292 .ifeqs "\scaled", "TRUE"
293 VSHR dX0r,dX0r,#1
294 .endif
295
296 VST1 dX0r[0],[pOut1]!
297 VNEG dX0r,dX0r
298 VST1 dX0r[1],[pOut1]
299
300
301
302 decrementScale\name :
303
304 .ifeqs "\scaled", "TRUE"
305 SUB scale,scale,#1
306 .endif
307
308 .endm
309
310 M_START armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe,r4
311
312 FFTSTAGE "FALSE","TRUE",Inv
313 M_END
314
315 M_START armSP_FFTInv_CCSToR_S32_Sfs_preTwiddleRadix2_unsafe,r4
316
317 FFTSTAGE "TRUE","TRUE",InvSfs
318 M_END
319
320
321 .end
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698