Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(8)

Side by Side Diff: third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix2_unsafe_s.S

Issue 12317152: Add openmax dl routines for review. MUST NOT BE LANDED (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 @//
2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 @//
4 @// Use of this source code is governed by a BSD-style license
5 @// that can be found in the LICENSE file in the root of the source
6 @// tree. An additional intellectual property rights grant can be found
7 @// in the file PATENTS. All contributing project authors may
8 @// be found in the AUTHORS file in the root of the source tree.
9 @//
10 @// This is a modification of armSP_FFT_CToC_SC32_Radix2_unsafe_s.s
11 @// to support float instead of SC32.
12 @//
13
14 @// Description:
15 @// Compute a Radix 2 DIT in-order out-of-place FFT stage for an N point
16 @// complex signal. This handles the general stage, not the first or last
17 @// stage.
18 @//
19 @//
20
21
22 @// Include standard headers
23
24 #include "dl/api/armCOMM_s.h"
25 #include "dl/api/omxtypes_s.h"
26
27
28 @// Import symbols required from other files
29 @// (For example tables)
30
31
32
33 @// Set debugging level
34 @//DEBUG_ON SETL {TRUE}
35
36
37
38 @// Guarding implementation by the processor name
39
40
41
42
43 @// Guarding implementation by the processor name
44
45
46 @//Input Registers
47
48 #define pSrc r0
49 #define pDst r2
50 #define pTwiddle r1
51 #define subFFTNum r6
52 #define subFFTSize r7
53
54
55 @//Output Registers
56
57
58 @//Local Scratch Registers
59
60 #define outPointStep r3
61 #define pointStep r4
62 #define grpCount r5
63 #define setCount r8
64 @//const RN 9
65 #define step r10
66 #define dstStep r11
67 #define pTable r9
68 #define pTmp r9
69
70 @// Neon Registers
71
72 #define dW D0.F32
73 #define dX0 D2.F32
74 #define dX1 D3.F32
75 #define dX2 D4.F32
76 #define dX3 D5.F32
77 #define dY0 D6.F32
78 #define dY1 D7.F32
79 #define dY2 D8.F32
80 #define dY3 D9.F32
81 #define qT0 D10.F32
82 #define qT1 D11.F32
83
84
85 .MACRO FFTSTAGE scaled, inverse, name
86
87 @// Define stack arguments
88
89
90 @// Update grpCount and grpSize rightaway inorder to reuse pGrpCount
91 @// and pGrpSize regs
92
93 LSR subFFTNum,subFFTNum,#1 @//grpSize
94 LSL grpCount,subFFTSize,#1
95
96
97 @// pT0+1 increments pT0 by 8 bytes
98 @// pT0+pointStep = increment of 8*pointStep bytes = 4*grpSize bytes
99 MOV pointStep,subFFTNum,LSL #2
100
101 @// update subFFTSize for the next stage
102 MOV subFFTSize,grpCount
103
104 @// pOut0+1 increments pOut0 by 8 bytes
105 @// pOut0+outPointStep == increment of 8*outPointStep bytes =
106 @// 4*size bytes
107 SMULBB outPointStep,grpCount,pointStep
108 LSL pointStep,pointStep,#1
109
110
111 RSB step,pointStep,#16
112 RSB dstStep,outPointStep,#16
113
114 @// Loop on the groups
115
116 radix2GrpLoop\name :
117 MOV setCount,pointStep,LSR #3
118 VLD1 dW,[pTwiddle],pointStep @//[wi | wr]
119
120
121 @// Loop on the sets
122
123
124 radix2SetLoop\name :
125
126
127 @// point0: dX0-real part dX1-img part
128 VLD2 {dX0,dX1},[pSrc],pointStep
129 @// point1: dX2-real part dX3-img part
130 VLD2 {dX2,dX3},[pSrc],step
131
132 SUBS setCount,setCount,#2
133
134 .ifeqs "\inverse", "TRUE"
135 VMUL qT0,dX2,dW[0]
136 VMLA qT0,dX3,dW[1] @// real part
137 VMUL qT1,dX3,dW[0]
138 VMLS qT1,dX2,dW[1] @// imag part
139
140 .else
141
142 VMUL qT0,dX2,dW[0]
143 VMLS qT0,dX3,dW[1] @// real part
144 VMUL qT1,dX3,dW[0]
145 VMLA qT1,dX2,dW[1] @// imag part
146
147 .endif
148
149 VSUB dY0,dX0,qT0
150 VSUB dY1,dX1,qT1
151 VADD dY2,dX0,qT0
152 VADD dY3,dX1,qT1
153
154 VST2 {dY0,dY1},[pDst],outPointStep
155 @// dstStep = -outPointStep + 16
156 VST2 {dY2,dY3},[pDst],dstStep
157
158 BGT radix2SetLoop\name
159
160 SUBS grpCount,grpCount,#2
161 ADD pSrc,pSrc,pointStep
162 BGT radix2GrpLoop\name
163
164
165 @// Reset and Swap pSrc and pDst for the next stage
166 MOV pTmp,pDst
167 @// pDst -= 4*size; pSrc -= 8*size bytes
168 SUB pDst,pSrc,outPointStep,LSL #1
169 SUB pSrc,pTmp,outPointStep
170
171 @// Reset pTwiddle for the next stage
172 @// pTwiddle -= 4*size bytes
173 SUB pTwiddle,pTwiddle,outPointStep
174
175
176 .endm
177
178
179
180 M_START armSP_FFTFwd_CToC_FC32_Radix2_OutOfPlace_unsafe,r4
181 FFTSTAGE "FALSE","FALSE",FWD
182 M_END
183
184
185
186 M_START armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe,r4
187 FFTSTAGE "FALSE","TRUE",INV
188 M_END
189
190
191 .end
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698