Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(471)

Side by Side Diff: third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix2_unsafe_s.S

Issue 12317152: Add openmax dl routines for review. MUST NOT BE LANDED (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 @//
2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 @//
4 @// Use of this source code is governed by a BSD-style license
5 @// that can be found in the LICENSE file in the root of the source
6 @// tree. An additional intellectual property rights grant can be found
7 @// in the file PATENTS. All contributing project authors may
8 @// be found in the AUTHORS file in the root of the source tree.
9 @//
10 @// This file was originally licensed as follows. It has been
11 @// relicensed with permission from the copyright holders.
12 @//
13
14 @//
15 @// File Name: armSP_FFT_CToC_SC32_Radix2_unsafe_s.s
16 @// OpenMAX DL: v1.0.2
17 @// Last Modified Revision: 5638
18 @// Last Modified Date: Wed, 06 Jun 2007
19 @//
20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
21 @//
22 @//
23 @//
24 @// Description:
25 @// Compute a Radix 2 DIT in-order out-of-place FFT stage for a N point complex signal.
26 @// This handle the general stage, not the first or last stage.
27 @//
28
29
30 @// Include standard headers
31
32 #include "dl/api/armCOMM_s.h"
33 #include "dl/api/omxtypes_s.h"
34
35
36 @// Import symbols required from other files
37 @// (For example tables)
38
39
40
41 @// Set debugging level
42 @//DEBUG_ON SETL {TRUE}
43
44
45
46 @// Guarding implementation by the processor name
47
48
49
50
51 @// Guarding implementation by the processor name
52
53
54 @//Input Registers
55
56 #define pSrc r0
57 #define pDst r2
58 #define pTwiddle r1
59 #define subFFTNum r6
60 #define subFFTSize r7
61
62
63 @//Output Registers
64
65
66 @//Local Scratch Registers
67
68 #define outPointStep r3
69 #define pointStep r4
70 #define grpCount r5
71 #define setCount r8
72 @//const RN 9
73 #define step r10
74 #define dstStep r11
75 #define pTable r9
76 #define pTmp r9
77
78 @// Neon Registers
79
80 #define dW D0.S32
81 #define dX0 D2.S32
82 #define dX1 D3.S32
83 #define dX2 D4.S32
84 #define dX3 D5.S32
85 #define dY0 D6.S32
86 #define dY1 D7.S32
87 #define dY2 D8.S32
88 #define dY3 D9.S32
89 #define qT0 Q3.S64
90 #define qT1 Q4.S64
91
92
93
94 .MACRO FFTSTAGE scaled, inverse, name
95
96 @// Define stack arguments
97
98
99 @// Update grpCount and grpSize rightaway inorder to reuse pGrpCount and pGrpSize regs
100
101 LSR subFFTNum,subFFTNum,#1 @//grpSize
102 LSL grpCount,subFFTSize,#1
103
104
105 @// pT0+1 increments pT0 by 8 bytes
106 @// pT0+pointStep = increment of 8*pointStep bytes = 4*grpSize bytes
107 MOV pointStep,subFFTNum,LSL #2
108
109 @// update subFFTSize for the next stage
110 MOV subFFTSize,grpCount
111
112 @// pOut0+1 increments pOut0 by 8 bytes
113 @// pOut0+outPointStep == increment of 8*outPointStep bytes = 4*size byt es
114 SMULBB outPointStep,grpCount,pointStep
115 LSL pointStep,pointStep,#1
116
117
118 RSB step,pointStep,#16
119 RSB dstStep,outPointStep,#16
120
121 @// Loop on the groups
122
123 grpLoop\name :
124 MOV setCount,pointStep,LSR #3
125 VLD1 dW,[pTwiddle],pointStep @//[wi | wr]
126
127
128 @// Loop on the sets
129
130
131 setLoop\name :
132
133
134 VLD2 {dX0,dX1},[pSrc],pointStep @// point0: dX0-real part dX1-img part
135 VLD2 {dX2,dX3},[pSrc],step @// point1: dX2-real part dX3-img part
136
137 SUBS setCount,setCount,#2
138
139 .ifeqs "\inverse", "TRUE"
140 VMULL qT0,dX2,dW[0]
141 VMLAL qT0,dX3,dW[1] @// real part
142 VMULL qT1,dX3,dW[0]
143 VMLSL qT1,dX2,dW[1] @// imag part
144
145 .else
146
147 VMULL qT0,dX2,dW[0]
148 VMLSL qT0,dX3,dW[1] @// real part
149 VMULL qT1,dX3,dW[0]
150 VMLAL qT1,dX2,dW[1] @// imag part
151
152 .endif
153
154 VRSHRN dX2,qT0,#31
155 VRSHRN dX3,qT1,#31
156
157 .ifeqs "\scaled", "TRUE"
158 VHSUB dY0,dX0,dX2
159 VHSUB dY1,dX1,dX3
160 VHADD dY2,dX0,dX2
161 VHADD dY3,dX1,dX3
162
163 .else
164 VSUB dY0,dX0,dX2
165 VSUB dY1,dX1,dX3
166 VADD dY2,dX0,dX2
167 VADD dY3,dX1,dX3
168
169 .endif
170
171 VST2 {dY0,dY1},[pDst],outPointStep
172 VST2 {dY2,dY3},[pDst],dstStep @// dstStep = -outPointSte p + 16
173
174 BGT setLoop\name
175
176 SUBS grpCount,grpCount,#2
177 ADD pSrc,pSrc,pointStep
178 BGT grpLoop\name
179
180
181 @// Reset and Swap pSrc and pDst for the next stage
182 MOV pTmp,pDst
183 SUB pDst,pSrc,outPointStep,LSL #1 @// pDst -= 4*size; pSrc -= 8*size bytes
184 SUB pSrc,pTmp,outPointStep
185
186 @// Reset pTwiddle for the next stage
187 SUB pTwiddle,pTwiddle,outPointStep @// pTwiddle -= 4*size bytes
188
189
190 .endm
191
192
193
194 M_START armSP_FFTFwd_CToC_SC32_Radix2_OutOfPlace_unsafe,r4
195 FFTSTAGE "FALSE","FALSE",FWD
196 M_END
197
198
199
200 M_START armSP_FFTInv_CToC_SC32_Radix2_OutOfPlace_unsafe,r4
201 FFTSTAGE "FALSE","TRUE",INV
202 M_END
203
204
205
206 M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe,r4
207 FFTSTAGE "TRUE","FALSE",FWDSFS
208 M_END
209
210
211
212 M_START armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe,r4
213 FFTSTAGE "TRUE","TRUE",INVSFS
214 M_END
215
216 .end
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698