Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(631)

Side by Side Diff: third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix2_ps_unsafe_s.S

Issue 12317152: Add openmax dl routines for review. MUST NOT BE LANDED (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 @//
2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 @//
4 @// Use of this source code is governed by a BSD-style license
5 @// that can be found in the LICENSE file in the root of the source
6 @// tree. An additional intellectual property rights grant can be found
7 @// in the file PATENTS. All contributing project authors may
8 @// be found in the AUTHORS file in the root of the source tree.
9 @//
10 @// This file was originally licensed as follows. It has been
11 @// relicensed with permission from the copyright holders.
12
13 @//
14 @//
15 @// File Name: armSP_FFT_CToC_SC16_Radix2_ps_unsafe_s.s
16 @// OpenMAX DL: v1.0.2
17 @// Last Modified Revision: 6740
18 @// Last Modified Date: Wed, 18 Jul 2007
19 @//
20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
21 @//
22 @//
23 @//
24 @// Description:
25 @// Compute a Radix 2 FFT stage for a N point complex signal
26 @//
27 @//
28
29
30 @// Include standard headers
31
32 #include "dl/api/armCOMM_s.h"
33 #include "dl/api/omxtypes_s.h"
34
35
36 @// Import symbols required from other files
37 @// (For example tables)
38
39
40
41
42 @// Set debugging level
43 @//DEBUG_ON SETL {TRUE}
44
45
46
47
48 @// Guarding implementation by the processor name
49
50
51 @//Input Registers
52
53 #define pSrc r0
54 #define pDst r2
55 #define pTwiddle r1
56 #define subFFTNum r6
57 #define subFFTSize r7
58
59
60 @//Output Registers
61
62
63 @//Local Scratch Registers
64
65 #define outPointStep r3
66 #define grpCount r4
67 #define dstStep r5
68 #define twStep r8
69 #define pTmp r4
70
71 @// Neon Registers
72
73 #define dW1S32 D0.S32
74 #define dW2S32 D1.S32
75 #define dW1 D0.S16
76 #define dW2 D1.S16
77
78 #define dX0 D2.S16
79 #define dX1 D3.S16
80 #define dX2 D4.S16
81 #define dX3 D5.S16
82 #define dY0 D6.S16
83 #define dY1 D7.S16
84 #define dY2 D8.S16
85 #define dY3 D9.S16
86 #define qT0 Q5.S32
87 #define qT1 Q6.S32
88
89
90 .MACRO FFTSTAGE scaled, inverse, name
91
92 @// Define stack arguments
93
94
95 @// Update grpCount and grpSize rightaway inorder to reuse pGrpCount and pGrpSize regs
96
97
98 LSL grpCount,subFFTSize,#1
99
100
101 @// update subFFTSize for the next stage
102 MOV subFFTSize,grpCount
103
104 @// pOut0+1 increments pOut0 by 8 bytes
105 @// pOut0+outPointStep == increment of 4*outPointStep bytes = 2*size byt es
106 SMULBB outPointStep,grpCount,subFFTNum
107 MOV twStep,subFFTNum,LSL #1
108 LSR subFFTNum,subFFTNum,#1 @//grpSize
109
110
111 RSB dstStep,outPointStep,#8
112
113
114 @// Note: pointStep is 8 in this case: so need of extra reg
115 @// Loop on the groups: 2 groups at a time
116
117 grpLoop\name:
118
119 VLD1 dW1S32[],[pTwiddle],twStep @//[wi | wr]
120 VLD1 dW2S32[],[pTwiddle],twStep
121
122 @// Process the sets for each grp: 2 sets at a time (no set looping req uired)
123
124 VLD1 dX0,[pSrc]! @// point0: of set0,set1 of grp0
125 VLD1 dX1,[pSrc]! @// point1: of set0,set1 of grp0
126 VLD1 dX2,[pSrc]! @// point0: of set0,set1 of grp1
127 VLD1 dX3,[pSrc]! @// point1: of set0,set1 of grp1
128
129 SUBS grpCount,grpCount,#4 @// decrement the loop counter
130 VUZP dW1,dW2
131 VUZP dX1,dX3
132
133 .ifeqs "\inverse", "TRUE"
134 VMULL qT0,dX1,dW1
135 VMLAL qT0,dX3,dW2 @// real part
136 VMULL qT1,dX3,dW1
137 VMLSL qT1,dX1,dW2 @// imag part
138
139 .ELSE
140 VMULL qT0,dX1,dW1
141 VMLSL qT0,dX3,dW2 @// real part
142 VMULL qT1,dX3,dW1
143 VMLAL qT1,dX1,dW2 @// imag part
144
145 .ENDIF
146
147 VRSHRN dX1,qT0,#15
148 VRSHRN dX3,qT1,#15
149
150 VZIP dX1,dX3
151
152
153 .ifeqs "\scaled", "TRUE"
154
155 VHSUB dY0,dX0,dX1
156 VHADD dY1,dX0,dX1
157 VHSUB dY2,dX2,dX3
158 VHADD dY3,dX2,dX3
159
160 .ELSE
161
162 VSUB dY0,dX0,dX1
163 VADD dY1,dX0,dX1
164 VSUB dY2,dX2,dX3
165 VADD dY3,dX2,dX3
166
167
168
169 .ENDIF
170
171 VST1 dY0,[pDst],outPointStep @// point0: of set0,set1 of grp0
172 VST1 dY1,[pDst],dstStep @// dstStep = -outPointStep + 8
173 VST1 dY2,[pDst],outPointStep @// point0: of set0,set1 of grp1
174 VST1 dY3,[pDst],dstStep @// point1: of set0,set1 of grp1
175
176
177 BGT grpLoop\name
178
179
180 @// Reset and Swap pSrc and pDst for the next stage
181 MOV pTmp,pDst
182 SUB pDst,pSrc,outPointStep,LSL #1 @// pDst -= 2*size; pSrc -= 4*size bytes
183 SUB pSrc,pTmp,outPointStep
184
185 @// Reset pTwiddle for the next stage
186 SUB pTwiddle,pTwiddle,outPointStep @// pTwiddle -= 2*size bytes
187
188 .endm
189
190
191
192 M_START armSP_FFTFwd_CToC_SC16_Radix2_ps_OutOfPlace_unsafe,r4
193 FFTSTAGE "FALSE","FALSE",FWD
194 M_END
195
196
197
198 M_START armSP_FFTInv_CToC_SC16_Radix2_ps_OutOfPlace_unsafe,r4
199 FFTSTAGE "FALSE","TRUE",INV
200 M_END
201
202
203
204 M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe,r4
205 FFTSTAGE "TRUE","FALSE",FWDSFS
206 M_END
207
208
209
210 M_START armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe,r4
211 FFTSTAGE "TRUE","TRUE",INVSFS
212 M_END
213
214
215
216 .END
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698