Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(13)

Side by Side Diff: third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix2_unsafe_s.S

Issue 12317152: Add openmax dl routines for review. MUST NOT BE LANDED (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 @//
2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 @//
4 @// Use of this source code is governed by a BSD-style license
5 @// that can be found in the LICENSE file in the root of the source
6 @// tree. An additional intellectual property rights grant can be found
7 @// in the file PATENTS. All contributing project authors may
8 @// be found in the AUTHORS file in the root of the source tree.
9 @//
10 @// This file was originally licensed as follows. It has been
11 @// relicensed with permission from the copyright holders.
12
13 @//
14 @//
15 @// File Name: armSP_FFT_CToC_SC16_Radix2_unsafe_s.s
16 @// OpenMAX DL: v1.0.2
17 @// Last Modified Revision: 5892
18 @// Last Modified Date: Thu, 07 Jun 2007
19 @//
20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
21 @//
22 @//
23 @//
24 @// Description:
25 @// Compute a Radix 2 FFT stage for a N point complex signal
26 @//
27 @//
28
29
30 @// Include standard headers
31
32 #include "dl/api/armCOMM_s.h"
33 #include "dl/api/omxtypes_s.h"
34
35
36 @// Import symbols required from other files
37 @// (For example tables)
38
39
40
41 @// Set debugging level
42 @//DEBUG_ON SETL {TRUE}
43
44
45
46 @// Guarding implementation by the processor name
47
48
49
50
51 @// Guarding implementation by the processor name
52
53
54 @//Input Registers
55
56 #define pSrc r0
57 #define pDst r2
58 #define pTwiddle r1
59 #define subFFTNum r6
60 #define subFFTSize r7
61
62
63 @//Output Registers
64
65
66 @//Local Scratch Registers
67
68 #define outPointStep r3
69 #define pointStep r4
70 #define grpCount r5
71 #define setCount r8
72 #define step r10
73 #define dstStep r11
74 #define pTmp r9
75
76 @// Neon Registers
77
78 #define dW D0.S16
79 #define dX0 D2.S16
80 #define dX1 D3.S16
81 #define dX2 D4.S16
82 #define dX3 D5.S16
83 #define dY0 D6.S16
84 #define dY1 D7.S16
85 #define dY2 D8.S16
86 #define dY3 D9.S16
87 #define qT0 Q3.S32
88 #define qT1 Q4.S32
89
90
91
92 .MACRO FFTSTAGE scaled, inverse, name
93
94 @// Define stack arguments
95
96
97 @// Update grpCount and grpSize rightaway inorder to reuse pGrpCount and pGrpSize regs
98
99 LSR subFFTNum,subFFTNum,#1 @//grpSize
100 LSL grpCount,subFFTSize,#1
101
102
103 @// pT0+1 increments pT0 by 8 bytes
104 @// pT0+pointStep = increment of 4*pointStep bytes = 2*grpSize bytes
105 MOV pointStep,subFFTNum,LSL #1
106
107 @// update subFFTSize for the next stage
108 MOV subFFTSize,grpCount
109
110 @// pOut0+1 increments pOut0 by 8 bytes
111 @// pOut0+outPointStep == increment of 4*outPointStep bytes = 2*size byt es
112 SMULBB outPointStep,grpCount,pointStep
113 LSL pointStep,pointStep,#1
114
115
116 RSB step,pointStep,#16
117 RSB dstStep,outPointStep,#16
118
119 @// Loop on the groups
120
121 grpLoop\name:
122
123 VLD1 dW,[pTwiddle],pointStep @//[wi | wr]
124 MOV setCount,pointStep,LSR #2
125
126
127 @// Loop on the sets: 4 at a time
128
129
130 setLoop\name:
131
132
133 VLD2 {dX0,dX1},[pSrc],pointStep @// point0: dX0-real part dX1-img part
134 VLD2 {dX2,dX3},[pSrc],step @// point1: dX2-real part dX3-img part
135
136 SUBS setCount,setCount,#4
137
138 .ifeqs "\inverse", "TRUE"
139 VMULL qT0,dX2,dW[0]
140 VMLAL qT0,dX3,dW[1] @// real part
141 VMULL qT1,dX3,dW[0]
142 VMLSL qT1,dX2,dW[1] @// imag part
143
144 .ELSE
145
146 VMULL qT0,dX2,dW[0]
147 VMLSL qT0,dX3,dW[1] @// real part
148 VMULL qT1,dX3,dW[0]
149 VMLAL qT1,dX2,dW[1] @// imag part
150
151 .ENDIF
152
153 VRSHRN dX2,qT0,#15
154 VRSHRN dX3,qT1,#15
155
156 .ifeqs "\scaled", "TRUE"
157 VHSUB dY0,dX0,dX2
158 VHSUB dY1,dX1,dX3
159 VHADD dY2,dX0,dX2
160 VHADD dY3,dX1,dX3
161
162 .ELSE
163 VSUB dY0,dX0,dX2
164 VSUB dY1,dX1,dX3
165 VADD dY2,dX0,dX2
166 VADD dY3,dX1,dX3
167
168 .ENDIF
169
170 VST2 {dY0,dY1},[pDst],outPointStep
171 VST2 {dY2,dY3},[pDst],dstStep @// dstStep = -outPointSte p + 16
172
173 BGT setLoop\name
174
175 SUBS grpCount,grpCount,#2
176 ADD pSrc,pSrc,pointStep
177 BGT grpLoop\name
178
179
180 @// Reset and Swap pSrc and pDst for the next stage
181 MOV pTmp,pDst
182 SUB pDst,pSrc,outPointStep,LSL #1 @// pDst -= 2*size; pSrc -= 4*size bytes
183 SUB pSrc,pTmp,outPointStep
184
185 @// Reset pTwiddle for the next stage
186 SUB pTwiddle,pTwiddle,outPointStep @// pTwiddle -= 2*size bytes
187
188
189 .endm
190
191
192
193 M_START armSP_FFTFwd_CToC_SC16_Radix2_OutOfPlace_unsafe,r4
194 FFTSTAGE "FALSE","FALSE",FWD
195 M_END
196
197
198
199 M_START armSP_FFTInv_CToC_SC16_Radix2_OutOfPlace_unsafe,r4
200 FFTSTAGE "FALSE","TRUE",INV
201 M_END
202
203
204
205 M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe,r4
206 FFTSTAGE "TRUE","FALSE",FWDSFS
207 M_END
208
209
210
211 M_START armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe,r4
212 FFTSTAGE "TRUE","TRUE",INVSFS
213 M_END
214
215
216
217
218
219 .END
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698