Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(135)

Side by Side Diff: third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix2_ls_unsafe_s.S

Issue 12317152: Add openmax dl routines for review. MUST NOT BE LANDED (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 @//
2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 @//
4 @// Use of this source code is governed by a BSD-style license
5 @// that can be found in the LICENSE file in the root of the source
6 @// tree. An additional intellectual property rights grant can be found
7 @// in the file PATENTS. All contributing project authors may
8 @// be found in the AUTHORS file in the root of the source tree.
9 @//
10 @// This file was originally licensed as follows. It has been
11 @// relicensed with permission from the copyright holders.
12
13 @//
14 @//
15 @// File Name: armSP_FFT_CToC_SC16_Radix2_ls_unsafe_s.s
16 @// OpenMAX DL: v1.0.2
17 @// Last Modified Revision: 6741
18 @// Last Modified Date: Wed, 18 Jul 2007
19 @//
20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
21 @//
22 @//
23 @//
24 @// Description:
25 @// Compute a Radix 2 FFT stage for a N point complex signal
26 @//
27 @//
28
29
30 @// Include standard headers
31
32 #include "dl/api/armCOMM_s.h"
33 #include "dl/api/omxtypes_s.h"
34
35
36 @// Import symbols required from other files
37 @// (For example tables)
38
39
40
41
42 @// Set debugging level
43 @//DEBUG_ON SETL {TRUE}
44
45
46 @// Guarding implementation by the processor name
47
48
49
50
51
52
53
54 @// Guarding implementation by the processor name
55
56
57 @//Input Registers
58
59 #define pSrc r0
60 #define pDst r2
61 #define pTwiddle r1
62 #define subFFTNum r6
63 #define subFFTSize r7
64
65
66 @//Output Registers
67
68
69 @//Local Scratch Registers
70
71
72 #define outPointStep r3
73 #define grpCount r4
74 #define dstStep r5
75 #define pTmp r4
76 #define step r8
77
78 @// Neon Registers
79
80 #define dWr D0.S16
81 #define dWi D1.S16
82 #define dXr0 D2.S16
83 #define dXi0 D3.S16
84 #define dXr1 D4.S16
85 #define dXi1 D5.S16
86 #define dYr0 D6.S16
87 #define dYi0 D7.S16
88 #define dYr1 D8.S16
89 #define dYi1 D9.S16
90 #define qT0 Q5.S32
91 #define qT1 Q6.S32
92
93
94 .MACRO FFTSTAGE scaled, inverse, name
95
96
97 MOV outPointStep,subFFTSize,LSL #2
98 @// Update grpCount and grpSize rightaway
99
100 MOV subFFTNum,#1 @//after the last stage
101 LSL grpCount,subFFTSize,#1
102
103 @// update subFFTSize for the next stage
104 MOV subFFTSize,grpCount
105
106 SUB step,outPointStep,#4 @// step = -4+outPointSt ep
107 RSB dstStep,step,#0 @// dstStep = -4-outPoin tStep+8 = -step
108 @//RSB dstStep,outPointStep,#16
109
110
111 @// Loop on 2 grps at a time for the last stage
112
113 grpLoop\name:
114 VLD2 {dWr[0],dWi[0]},[pTwiddle]! @// grp 0
115 VLD2 {dWr[1],dWi[1]},[pTwiddle]! @// grp 1
116
117 @//VLD2 {dWr,dWi},[pTwiddle],#16
118
119 VLD4 {dXr0[0],dXi0[0],dXr1[0],dXi1[0]},[pSrc]! @// grp 0
120 VLD4 {dXr0[1],dXi0[1],dXr1[1],dXi1[1]},[pSrc]! @// grp 1
121
122
123 @//VLD4 {dXr0,dXi0,dXr1,dXi1},[pSrc],#32
124 SUBS grpCount,grpCount,#4 @// grpCount is multiplie d by 2
125
126 .ifeqs "\inverse", "TRUE"
127 VMULL qT0,dXr1,dWr
128 VMLAL qT0,dXi1,dWi @// real part
129 VMULL qT1,dXi1,dWr
130 VMLSL qT1,dXr1,dWi @// imag part
131
132 .ELSE
133 VMULL qT0,dXr1,dWr
134 VMLSL qT0,dXi1,dWi @// real part
135 VMULL qT1,dXi1,dWr
136 VMLAL qT1,dXr1,dWi @// imag part
137
138 .ENDIF
139
140 VRSHRN dXr1,qT0,#15
141 VRSHRN dXi1,qT1,#15
142
143
144 .ifeqs "\scaled", "TRUE"
145
146 VHSUB dYr0,dXr0,dXr1
147 VHSUB dYi0,dXi0,dXi1
148 VHADD dYr1,dXr0,dXr1
149 VHADD dYi1,dXi0,dXi1
150
151 .ELSE
152
153 VSUB dYr0,dXr0,dXr1
154 VSUB dYi0,dXi0,dXi1
155 VADD dYr1,dXr0,dXr1
156 VADD dYi1,dXi0,dXi1
157
158
159 .ENDIF
160
161 VST2 {dYr0[0],dYi0[0]},[pDst]!
162 VST2 {dYr0[1],dYi0[1]},[pDst],step @// step = -4+outPoi ntStep
163
164 VST2 {dYr1[0],dYi1[0]},[pDst]!
165 VST2 {dYr1[1],dYi1[1]},[pDst],dstStep @// dstStep = -4-out PointStep+8 = -step
166
167 @//VST2 {dYr0,dYi0},[pDst],outPointStep
168 @//VST2 {dYr1,dYi1},[pDst],dstStep @// dstStep = st ep = -outPointStep + 16
169
170 BGT grpLoop\name
171
172
173 @// Reset and Swap pSrc and pDst for the next stage
174 MOV pTmp,pDst
175 SUB pDst,pSrc,outPointStep,LSL #1 @// pDst -= 2*size; pSrc -= 4*size bytes
176 SUB pSrc,pTmp,outPointStep
177
178 @// Reset pTwiddle for the next stage
179 SUB pTwiddle,pTwiddle,outPointStep @// pTwiddle -= 2*size bytes
180
181 .endm
182
183
184
185 M_START armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe,r4
186 FFTSTAGE "FALSE","FALSE",FWD
187 M_END
188
189
190
191 M_START armSP_FFTInv_CToC_SC16_Radix2_ls_OutOfPlace_unsafe,r4
192 FFTSTAGE "FALSE","TRUE",INV
193 M_END
194
195
196
197 M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe,r4
198 FFTSTAGE "TRUE","FALSE",FWDSFS
199 M_END
200
201
202
203 M_START armSP_FFTInv_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe,r4
204 FFTSTAGE "TRUE","TRUE",INVSFS
205 M_END
206
207
208
209
210 .END
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698