Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(253)

Side by Side Diff: third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix8_fs_unsafe_s.S

Issue 12317152: Add openmax dl routines for review. MUST NOT BE LANDED (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 @//
2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 @//
4 @// Use of this source code is governed by a BSD-style license
5 @// that can be found in the LICENSE file in the root of the source
6 @// tree. An additional intellectual property rights grant can be found
7 @// in the file PATENTS. All contributing project authors may
8 @// be found in the AUTHORS file in the root of the source tree.
9 @//
10 @// This file was originally licensed as follows. It has been
11 @// relicensed with permission from the copyright holders.
12 @//
13
14 @//
15 @// File Name: armSP_FFT_CToC_SC32_Radix8_fs_unsafe_s.s
16 @// OpenMAX DL: v1.0.2
17 @// Last Modified Revision: 7770
18 @// Last Modified Date: Thu, 27 Sep 2007
19 @//
20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
21 @//
22 @//
23 @//
24 @// Description:
25 @// Compute a first stage Radix 8 FFT stage for a N point complex signal
26 @//
27
28
29
30
31 @// Include standard headers
32
33 #include "dl/api/armCOMM_s.h"
34 #include "dl/api/omxtypes_s.h"
35
36 @// Import symbols required from other files
37 @// (For example tables)
38
39
40 @// Set debugging level
41 @//DEBUG_ON SETL {TRUE}
42
43
44
45 @// Guarding implementation by the processor name
46
47
48
49
50 @// Guarding implementation by the processor name
51
52 @//Input Registers
53
54 #define pSrc r0
55 #define pDst r2
56 #define pTwiddle r1
57 #define subFFTNum r6
58 #define subFFTSize r7
59 @// dest buffer for the next stage (not pSrc for first stage)
60 #define pPingPongBuf r5
61
62
63 @//Output Registers
64
65
66 @//Local Scratch Registers
67
68 #define grpSize r3
69 @// Reuse grpSize as setCount
70 #define setCount r3
71 #define pointStep r4
72 #define outPointStep r4
73 #define setStep r8
74 #define step1 r9
75 #define step2 r10
76 #define t0 r11
77
78
79 @// Neon Registers
80
81 #define dXr0 D0.S32
82 #define dXi0 D1.S32
83 #define dXr1 D2.S32
84 #define dXi1 D3.S32
85 #define dXr2 D4.S32
86 #define dXi2 D5.S32
87 #define dXr3 D6.S32
88 #define dXi3 D7.S32
89 #define dXr4 D8.S32
90 #define dXi4 D9.S32
91 #define dXr5 D10.S32
92 #define dXi5 D11.S32
93 #define dXr6 D12.S32
94 #define dXi6 D13.S32
95 #define dXr7 D14.S32
96 #define dXi7 D15.S32
97 #define qX0 Q0.S32
98 #define qX1 Q1.S32
99 #define qX2 Q2.S32
100 #define qX3 Q3.S32
101 #define qX4 Q4.S32
102 #define qX5 Q5.S32
103 #define qX6 Q6.S32
104 #define qX7 Q7.S32
105
106 #define dUr0 D16.S32
107 #define dUi0 D17.S32
108 #define dUr2 D18.S32
109 #define dUi2 D19.S32
110 #define dUr4 D20.S32
111 #define dUi4 D21.S32
112 #define dUr6 D22.S32
113 #define dUi6 D23.S32
114 #define dUr1 D24.S32
115 #define dUi1 D25.S32
116 #define dUr3 D26.S32
117 #define dUi3 D27.S32
118 #define dUr5 D28.S32
119 #define dUi5 D29.S32
120 @// reuse dXr7 and dXi7
121 #define dUr7 D30.S32
122 #define dUi7 D31.S32
123 #define qU0 Q8.S32
124 #define qU1 Q12.S32
125 #define qU2 Q9.S32
126 #define qU3 Q13.S32
127 #define qU4 Q10.S32
128 #define qU5 Q14.S32
129 #define qU6 Q11.S32
130 #define qU7 Q15.S32
131
132
133
134 #define dVr0 D24.S32
135 #define dVi0 D25.S32
136 #define dVr2 D26.S32
137 #define dVi2 D27.S32
138 #define dVr4 D28.S32
139 #define dVi4 D29.S32
140 #define dVr6 D30.S32
141 #define dVi6 D31.S32
142 #define dVr1 D16.S32
143 #define dVi1 D17.S32
144 #define dVr3 D18.S32
145 #define dVi3 D19.S32
146 #define dVr5 D20.S32
147 #define dVi5 D21.S32
148 #define dVr7 D22.S32
149 #define dVi7 D23.S32
150 #define qV0 Q12.S32
151 #define qV1 Q8.S32
152 #define qV2 Q13.S32
153 #define qV3 Q9.S32
154 #define qV4 Q14.S32
155 #define qV5 Q10.S32
156 #define qV6 Q15.S32
157 #define qV7 Q11.S32
158
159
160
161 #define dYr0 D16.S32
162 #define dYi0 D17.S32
163 #define dYr2 D18.S32
164 #define dYi2 D19.S32
165 #define dYr4 D20.S32
166 #define dYi4 D21.S32
167 #define dYr6 D22.S32
168 #define dYi6 D23.S32
169 #define dYr1 D24.S32
170 #define dYi1 D25.S32
171 #define dYr3 D26.S32
172 #define dYi3 D27.S32
173 #define dYr5 D28.S32
174 #define dYi5 D29.S32
175 #define dYr7 D30.S32
176 #define dYi7 D31.S32
177 #define qY0 Q8.S32
178 #define qY1 Q12.S32
179 #define qY2 Q9.S32
180 #define qY3 Q13.S32
181 #define qY4 Q10.S32
182 #define qY5 Q14.S32
183 #define qY6 Q11.S32
184 #define qY7 Q15.S32
185
186
187 #define dT0 D14.S32
188 #define dT1 D15.S32
189
190 @// Define constants
191 .set ONEBYSQRT2, 0x5A82799A @// Q31 format
192
193
194 .MACRO FFTSTAGE scaled, inverse, name
195
196 @// Define stack arguments
197
198 @// Update pSubFFTSize and pSubFFTNum regs
199 MOV subFFTSize,#8 @// subFFTSize = 1 f or the first stage
200 LDR t0,=ONEBYSQRT2 @// t0=(1/sqrt(2)) a s Q31 value
201
202 @// Note: setCount = subFFTNum/8 (reuse the grpSize reg for setCount)
203 LSR grpSize,subFFTNum,#3
204 MOV subFFTNum,grpSize
205
206
207 @// pT0+1 increments pT0 by 8 bytes
208 @// pT0+pointStep = increment of 8*pointStep bytes = grpSize bytes
209 @// Note: outPointStep = pointStep for firststage
210
211 MOV pointStep,grpSize,LSL #3
212
213
214 @// Calculate the step of input data for the next set
215 @//MOV step1,pointStep,LSL #1 @// step1 = 2*poi ntStep
216 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0]
217 MOV step1,grpSize,LSL #4
218
219 MOV step2,pointStep,LSL #3
220 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
221 SUB step2,step2,pointStep @// step2 = 7*poi ntStep
222 RSB setStep,step2,#16 @// setStep = - 7 *pointStep+16
223
224 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
225 VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3]
226 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
227 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
228 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
229 VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7] & update pSrc for the next set
230 @// setStep = -7*pointSte p + 16
231 @// grp = 0 a special case since all the twiddle factors are 1
232 @// Loop on the sets
233
234 grpZeroSetLoop\name :
235
236 @// Decrement setcount
237 SUBS setCount,setCount,#2 @// decrement the set lo op counter
238
239
240 .ifeqs "\scaled", "TRUE"
241 @// finish first stage of 8 point FFT
242
243 VHADD qU0,qX0,qX4
244 VHADD qU2,qX1,qX5
245 VHADD qU4,qX2,qX6
246 VHADD qU6,qX3,qX7
247
248 @// finish second stage of 8 point FFT
249
250 VHADD qV0,qU0,qU4
251 VHSUB qV2,qU0,qU4
252 VHADD qV4,qU2,qU6
253 VHSUB qV6,qU2,qU6
254
255 @// finish third stage of 8 point FFT
256
257 VHADD qY0,qV0,qV4
258 VHSUB qY4,qV0,qV4
259 VST2 {dYr0,dYi0},[pDst :128],step1 @// store y 0
260
261 .ifeqs "\inverse", "TRUE"
262
263 VHSUB dYr2,dVr2,dVi6
264 VHADD dYi2,dVi2,dVr6
265
266 VHADD dYr6,dVr2,dVi6
267 VST2 {dYr2,dYi2},[pDst :128],step1 @// sto re y2
268 VHSUB dYi6,dVi2,dVr6
269
270 VHSUB qU1,qX0,qX4
271 VST2 {dYr4,dYi4},[pDst :128],step1 @// sto re y4
272
273 VHSUB qU3,qX1,qX5
274 VHSUB qU5,qX2,qX6
275 VST2 {dYr6,dYi6},[pDst :128],step1 @// sto re y6
276
277 .ELSE
278
279 VHADD dYr6,dVr2,dVi6
280 VHSUB dYi6,dVi2,dVr6
281
282 VHSUB dYr2,dVr2,dVi6
283 VST2 {dYr6,dYi6},[pDst :128],step1 @// sto re y2
284 VHADD dYi2,dVi2,dVr6
285
286
287 VHSUB qU1,qX0,qX4
288 VST2 {dYr4,dYi4},[pDst :128],step1 @// sto re y4
289 VHSUB qU3,qX1,qX5
290 VHSUB qU5,qX2,qX6
291 VST2 {dYr2,dYi2},[pDst :128],step1 @// sto re y6
292
293
294 .ENDIF
295
296 @// finish first stage of 8 point FFT
297
298 VHSUB qU7,qX3,qX7
299 VMOV dT0[0],t0
300
301 @// finish second stage of 8 point FFT
302
303 VHSUB dVr1,dUr1,dUi5
304 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] for next iteration
305 VHADD dVi1,dUi1,dUr5
306 VHADD dVr3,dUr1,dUi5
307 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
308 VHSUB dVi3,dUi1,dUr5
309
310 VHSUB dVr5,dUr3,dUi7
311 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
312 VHADD dVi5,dUi3,dUr7
313 VHADD dVr7,dUr3,dUi7
314 VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3]
315 VHSUB dVi7,dUi3,dUr7
316
317 @// finish third stage of 8 point FFT
318
319 .ifeqs "\inverse", "TRUE"
320
321 @// calculate a*v5
322 VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
323 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
324 VQRDMULH dVi5,dVi5,dT0[0]
325
326 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
327 VSUB dVr5,dT1,dVi5 @// a * V5
328 VADD dVi5,dT1,dVi5
329
330 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
331
332 @// calculate b*v7
333 VQRDMULH dT1,dVr7,dT0[0]
334 VQRDMULH dVi7,dVi7,dT0[0]
335
336 VHADD qY1,qV1,qV5
337 VHSUB qY5,qV1,qV5
338
339
340 VADD dVr7,dT1,dVi7 @// b * V7
341 VSUB dVi7,dVi7,dT1
342 SUB pDst, pDst, step2 @// set pDst to y1
343
344 VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
345
346
347 VHSUB dYr3,dVr3,dVr7
348 VHSUB dYi3,dVi3,dVi7
349 VST2 {dYr1,dYi1},[pDst :128],step1 @// sto re y1
350 VHADD dYr7,dVr3,dVr7
351 VHADD dYi7,dVi3,dVi7
352
353
354 VST2 {dYr3,dYi3},[pDst :128],step1 @// sto re y3
355 VST2 {dYr5,dYi5},[pDst :128],step1 @// sto re y5
356 VST2 {dYr7,dYi7},[pDst :128]! @// store y7
357
358 .ELSE
359
360 @// calculate b*v7
361 VQRDMULH dT1,dVr7,dT0[0]
362 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
363 VQRDMULH dVi7,dVi7,dT0[0]
364
365 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
366 VADD dVr7,dT1,dVi7 @// b * V7
367 VSUB dVi7,dVi7,dT1
368
369 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
370
371 @// calculate a*v5
372 VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
373 VQRDMULH dVi5,dVi5,dT0[0]
374
375 VHADD dYr7,dVr3,dVr7
376 VHADD dYi7,dVi3,dVi7
377 SUB pDst, pDst, step2 @// set pDst to y1
378
379 VSUB dVr5,dT1,dVi5 @// a * V5
380 VADD dVi5,dT1,dVi5
381 VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
382
383 VHSUB qY5,qV1,qV5
384
385 VHSUB dYr3,dVr3,dVr7
386 VST2 {dYr7,dYi7},[pDst :128],step1 @// sto re y1
387 VHSUB dYi3,dVi3,dVi7
388 VHADD qY1,qV1,qV5
389
390
391 VST2 {dYr5,dYi5},[pDst :128],step1 @// sto re y3
392 VST2 {dYr3,dYi3},[pDst :128],step1 @// sto re y5
393 VST2 {dYr1,dYi1},[pDst :128]! @// store y7
394
395 .ENDIF
396
397
398
399 .ELSE
400 @// finish first stage of 8 point FFT
401
402 VADD qU0,qX0,qX4
403 VADD qU2,qX1,qX5
404 VADD qU4,qX2,qX6
405 VADD qU6,qX3,qX7
406
407 @// finish second stage of 8 point FFT
408
409 VADD qV0,qU0,qU4
410 VSUB qV2,qU0,qU4
411 VADD qV4,qU2,qU6
412 VSUB qV6,qU2,qU6
413
414 @// finish third stage of 8 point FFT
415
416 VADD qY0,qV0,qV4
417 VSUB qY4,qV0,qV4
418 VST2 {dYr0,dYi0},[pDst :128],step1 @// store y 0
419
420 .ifeqs "\inverse", "TRUE"
421
422 VSUB dYr2,dVr2,dVi6
423 VADD dYi2,dVi2,dVr6
424
425 VADD dYr6,dVr2,dVi6
426 VST2 {dYr2,dYi2},[pDst :128],step1 @// sto re y2
427 VSUB dYi6,dVi2,dVr6
428
429 VSUB qU1,qX0,qX4
430 VST2 {dYr4,dYi4},[pDst :128],step1 @// sto re y4
431
432 VSUB qU3,qX1,qX5
433 VSUB qU5,qX2,qX6
434 VST2 {dYr6,dYi6},[pDst :128],step1 @// sto re y6
435
436 .ELSE
437
438 VADD dYr6,dVr2,dVi6
439 VSUB dYi6,dVi2,dVr6
440
441 VSUB dYr2,dVr2,dVi6
442 VST2 {dYr6,dYi6},[pDst :128],step1 @// sto re y2
443 VADD dYi2,dVi2,dVr6
444
445
446 VSUB qU1,qX0,qX4
447 VST2 {dYr4,dYi4},[pDst :128],step1 @// sto re y4
448 VSUB qU3,qX1,qX5
449 VSUB qU5,qX2,qX6
450 VST2 {dYr2,dYi2},[pDst :128],step1 @// sto re y6
451
452
453 .ENDIF
454
455 @// finish first stage of 8 point FFT
456
457 VSUB qU7,qX3,qX7
458 VMOV dT0[0],t0
459
460 @// finish second stage of 8 point FFT
461
462 VSUB dVr1,dUr1,dUi5
463 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] for next iteration
464 VADD dVi1,dUi1,dUr5
465 VADD dVr3,dUr1,dUi5
466 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
467 VSUB dVi3,dUi1,dUr5
468
469 VSUB dVr5,dUr3,dUi7
470 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
471 VADD dVi5,dUi3,dUr7
472 VADD dVr7,dUr3,dUi7
473 VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3]
474 VSUB dVi7,dUi3,dUr7
475
476 @// finish third stage of 8 point FFT
477
478 .ifeqs "\inverse", "TRUE"
479
480 @// calculate a*v5
481 VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
482 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
483 VQRDMULH dVi5,dVi5,dT0[0]
484
485 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
486 VSUB dVr5,dT1,dVi5 @// a * V5
487 VADD dVi5,dT1,dVi5
488
489 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
490
491 @// calculate b*v7
492 VQRDMULH dT1,dVr7,dT0[0]
493 VQRDMULH dVi7,dVi7,dT0[0]
494
495 VADD qY1,qV1,qV5
496 VSUB qY5,qV1,qV5
497
498
499 VADD dVr7,dT1,dVi7 @// b * V7
500 VSUB dVi7,dVi7,dT1
501 SUB pDst, pDst, step2 @// set pDst to y1
502
503 VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
504
505
506 VSUB dYr3,dVr3,dVr7
507 VSUB dYi3,dVi3,dVi7
508 VST2 {dYr1,dYi1},[pDst :128],step1 @// sto re y1
509 VADD dYr7,dVr3,dVr7
510 VADD dYi7,dVi3,dVi7
511
512
513 VST2 {dYr3,dYi3},[pDst :128],step1 @// sto re y3
514 VST2 {dYr5,dYi5},[pDst :128],step1 @// sto re y5
515 VST2 {dYr7,dYi7},[pDst :128]! @// store y7
516
517 .ELSE
518
519 @// calculate b*v7
520 VQRDMULH dT1,dVr7,dT0[0]
521 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
522 VQRDMULH dVi7,dVi7,dT0[0]
523
524 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
525 VADD dVr7,dT1,dVi7 @// b * V7
526 VSUB dVi7,dVi7,dT1
527
528 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
529
530 @// calculate a*v5
531 VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
532 VQRDMULH dVi5,dVi5,dT0[0]
533
534 VADD dYr7,dVr3,dVr7
535 VADD dYi7,dVi3,dVi7
536 SUB pDst, pDst, step2 @// set pDst to y1
537
538 VSUB dVr5,dT1,dVi5 @// a * V5
539 VADD dVi5,dT1,dVi5
540 VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
541
542 VSUB qY5,qV1,qV5
543
544 VSUB dYr3,dVr3,dVr7
545 VST2 {dYr7,dYi7},[pDst :128],step1 @// sto re y1
546 VSUB dYi3,dVi3,dVi7
547 VADD qY1,qV1,qV5
548
549
550 VST2 {dYr5,dYi5},[pDst :128],step1 @// sto re y3
551 VST2 {dYr3,dYi3},[pDst :128],step1 @// sto re y5
552 VST2 {dYr1,dYi1},[pDst :128]! @// store y7
553
554 .ENDIF
555
556
557 .ENDIF
558
559 SUB pDst, pDst, step2 @// update pDst for the next set
560 BGT grpZeroSetLoop\name
561
562
563 @// reset pSrc to pDst for the next stage
564 SUB pSrc,pDst,pointStep @// pDst -= 2*gr pSize
565 MOV pDst,pPingPongBuf
566
567
568
569 .endm
570
571
572 @// Allocate stack memory required by the function
573
574
575 M_START armSP_FFTFwd_CToC_SC32_Radix8_fs_OutOfPlace_unsafe,r4
576 FFTSTAGE "FALSE","FALSE",FWD
577 M_END
578
579
580 M_START armSP_FFTInv_CToC_SC32_Radix8_fs_OutOfPlace_unsafe,r4
581 FFTSTAGE "FALSE","TRUE",INV
582 M_END
583
584
585 M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe,r4
586 FFTSTAGE "TRUE","FALSE",FWDSFS
587 M_END
588
589
590 M_START armSP_FFTInv_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe,r4
591 FFTSTAGE "TRUE","TRUE",INVSFS
592 M_END
593
594
595 .end
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698