Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(169)

Side by Side Diff: third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.S

Issue 12317152: Add openmax dl routines for review. MUST NOT BE LANDED (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 @//
2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 @//
4 @// Use of this source code is governed by a BSD-style license
5 @// that can be found in the LICENSE file in the root of the source
6 @// tree. An additional intellectual property rights grant can be found
7 @// in the file PATENTS. All contributing project authors may
8 @// be found in the AUTHORS file in the root of the source tree.
9 @//
10 @// This file was originally licensed as follows. It has been
11 @// relicensed with permission from the copyright holders.
12
13 @//
14 @//
15 @// File Name: armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.s
16 @// OpenMAX DL: v1.0.2
17 @// Last Modified Revision: 7766
18 @// Last Modified Date: Thu, 27 Sep 2007
19 @//
20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
21 @//
22 @//
23 @//
24 @// Description:
25 @// Compute a first stage Radix 8 FFT stage for a N point complex signal
26 @//
27 @//
28
29
30 @// Include standard headers
31
32 #include "dl/api/armCOMM_s.h"
33 #include "dl/api/omxtypes_s.h"
34
35
36 @// Import symbols required from other files
37 @// (For example tables)
38
39
40 @// Set debugging level
41 @//DEBUG_ON SETL {TRUE}
42
43
44
45 @// Guarding implementation by the processor name
46
47
48
49
50 @// Guarding implementation by the processor name
51
52
53 @//Input Registers
54
55 #define pSrc r0
56 #define pDst r2
57 #define pTwiddle r1
58 #define subFFTNum r6
59 #define subFFTSize r7
60 @// dest buffer for the next stage (not pSrc for first stage)
61 #define pPingPongBuf r5
62
63
64 @//Output Registers
65
66
67 @//Local Scratch Registers
68
69 #define grpSize r3
70 @// Reuse grpSize as setCount
71 #define setCount r3
72 #define pointStep r4
73 #define outPointStep r4
74 #define setStep r8
75 #define step1 r9
76 #define step2 r10
77 #define t0 r11
78
79
80 @// Neon Registers
81
82 #define dXr0 D14.S16
83 #define dXi0 D15.S16
84 #define dXr1 D2.S16
85 #define dXi1 D3.S16
86 #define dXr2 D4.S16
87 #define dXi2 D5.S16
88 #define dXr3 D6.S16
89 #define dXi3 D7.S16
90 #define dXr4 D8.S16
91 #define dXi4 D9.S16
92 #define dXr5 D10.S16
93 #define dXi5 D11.S16
94 #define dXr6 D12.S16
95 #define dXi6 D13.S16
96 #define dXr7 D0.S16
97 #define dXi7 D1.S16
98 #define qX0 Q7.S16
99 #define qX1 Q1.S16
100 #define qX2 Q2.S16
101 #define qX3 Q3.S16
102 #define qX4 Q4.S16
103 #define qX5 Q5.S16
104 #define qX6 Q6.S16
105 #define qX7 Q0.S16
106
107 #define dUr0 D16.S16
108 #define dUi0 D17.S16
109 #define dUr2 D18.S16
110 #define dUi2 D19.S16
111 #define dUr4 D20.S16
112 #define dUi4 D21.S16
113 #define dUr6 D22.S16
114 #define dUi6 D23.S16
115 #define dUr1 D24.S16
116 #define dUi1 D25.S16
117 #define dUr3 D26.S16
118 #define dUi3 D27.S16
119 #define dUr5 D28.S16
120 #define dUi5 D29.S16
121 @// reuse dXr7 and dXi7
122 #define dUr7 D30.S16
123 #define dUi7 D31.S16
124 #define qU0 Q8.S16
125 #define qU1 Q12.S16
126 #define qU2 Q9.S16
127 #define qU3 Q13.S16
128 #define qU4 Q10.S16
129 #define qU5 Q14.S16
130 #define qU6 Q11.S16
131 #define qU7 Q15.S16
132
133
134
135 #define dVr0 D24.S16
136 #define dVi0 D25.S16
137 #define dVr2 D26.S16
138 #define dVi2 D27.S16
139 #define dVr4 D28.S16
140 #define dVi4 D29.S16
141 #define dVr6 D30.S16
142 #define dVi6 D31.S16
143 #define dVr1 D16.S16
144 #define dVi1 D17.S16
145 #define dVr3 D18.S16
146 #define dVi3 D19.S16
147 #define dVr5 D20.S16
148 #define dVi5 D21.S16
149 @// reuse dUi7
150 #define dVr7 D22.S16
151 @// reuse dUr7
152 #define dVi7 D23.S16
153 #define qV0 Q12.S16
154 #define qV1 Q8.S16
155 #define qV2 Q13.S16
156 #define qV3 Q9.S16
157 #define qV4 Q14.S16
158 #define qV5 Q10.S16
159 #define qV6 Q15.S16
160 #define qV7 Q11.S16
161
162
163
164 #define dYr0 D16.S16
165 #define dYi0 D17.S16
166 #define dYr2 D18.S16
167 #define dYi2 D19.S16
168 #define dYr4 D20.S16
169 #define dYi4 D21.S16
170 #define dYr6 D22.S16
171 #define dYi6 D23.S16
172 #define dYr1 D24.S16
173 #define dYi1 D25.S16
174 #define dYr3 D26.S16
175 #define dYi3 D27.S16
176 #define dYr5 D28.S16
177 #define dYi5 D29.S16
178 @// reuse dYr4 and dYi4
179 #define dYr7 D30.S16
180 #define dYi7 D31.S16
181 #define qY0 Q8.S16
182 #define qY1 Q12.S16
183 #define qY2 Q9.S16
184 #define qY3 Q13.S16
185 #define qY4 Q10.S16
186 #define qY5 Q14.S16
187 #define qY6 Q11.S16
188 #define qY7 Q15.S16
189
190
191 #define dT0 D0.S16
192 #define dT1 D1.S16
193
194
195 @// Define constants
196 .set ONEBYSQRT2, 0x00005A82 @// Q15 format
197
198
199 .MACRO FFTSTAGE scaled, inverse , name
200
201 @// Define stack arguments
202
203 @// Update pSubFFTSize and pSubFFTNum regs
204 MOV subFFTSize,#8 @// subFFTSize = 1 f or the first stage
205 LDR t0,=ONEBYSQRT2 @// t0=(1/sqrt(2)) a s Q15 format
206
207 @// Note: setCount = subFFTNum/8 (reuse the grpSize reg for setCount)
208 LSR grpSize,subFFTNum,#3
209 MOV subFFTNum,grpSize
210
211
212 @// pT0+1 increments pT0 by 4 bytes
213 @// pT0+pointStep = increment of 4*pointStep bytes = grpSize/2 bytes
214 @// Note: outPointStep = pointStep for firststage
215
216 MOV pointStep,grpSize,LSL #2
217
218
219 @// Calculate the step of input data for the next set
220 @//MOV step1,pointStep,LSL #1 @// step1 = 2*poi ntStep
221 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0]
222 MOV step1,grpSize,LSL #3
223
224 MOV step2,pointStep,LSL #3
225 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
226 SUB step2,step2,pointStep @// step2 = 7*poi ntStep
227 RSB setStep,step2,#16 @// setStep = - 7 *pointStep+16
228
229
230
231 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
232 VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3]
233 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
234 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
235 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
236 VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7] & update pSrc for the next set
237 @// setStep = -7*pointSte p + 16
238 @// grp = 0 a special case since all the twiddle factors are 1
239 @// Loop on the sets : 4 sets at a time
240
241 grpZeroSetLoop\name:
242
243 @// Decrement setcount
244 SUBS setCount,setCount,#4 @// decrement the set lo op counter
245
246
247 .ifeqs "\scaled", "TRUE"
248 @// finish first stage of 8 point FFT
249
250 VHADD qU0,qX0,qX4
251 VHADD qU2,qX1,qX5
252 VHADD qU4,qX2,qX6
253 VHADD qU6,qX3,qX7
254
255 @// finish second stage of 8 point FFT
256
257 VHADD qV0,qU0,qU4
258 VHSUB qV2,qU0,qU4
259 VHADD qV4,qU2,qU6
260 VHSUB qV6,qU2,qU6
261
262 @// finish third stage of 8 point FFT
263
264 VHADD qY0,qV0,qV4
265 VHSUB qY4,qV0,qV4
266 VST2 {dYr0,dYi0},[pDst :128],step1 @// store y 0
267
268 .ifeqs "\inverse", "TRUE"
269
270 VHSUB dYr2,dVr2,dVi6
271 VHADD dYi2,dVi2,dVr6
272
273 VHADD dYr6,dVr2,dVi6
274 VST2 {dYr2,dYi2},[pDst :128],step1 @// sto re y2
275 VHSUB dYi6,dVi2,dVr6
276
277 VHSUB qU1,qX0,qX4
278 VST2 {dYr4,dYi4},[pDst :128],step1 @// sto re y4
279
280 VHSUB qU3,qX1,qX5
281 VHSUB qU5,qX2,qX6
282 VST2 {dYr6,dYi6},[pDst :128],step1 @// sto re y6
283
284 .ELSE
285
286 VHADD dYr6,dVr2,dVi6
287 VHSUB dYi6,dVi2,dVr6
288
289 VHSUB dYr2,dVr2,dVi6
290 VST2 {dYr6,dYi6},[pDst :128],step1 @// sto re y2
291 VHADD dYi2,dVi2,dVr6
292
293
294 VHSUB qU1,qX0,qX4
295 VST2 {dYr4,dYi4},[pDst :128],step1 @// sto re y4
296 VHSUB qU3,qX1,qX5
297 VHSUB qU5,qX2,qX6
298 VST2 {dYr2,dYi2},[pDst :128],step1 @// sto re y6
299
300
301 .ENDIF
302
303 @// finish first stage of 8 point FFT
304
305 VHSUB qU7,qX3,qX7
306 VMOV dT0[0],t0
307
308 @// finish second stage of 8 point FFT
309
310 VHSUB dVr1,dUr1,dUi5
311 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] for next iteration
312 VHADD dVi1,dUi1,dUr5
313 VHADD dVr3,dUr1,dUi5
314 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
315 VHSUB dVi3,dUi1,dUr5
316
317 VHSUB dVr5,dUr3,dUi7
318 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
319 VHADD dVi5,dUi3,dUr7
320 VHADD dVr7,dUr3,dUi7
321 VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3]
322 VHSUB dVi7,dUi3,dUr7
323
324 @// finish third stage of 8 point FFT
325
326 .ifeqs "\inverse", "TRUE"
327
328 @// calculate a*v5
329 VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
330 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
331 VQRDMULH dVi5,dVi5,dT0[0]
332
333 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
334 VSUB dVr5,dT1,dVi5 @// a * V5
335 VADD dVi5,dT1,dVi5
336
337 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
338
339 @// calculate b*v7
340 VQRDMULH dT1,dVr7,dT0[0]
341 VQRDMULH dVi7,dVi7,dT0[0]
342
343 VHADD qY1,qV1,qV5
344 VHSUB qY5,qV1,qV5
345
346
347 VADD dVr7,dT1,dVi7 @// b * V7
348 VSUB dVi7,dVi7,dT1
349 SUB pDst, pDst, step2 @// set pDst to y1
350
351 VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
aedla 2013/06/26 12:52:17 Last iteration 16-byte OOB read here,
352
353
354 VHSUB dYr3,dVr3,dVr7
355 VHSUB dYi3,dVi3,dVi7
356 VST2 {dYr1,dYi1},[pDst :128],step1 @// sto re y1
357 VHADD dYr7,dVr3,dVr7
358 VHADD dYi7,dVi3,dVi7
359
360
361 VST2 {dYr3,dYi3},[pDst :128],step1 @// sto re y3
362 VST2 {dYr5,dYi5},[pDst :128],step1 @// sto re y5
363 #if 0
364 VST2 {dYr7,dYi7},[pDst :128],#16 @// sto re y7
365 #else
366 VST2 {dYr7,dYi7},[pDst :128]! @// store y7
367 #endif
368 .ELSE
369
370 @// calculate b*v7
371 VQRDMULH dT1,dVr7,dT0[0]
372 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
373 VQRDMULH dVi7,dVi7,dT0[0]
374
375 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
376 VADD dVr7,dT1,dVi7 @// b * V7
377 VSUB dVi7,dVi7,dT1
378
379 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
380
381 @// calculate a*v5
382 VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
383 VQRDMULH dVi5,dVi5,dT0[0]
384
385 VHADD dYr7,dVr3,dVr7
386 VHADD dYi7,dVi3,dVi7
387 SUB pDst, pDst, step2 @// set pDst to y1
388
389 VSUB dVr5,dT1,dVi5 @// a * V5
390 VADD dVi5,dT1,dVi5
391 VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
aedla 2013/06/26 12:52:17 here,
392
393 VHSUB qY5,qV1,qV5
394
395 VHSUB dYr3,dVr3,dVr7
396 VST2 {dYr7,dYi7},[pDst :128],step1 @// sto re y1
397 VHSUB dYi3,dVi3,dVi7
398 VHADD qY1,qV1,qV5
399
400
401 VST2 {dYr5,dYi5},[pDst :128],step1 @// sto re y3
402 VST2 {dYr3,dYi3},[pDst :128],step1 @// sto re y5
403 #if 0
404 VST2 {dYr1,dYi1},[pDst :128],#16 @// sto re y7
405 #else
406 VST2 {dYr1,dYi1},[pDst :128]! @// store y7
407 #endif
408
409 .ENDIF
410
411
412
413 .ELSE
414 @// finish first stage of 8 point FFT
415
416 VADD qU0,qX0,qX4
417 VADD qU2,qX1,qX5
418 VADD qU4,qX2,qX6
419 VADD qU6,qX3,qX7
420
421 @// finish second stage of 8 point FFT
422
423 VADD qV0,qU0,qU4
424 VSUB qV2,qU0,qU4
425 VADD qV4,qU2,qU6
426 VSUB qV6,qU2,qU6
427
428 @// finish third stage of 8 point FFT
429
430 VADD qY0,qV0,qV4
431 VSUB qY4,qV0,qV4
432 VST2 {dYr0,dYi0},[pDst :128],step1 @// store y 0
433
434 .ifeqs "\inverse", "TRUE"
435
436 VSUB dYr2,dVr2,dVi6
437 VADD dYi2,dVi2,dVr6
438
439 VADD dYr6,dVr2,dVi6
440 VST2 {dYr2,dYi2},[pDst :128],step1 @// sto re y2
441 VSUB dYi6,dVi2,dVr6
442
443 VSUB qU1,qX0,qX4
444 VST2 {dYr4,dYi4},[pDst :128],step1 @// sto re y4
445
446 VSUB qU3,qX1,qX5
447 VSUB qU5,qX2,qX6
448 VST2 {dYr6,dYi6},[pDst :128],step1 @// sto re y6
449
450 .ELSE
451
452 VADD dYr6,dVr2,dVi6
453 VSUB dYi6,dVi2,dVr6
454
455 VSUB dYr2,dVr2,dVi6
456 VST2 {dYr6,dYi6},[pDst :128],step1 @// sto re y2
457 VADD dYi2,dVi2,dVr6
458
459
460 VSUB qU1,qX0,qX4
461 VST2 {dYr4,dYi4},[pDst :128],step1 @// sto re y4
462 VSUB qU3,qX1,qX5
463 VSUB qU5,qX2,qX6
464 VST2 {dYr2,dYi2},[pDst :128],step1 @// sto re y6
465
466
467 .ENDIF
468
469 @// finish first stage of 8 point FFT
470
471 VSUB qU7,qX3,qX7
472 VMOV dT0[0],t0
473
474 @// finish second stage of 8 point FFT
475
476 VSUB dVr1,dUr1,dUi5
477 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] for next iteration
478 VADD dVi1,dUi1,dUr5
479 VADD dVr3,dUr1,dUi5
480 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
481 VSUB dVi3,dUi1,dUr5
482
483 VSUB dVr5,dUr3,dUi7
484 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
485 VADD dVi5,dUi3,dUr7
486 VADD dVr7,dUr3,dUi7
487 VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3]
488 VSUB dVi7,dUi3,dUr7
489
490 @// finish third stage of 8 point FFT
491
492 .ifeqs "\inverse", "TRUE"
493
494 @// calculate a*v5
495 VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
496 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
497 VQRDMULH dVi5,dVi5,dT0[0]
498
499 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
500 VSUB dVr5,dT1,dVi5 @// a * V5
501 VADD dVi5,dT1,dVi5
502
503 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
504
505 @// calculate b*v7
506 VQRDMULH dT1,dVr7,dT0[0]
507 VQRDMULH dVi7,dVi7,dT0[0]
508
509 VADD qY1,qV1,qV5
510 VSUB qY5,qV1,qV5
511
512
513 VADD dVr7,dT1,dVi7 @// b * V7
514 VSUB dVi7,dVi7,dT1
515 SUB pDst, pDst, step2 @// set pDst to y1
516
517 VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
aedla 2013/06/26 12:52:17 here,
518
519
520 VSUB dYr3,dVr3,dVr7
521 VSUB dYi3,dVi3,dVi7
522 VST2 {dYr1,dYi1},[pDst :128],step1 @// sto re y1
523 VADD dYr7,dVr3,dVr7
524 VADD dYi7,dVi3,dVi7
525
526
527 VST2 {dYr3,dYi3},[pDst :128],step1 @// sto re y3
528 VST2 {dYr5,dYi5},[pDst :128],step1 @// sto re y5
529 #if 0
530 VST2 {dYr7,dYi7},[pDst :128],#16 @// sto re y7
531 #else
532 VST2 {dYr7,dYi7},[pDst :128]! @// store y7
533 #endif
534 .ELSE
535
536 @// calculate b*v7
537 VQRDMULH dT1,dVr7,dT0[0]
538 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
539 VQRDMULH dVi7,dVi7,dT0[0]
540
541 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
542 VADD dVr7,dT1,dVi7 @// b * V7
543 VSUB dVi7,dVi7,dT1
544
545 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
546
547 @// calculate a*v5
548 VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
549 VQRDMULH dVi5,dVi5,dT0[0]
550
551 VADD dYr7,dVr3,dVr7
552 VADD dYi7,dVi3,dVi7
553 SUB pDst, pDst, step2 @// set pDst to y1
554
555 VSUB dVr5,dT1,dVi5 @// a * V5
556 VADD dVi5,dT1,dVi5
557 VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
aedla 2013/06/26 12:52:17 and here.
558
559 VSUB qY5,qV1,qV5
560
561 VSUB dYr3,dVr3,dVr7
562 VST2 {dYr7,dYi7},[pDst :128],step1 @// sto re y1
563 VSUB dYi3,dVi3,dVi7
564 VADD qY1,qV1,qV5
565
566
567 VST2 {dYr5,dYi5},[pDst :128],step1 @// sto re y3
568 VST2 {dYr3,dYi3},[pDst :128],step1 @// sto re y5
569 #if 0
570 VST2 {dYr1,dYi1},[pDst :128],#16 @// sto re y7
571 #else
572 VST2 {dYr1,dYi1},[pDst :128]! @// store y7
573 #endif
574
575 .ENDIF
576
577
578 .ENDIF
579
580 SUB pDst, pDst, step2 @// update pDst for the next set
581 BGT grpZeroSetLoop\name
582
583
584 @// reset pSrc to pDst for the next stage
585 SUB pSrc,pDst,pointStep @// pDst -= 2*gr pSize
586 MOV pDst,pPingPongBuf
587
588
589
590 .endm
591
592
593 @// Allocate stack memory required by the function
594
595
596 M_START armSP_FFTFwd_CToC_SC16_Radix8_fs_OutOfPlace_unsafe,r4
597 FFTSTAGE "FALSE","FALSE",FWD
598 M_END
599
600
601 M_START armSP_FFTInv_CToC_SC16_Radix8_fs_OutOfPlace_unsafe,r4
602 FFTSTAGE "FALSE","TRUE",INV
603 M_END
604
605
606 M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe,r4
607 FFTSTAGE "TRUE","FALSE",FWDSFS
608 M_END
609
610
611 M_START armSP_FFTInv_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe,r4
612 FFTSTAGE "TRUE","TRUE",INVSFS
613 M_END
614
615
616
617
618
619 .END
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698