Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 @// | |
| 2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. | |
| 3 @// | |
| 4 @// Use of this source code is governed by a BSD-style license | |
| 5 @// that can be found in the LICENSE file in the root of the source | |
| 6 @// tree. An additional intellectual property rights grant can be found | |
| 7 @// in the file PATENTS. All contributing project authors may | |
| 8 @// be found in the AUTHORS file in the root of the source tree. | |
| 9 @// | |
| 10 @// This file was originally licensed as follows. It has been | |
| 11 @// relicensed with permission from the copyright holders. | |
| 12 | |
| 13 @// | |
| 14 @// | |
| 15 @// File Name: armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.s | |
| 16 @// OpenMAX DL: v1.0.2 | |
| 17 @// Last Modified Revision: 7766 | |
| 18 @// Last Modified Date: Thu, 27 Sep 2007 | |
| 19 @// | |
| 20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. | |
| 21 @// | |
| 22 @// | |
| 23 @// | |
| 24 @// Description: | |
| 25 @// Compute a first stage Radix 8 FFT stage for a N point complex signal | |
| 26 @// | |
| 27 @// | |
| 28 | |
| 29 | |
| 30 @// Include standard headers | |
| 31 | |
| 32 #include "dl/api/armCOMM_s.h" | |
| 33 #include "dl/api/omxtypes_s.h" | |
| 34 | |
| 35 | |
| 36 @// Import symbols required from other files | |
| 37 @// (For example tables) | |
| 38 | |
| 39 | |
| 40 @// Set debugging level | |
| 41 @//DEBUG_ON SETL {TRUE} | |
| 42 | |
| 43 | |
| 44 | |
| 45 @// Guarding implementation by the processor name | |
| 46 | |
| 47 | |
| 48 | |
| 49 | |
| 50 @// Guarding implementation by the processor name | |
| 51 | |
| 52 | |
| 53 @//Input Registers | |
| 54 | |
| 55 #define pSrc r0 | |
| 56 #define pDst r2 | |
| 57 #define pTwiddle r1 | |
| 58 #define subFFTNum r6 | |
| 59 #define subFFTSize r7 | |
| 60 @// dest buffer for the next stage (not pSrc for first stage) | |
| 61 #define pPingPongBuf r5 | |
| 62 | |
| 63 | |
| 64 @//Output Registers | |
| 65 | |
| 66 | |
| 67 @//Local Scratch Registers | |
| 68 | |
| 69 #define grpSize r3 | |
| 70 @// Reuse grpSize as setCount | |
| 71 #define setCount r3 | |
| 72 #define pointStep r4 | |
| 73 #define outPointStep r4 | |
| 74 #define setStep r8 | |
| 75 #define step1 r9 | |
| 76 #define step2 r10 | |
| 77 #define t0 r11 | |
| 78 | |
| 79 | |
| 80 @// Neon Registers | |
| 81 | |
| 82 #define dXr0 D14.S16 | |
| 83 #define dXi0 D15.S16 | |
| 84 #define dXr1 D2.S16 | |
| 85 #define dXi1 D3.S16 | |
| 86 #define dXr2 D4.S16 | |
| 87 #define dXi2 D5.S16 | |
| 88 #define dXr3 D6.S16 | |
| 89 #define dXi3 D7.S16 | |
| 90 #define dXr4 D8.S16 | |
| 91 #define dXi4 D9.S16 | |
| 92 #define dXr5 D10.S16 | |
| 93 #define dXi5 D11.S16 | |
| 94 #define dXr6 D12.S16 | |
| 95 #define dXi6 D13.S16 | |
| 96 #define dXr7 D0.S16 | |
| 97 #define dXi7 D1.S16 | |
| 98 #define qX0 Q7.S16 | |
| 99 #define qX1 Q1.S16 | |
| 100 #define qX2 Q2.S16 | |
| 101 #define qX3 Q3.S16 | |
| 102 #define qX4 Q4.S16 | |
| 103 #define qX5 Q5.S16 | |
| 104 #define qX6 Q6.S16 | |
| 105 #define qX7 Q0.S16 | |
| 106 | |
| 107 #define dUr0 D16.S16 | |
| 108 #define dUi0 D17.S16 | |
| 109 #define dUr2 D18.S16 | |
| 110 #define dUi2 D19.S16 | |
| 111 #define dUr4 D20.S16 | |
| 112 #define dUi4 D21.S16 | |
| 113 #define dUr6 D22.S16 | |
| 114 #define dUi6 D23.S16 | |
| 115 #define dUr1 D24.S16 | |
| 116 #define dUi1 D25.S16 | |
| 117 #define dUr3 D26.S16 | |
| 118 #define dUi3 D27.S16 | |
| 119 #define dUr5 D28.S16 | |
| 120 #define dUi5 D29.S16 | |
| 121 @// reuse dXr7 and dXi7 | |
| 122 #define dUr7 D30.S16 | |
| 123 #define dUi7 D31.S16 | |
| 124 #define qU0 Q8.S16 | |
| 125 #define qU1 Q12.S16 | |
| 126 #define qU2 Q9.S16 | |
| 127 #define qU3 Q13.S16 | |
| 128 #define qU4 Q10.S16 | |
| 129 #define qU5 Q14.S16 | |
| 130 #define qU6 Q11.S16 | |
| 131 #define qU7 Q15.S16 | |
| 132 | |
| 133 | |
| 134 | |
| 135 #define dVr0 D24.S16 | |
| 136 #define dVi0 D25.S16 | |
| 137 #define dVr2 D26.S16 | |
| 138 #define dVi2 D27.S16 | |
| 139 #define dVr4 D28.S16 | |
| 140 #define dVi4 D29.S16 | |
| 141 #define dVr6 D30.S16 | |
| 142 #define dVi6 D31.S16 | |
| 143 #define dVr1 D16.S16 | |
| 144 #define dVi1 D17.S16 | |
| 145 #define dVr3 D18.S16 | |
| 146 #define dVi3 D19.S16 | |
| 147 #define dVr5 D20.S16 | |
| 148 #define dVi5 D21.S16 | |
| 149 @// reuse dUi7 | |
| 150 #define dVr7 D22.S16 | |
| 151 @// reuse dUr7 | |
| 152 #define dVi7 D23.S16 | |
| 153 #define qV0 Q12.S16 | |
| 154 #define qV1 Q8.S16 | |
| 155 #define qV2 Q13.S16 | |
| 156 #define qV3 Q9.S16 | |
| 157 #define qV4 Q14.S16 | |
| 158 #define qV5 Q10.S16 | |
| 159 #define qV6 Q15.S16 | |
| 160 #define qV7 Q11.S16 | |
| 161 | |
| 162 | |
| 163 | |
| 164 #define dYr0 D16.S16 | |
| 165 #define dYi0 D17.S16 | |
| 166 #define dYr2 D18.S16 | |
| 167 #define dYi2 D19.S16 | |
| 168 #define dYr4 D20.S16 | |
| 169 #define dYi4 D21.S16 | |
| 170 #define dYr6 D22.S16 | |
| 171 #define dYi6 D23.S16 | |
| 172 #define dYr1 D24.S16 | |
| 173 #define dYi1 D25.S16 | |
| 174 #define dYr3 D26.S16 | |
| 175 #define dYi3 D27.S16 | |
| 176 #define dYr5 D28.S16 | |
| 177 #define dYi5 D29.S16 | |
| 178 @// reuse dYr4 and dYi4 | |
| 179 #define dYr7 D30.S16 | |
| 180 #define dYi7 D31.S16 | |
| 181 #define qY0 Q8.S16 | |
| 182 #define qY1 Q12.S16 | |
| 183 #define qY2 Q9.S16 | |
| 184 #define qY3 Q13.S16 | |
| 185 #define qY4 Q10.S16 | |
| 186 #define qY5 Q14.S16 | |
| 187 #define qY6 Q11.S16 | |
| 188 #define qY7 Q15.S16 | |
| 189 | |
| 190 | |
| 191 #define dT0 D0.S16 | |
| 192 #define dT1 D1.S16 | |
| 193 | |
| 194 | |
| 195 @// Define constants | |
| 196 .set ONEBYSQRT2, 0x00005A82 @// Q15 format | |
| 197 | |
| 198 | |
| 199 .MACRO FFTSTAGE scaled, inverse , name | |
| 200 | |
| 201 @// Define stack arguments | |
| 202 | |
| 203 @// Update pSubFFTSize and pSubFFTNum regs | |
| 204 MOV subFFTSize,#8 @// subFFTSize = 1 f or the first stage | |
| 205 LDR t0,=ONEBYSQRT2 @// t0=(1/sqrt(2)) a s Q15 format | |
| 206 | |
| 207 @// Note: setCount = subFFTNum/8 (reuse the grpSize reg for setCount) | |
| 208 LSR grpSize,subFFTNum,#3 | |
| 209 MOV subFFTNum,grpSize | |
| 210 | |
| 211 | |
| 212 @// pT0+1 increments pT0 by 4 bytes | |
| 213 @// pT0+pointStep = increment of 4*pointStep bytes = grpSize/2 bytes | |
| 214 @// Note: outPointStep = pointStep for firststage | |
| 215 | |
| 216 MOV pointStep,grpSize,LSL #2 | |
| 217 | |
| 218 | |
| 219 @// Calculate the step of input data for the next set | |
| 220 @//MOV step1,pointStep,LSL #1 @// step1 = 2*poi ntStep | |
| 221 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] | |
| 222 MOV step1,grpSize,LSL #3 | |
| 223 | |
| 224 MOV step2,pointStep,LSL #3 | |
| 225 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1] | |
| 226 SUB step2,step2,pointStep @// step2 = 7*poi ntStep | |
| 227 RSB setStep,step2,#16 @// setStep = - 7 *pointStep+16 | |
| 228 | |
| 229 | |
| 230 | |
| 231 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2] | |
| 232 VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3] | |
| 233 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4] | |
| 234 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5] | |
| 235 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6] | |
| 236 VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7] & update pSrc for the next set | |
| 237 @// setStep = -7*pointSte p + 16 | |
| 238 @// grp = 0 a special case since all the twiddle factors are 1 | |
| 239 @// Loop on the sets : 4 sets at a time | |
| 240 | |
| 241 grpZeroSetLoop\name: | |
| 242 | |
| 243 @// Decrement setcount | |
| 244 SUBS setCount,setCount,#4 @// decrement the set lo op counter | |
| 245 | |
| 246 | |
| 247 .ifeqs "\scaled", "TRUE" | |
| 248 @// finish first stage of 8 point FFT | |
| 249 | |
| 250 VHADD qU0,qX0,qX4 | |
| 251 VHADD qU2,qX1,qX5 | |
| 252 VHADD qU4,qX2,qX6 | |
| 253 VHADD qU6,qX3,qX7 | |
| 254 | |
| 255 @// finish second stage of 8 point FFT | |
| 256 | |
| 257 VHADD qV0,qU0,qU4 | |
| 258 VHSUB qV2,qU0,qU4 | |
| 259 VHADD qV4,qU2,qU6 | |
| 260 VHSUB qV6,qU2,qU6 | |
| 261 | |
| 262 @// finish third stage of 8 point FFT | |
| 263 | |
| 264 VHADD qY0,qV0,qV4 | |
| 265 VHSUB qY4,qV0,qV4 | |
| 266 VST2 {dYr0,dYi0},[pDst :128],step1 @// store y 0 | |
| 267 | |
| 268 .ifeqs "\inverse", "TRUE" | |
| 269 | |
| 270 VHSUB dYr2,dVr2,dVi6 | |
| 271 VHADD dYi2,dVi2,dVr6 | |
| 272 | |
| 273 VHADD dYr6,dVr2,dVi6 | |
| 274 VST2 {dYr2,dYi2},[pDst :128],step1 @// sto re y2 | |
| 275 VHSUB dYi6,dVi2,dVr6 | |
| 276 | |
| 277 VHSUB qU1,qX0,qX4 | |
| 278 VST2 {dYr4,dYi4},[pDst :128],step1 @// sto re y4 | |
| 279 | |
| 280 VHSUB qU3,qX1,qX5 | |
| 281 VHSUB qU5,qX2,qX6 | |
| 282 VST2 {dYr6,dYi6},[pDst :128],step1 @// sto re y6 | |
| 283 | |
| 284 .ELSE | |
| 285 | |
| 286 VHADD dYr6,dVr2,dVi6 | |
| 287 VHSUB dYi6,dVi2,dVr6 | |
| 288 | |
| 289 VHSUB dYr2,dVr2,dVi6 | |
| 290 VST2 {dYr6,dYi6},[pDst :128],step1 @// sto re y2 | |
| 291 VHADD dYi2,dVi2,dVr6 | |
| 292 | |
| 293 | |
| 294 VHSUB qU1,qX0,qX4 | |
| 295 VST2 {dYr4,dYi4},[pDst :128],step1 @// sto re y4 | |
| 296 VHSUB qU3,qX1,qX5 | |
| 297 VHSUB qU5,qX2,qX6 | |
| 298 VST2 {dYr2,dYi2},[pDst :128],step1 @// sto re y6 | |
| 299 | |
| 300 | |
| 301 .ENDIF | |
| 302 | |
| 303 @// finish first stage of 8 point FFT | |
| 304 | |
| 305 VHSUB qU7,qX3,qX7 | |
| 306 VMOV dT0[0],t0 | |
| 307 | |
| 308 @// finish second stage of 8 point FFT | |
| 309 | |
| 310 VHSUB dVr1,dUr1,dUi5 | |
| 311 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] for next iteration | |
| 312 VHADD dVi1,dUi1,dUr5 | |
| 313 VHADD dVr3,dUr1,dUi5 | |
| 314 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1] | |
| 315 VHSUB dVi3,dUi1,dUr5 | |
| 316 | |
| 317 VHSUB dVr5,dUr3,dUi7 | |
| 318 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2] | |
| 319 VHADD dVi5,dUi3,dUr7 | |
| 320 VHADD dVr7,dUr3,dUi7 | |
| 321 VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3] | |
| 322 VHSUB dVi7,dUi3,dUr7 | |
| 323 | |
| 324 @// finish third stage of 8 point FFT | |
| 325 | |
| 326 .ifeqs "\inverse", "TRUE" | |
| 327 | |
| 328 @// calculate a*v5 | |
| 329 VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1 | |
| 330 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4] | |
| 331 VQRDMULH dVi5,dVi5,dT0[0] | |
| 332 | |
| 333 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5] | |
| 334 VSUB dVr5,dT1,dVi5 @// a * V5 | |
| 335 VADD dVi5,dT1,dVi5 | |
| 336 | |
| 337 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6] | |
| 338 | |
| 339 @// calculate b*v7 | |
| 340 VQRDMULH dT1,dVr7,dT0[0] | |
| 341 VQRDMULH dVi7,dVi7,dT0[0] | |
| 342 | |
| 343 VHADD qY1,qV1,qV5 | |
| 344 VHSUB qY5,qV1,qV5 | |
| 345 | |
| 346 | |
| 347 VADD dVr7,dT1,dVi7 @// b * V7 | |
| 348 VSUB dVi7,dVi7,dT1 | |
| 349 SUB pDst, pDst, step2 @// set pDst to y1 | |
| 350 | |
| 351 VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7] | |
|
aedla
2013/06/26 12:52:17
Last iteration 16-byte OOB read here,
| |
| 352 | |
| 353 | |
| 354 VHSUB dYr3,dVr3,dVr7 | |
| 355 VHSUB dYi3,dVi3,dVi7 | |
| 356 VST2 {dYr1,dYi1},[pDst :128],step1 @// sto re y1 | |
| 357 VHADD dYr7,dVr3,dVr7 | |
| 358 VHADD dYi7,dVi3,dVi7 | |
| 359 | |
| 360 | |
| 361 VST2 {dYr3,dYi3},[pDst :128],step1 @// sto re y3 | |
| 362 VST2 {dYr5,dYi5},[pDst :128],step1 @// sto re y5 | |
| 363 #if 0 | |
| 364 VST2 {dYr7,dYi7},[pDst :128],#16 @// sto re y7 | |
| 365 #else | |
| 366 VST2 {dYr7,dYi7},[pDst :128]! @// store y7 | |
| 367 #endif | |
| 368 .ELSE | |
| 369 | |
| 370 @// calculate b*v7 | |
| 371 VQRDMULH dT1,dVr7,dT0[0] | |
| 372 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4] | |
| 373 VQRDMULH dVi7,dVi7,dT0[0] | |
| 374 | |
| 375 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5] | |
| 376 VADD dVr7,dT1,dVi7 @// b * V7 | |
| 377 VSUB dVi7,dVi7,dT1 | |
| 378 | |
| 379 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6] | |
| 380 | |
| 381 @// calculate a*v5 | |
| 382 VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1 | |
| 383 VQRDMULH dVi5,dVi5,dT0[0] | |
| 384 | |
| 385 VHADD dYr7,dVr3,dVr7 | |
| 386 VHADD dYi7,dVi3,dVi7 | |
| 387 SUB pDst, pDst, step2 @// set pDst to y1 | |
| 388 | |
| 389 VSUB dVr5,dT1,dVi5 @// a * V5 | |
| 390 VADD dVi5,dT1,dVi5 | |
| 391 VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7] | |
|
aedla
2013/06/26 12:52:17
here,
| |
| 392 | |
| 393 VHSUB qY5,qV1,qV5 | |
| 394 | |
| 395 VHSUB dYr3,dVr3,dVr7 | |
| 396 VST2 {dYr7,dYi7},[pDst :128],step1 @// sto re y1 | |
| 397 VHSUB dYi3,dVi3,dVi7 | |
| 398 VHADD qY1,qV1,qV5 | |
| 399 | |
| 400 | |
| 401 VST2 {dYr5,dYi5},[pDst :128],step1 @// sto re y3 | |
| 402 VST2 {dYr3,dYi3},[pDst :128],step1 @// sto re y5 | |
| 403 #if 0 | |
| 404 VST2 {dYr1,dYi1},[pDst :128],#16 @// sto re y7 | |
| 405 #else | |
| 406 VST2 {dYr1,dYi1},[pDst :128]! @// store y7 | |
| 407 #endif | |
| 408 | |
| 409 .ENDIF | |
| 410 | |
| 411 | |
| 412 | |
| 413 .ELSE | |
| 414 @// finish first stage of 8 point FFT | |
| 415 | |
| 416 VADD qU0,qX0,qX4 | |
| 417 VADD qU2,qX1,qX5 | |
| 418 VADD qU4,qX2,qX6 | |
| 419 VADD qU6,qX3,qX7 | |
| 420 | |
| 421 @// finish second stage of 8 point FFT | |
| 422 | |
| 423 VADD qV0,qU0,qU4 | |
| 424 VSUB qV2,qU0,qU4 | |
| 425 VADD qV4,qU2,qU6 | |
| 426 VSUB qV6,qU2,qU6 | |
| 427 | |
| 428 @// finish third stage of 8 point FFT | |
| 429 | |
| 430 VADD qY0,qV0,qV4 | |
| 431 VSUB qY4,qV0,qV4 | |
| 432 VST2 {dYr0,dYi0},[pDst :128],step1 @// store y 0 | |
| 433 | |
| 434 .ifeqs "\inverse", "TRUE" | |
| 435 | |
| 436 VSUB dYr2,dVr2,dVi6 | |
| 437 VADD dYi2,dVi2,dVr6 | |
| 438 | |
| 439 VADD dYr6,dVr2,dVi6 | |
| 440 VST2 {dYr2,dYi2},[pDst :128],step1 @// sto re y2 | |
| 441 VSUB dYi6,dVi2,dVr6 | |
| 442 | |
| 443 VSUB qU1,qX0,qX4 | |
| 444 VST2 {dYr4,dYi4},[pDst :128],step1 @// sto re y4 | |
| 445 | |
| 446 VSUB qU3,qX1,qX5 | |
| 447 VSUB qU5,qX2,qX6 | |
| 448 VST2 {dYr6,dYi6},[pDst :128],step1 @// sto re y6 | |
| 449 | |
| 450 .ELSE | |
| 451 | |
| 452 VADD dYr6,dVr2,dVi6 | |
| 453 VSUB dYi6,dVi2,dVr6 | |
| 454 | |
| 455 VSUB dYr2,dVr2,dVi6 | |
| 456 VST2 {dYr6,dYi6},[pDst :128],step1 @// sto re y2 | |
| 457 VADD dYi2,dVi2,dVr6 | |
| 458 | |
| 459 | |
| 460 VSUB qU1,qX0,qX4 | |
| 461 VST2 {dYr4,dYi4},[pDst :128],step1 @// sto re y4 | |
| 462 VSUB qU3,qX1,qX5 | |
| 463 VSUB qU5,qX2,qX6 | |
| 464 VST2 {dYr2,dYi2},[pDst :128],step1 @// sto re y6 | |
| 465 | |
| 466 | |
| 467 .ENDIF | |
| 468 | |
| 469 @// finish first stage of 8 point FFT | |
| 470 | |
| 471 VSUB qU7,qX3,qX7 | |
| 472 VMOV dT0[0],t0 | |
| 473 | |
| 474 @// finish second stage of 8 point FFT | |
| 475 | |
| 476 VSUB dVr1,dUr1,dUi5 | |
| 477 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] for next iteration | |
| 478 VADD dVi1,dUi1,dUr5 | |
| 479 VADD dVr3,dUr1,dUi5 | |
| 480 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1] | |
| 481 VSUB dVi3,dUi1,dUr5 | |
| 482 | |
| 483 VSUB dVr5,dUr3,dUi7 | |
| 484 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2] | |
| 485 VADD dVi5,dUi3,dUr7 | |
| 486 VADD dVr7,dUr3,dUi7 | |
| 487 VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3] | |
| 488 VSUB dVi7,dUi3,dUr7 | |
| 489 | |
| 490 @// finish third stage of 8 point FFT | |
| 491 | |
| 492 .ifeqs "\inverse", "TRUE" | |
| 493 | |
| 494 @// calculate a*v5 | |
| 495 VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1 | |
| 496 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4] | |
| 497 VQRDMULH dVi5,dVi5,dT0[0] | |
| 498 | |
| 499 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5] | |
| 500 VSUB dVr5,dT1,dVi5 @// a * V5 | |
| 501 VADD dVi5,dT1,dVi5 | |
| 502 | |
| 503 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6] | |
| 504 | |
| 505 @// calculate b*v7 | |
| 506 VQRDMULH dT1,dVr7,dT0[0] | |
| 507 VQRDMULH dVi7,dVi7,dT0[0] | |
| 508 | |
| 509 VADD qY1,qV1,qV5 | |
| 510 VSUB qY5,qV1,qV5 | |
| 511 | |
| 512 | |
| 513 VADD dVr7,dT1,dVi7 @// b * V7 | |
| 514 VSUB dVi7,dVi7,dT1 | |
| 515 SUB pDst, pDst, step2 @// set pDst to y1 | |
| 516 | |
| 517 VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7] | |
|
aedla
2013/06/26 12:52:17
here,
| |
| 518 | |
| 519 | |
| 520 VSUB dYr3,dVr3,dVr7 | |
| 521 VSUB dYi3,dVi3,dVi7 | |
| 522 VST2 {dYr1,dYi1},[pDst :128],step1 @// sto re y1 | |
| 523 VADD dYr7,dVr3,dVr7 | |
| 524 VADD dYi7,dVi3,dVi7 | |
| 525 | |
| 526 | |
| 527 VST2 {dYr3,dYi3},[pDst :128],step1 @// sto re y3 | |
| 528 VST2 {dYr5,dYi5},[pDst :128],step1 @// sto re y5 | |
| 529 #if 0 | |
| 530 VST2 {dYr7,dYi7},[pDst :128],#16 @// sto re y7 | |
| 531 #else | |
| 532 VST2 {dYr7,dYi7},[pDst :128]! @// store y7 | |
| 533 #endif | |
| 534 .ELSE | |
| 535 | |
| 536 @// calculate b*v7 | |
| 537 VQRDMULH dT1,dVr7,dT0[0] | |
| 538 VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4] | |
| 539 VQRDMULH dVi7,dVi7,dT0[0] | |
| 540 | |
| 541 VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5] | |
| 542 VADD dVr7,dT1,dVi7 @// b * V7 | |
| 543 VSUB dVi7,dVi7,dT1 | |
| 544 | |
| 545 VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6] | |
| 546 | |
| 547 @// calculate a*v5 | |
| 548 VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1 | |
| 549 VQRDMULH dVi5,dVi5,dT0[0] | |
| 550 | |
| 551 VADD dYr7,dVr3,dVr7 | |
| 552 VADD dYi7,dVi3,dVi7 | |
| 553 SUB pDst, pDst, step2 @// set pDst to y1 | |
| 554 | |
| 555 VSUB dVr5,dT1,dVi5 @// a * V5 | |
| 556 VADD dVi5,dT1,dVi5 | |
| 557 VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7] | |
|
aedla
2013/06/26 12:52:17
and here.
| |
| 558 | |
| 559 VSUB qY5,qV1,qV5 | |
| 560 | |
| 561 VSUB dYr3,dVr3,dVr7 | |
| 562 VST2 {dYr7,dYi7},[pDst :128],step1 @// sto re y1 | |
| 563 VSUB dYi3,dVi3,dVi7 | |
| 564 VADD qY1,qV1,qV5 | |
| 565 | |
| 566 | |
| 567 VST2 {dYr5,dYi5},[pDst :128],step1 @// sto re y3 | |
| 568 VST2 {dYr3,dYi3},[pDst :128],step1 @// sto re y5 | |
| 569 #if 0 | |
| 570 VST2 {dYr1,dYi1},[pDst :128],#16 @// sto re y7 | |
| 571 #else | |
| 572 VST2 {dYr1,dYi1},[pDst :128]! @// store y7 | |
| 573 #endif | |
| 574 | |
| 575 .ENDIF | |
| 576 | |
| 577 | |
| 578 .ENDIF | |
| 579 | |
| 580 SUB pDst, pDst, step2 @// update pDst for the next set | |
| 581 BGT grpZeroSetLoop\name | |
| 582 | |
| 583 | |
| 584 @// reset pSrc to pDst for the next stage | |
| 585 SUB pSrc,pDst,pointStep @// pDst -= 2*gr pSize | |
| 586 MOV pDst,pPingPongBuf | |
| 587 | |
| 588 | |
| 589 | |
| 590 .endm | |
| 591 | |
| 592 | |
| 593 @// Allocate stack memory required by the function | |
| 594 | |
| 595 | |
| 596 M_START armSP_FFTFwd_CToC_SC16_Radix8_fs_OutOfPlace_unsafe,r4 | |
| 597 FFTSTAGE "FALSE","FALSE",FWD | |
| 598 M_END | |
| 599 | |
| 600 | |
| 601 M_START armSP_FFTInv_CToC_SC16_Radix8_fs_OutOfPlace_unsafe,r4 | |
| 602 FFTSTAGE "FALSE","TRUE",INV | |
| 603 M_END | |
| 604 | |
| 605 | |
| 606 M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe,r4 | |
| 607 FFTSTAGE "TRUE","FALSE",FWDSFS | |
| 608 M_END | |
| 609 | |
| 610 | |
| 611 M_START armSP_FFTInv_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe,r4 | |
| 612 FFTSTAGE "TRUE","TRUE",INVSFS | |
| 613 M_END | |
| 614 | |
| 615 | |
| 616 | |
| 617 | |
| 618 | |
| 619 .END | |
| OLD | NEW |