| OLD | NEW | 
|---|
| (Empty) |  | 
|  | 1 @// | 
|  | 2 @//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. | 
|  | 3 @// | 
|  | 4 @//  Use of this source code is governed by a BSD-style license | 
|  | 5 @//  that can be found in the LICENSE file in the root of the source | 
|  | 6 @//  tree. An additional intellectual property rights grant can be found | 
|  | 7 @//  in the file PATENTS.  All contributing project authors may | 
|  | 8 @//  be found in the AUTHORS file in the root of the source tree. | 
|  | 9 @// | 
|  | 10 @// | 
|  | 11 @//  This is a modification of armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.s | 
|  | 12 @//  to support float instead of SC32. | 
|  | 13 @// | 
|  | 14 | 
|  | 15 @// | 
|  | 16 @// Description: | 
|  | 17 @// Compute a first stage Radix 4 FFT stage for a N point complex signal | 
|  | 18 @// | 
|  | 19 @// | 
|  | 20 | 
|  | 21 | 
|  | 22 @// Include standard headers | 
|  | 23 | 
|  | 24 #include "dl/api/armCOMM_s.h" | 
|  | 25 #include "dl/api/omxtypes_s.h" | 
|  | 26 | 
|  | 27 @// Import symbols required from other files | 
|  | 28 @// (For example tables) | 
|  | 29 | 
|  | 30 | 
|  | 31 | 
|  | 32 | 
|  | 33 @// Set debugging level | 
|  | 34 @//DEBUG_ON    SETL {TRUE} | 
|  | 35 | 
|  | 36 | 
|  | 37 | 
|  | 38 @// Guarding implementation by the processor name | 
|  | 39 | 
|  | 40 | 
|  | 41 | 
|  | 42 @// Guarding implementation by the processor name | 
|  | 43 | 
|  | 44 | 
|  | 45 @//Input Registers | 
|  | 46 | 
|  | 47 #define pSrc            r0 | 
|  | 48 #define pDst            r2 | 
|  | 49 #define pTwiddle        r1 | 
|  | 50 #define pPingPongBuf    r5 | 
|  | 51 #define subFFTNum       r6 | 
|  | 52 #define subFFTSize      r7 | 
|  | 53 | 
|  | 54 | 
|  | 55 @//Output Registers | 
|  | 56 | 
|  | 57 | 
|  | 58 @//Local Scratch Registers | 
|  | 59 | 
|  | 60 #define grpSize         r3 | 
|  | 61 @// Reuse grpSize as setCount | 
|  | 62 #define setCount        r3 | 
|  | 63 #define pointStep       r4 | 
|  | 64 #define outPointStep    r4 | 
|  | 65 #define setStep         r8 | 
|  | 66 #define step1           r9 | 
|  | 67 #define step3           r10 | 
|  | 68 | 
|  | 69 @// Neon Registers | 
|  | 70 | 
|  | 71 #define dXr0    D0.F32 | 
|  | 72 #define dXi0    D1.F32 | 
|  | 73 #define dXr1    D2.F32 | 
|  | 74 #define dXi1    D3.F32 | 
|  | 75 #define dXr2    D4.F32 | 
|  | 76 #define dXi2    D5.F32 | 
|  | 77 #define dXr3    D6.F32 | 
|  | 78 #define dXi3    D7.F32 | 
|  | 79 #define dYr0    D8.F32 | 
|  | 80 #define dYi0    D9.F32 | 
|  | 81 #define dYr1    D10.F32 | 
|  | 82 #define dYi1    D11.F32 | 
|  | 83 #define dYr2    D12.F32 | 
|  | 84 #define dYi2    D13.F32 | 
|  | 85 #define dYr3    D14.F32 | 
|  | 86 #define dYi3    D15.F32 | 
|  | 87 #define qX0     Q0.F32 | 
|  | 88 #define qX1     Q1.F32 | 
|  | 89 #define qX2     Q2.F32 | 
|  | 90 #define qX3     Q3.F32 | 
|  | 91 #define qY0     Q4.F32 | 
|  | 92 #define qY1     Q5.F32 | 
|  | 93 #define qY2     Q6.F32 | 
|  | 94 #define qY3     Q7.F32 | 
|  | 95 #define dZr0    D16.F32 | 
|  | 96 #define dZi0    D17.F32 | 
|  | 97 #define dZr1    D18.F32 | 
|  | 98 #define dZi1    D19.F32 | 
|  | 99 #define dZr2    D20.F32 | 
|  | 100 #define dZi2    D21.F32 | 
|  | 101 #define dZr3    D22.F32 | 
|  | 102 #define dZi3    D23.F32 | 
|  | 103 #define qZ0     Q8.F32 | 
|  | 104 #define qZ1     Q9.F32 | 
|  | 105 #define qZ2     Q10.F32 | 
|  | 106 #define qZ3     Q11.F32 | 
|  | 107 | 
|  | 108 | 
|  | 109         .MACRO FFTSTAGE scaled, inverse, name | 
|  | 110 | 
|  | 111         @// Define stack arguments | 
|  | 112 | 
|  | 113         @// pT0+1 increments pT0 by 8 bytes | 
|  | 114         @// pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes | 
|  | 115         @// Note: outPointStep = pointStep for firststage | 
|  | 116 | 
|  | 117         MOV     pointStep,subFFTNum,LSL #1 | 
|  | 118 | 
|  | 119 | 
|  | 120         @// Update pSubFFTSize and pSubFFTNum regs | 
|  | 121         VLD2    {dXr0,dXi0},[pSrc :128],pointStep          @//  data[0] | 
|  | 122         @// subFFTSize = 1 for the first stage | 
|  | 123         MOV     subFFTSize,#4 | 
|  | 124 | 
|  | 125         @// Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount) | 
|  | 126         LSR     grpSize,subFFTNum,#2 | 
|  | 127         VLD2    {dXr1,dXi1},[pSrc :128],pointStep          @//  data[1] | 
|  | 128         MOV     subFFTNum,grpSize | 
|  | 129 | 
|  | 130 | 
|  | 131         @// Calculate the step of input data for the next set | 
|  | 132         @//MOV     setStep,pointStep,LSL #1 | 
|  | 133         MOV     setStep,grpSize,LSL #4 | 
|  | 134         VLD2    {dXr2,dXi2},[pSrc :128],pointStep          @//  data[2] | 
|  | 135         @// setStep = 3*pointStep | 
|  | 136         ADD     setStep,setStep,pointStep | 
|  | 137         @// setStep = - 3*pointStep+16 | 
|  | 138         RSB     setStep,setStep,#16 | 
|  | 139 | 
|  | 140         @//  data[3] & update pSrc for the next set | 
|  | 141         VLD2    {dXr3,dXi3},[pSrc :128],setStep | 
|  | 142         @// step1 = 2*pointStep | 
|  | 143         MOV     step1,pointStep,LSL #1 | 
|  | 144 | 
|  | 145         VADD    qY0,qX0,qX2 | 
|  | 146 | 
|  | 147         @// step3 = -pointStep | 
|  | 148         RSB     step3,pointStep,#0 | 
|  | 149 | 
|  | 150         @// grp = 0 a special case since all the twiddle factors are 1 | 
|  | 151         @// Loop on the sets : 2 sets at a time | 
|  | 152 | 
|  | 153 radix4fsGrpZeroSetLoop\name : | 
|  | 154 | 
|  | 155 | 
|  | 156 | 
|  | 157         @// Decrement setcount | 
|  | 158         SUBS    setCount,setCount,#2 | 
|  | 159 | 
|  | 160 | 
|  | 161         @// finish first stage of 4 point FFT | 
|  | 162 | 
|  | 163 | 
|  | 164         VSUB    qY2,qX0,qX2 | 
|  | 165 | 
|  | 166         VLD2    {dXr0,dXi0},[pSrc :128],step1          @//  data[0] | 
|  | 167         VADD    qY1,qX1,qX3 | 
|  | 168         VLD2    {dXr2,dXi2},[pSrc :128],step3          @//  data[2] | 
|  | 169         VSUB    qY3,qX1,qX3 | 
|  | 170 | 
|  | 171 | 
|  | 172         @// finish second stage of 4 point FFT | 
|  | 173 | 
|  | 174         .ifeqs "\inverse", "TRUE" | 
|  | 175 | 
|  | 176             VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1] | 
|  | 177             VADD    qZ0,qY0,qY1 | 
|  | 178 | 
|  | 179             @//  data[3] & update pSrc for the next set, but not if it's the | 
|  | 180             @//  last iteration so that we don't read past the end of the | 
|  | 181             @//  input array. | 
|  | 182             BEQ     radix4SkipLastUpdateInv\name | 
|  | 183             VLD2    {dXr3,dXi3},[pSrc :128],setStep | 
|  | 184 radix4SkipLastUpdateInv\name: | 
|  | 185             VSUB    dZr3,dYr2,dYi3 | 
|  | 186 | 
|  | 187             VST2    {dZr0,dZi0},[pDst :128],outPointStep | 
|  | 188             VADD    dZi3,dYi2,dYr3 | 
|  | 189 | 
|  | 190             VSUB    qZ1,qY0,qY1 | 
|  | 191             VST2    {dZr3,dZi3},[pDst :128],outPointStep | 
|  | 192 | 
|  | 193             VADD    dZr2,dYr2,dYi3 | 
|  | 194             VST2    {dZr1,dZi1},[pDst :128],outPointStep | 
|  | 195             VSUB    dZi2,dYi2,dYr3 | 
|  | 196 | 
|  | 197             VADD    qY0,qX0,qX2                     @// u0 for next iteration | 
|  | 198             VST2    {dZr2,dZi2},[pDst :128],setStep | 
|  | 199 | 
|  | 200 | 
|  | 201         .else | 
|  | 202 | 
|  | 203             VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1] | 
|  | 204             VADD    qZ0,qY0,qY1 | 
|  | 205 | 
|  | 206             @//  data[3] & update pSrc for the next set, but not if it's the | 
|  | 207             @//  last iteration so that we don't read past the end of the | 
|  | 208             @//  input array. | 
|  | 209             BEQ     radix4SkipLastUpdateFwd\name | 
|  | 210             VLD2    {dXr3,dXi3},[pSrc :128],setStep | 
|  | 211 radix4SkipLastUpdateFwd\name: | 
|  | 212             VADD    dZr2,dYr2,dYi3 | 
|  | 213 | 
|  | 214             VST2    {dZr0,dZi0},[pDst :128],outPointStep | 
|  | 215             VSUB    dZi2,dYi2,dYr3 | 
|  | 216 | 
|  | 217             VSUB    qZ1,qY0,qY1 | 
|  | 218             VST2    {dZr2,dZi2},[pDst :128],outPointStep | 
|  | 219 | 
|  | 220             VSUB    dZr3,dYr2,dYi3 | 
|  | 221             VST2    {dZr1,dZi1},[pDst :128],outPointStep | 
|  | 222             VADD    dZi3,dYi2,dYr3 | 
|  | 223 | 
|  | 224             VADD    qY0,qX0,qX2                     @// u0 for next iteration | 
|  | 225             VST2    {dZr3,dZi3},[pDst :128],setStep | 
|  | 226 | 
|  | 227         .endif | 
|  | 228 | 
|  | 229         BGT     radix4fsGrpZeroSetLoop\name | 
|  | 230 | 
|  | 231         @// reset pSrc to pDst for the next stage | 
|  | 232         SUB     pSrc,pDst,pointStep                     @// pDst -= 2*grpSize | 
|  | 233         MOV     pDst,pPingPongBuf | 
|  | 234 | 
|  | 235 | 
|  | 236         .endm | 
|  | 237 | 
|  | 238 | 
|  | 239 | 
|  | 240         M_START armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe,r4 | 
|  | 241         FFTSTAGE "FALSE","FALSE",fwd | 
|  | 242         M_END | 
|  | 243 | 
|  | 244 | 
|  | 245 | 
|  | 246         M_START armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe,r4 | 
|  | 247         FFTSTAGE "FALSE","TRUE",inv | 
|  | 248         M_END | 
|  | 249 | 
|  | 250 | 
|  | 251         .end | 
| OLD | NEW | 
|---|