| Index: dl/sp/src/arm/arm64/armSP_FFT_CToC_FC32_Radix4_s.S
|
| diff --git a/dl/sp/src/arm/arm64/armSP_FFT_CToC_FC32_Radix4_s.S b/dl/sp/src/arm/arm64/armSP_FFT_CToC_FC32_Radix4_s.S
|
| index 7442e0d51303c0bcbc64d5f0d65e5c557abc8c3d..047597dc15c081cd04facb66c9649bf7269c05e6 100644
|
| --- a/dl/sp/src/arm/arm64/armSP_FFT_CToC_FC32_Radix4_s.S
|
| +++ b/dl/sp/src/arm/arm64/armSP_FFT_CToC_FC32_Radix4_s.S
|
| @@ -80,8 +80,11 @@
|
| // Neon Registers
|
|
|
| #define dW1 v0.2s
|
| +#define dW1s v0.s
|
| #define dW2 v1.2s
|
| +#define dW2s v1.s
|
| #define dW3 v2.2s
|
| +#define dW3s v2.s
|
|
|
| #define dXr0 v4.2s
|
| #define dXi0 v5.2s
|
| @@ -182,49 +185,49 @@ radix4SetLoop\name :
|
|
|
|
|
| .ifeqs "\inverse", "TRUE"
|
| - fmul dZr1,dXr1,dW1[0]
|
| - fmul dZi1,dXi1,dW1[0]
|
| - fmul dZr2,dXr2,dW2[0]
|
| - fmul dZi2,dXi2,dW2[0]
|
| - fmul dZr3,dXr3,dW3[0]
|
| - fmul dZi3,dXi3,dW3[0]
|
| + fmul dZr1,dXr1,dW1s[0]
|
| + fmul dZi1,dXi1,dW1s[0]
|
| + fmul dZr2,dXr2,dW2s[0]
|
| + fmul dZi2,dXi2,dW2s[0]
|
| + fmul dZr3,dXr3,dW3s[0]
|
| + fmul dZi3,dXi3,dW3s[0]
|
|
|
| - fmla dZr1,dXi1,dW1[1] // real part
|
| - fmls dZi1,dXr1,dW1[1] // imag part
|
| + fmla dZr1,dXi1,dW1s[1] // real part
|
| + fmls dZi1,dXr1,dW1s[1] // imag part
|
|
|
| // data[1] for next iteration
|
| ld2 {dXr1,dXi1},[pSrc],pointStep
|
|
|
| - fmla dZr2,dXi2,dW2[1] // real part
|
| - fmls dZi2,dXr2,dW2[1] // imag part
|
| + fmla dZr2,dXi2,dW2s[1] // real part
|
| + fmls dZi2,dXr2,dW2s[1] // imag part
|
|
|
| // data[2] for next iteration
|
| ld2 {dXr2,dXi2},[pSrc],pointStep
|
|
|
| - fmla dZr3,dXi3,dW3[1] // real part
|
| - fmls dZi3,dXr3,dW3[1] // imag part
|
| + fmla dZr3,dXi3,dW3s[1] // real part
|
| + fmls dZi3,dXr3,dW3s[1] // imag part
|
| .else
|
| - fmul dZr1,dXr1,dW1[0]
|
| - fmul dZi1,dXi1,dW1[0]
|
| - fmul dZr2,dXr2,dW2[0]
|
| - fmul dZi2,dXi2,dW2[0]
|
| - fmul dZr3,dXr3,dW3[0]
|
| - fmul dZi3,dXi3,dW3[0]
|
| + fmul dZr1,dXr1,dW1s[0]
|
| + fmul dZi1,dXi1,dW1s[0]
|
| + fmul dZr2,dXr2,dW2s[0]
|
| + fmul dZi2,dXi2,dW2s[0]
|
| + fmul dZr3,dXr3,dW3s[0]
|
| + fmul dZi3,dXi3,dW3s[0]
|
|
|
| - fmls dZr1,dXi1,dW1[1] // real part
|
| - fmla dZi1,dXr1,dW1[1] // imag part
|
| + fmls dZr1,dXi1,dW1s[1] // real part
|
| + fmla dZi1,dXr1,dW1s[1] // imag part
|
|
|
| // data[1] for next iteration
|
| ld2 {dXr1,dXi1},[pSrc],pointStep
|
|
|
| - fmls dZr2,dXi2,dW2[1] // real part
|
| - fmla dZi2,dXr2,dW2[1] // imag part
|
| + fmls dZr2,dXi2,dW2s[1] // real part
|
| + fmla dZi2,dXr2,dW2s[1] // imag part
|
|
|
| // data[2] for next iteration
|
| ld2 {dXr2,dXi2},[pSrc],pointStep
|
|
|
| - fmls dZr3,dXi3,dW3[1] // real part
|
| - fmla dZi3,dXr3,dW3[1] // imag part
|
| + fmls dZr3,dXi3,dW3s[1] // real part
|
| + fmla dZi3,dXr3,dW3s[1] // imag part
|
| .endif
|
|
|
| // data[3] & update pSrc to data[0]
|
|
|