third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_F32_Sfs_s.S - Issue 12317152: Add openmax dl routines for review. MUST NOT BE LANDED

Unified Diff: third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_F32_Sfs_s.S

Issue 12317152: Add openmax dl routines for review. MUST NOT BE LANDED (Closed) Base URL: http://git.chromium.org/chromium/src.git@master

Patch Set: Created 7 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.S ('K') | « third_party/openmax_dl/dl/sp/src/omxSP_FFTInit_R_S32.c ('k') | third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_S32S16_Sfs_s.S » ('j') | third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CToC_SC16_Sfs_s.S » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_F32_Sfs_s.S

diff --git a/third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_F32_Sfs_s.S b/third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_F32_Sfs_s.S

new file mode 100644

index 0000000000000000000000000000000000000000..2616506ac065ba5b6d804f1e13eef7ef229ac9ea

--- /dev/null

+++ b/third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_F32_Sfs_s.S

@@ -0,0 +1,283 @@

+@//

+@// Use of this source code is governed by a BSD-style license

+@// that can be found in the LICENSE file in the root of the source

+@// tree. An additional intellectual property rights grant can be found

+@// in the file PATENTS. All contributing project authors may

+@// be found in the AUTHORS file in the root of the source tree.

+@//

+@// This is a modification of omxSP_FFTInv_CCSToR_S32_Sfs_s.s

+@// to support float instead of SC32.

+@//

+@// Description:

+@// Compute an inverse FFT for a complex signal

+@//

+@// Include standard headers

+#include "dl/api/armCOMM_s.h"

+#include "dl/api/omxtypes_s.h"

+@// Import symbols required from other files

+@// (For example tables)

+ .extern armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe

+ .extern armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe

+ .extern armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe

+ .extern armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe

+ .extern armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe

+ .extern armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe

+@// Set debugging level

+@//DEBUG_ON SETL {TRUE}

+@// Guarding implementation by the processor name

+ @// Guarding implementation by the processor name

+@// Import symbols required from other files

+@// (For example tables)

+ .extern armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace_unsafe

+ .extern armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe

+@//Input Registers

+#define pSrc r0

+#define pDst r1

+#define pFFTSpec r2

+#define scale r3

+@// Output registers

+#define result r0

+@//Local Scratch Registers

+#define argTwiddle r1

+#define argDst r2

+#define argScale r4

+#define tmpOrder r4

+#define pTwiddle r4

+#define pOut r5

+#define subFFTSize r7

+#define subFFTNum r6

+#define N r6

+#define order r14

+#define diff r9

+@// Total num of radix stages required to comple the FFT

+#define count r8

+#define x0r r4

+#define x0i r5

+#define diffMinusOne r2

+#define round r3

+#define pOut1 r2

+#define size r7

+#define step r8

+#define step1 r9

+#define twStep r10

+#define pTwiddleTmp r11

+#define argTwiddle1 r12

+#define zero r14

+@// Neon registers

+#define dX0 D0.F32

+#define dShift D1.F32

+#define dX1 D1.F32

+#define dY0 D2.F32

+#define dY1 D3.F32

+#define dX0r D0.F32

+#define dX0i D1.F32

+#define dX1r D2.F32

+#define dX1i D3.F32

+#define dW0r D4.F32

+#define dW0i D5.F32

+#define dW1r D6.F32

+#define dW1i D7.F32

+#define dT0 D8.F32

+#define dT1 D9.F32

+#define dT2 D10.F32

+#define dT3 D11.F32

+#define qT0 d12.F32

+#define qT1 d14.F32

+#define qT2 d16.F32

+#define qT3 d18.F32

+#define dY0r D4.F32

+#define dY0i D5.F32

+#define dY1r D6.F32

+#define dY1i D7.F32

+#define dzero D20.F32

+#define dY2 D4.F32

+#define dY3 D5.F32

+#define dW0 D6.F32

+#define dW1 D7.F32

+#define dW0Tmp D10.F32

+#define dW1Neg D11.F32

+#define sN S0.S32

+#define fN S1.F32

+@// one must be the same as dScale[0]!

+#define dScale D2.F32

+#define one S4.F32

+ @// Allocate stack memory required by the function

+ M_ALLOC4 complexFFTSize, 4

+ @// Write function header

+ M_START omxSP_FFTInv_CCSToR_F32_Sfs,r11,d15

+@ Structure offsets for the FFTSpec

+ .set ARMsFFTSpec_N, 0

+ .set ARMsFFTSpec_pBitRev, 4

+ .set ARMsFFTSpec_pTwiddle, 8

+ .set ARMsFFTSpec_pBuf, 12

+ @// Define stack arguments

+ @// Read the size from structure and take log

+ LDR N, [pFFTSpec, #ARMsFFTSpec_N]

+ @// Read other structure parameters

+ LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]

+ LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]

+ @// N=1 Treat seperately

+ CMP N,#1

+ BGT sizeGreaterThanOne

+ VLD1 dX0[0],[pSrc]

+ VST1 dX0[0],[pDst]

+ B End

+sizeGreaterThanOne:

+ @// Call the preTwiddle Radix2 stage before doing the compledIFFT

+ BL armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe

+complexIFFT:

+ ASR N,N,#1 @// N/2 point complex IFFT

+ M_STR N, complexFFTSize @ Save N for scaling later

+ ADD pSrc,pOut,N,LSL #3 @// set pSrc as pOut1

+ CLZ order,N @// N = 2^order

+ RSB order,order,#31

+ MOV subFFTSize,#1

+ @//MOV subFFTNum,N

+ CMP order,#3

+ BGT orderGreaterthan3 @// order > 3

+ CMP order,#1

+ BGE orderGreaterthan0 @// order > 0

+ VLD1 dX0,[pSrc]

+ VST1 dX0,[pDst]

+ MOV pSrc,pDst

+ BLT FFTEnd

+orderGreaterthan0:

+ @// set the buffers appropriately for various orders

+ CMP order,#2

+ MOVNE argDst,pDst

+ MOVEQ argDst,pOut

+ @// Pass the first stage destination in RN5

+ MOVEQ pOut,pDst

+ MOV argTwiddle,pTwiddle

+ BGE orderGreaterthan1

+ BLLT armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe @// order = 1

+ B FFTEnd

+orderGreaterthan1:

+ MOV tmpOrder,order @// tmpOrder = RN 4

+ BL armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe

+ CMP tmpOrder,#2

+ BLGT armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe

+ BL armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe

+ B FFTEnd

+orderGreaterthan3:

+specialScaleCase:

+ @// Set input args to fft stages

+ TST order, #2

+ MOVNE argDst,pDst

+ MOVEQ argDst,pOut

+ @// Pass the first stage destination in RN5

+ MOVEQ pOut,pDst

+ MOV argTwiddle,pTwiddle

+ @//check for even or odd order

+ @// NOTE: The following combination of BL's would work fine even though

+ @// the first BL would corrupt the flags. This is because the end of

+ @// the "grpZeroSetLoop" loop inside

+ @// armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag

+ @// to EQ

+ TST order,#0x00000001

+ BLEQ armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe

+ BLNE armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe

+ CMP subFFTNum,#4

+ BLT FFTEnd

+unscaledRadix4Loop:

+ BEQ lastStageUnscaledRadix4

+ BL armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe

+ CMP subFFTNum,#4

+ B unscaledRadix4Loop

+lastStageUnscaledRadix4:

+ BL armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace_unsafe

+ B FFTEnd

+FFTEnd: @// Does only the scaling

+ @ Scale inverse FFT result by 1/N

+ M_LDR N, complexFFTSize

+ VMOV sN,N

+ VCVT fN, sN @ fn = fftSize, as a float

+ VMOV one, 1.0

+ VDIV one, one, fN @ one = dScale[0] = 1 / fftSize

+ @// N = subFFTSize ; dataptr = pDst

+scaleFFTData:

+ VLD1 {dX0},[pSrc] @// pSrc contains pDst pointer

+ SUBS subFFTSize,subFFTSize,#1

+ VMUL dX0, dX0, dScale[0]

+ VST1 {dX0},[pSrc]!

+ BGT scaleFFTData

+End:

+ @// Set return value

+ MOV result, #OMX_Sts_NoErr

+ @// Write function tail

+ M_END

+ .end