third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_F32_Sfs_s.S - Issue 12317152: Add openmax dl routines for review. MUST NOT BE LANDED

Side by Side Diff: third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_F32_Sfs_s.S

Issue 12317152: Add openmax dl routines for review. MUST NOT BE LANDED (Closed) Base URL: http://git.chromium.org/chromium/src.git@master

Patch Set: Created 7 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.S ('K') | « third_party/openmax_dl/dl/sp/src/omxSP_FFTInit_R_S32.c ('k') | third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_S32S16_Sfs_s.S » ('j') | third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CToC_SC16_Sfs_s.S » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 @//

	2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.

	3 @//

	4 @// Use of this source code is governed by a BSD-style license

	5 @// that can be found in the LICENSE file in the root of the source

	6 @// tree. An additional intellectual property rights grant can be found

	7 @// in the file PATENTS. All contributing project authors may

	8 @// be found in the AUTHORS file in the root of the source tree.

	9 @//

	10 @// This is a modification of omxSP_FFTInv_CCSToR_S32_Sfs_s.s

	11 @// to support float instead of SC32.

	12 @//

	13

	14 @//

	15 @// Description:

	16 @// Compute an inverse FFT for a complex signal

	17 @//

	18 @//

	19

	20

	21 @// Include standard headers

	22

	23 #include "dl/api/armCOMM_s.h"

	24 #include "dl/api/omxtypes_s.h"

	25

	26

	27 @// Import symbols required from other files

	28 @// (For example tables)

	29

	30 .extern armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe

	31 .extern armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe

	32 .extern armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe

	33 .extern armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe

	34 .extern armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe

	35 .extern armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe

	36

	37

	38 @// Set debugging level

	39 @//DEBUG_ON SETL {TRUE}

	40

	41

	42

	43 @// Guarding implementation by the processor name

	44

	45

	46

	47 @// Guarding implementation by the processor name

	48

	49 @// Import symbols required from other files

	50 @// (For example tables)

	51 .extern armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace_unsafe

	52 .extern armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe

	53

	54

	55 @//Input Registers

	56

	57 #define pSrc r0

	58 #define pDst r1

	59 #define pFFTSpec r2

	60 #define scale r3

	61

	62

	63 @// Output registers

	64 #define result r0

	65

	66 @//Local Scratch Registers

	67

	68 #define argTwiddle r1

	69 #define argDst r2

	70 #define argScale r4

	71 #define tmpOrder r4

	72 #define pTwiddle r4

	73 #define pOut r5

	74 #define subFFTSize r7

	75 #define subFFTNum r6

	76 #define N r6

	77 #define order r14

	78 #define diff r9

	79 @// Total num of radix stages required to comple the FFT

	80 #define count r8

	81 #define x0r r4

	82 #define x0i r5

	83 #define diffMinusOne r2

	84 #define round r3

	85

	86 #define pOut1 r2

	87 #define size r7

	88 #define step r8

	89 #define step1 r9

	90 #define twStep r10

	91 #define pTwiddleTmp r11

	92 #define argTwiddle1 r12

	93 #define zero r14

	94

	95 @// Neon registers

	96

	97 #define dX0 D0.F32

	98 #define dShift D1.F32

	99 #define dX1 D1.F32

	100 #define dY0 D2.F32

	101 #define dY1 D3.F32

	102 #define dX0r D0.F32

	103 #define dX0i D1.F32

	104 #define dX1r D2.F32

	105 #define dX1i D3.F32

	106 #define dW0r D4.F32

	107 #define dW0i D5.F32

	108 #define dW1r D6.F32

	109 #define dW1i D7.F32

	110 #define dT0 D8.F32

	111 #define dT1 D9.F32

	112 #define dT2 D10.F32

	113 #define dT3 D11.F32

	114 #define qT0 d12.F32

	115 #define qT1 d14.F32

	116 #define qT2 d16.F32

	117 #define qT3 d18.F32

	118 #define dY0r D4.F32

	119 #define dY0i D5.F32

	120 #define dY1r D6.F32

	121 #define dY1i D7.F32

	122 #define dzero D20.F32

	123

	124 #define dY2 D4.F32

	125 #define dY3 D5.F32

	126 #define dW0 D6.F32

	127 #define dW1 D7.F32

	128 #define dW0Tmp D10.F32

	129 #define dW1Neg D11.F32

	130

	131 #define sN S0.S32

	132 #define fN S1.F32

	133 @// one must be the same as dScale[0]!

	134 #define dScale D2.F32

	135 #define one S4.F32

	136

	137

	138 @// Allocate stack memory required by the function

	139 M_ALLOC4 complexFFTSize, 4

	140

	141 @// Write function header

	142 M_START omxSP_FFTInv_CCSToR_F32_Sfs,r11,d15

	143

	144 @ Structure offsets for the FFTSpec

	145 .set ARMsFFTSpec_N, 0

	146 .set ARMsFFTSpec_pBitRev, 4

	147 .set ARMsFFTSpec_pTwiddle, 8

	148 .set ARMsFFTSpec_pBuf, 12

	149

	150 @// Define stack arguments

	151

	152 @// Read the size from structure and take log

	153 LDR N, [pFFTSpec, #ARMsFFTSpec_N]

	154

	155 @// Read other structure parameters

	156 LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]

	157 LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]

	158

	159 @// N=1 Treat seperately

	160 CMP N,#1

	161 BGT sizeGreaterThanOne

	162 VLD1 dX0[0],[pSrc]

	163 VST1 dX0[0],[pDst]

	164

	165 B End

	166

	167 sizeGreaterThanOne:

	168

	169 @// Call the preTwiddle Radix2 stage before doing the compledIFFT

	170

	171

	172 BL armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe

	173

	174

	175 complexIFFT:

	176

	177 ASR N,N,#1 @// N/2 point complex IFFT

	178 M_STR N, complexFFTSize @ Save N for scaling later

	179 ADD pSrc,pOut,N,LSL #3 @// set pSrc as pOut1

	180

	181 CLZ order,N @// N = 2^order

	182 RSB order,order,#31

	183 MOV subFFTSize,#1

	184 @//MOV subFFTNum,N

	185

	186 CMP order,#3

	187 BGT orderGreaterthan3 @// order > 3

	188

	189 CMP order,#1

	190 BGE orderGreaterthan0 @// order > 0

	191

	192 VLD1 dX0,[pSrc]

	193 VST1 dX0,[pDst]

	194 MOV pSrc,pDst

	195 BLT FFTEnd

	196

	197 orderGreaterthan0:

	198 @// set the buffers appropriately for various orders

	199 CMP order,#2

	200 MOVNE argDst,pDst

	201 MOVEQ argDst,pOut

	202 @// Pass the first stage destination in RN5

	203 MOVEQ pOut,pDst

	204 MOV argTwiddle,pTwiddle

	205

	206 BGE orderGreaterthan1

	207 BLLT armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe @// order = 1

	208 B FFTEnd

	209

	210 orderGreaterthan1:

	211 MOV tmpOrder,order @// tmpOrder = RN 4

	212 BL armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe

	213 CMP tmpOrder,#2

	214 BLGT armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe

	215 BL armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe

	216 B FFTEnd

	217

	218

	219 orderGreaterthan3:

	220 specialScaleCase:

	221

	222 @// Set input args to fft stages

	223 TST order, #2

	224 MOVNE argDst,pDst

	225 MOVEQ argDst,pOut

	226 @// Pass the first stage destination in RN5

	227 MOVEQ pOut,pDst

	228 MOV argTwiddle,pTwiddle

	229

	230 @//check for even or odd order

	231 @// NOTE: The following combination of BL's would work fine even though

	232 @// the first BL would corrupt the flags. This is because the end of

	233 @// the "grpZeroSetLoop" loop inside

	234 @// armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag

	235 @// to EQ

	236

	237 TST order,#0x00000001

	238 BLEQ armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe

	239 BLNE armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe

	240

	241 CMP subFFTNum,#4

	242 BLT FFTEnd

	243

	244

	245 unscaledRadix4Loop:

	246 BEQ lastStageUnscaledRadix4

	247 BL armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe

	248 CMP subFFTNum,#4

	249 B unscaledRadix4Loop

	250

	251 lastStageUnscaledRadix4:

	252 BL armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace_unsafe

	253 B FFTEnd

	254

	255 FFTEnd: @// Does only the scaling

	256 @ Scale inverse FFT result by 1/N

	257

	258 M_LDR N, complexFFTSize

	259 VMOV sN,N

	260 VCVT fN, sN @ fn = fftSize, as a float

	261 VMOV one, 1.0

	262 VDIV one, one, fN @ one = dScale[0] = 1 / fftSize

	263

	264

	265 @// N = subFFTSize ; dataptr = pDst

	266 scaleFFTData:

	267 VLD1 {dX0},[pSrc] @// pSrc contains pDst pointer

	268 SUBS subFFTSize,subFFTSize,#1

	269 VMUL dX0, dX0, dScale[0]

	270 VST1 {dX0},[pSrc]!

	271

	272 BGT scaleFFTData

	273

	274 End:

	275 @// Set return value

	276 MOV result, #OMX_Sts_NoErr

	277

	278 @// Write function tail

	279 M_END

	280

	281

	282

	283 .end

OLD	NEW