third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix4_unsafe_s.S - Issue 12317152: Add openmax dl routines for review. MUST NOT BE LANDED

Side by Side Diff: third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix4_unsafe_s.S

Issue 12317152: Add openmax dl routines for review. MUST NOT BE LANDED (Closed) Base URL: http://git.chromium.org/chromium/src.git@master

Patch Set: Created 7 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.S ('K') | « third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix4_ls_unsafe_s.S ('k') | third_party/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix8_fs_unsafe_s.S » ('j') | third_party/openmax_dl/dl/sp/src/omxSP_FFTInv_CToC_SC16_Sfs_s.S » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 @//

	2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.

	3 @//

	4 @// Use of this source code is governed by a BSD-style license

	5 @// that can be found in the LICENSE file in the root of the source

	6 @// tree. An additional intellectual property rights grant can be found

	7 @// in the file PATENTS. All contributing project authors may

	8 @// be found in the AUTHORS file in the root of the source tree.

	9 @//

	10 @// This file was originally licensed as follows. It has been

	11 @// relicensed with permission from the copyright holders.

	12 @//

	13

	14 @//

	15 @// File Name: armSP_FFT_CToC_SC32_Radix4_unsafe_s.s

	16 @// OpenMAX DL: v1.0.2

	17 @// Last Modified Revision: 7767

	18 @// Last Modified Date: Thu, 27 Sep 2007

	19 @//

	20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.

	21 @//

	22 @//

	23 @//

	24 @// Description:

	25 @// Compute a Radix 4 FFT stage for a N point complex signal

	26 @//

	27

	28

	29

	30

	31 @// Include standard headers

	32

	33 #include "dl/api/armCOMM_s.h"

	34 #include "dl/api/omxtypes_s.h"

	35

	36

	37 @// Import symbols required from other files

	38 @// (For example tables)

	39

	40

	41

	42

	43 @// Set debugging level

	44 @//DEBUG_ON SETL {TRUE}

	45

	46

	47

	48 @// Guarding implementation by the processor name

	49

	50

	51

	52

	53 @// Guarding implementation by the processor name

	54

	55

	56 @// Import symbols required from other files

	57 @// (For example tables)

	58

	59

	60 @//Input Registers

	61

	62 #define pSrc r0

	63 #define pDst r2

	64 #define pTwiddle r1

	65 #define subFFTNum r6

	66 #define subFFTSize r7

	67

	68

	69

	70 @//Output Registers

	71

	72

	73 @//Local Scratch Registers

	74

	75 #define grpCount r3

	76 #define pointStep r4

	77 #define outPointStep r5

	78 #define stepTwiddle r12

	79 #define setCount r14

	80 #define srcStep r8

	81 #define setStep r9

	82 #define dstStep r10

	83 #define twStep r11

	84 #define t1 r3

	85

	86 @// Neon Registers

	87

	88 #define dW1 D0.S32

	89 #define dW2 D1.S32

	90 #define dW3 D2.S32

	91

	92 #define dXr0 D4.S32

	93 #define dXi0 D5.S32

	94 #define dXr1 D6.S32

	95 #define dXi1 D7.S32

	96 #define dXr2 D8.S32

	97 #define dXi2 D9.S32

	98 #define dXr3 D10.S32

	99 #define dXi3 D11.S32

	100 #define dYr0 D12.S32

	101 #define dYi0 D13.S32

	102 #define dYr1 D14.S32

	103 #define dYi1 D15.S32

	104 #define dYr2 D16.S32

	105 #define dYi2 D17.S32

	106 #define dYr3 D18.S32

	107 #define dYi3 D19.S32

	108 #define qT0 Q8.S64

	109 #define qT1 Q9.S64

	110 #define qT2 Q6.S64

	111 #define qT3 Q7.S64

	112

	113 #define dZr0 D20.S32

	114 #define dZi0 D21.S32

	115 #define dZr1 D22.S32

	116 #define dZi1 D23.S32

	117 #define dZr2 D24.S32

	118 #define dZi2 D25.S32

	119 #define dZr3 D26.S32

	120 #define dZi3 D27.S32

	121

	122 #define qY0 Q6.S32

	123 #define qY1 Q7.S32

	124 #define qY2 Q8.S32

	125 #define qY3 Q9.S32

	126 #define qX0 Q2.S32

	127 #define qZ0 Q10.S32

	128 #define qZ1 Q11.S32

	129 #define qZ2 Q12.S32

	130 #define qZ3 Q13.S32

	131

	132

	133 .MACRO FFTSTAGE scaled, inverse , name

	134

	135 @// Define stack arguments

	136

	137

	138 @// Update grpCount and grpSize rightaway inorder to reuse pGrpCount and pGrpSize regs

	139

	140 LSL grpCount,subFFTSize,#2

	141 LSR subFFTNum,subFFTNum,#2

	142 MOV subFFTSize,grpCount

	143

	144 VLD1 dW1,[pTwiddle] @//[wi \| wr]

	145 @// pT0+1 increments pT0 by 8 bytes

	146 @// pT0+pointStep = increment of 8pointStep bytes = 2grpSize bytes

	147 MOV pointStep,subFFTNum,LSL #1

	148

	149

	150 @// pOut0+1 increments pOut0 by 8 bytes

	151 @// pOut0+outPointStep == increment of 8outPointStep bytes = 2size byt es

	152

	153 MOV stepTwiddle,#0

	154 VLD1 dW2,[pTwiddle] @//[wi \| wr]

	155 SMULBB outPointStep,grpCount,pointStep

	156 LSL pointStep,pointStep,#2 @// 2*grpSize

	157

	158 VLD1 dW3,[pTwiddle] @//[wi \| wr]

	159 MOV srcStep,pointStep,LSL #1 @// srcStep = 2*poin tStep

	160 ADD setStep,srcStep,pointStep @// setStep = 3*poin tStep

	161 @//RSB setStep,setStep,#16 @// setStep = - 3 *pointStep+16

	162 RSB setStep,setStep,#0 @// setStep = - 3*poi ntStep

	163 SUB srcStep,srcStep,#16 @// srcStep = 2*poin tStep-16

	164

	165 MOV dstStep,outPointStep,LSL #1

	166 ADD dstStep,dstStep,outPointStep @// dstStep = 3*outP ointStep

	167 RSB dstStep,dstStep,#16 @// dstStep = - 3*o utPointStep+16

	168

	169

	170

	171 grpLoop\name :

	172

	173 VLD2 {dXr0,dXi0},[pSrc],pointStep @// data[0]

	174 ADD stepTwiddle,stepTwiddle,pointStep

	175 VLD2 {dXr1,dXi1},[pSrc],pointStep @// data[1]

	176 ADD pTwiddle,pTwiddle,stepTwiddle @// set pTwiddle to the first point

	177 VLD2 {dXr2,dXi2},[pSrc],pointStep @// data[2]

	178 MOV twStep,stepTwiddle,LSL #2

	179

	180 VLD2 {dXr3,dXi3},[pSrc],setStep @// data[3] & updat e pSrc for the next set

	181 SUB twStep,stepTwiddle,twStep @// twStep = -3*step Twiddle

	182

	183 MOV setCount,pointStep,LSR #3

	184 ADD pSrc,pSrc,#16 @// set pSrc to data[0] of the next set

	185 ADD pSrc,pSrc,pointStep @// increment to data[1] o f the next set

	186

	187

	188 @// Loop on the sets

	189

	190 setLoop\name :

	191

	192

	193

	194 SUBS setCount,setCount,#2 @// decrement the loop c ounter

	195

	196 .ifeqs "\inverse", "TRUE"

	197 VMULL qT0,dXr1,dW1[0]

	198 VMLAL qT0,dXi1,dW1[1] @// real part

	199 VMULL qT1,dXi1,dW1[0]

	200 VMLSL qT1,dXr1,dW1[1] @// imag part

	201

	202 .else

	203 VMULL qT0,dXr1,dW1[0]

	204 VMLSL qT0,dXi1,dW1[1] @// real part

	205 VMULL qT1,dXi1,dW1[0]

	206 VMLAL qT1,dXr1,dW1[1] @// imag part

	207

	208 .endif

	209

	210 VLD2 {dXr1,dXi1},[pSrc],pointStep @// data[1] for next iteration

	211

	212 .ifeqs "\inverse", "TRUE"

	213 VMULL qT2,dXr2,dW2[0]

	214 VMLAL qT2,dXi2,dW2[1] @// real part

	215 VMULL qT3,dXi2,dW2[0]

	216 VMLSL qT3,dXr2,dW2[1] @// imag part

	217

	218 .else

	219 VMULL qT2,dXr2,dW2[0]

	220 VMLSL qT2,dXi2,dW2[1] @// real part

	221 VMULL qT3,dXi2,dW2[0]

	222 VMLAL qT3,dXr2,dW2[1] @// imag part

	223

	224 .endif

	225

	226 VRSHRN dZr1,qT0,#31

	227 VRSHRN dZi1,qT1,#31

	228 VLD2 {dXr2,dXi2},[pSrc],pointStep @// data[2] for next iteration

	229

	230

	231 .ifeqs "\inverse", "TRUE"

	232 VMULL qT0,dXr3,dW3[0]

	233 VMLAL qT0,dXi3,dW3[1] @// real part

	234 VMULL qT1,dXi3,dW3[0]

	235 VMLSL qT1,dXr3,dW3[1] @// imag part

	236

	237 .else

	238 VMULL qT0,dXr3,dW3[0]

	239 VMLSL qT0,dXi3,dW3[1] @// real part

	240 VMULL qT1,dXi3,dW3[0]

	241 VMLAL qT1,dXr3,dW3[1] @// imag part

	242

	243 .endif

	244

	245 VRSHRN dZr2,qT2,#31

	246 VRSHRN dZi2,qT3,#31

	247

	248

	249 VRSHRN dZr3,qT0,#31

	250 VRSHRN dZi3,qT1,#31

	251 VLD2 {dXr3,dXi3},[pSrc],setStep @// data[3] & update pSrc to data[0]

	252

	253 .ifeqs "\scaled", "TRUE"

	254

	255 @// finish first stage of 4 point FFT

	256 VHADD qY0,qX0,qZ2

	257 VHSUB qY2,qX0,qZ2

	258

	259 VLD2 {dXr0,dXi0},[pSrc]! @// data[0] for next iteration

	260 VHADD qY1,qZ1,qZ3

	261 VHSUB qY3,qZ1,qZ3

	262

	263 @// finish second stage of 4 point FFT

	264

	265 VHSUB qZ0,qY2,qY1

	266

	267

	268 .ifeqs "\inverse", "TRUE"

	269

	270 VHADD dZr3,dYr0,dYi3

	271 VST2 {dZr0,dZi0},[pDst :128],outPointStep

	272 VHSUB dZi3,dYi0,dYr3

	273

	274 VHADD qZ2,qY2,qY1

	275 VST2 {dZr3,dZi3},[pDst :128],outPointStep

	276

	277 VHSUB dZr1,dYr0,dYi3

	278 VST2 {dZr2,dZi2},[pDst :128],outPointStep

	279 VHADD dZi1,dYi0,dYr3

	280

	281 VST2 {dZr1,dZi1},[pDst :128],dstStep

	282

	283

	284 .else

	285

	286 VHSUB dZr1,dYr0,dYi3

	287 VST2 {dZr0,dZi0},[pDst :128],outPointStep

	288 VHADD dZi1,dYi0,dYr3

	289

	290 VHADD qZ2,qY2,qY1

	291 VST2 {dZr1,dZi1},[pDst :128],outPointStep

	292

	293 VHADD dZr3,dYr0,dYi3

	294 VST2 {dZr2,dZi2},[pDst :128],outPointStep

	295 VHSUB dZi3,dYi0,dYr3

	296

	297 VST2 {dZr3,dZi3},[pDst :128],dstStep

	298

	299

	300 .endif

	301

	302

	303 .else

	304

	305 @// finish first stage of 4 point FFT

	306 VADD qY0,qX0,qZ2

	307 VSUB qY2,qX0,qZ2

	308

	309 VLD2 {dXr0,dXi0},[pSrc :128]! @// data[0] for next iter ation

	310 VADD qY1,qZ1,qZ3

	311 VSUB qY3,qZ1,qZ3

	312

	313 @// finish second stage of 4 point FFT

	314

	315 VSUB qZ0,qY2,qY1

	316

	317

	318 .ifeqs "\inverse", "TRUE"

	319

	320 VADD dZr3,dYr0,dYi3

	321 VST2 {dZr0,dZi0},[pDst :128],outPointStep

	322 VSUB dZi3,dYi0,dYr3

	323

	324 VADD qZ2,qY2,qY1

	325 VST2 {dZr3,dZi3},[pDst :128],outPointStep

	326

	327 VSUB dZr1,dYr0,dYi3

	328 VST2 {dZr2,dZi2},[pDst :128],outPointStep

	329 VADD dZi1,dYi0,dYr3

	330

	331 VST2 {dZr1,dZi1},[pDst :128],dstStep

	332

	333

	334 .else

	335

	336 VSUB dZr1,dYr0,dYi3

	337 VST2 {dZr0,dZi0},[pDst :128],outPointStep

	338 VADD dZi1,dYi0,dYr3

	339

	340 VADD qZ2,qY2,qY1

	341 VST2 {dZr1,dZi1},[pDst :128],outPointStep

	342

	343 VADD dZr3,dYr0,dYi3

	344 VST2 {dZr2,dZi2},[pDst :128],outPointStep

	345 VSUB dZi3,dYi0,dYr3

	346

	347 VST2 {dZr3,dZi3},[pDst :128],dstStep

	348

	349

	350 .endif

	351

	352 .endif

	353

	354 ADD pSrc,pSrc,pointStep @// increment to dat a[1] of the next set

	355 BGT setLoop\name

	356

	357

	358 VLD1 dW1,[pTwiddle :64],stepTwiddle @//[wi \| wr]

	359 SUBS grpCount,grpCount,#4 @// subtract 4 since grp Count multiplied by 4

	360 VLD1 dW2,[pTwiddle :64],stepTwiddle @//[wi \| wr]

	361 ADD pSrc,pSrc,srcStep @// increment pSrc for t he next grp

	362 VLD1 dW3,[pTwiddle :64],twStep @//[wi \| wr]

	363 BGT grpLoop\name

	364

	365

	366 @// Reset and Swap pSrc and pDst for the next stage

	367 MOV t1,pDst

	368 SUB pDst,pSrc,outPointStep,LSL #2 @// pDst -= 2siz e; pSrc -= 8size bytes

	369 SUB pSrc,t1,outPointStep

	370

	371

	372 .endm

	373

	374

	375 M_START armSP_FFTFwd_CToC_SC32_Radix4_OutOfPlace_unsafe,r4

	376 FFTSTAGE "FALSE","FALSE",FWD

	377 M_END

	378

	379

	380 M_START armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe,r4

	381 FFTSTAGE "FALSE","TRUE",INV

	382 M_END

	383

	384

	385 M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe,r4

	386 FFTSTAGE "TRUE","FALSE",FWDSFS

	387 M_END

	388

	389

	390 M_START armSP_FFTInv_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe,r4

	391 FFTSTAGE "TRUE","TRUE",INVSFS

	392 M_END

	393

	394

	395 .end

OLD	NEW