gcc/gmp/mpn/alpha/ev6/sub_n.asm - Issue 3050029: [gcc] GCC 4.5.0=>4.5.1

Unified Diff: gcc/gmp/mpn/alpha/ev6/sub_n.asm

Issue 3050029: [gcc] GCC 4.5.0=>4.5.1 (Closed) Base URL: ssh://git@gitrw.chromium.org:9222/nacl-toolchain.git

Patch Set: Created 10 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: gcc/gmp/mpn/alpha/ev6/sub_n.asm

diff --git a/gcc/gmp/mpn/alpha/ev6/sub_n.asm b/gcc/gmp/mpn/alpha/ev6/sub_n.asm

deleted file mode 100644

index f23ad44a15a7619e691526b503199a9eb2bdd306..0000000000000000000000000000000000000000

--- a/gcc/gmp/mpn/alpha/ev6/sub_n.asm

+++ /dev/null

@@ -1,272 +0,0 @@

-dnl Alpha ev6 mpn_sub_n -- Subtract two limb vectors of the same length > 0

-dnl and store difference in a third limb vector.

-dnl This file is part of the GNU MP Library.

-dnl The GNU MP Library is free software; you can redistribute it and/or modify

-dnl it under the terms of the GNU Lesser General Public License as published

-dnl by the Free Software Foundation; either version 3 of the License, or (at

-dnl your option) any later version.

-dnl The GNU MP Library is distributed in the hope that it will be useful, but

-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY

-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public

-dnl License for more details.

-dnl You should have received a copy of the GNU Lesser General Public License

-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.

-include(`../config.m4')

-C cycles/limb

-C EV4: ?

-C EV5: 5.4

-C EV6: 2.125

-C INPUT PARAMETERS

-C rp r16

-C up r17

-C vp r18

-C n r19

-C cy r20 (for mpn_add_nc)

-C TODO

-C Finish cleaning up cy registers r22, r23 (make them use cy0/cy1)

-C Use multi-pronged feed-in.

-C Perform additional micro-tuning

-C This code was written in cooperation with ev6 pipeline expert Steve Root.

-C Pair loads and stores where possible

-C Store pairs oct-aligned where possible (didn't need it here)

-C Stores are delayed every third cycle

-C Loads and stores are delayed by fills

-C U stays still, put code there where possible (note alternation of U1 and U0)

-C L moves because of loads and stores

-C Note dampers in L to limit damage

-C This odd-looking optimization expects that were having random bits in our

-C data, so that a pure zero result is unlikely. so we penalize the unlikely

-C case to help the common case.

-define(`u0', `r0') define(`u1', `r3')

-define(`v0', `r1') define(`v1', `r4')

-define(`cy0', `r20') define(`cy1', `r21')

-MULFUNC_PROLOGUE(mpn_sub_n mpn_sub_nc)

-ASM_START()

-PROLOGUE(mpn_sub_nc)

- br r31, $entry

-EPILOGUE()

-PROLOGUE(mpn_sub_n)

- bis r31, r31, cy0 C clear carry in

-$entry: cmpult r19, 5, r22 C L1 move counter

- ldq u1, 0(r17) C L0 get next ones

- ldq v1, 0(r18) C L1

- bne r22, $Lsmall

- ldq u0, 8(r17) C L0 get next ones

- ldq v0, 8(r18) C L1

- subq u1, v1, r5 C U0 sub two data

- cmpult u1, v1, r23 C U0 did it borrow

- ldq u1, 16(r17) C L0 get next ones

- ldq v1, 16(r18) C L1

- subq u0, v0, r8 C U1 sub two data

- subq r5, cy0, r24 C U0 borrow in

- cmpult u0, v0, r22 C U1 did it borrow

- beq r5, $fix5f C U0 fix exact zero

-$ret5f: ldq u0, 24(r17) C L0 get next ones

- ldq v0, 24(r18) C L1

- subq r8, r23, r25 C U1 borrow from last

- subq u1, v1, r7 C U0 sub two data

- beq r8, $fix6f C U1 fix exact zero

-$ret6f: cmpult u1, v1, r23 C U0 did it borrow

- ldq u1, 32(r17) C L0 get next ones

- ldq v1, 32(r18) C L1

- lda r17, 40(r17) C L0 move pointer

- lda r18, 40(r18) C L1 move pointer

- lda r16, -8(r16)

- lda r19, -13(r19) C L1 move counter

- blt r19, $Lend C U1 loop control

-C Main loop. 8-way unrolled.

- ALIGN(16)

-$Loop: subq u0, v0, r2 C U1 sub two data

- stq r24, 8(r16) C L0 put an answer

- subq r7, r22, r24 C U0 borrow from last

- stq r25, 16(r16) C L1 pair

- cmpult u0, v0, cy1 C U1 did it borrow

- beq r7, $fix7 C U0 fix exact 0

-$ret7: ldq u0, 0(r17) C L0 get next ones

- ldq v0, 0(r18) C L1

- bis r31, r31, r31 C L damp out

- subq r2, r23, r25 C U1 borrow from last

- bis r31, r31, r31 C L moves in L !

- subq u1, v1, r5 C U0 sub two data

- beq r2, $fix0 C U1 fix exact zero

-$ret0: cmpult u1, v1, cy0 C U0 did it borrow

- ldq u1, 8(r17) C L0 get next ones

- ldq v1, 8(r18) C L1

- subq u0, v0, r8 C U1 sub two data

- stq r24, 24(r16) C L0 store pair

- subq r5, cy1, r24 C U0 borrow from last

- stq r25, 32(r16) C L1

- cmpult u0, v0, r22 C U1 did it borrow

- beq r5, $fix1 C U0 fix exact zero

-$ret1: ldq u0, 16(r17) C L0 get next ones

- ldq v0, 16(r18) C L1

- lda r16, 64(r16) C L0 move pointer

- subq r8, cy0, r25 C U1 borrow from last

- lda r19, -8(r19) C L1 move counter

- subq u1, v1, r7 C U0 sub two data

- beq r8, $fix2 C U1 fix exact zero

-$ret2: cmpult u1, v1, r23 C U0 did it borrow

- ldq u1, 24(r17) C L0 get next ones

- ldq v1, 24(r18) C L1

- subq u0, v0, r2 C U1 sub two data

- stq r24, -24(r16) C L0 put an answer

- subq r7, r22, r24 C U0 borrow from last

- stq r25, -16(r16) C L1 pair

- cmpult u0, v0, cy1 C U1 did it borrow

- beq r7, $fix3 C U0 fix exact 0

-$ret3: ldq u0, 32(r17) C L0 get next ones

- ldq v0, 32(r18) C L1

- bis r31, r31, r31 C L damp out

- subq r2, r23, r25 C U1 borrow from last

- bis r31, r31, r31 C L moves in L !

- subq u1, v1, r5 C U0 sub two data

- beq r2, $fix4 C U1 fix exact zero

-$ret4: cmpult u1, v1, cy0 C U0 did it borrow

- ldq u1, 40(r17) C L0 get next ones

- ldq v1, 40(r18) C L1

- subq u0, v0, r8 C U1 sub two data

- stq r24, -8(r16) C L0 store pair

- subq r5, cy1, r24 C U0 borrow from last

- stq r25, 0(r16) C L1

- cmpult u0, v0, r22 C U1 did it borrow

- beq r5, $fix5 C U0 fix exact zero

-$ret5: ldq u0, 48(r17) C L0 get next ones

- ldq v0, 48(r18) C L1

- ldl r31, 256(r17) C L0 prefetch

- subq r8, cy0, r25 C U1 borrow from last

- ldl r31, 256(r18) C L1 prefetch

- subq u1, v1, r7 C U0 sub two data

- beq r8, $fix6 C U1 fix exact zero

-$ret6: cmpult u1, v1, r23 C U0 did it borrow

- ldq u1, 56(r17) C L0 get next ones

- ldq v1, 56(r18) C L1

- lda r17, 64(r17) C L0 move pointer

- bis r31, r31, r31 C U

- lda r18, 64(r18) C L1 move pointer

- bge r19, $Loop C U1 loop control

-C ==== main loop end

-$Lend: subq u0, v0, r2 C U1 sub two data

- stq r24, 8(r16) C L0 put an answer

- subq r7, r22, r24 C U0 borrow from last

- stq r25, 16(r16) C L1 pair

- cmpult u0, v0, cy1 C U1 did it borrow

- beq r7, $fix7c C U0 fix exact 0

-$ret7c: subq r2, r23, r25 C U1 borrow from last

- subq u1, v1, r5 C U0 sub two data

- beq r2, $fix0c C U1 fix exact zero

-$ret0c: cmpult u1, v1, cy0 C U0 did it borrow

- stq r24, 24(r16) C L0 store pair

- subq r5, cy1, r24 C U0 borrow from last

- stq r25, 32(r16) C L1

- beq r5, $fix1c C U0 fix exact zero

-$ret1c: stq r24, 40(r16) C L0 put an answer

- lda r16, 48(r16) C L0 move pointer

- lda r19, 8(r19)

- beq r19, $Lret

- ldq u1, 0(r17)

- ldq v1, 0(r18)

-$Lsmall:

- lda r19, -1(r19)

- beq r19, $Lend0

- ALIGN(8)

-$Loop0: subq u1, v1, r2 C main sub

- cmpult u1, v1, r8 C compute bw from last sub

- ldq u1, 8(r17)

- ldq v1, 8(r18)

- subq r2, cy0, r5 C borrow sub

- lda r17, 8(r17)

- lda r18, 8(r18)

- stq r5, 0(r16)

- cmpult r2, cy0, cy0 C compute bw from last sub

- lda r19, -1(r19) C decr loop cnt

- bis r8, cy0, cy0 C combine bw from the two subs

- lda r16, 8(r16)

- bne r19, $Loop0

-$Lend0: subq u1, v1, r2 C main sub

- subq r2, cy0, r5 C borrow sub

- cmpult u1, v1, r8 C compute bw from last sub

- cmpult r2, cy0, cy0 C compute bw from last sub

- stq r5, 0(r16)

- bis r8, cy0, r0 C combine bw from the two subs

- ret r31,(r26),1

- ALIGN(8)

-$Lret: lda r0, 0(cy0) C copy borrow into return register

- ret r31,(r26),1

-$fix5f: bis r23, cy0, r23 C bring forward borrow

- br r31, $ret5f

-$fix6f: bis r22, r23, r22 C bring forward borrow

- br r31, $ret6f

-$fix0: bis cy1, r23, cy1 C bring forward borrow

- br r31, $ret0

-$fix1: bis cy0, cy1, cy0 C bring forward borrow

- br r31, $ret1

-$fix2: bis r22, cy0, r22 C bring forward borrow

- br r31, $ret2

-$fix3: bis r23, r22, r23 C bring forward borrow

- br r31, $ret3

-$fix4: bis cy1, r23, cy1 C bring forward borrow

- br r31, $ret4

-$fix5: bis cy1, cy0, cy0 C bring forward borrow

- br r31, $ret5

-$fix6: bis r22, cy0, r22 C bring forward borrow

- br r31, $ret6

-$fix7: bis r23, r22, r23 C bring forward borrow

- br r31, $ret7

-$fix0c: bis cy1, r23, cy1 C bring forward borrow

- br r31, $ret0c

-$fix1c: bis cy0, cy1, cy0 C bring forward borrow

- br r31, $ret1c

-$fix7c: bis r23, r22, r23 C bring forward borrow

- br r31, $ret7c

-EPILOGUE()

-ASM_END()

« no previous file with comments | « gcc/gmp/mpn/alpha/ev6/nails/gmp-mparam.h ('k') | gcc/gmp/mpn/alpha/ev67/gcd_1.asm » ('j') | no next file with comments »