Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(89)

Unified Diff: gcc/gmp/mpn/alpha/diveby3.asm

Issue 3050029: [gcc] GCC 4.5.0=>4.5.1 (Closed) Base URL: ssh://git@gitrw.chromium.org:9222/nacl-toolchain.git
Patch Set: Created 10 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « gcc/gmp/mpn/alpha/copyi.asm ('k') | gcc/gmp/mpn/alpha/divrem_2.asm » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: gcc/gmp/mpn/alpha/diveby3.asm
diff --git a/gcc/gmp/mpn/alpha/diveby3.asm b/gcc/gmp/mpn/alpha/diveby3.asm
deleted file mode 100644
index e2d1c6beee9e9c6f2af8fb891133616f99342f89..0000000000000000000000000000000000000000
--- a/gcc/gmp/mpn/alpha/diveby3.asm
+++ /dev/null
@@ -1,322 +0,0 @@
-dnl Alpha mpn_divexact_by3c -- mpn division by 3, expecting no remainder.
-
-dnl Copyright 2004, 2005 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C EV4: 22
-C EV5: 11.5
-C EV6: 6.3
-
-C TODO
-C * Trim this to 6.0 c/l for ev6.
-C * Write special ev5 version, should reach 9 c/l, and could be smaller.
-C * Try prefetch for destination, using lds.
-C * Improve feed-in code, by moving initial mulq earlier; make initial load
-C to u0/u0 to save some copying.
-C * Combine u0 and u2, u1 and u3.
-
-C INPUT PARAMETERS
-define(`rp', `r16')
-define(`up', `r17')
-define(`n', `r18')
-define(`cy', `r19')
-
-ASM_START()
-
-DATASTART(L(LC))
- .quad 0xAAAAAAAAAAAAAAAB
- .quad 0x5555555555555555
- .quad 0xAAAAAAAAAAAAAAAA
-DATAEND()
-
-define(`xAAAAAAAAAAAAAAAB', `r20')
-define(`x5555555555555555', `r21')
-define(`xAAAAAAAAAAAAAAAA', `r22')
-define(`u0', `r0') define(`u1', `r1')
-define(`u2', `r2') define(`u3', `r3')
-define(`l0', `r25') define(`x', `r8')
-define(`q0', `r4') define(`q1', `r5')
-define(`p6', `r6') define(`p7', `r7')
-define(`t0', `r23') define(`t1', `r24')
-define(`cymask',`r28')
-
-
-PROLOGUE(mpn_divexact_by3c,gp)
-
- ldq r28, 0(up) C load first limb early
-
-C Put magic constants in registers
- lda r0, L(LC)
- ldq xAAAAAAAAAAAAAAAB, 0(r0)
- ldq x5555555555555555, 8(r0)
- ldq xAAAAAAAAAAAAAAAA, 16(r0)
-
-C Compute initial l0 value
- cmpeq cy, 1, p6
- cmpeq cy, 2, p7
- negq p6, p6
- and p6, x5555555555555555, l0
- cmovne p7, xAAAAAAAAAAAAAAAA, l0
-
-C Feed-in depending on (n mod 4)
- and n, 3, r8
- lda n, -3(n)
- cmpeq r8, 1, r4
- cmpeq r8, 2, r5
- bne r4, $Lb01
- bne r5, $Lb10
- beq r8, $Lb00
-
-$Lb11: ldq u3, 8(up)
- lda up, -24(up)
- lda rp, -24(rp)
- mulq r28, xAAAAAAAAAAAAAAAB, q0
- mov r28, u2
- br r31, $L11
-
-$Lb00: ldq u2, 8(up)
- lda up, -16(up)
- lda rp, -16(rp)
- mulq r28, xAAAAAAAAAAAAAAAB, q1
- mov r28, u1
- br r31, $L00
-
-$Lb01: lda rp, -8(rp)
- mulq r28, xAAAAAAAAAAAAAAAB, q0
- mov r28, u0
- blt n, $Lcj1
- ldq u1, 8(up)
- lda up, -8(up)
- br r31, $L01
-
-$Lb10: ldq u0, 8(up)
- mulq r28, xAAAAAAAAAAAAAAAB, q1
- mov r28, u3
- blt n, $Lend
-
- ALIGN(16)
-$Ltop:
-C 0
- cmpult u3, cy, cy C L0
- mulq u0, xAAAAAAAAAAAAAAAB, q0 C U1
- ldq u1, 16(up) C L1
- addq q1, l0, x C U0
-C 1
- negq cy, cymask C L0
- unop C U1
- unop C L1
- cmpult x5555555555555555, x, p6 C U0
-C 2
- cmpult xAAAAAAAAAAAAAAAA, x, p7 C U1
- unop
- unop
- negq p6, t0 C L0
-C 3
- negq p7, t1 C L0
- and cymask, x5555555555555555, l0 C U1
- addq p6, cy, cy
- and t0, x5555555555555555, t0
-C 4
- and t1, x5555555555555555, t1
- addq p7, cy, cy
- unop
- addq t0, l0, l0
-C 5
- addq t1, l0, l0
- unop
- stq x, 0(rp) C L1
- unop
-$L01:
-C 0
- cmpult u0, cy, cy C L0
- mulq u1, xAAAAAAAAAAAAAAAB, q1 C U1
- ldq u2, 24(up) C L1
- addq q0, l0, x C U0
-C 1
- negq cy, cymask C L0
- unop C U1
- unop C L1
- cmpult x5555555555555555, x, p6 C U0
-C 2
- cmpult xAAAAAAAAAAAAAAAA, x, p7 C U1
- unop
- unop
- negq p6, t0 C L0
-C 3
- negq p7, t1 C L0
- and cymask, x5555555555555555, l0 C U1
- addq p6, cy, cy
- and t0, x5555555555555555, t0
-C 4
- and t1, x5555555555555555, t1
- addq p7, cy, cy
- unop
- addq t0, l0, l0
-C 5
- addq t1, l0, l0
- unop
- stq x, 8(rp) C L1
- unop
-$L00:
-C 0
- cmpult u1, cy, cy C L0
- mulq u2, xAAAAAAAAAAAAAAAB, q0 C U1
- ldq u3, 32(up) C L1
- addq q1, l0, x C U0
-C 1
- negq cy, cymask C L0
- unop C U1
- unop C L1
- cmpult x5555555555555555, x, p6 C U0
-C 2
- cmpult xAAAAAAAAAAAAAAAA, x, p7 C U1
- unop
- unop
- negq p6, t0 C L0
-C 3
- negq p7, t1 C L0
- and cymask, x5555555555555555, l0 C U1
- addq p6, cy, cy
- and t0, x5555555555555555, t0
-C 4
- and t1, x5555555555555555, t1
- addq p7, cy, cy
- unop
- addq t0, l0, l0
-C 5
- addq t1, l0, l0
- unop
- stq x, 16(rp) C L1
- unop
-$L11:
-C 0
- cmpult u2, cy, cy C L0
- mulq u3, xAAAAAAAAAAAAAAAB, q1 C U1
- ldq u0, 40(up) C L1
- addq q0, l0, x C U0
-C 1
- negq cy, cymask C L0
- unop C U1
- unop C L1
- cmpult x5555555555555555, x, p6 C U0
-C 2
- cmpult xAAAAAAAAAAAAAAAA, x, p7 C U1
- lda n, -4(n) C L1 bookkeeping
- unop
- negq p6, t0 C L0
-C 3
- negq p7, t1 C L0
- and cymask, x5555555555555555, l0 C U1
- addq p6, cy, cy
- and t0, x5555555555555555, t0
-C 4
- and t1, x5555555555555555, t1
- addq p7, cy, cy
- unop
- addq t0, l0, l0
-C 5
- addq t1, l0, l0
- unop
- stq x, 24(rp) C L1
- lda up, 32(up)
-C
- ldl r31, 256(up) C prefetch
- unop
- lda rp, 32(rp)
- bge n, $Ltop C U1
-C *** MAIN LOOP END ***
-$Lend:
-
- cmpult u3, cy, cy C L0
- mulq u0, xAAAAAAAAAAAAAAAB, q0 C U1
- unop
- addq q1, l0, x C U0
-C 1
- negq cy, cymask C L0
- unop C U1
- unop C L1
- cmpult x5555555555555555, x, p6 C U0
-C 2
- cmpult xAAAAAAAAAAAAAAAA, x, p7 C U1
- unop
- unop
- negq p6, t0 C L0
-C 3
- negq p7, t1 C L0
- and cymask, x5555555555555555, l0 C U1
- addq p6, cy, cy
- and t0, x5555555555555555, t0
-C 4
- and t1, x5555555555555555, t1
- addq p7, cy, cy
- unop
- addq t0, l0, l0
-C 5
- addq t1, l0, l0
- unop
- stq x, 0(rp) C L1
- unop
-$Lcj1:
- cmpult u0, cy, cy C L0
- addq q0, l0, x C U0
- cmpult x5555555555555555, x, p6 C U0
- cmpult xAAAAAAAAAAAAAAAA, x, p7 C U1
- addq p6, cy, cy
- addq p7, cy, r0
- stq x, 8(rp) C L1
-
- ret r31,(r26),1
-EPILOGUE()
-ASM_END()
-
-C This is useful for playing with various schedules.
-C Expand as: one(0)one(1)one(2)one(3)
-define(`one',`
-C 0
- cmpult `$'eval(($1+3)%4), cy, cy C L0
- mulq `$'$1, xAAAAAAAAAAAAAAAB, `$'eval(4+$1%2) C U1
- ldq `$'eval(($1+1)%4), eval($1*8+16)(up) C L1
- addq `$'eval(4+($1+1)%2), l0, x C U0
-C 1
- negq cy, cymask C L0
- unop C U1
- unop C L1
- cmpult x5555555555555555, x, p6 C U0
-C 2
- cmpult xAAAAAAAAAAAAAAAA, x, p7 C U1
- unop
- unop
- negq p6, t0 C L0
-C 3
- negq p7, t1 C L0
- and cymask, x5555555555555555, l0 C U1
- addq p6, cy, cy
- and t0, x5555555555555555, t0
-C 4
- and t1, x5555555555555555, t1
- addq p7, cy, cy
- unop
- addq t0, l0, l0
-C 5
- addq t1, l0, l0
- unop
- stq x, eval($1*8)(rp) C L1
- unop
-')
« no previous file with comments | « gcc/gmp/mpn/alpha/copyi.asm ('k') | gcc/gmp/mpn/alpha/divrem_2.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698