gcc/gmp/mpn/alpha/divrem_2.asm - Issue 3050029: [gcc] GCC 4.5.0=>4.5.1

Unified Diff: gcc/gmp/mpn/alpha/divrem_2.asm

Issue 3050029: [gcc] GCC 4.5.0=>4.5.1 (Closed) Base URL: ssh://git@gitrw.chromium.org:9222/nacl-toolchain.git

Patch Set: Created 10 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: gcc/gmp/mpn/alpha/divrem_2.asm

diff --git a/gcc/gmp/mpn/alpha/divrem_2.asm b/gcc/gmp/mpn/alpha/divrem_2.asm

deleted file mode 100644

index b68468bca03f1780fcee47f4ca9780d411b4fe4d..0000000000000000000000000000000000000000

--- a/gcc/gmp/mpn/alpha/divrem_2.asm

+++ /dev/null

@@ -1,167 +0,0 @@

-dnl Alpha mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.

-dnl This file is part of the GNU MP Library.

-dnl The GNU MP Library is free software; you can redistribute it and/or modify

-dnl it under the terms of the GNU Lesser General Public License as published

-dnl by the Free Software Foundation; either version 3 of the License, or (at

-dnl your option) any later version.

-dnl The GNU MP Library is distributed in the hope that it will be useful, but

-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY

-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public

-dnl License for more details.

-dnl You should have received a copy of the GNU Lesser General Public License

-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.

-include(`../config.m4')

-C norm frac

-C ev4

-C ev5 70 70

-C ev6 29 29

-C TODO

-C * Perhaps inline mpn_invert_limb, that would allow us to not save/restore

-C any registers (thus save ~10 cycles per call).

-C * Use negated d1 and/or d0 to speed carry propagation. Might save a cycle

-C or two.

-C * Check cluster delays (for ev6). We very likely could save some cycles.

-C * Use branch-free code for computing di.

-C * CAVEAT: We rely on r19 not being clobbered by mpn_invert_limb call.

-C INPUT PARAMETERS

-define(`qp', `r16')

-define(`fn', `r17')

-define(`up_param', `r18')

-define(`un_param', `r19')

-define(`dp', `r20')

-ASM_START()

-PROLOGUE(mpn_divrem_2)

- ldgp r29, 0(r27)

- lda r30, -80(r30)

- stq r26, 0(r30)

- stq r9, 8(r30)

- stq r10, 16(r30)

- stq r11, 24(r30)

- stq r12, 32(r30)

- stq r13, 40(r30)

-C stq r14, 48(r30)

- stq r15, 56(r30)

- .prologue 1

- stq r16, 64(r30)

- bis r31, r17, r15

- s8addq r19, r18, r13

- lda r13, -24(r13)

- ldq r12, 8(r20)

- ldq r10, 0(r20)

- ldq r11, 16(r13)

- ldq r9, 8(r13)

- bis r31, r31, r3 C most_significant_q_limb = 0

- cmpult r11, r12, r1

- bne r1, L(L8)

- cmpule r11, r12, r1

- cmpult r9, r10, r2

- and r1, r2, r1

- bne r1, L(L8)

- subq r11, r12, r11

- subq r11, r2, r11

- subq r9, r10, r9

- lda r3, 1(r31) C most_significant_q_limb = 1

-L(L8): stq r3, 72(r30)

- addq r15, r19, r19

- lda r19, -3(r19)

- blt r19, L(L10)

- bis r31, r12, r16

- jsr r26, mpn_invert_limb

- ldgp r29, 0(r26)

- mulq r0, r12, r4 C t0 = LO(di * d1)

- umulh r0, r10, r2 C s1 = HI(di * d0)

- addq r4, r10, r4 C t0 += d0

- cmpule r10, r4, r7 C (t0 < d0)

- addq r4, r2, r4 C t0 += s1

- cmpult r4, r2, r1

- subq r1, r7, r7 C t1 (-1, 0, or 1)

- blt r7, L(L42)

-L(L22):

- lda r0, -1(r0) C di--

- cmpult r4, r12, r1 C cy for: t0 -= d1 (below)

- subq r7, r1, r7 C t1 -= cy

- subq r4, r12, r4 C t0 -= d1

- bge r7, L(L22)

-L(L42):

- ldq r16, 64(r30)

- s8addq r19, r16, r16

- ALIGN(16)

-L(loop):

- mulq r11, r0, r5 C q0 (early)

- umulh r11, r0, r6 C q (early)

- addq r5, r9, r8 C q0 += n1

- addq r6, r11, r6 C q += n2

- cmpult r8, r5, r1 C cy for: q0 += n1

- addq r6, r1, r6 C q += cy

- unop

- mulq r12, r6, r1 C LO(d1 * q)

- umulh r10, r6, r7 C t1 = HI(d0 * q)

- subq r9, r1, r9 C n1 -= LO(d1 * q)

- mulq r10, r6, r4 C t0 = LO(d0 * q)

- unop

- cmple r15, r19, r5 C condition and n0...

- beq r5, L(L31)

- ldq r5, 0(r13)

- lda r13, -8(r13)

-L(L31): subq r9, r12, r9 C n1 -= d1

- cmpult r5, r10, r1 C

- subq r9, r1, r9 C

- subq r5, r10, r5 C n0 -= d0

- subq r9, r7, r9 C n1 -= t0

- cmpult r5, r4, r1 C

- subq r9, r1, r2 C

- subq r5, r4, r5 C n0 -= t1

- cmpult r2, r8, r1 C (n1 < q0)

- addq r6, r1, r6 C q += cond

- lda r1, -1(r1) C -(n1 >= q0)

- and r1, r10, r4 C

- addq r5, r4, r9 C n0 += mask & d0

- and r1, r12, r1 C

- cmpult r9, r5, r11 C cy for: n0 += mask & d0

- addq r2, r1, r1 C n1 += mask & d1

- addq r1, r11, r11 C n1 += cy

- cmpult r11, r12, r1 C

- beq r1, L(fix) C

-L(bck): stq r6, 0(r16)

- lda r16, -8(r16)

- lda r19, -1(r19)

- bge r19, L(loop)

-L(L10): stq r9, 8(r13)

- stq r11, 16(r13)

- ldq r0, 72(r30)

- ldq r26, 0(r30)

- ldq r9, 8(r30)

- ldq r10, 16(r30)

- ldq r11, 24(r30)

- ldq r12, 32(r30)

- ldq r13, 40(r30)

-C ldq r14, 48(r30)

- ldq r15, 56(r30)

- lda r30, 80(r30)

- ret r31, (r26), 1

-L(fix): cmpule r11, r12, r1

- cmpult r9, r10, r2

- and r1, r2, r1

- bne r1, L(bck)

- subq r11, r12, r11

- subq r11, r2, r11

- subq r9, r10, r9

- lda r6, 1(r6)

- br L(bck)

-EPILOGUE()

-ASM_END()

« no previous file with comments | « gcc/gmp/mpn/alpha/diveby3.asm ('k') | gcc/gmp/mpn/alpha/ev5/add_n.asm » ('j') | no next file with comments »