| Index: gcc/gmp/mpn/powerpc32/divrem_2.asm
|
| diff --git a/gcc/gmp/mpn/powerpc32/divrem_2.asm b/gcc/gmp/mpn/powerpc32/divrem_2.asm
|
| deleted file mode 100644
|
| index 3261cbd727ee2e7aa6889b86a8e526ced8705955..0000000000000000000000000000000000000000
|
| --- a/gcc/gmp/mpn/powerpc32/divrem_2.asm
|
| +++ /dev/null
|
| @@ -1,171 +0,0 @@
|
| -dnl PPC-32 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
|
| -
|
| -dnl Copyright 2007, 2008 Free Software Foundation, Inc.
|
| -
|
| -dnl This file is part of the GNU MP Library.
|
| -
|
| -dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
| -dnl it under the terms of the GNU Lesser General Public License as published
|
| -dnl by the Free Software Foundation; either version 3 of the License, or (at
|
| -dnl your option) any later version.
|
| -
|
| -dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
| -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
| -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
| -dnl License for more details.
|
| -
|
| -dnl You should have received a copy of the GNU Lesser General Public License
|
| -dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
|
| -
|
| -include(`../config.m4')
|
| -
|
| -C cycles/limb
|
| -C norm frac
|
| -C 7410 ~36.5 ~36.5
|
| -C 744x, 745x 29 29
|
| -
|
| -C INPUT PARAMETERS
|
| -C qp = r3
|
| -C fn = r4
|
| -C up = r5
|
| -C un = r6
|
| -C d = r7
|
| -
|
| -C TODO
|
| -C * Decrease register usage.
|
| -C * Make sure mul operands and optimal for early-out.
|
| -C * Check that things work well for a shared library build.
|
| -C * Write an invert_limb, perhaps inline, perhaps as a private call. Or at
|
| -C least vastly improve the current __udiv_qrnnd_c based code.
|
| -
|
| -
|
| -ASM_START()
|
| -PROLOGUE(mpn_divrem_2)
|
| - stwu r1, -32(r1)
|
| - slwi r0, r6, 2
|
| - add r5, r5, r0
|
| - stmw r28, 8(r1)
|
| - addi r29, r5, -8 C up = up_param + un - 2
|
| - lwz r10, 4(r7)
|
| - lwz r12, 4(r29)
|
| - addi r8, r3, -12
|
| - lwz r7, 0(r7)
|
| - cmplw cr7, r12, r10
|
| - lwz r28, 0(r29)
|
| - blt- cr7, L(2)
|
| - bgt+ cr7, L(4)
|
| - cmplw cr7, r28, r7
|
| - blt- cr7, L(2)
|
| -L(4): subfc r28, r7, r28
|
| - subfe r12, r10, r12
|
| - li r3, 1
|
| - b L(6)
|
| -L(2): li r3, 0
|
| -
|
| -L(6): add r0, r4, r6
|
| - addic. r30, r0, -2
|
| - ble- cr0, L(ret)
|
| -
|
| - slwi r9, r0, 2
|
| - add r8, r8, r9 C rp += un + fn
|
| - mtctr r30
|
| -
|
| -C Compute di from d1
|
| - srwi r11, r10, 16
|
| - nor r0, r10, r10
|
| - divwu r31, r0, r11
|
| - rlwinm r5, r10, 0, 16, 31
|
| - mullw r9, r11, r31
|
| - mullw r6, r5, r31
|
| - subf r0, r9, r0
|
| - slwi r0, r0, 16
|
| - ori r0, r0, 65535
|
| - cmplw cr7, r0, r6
|
| - bge- cr7, L(9)
|
| - add r0, r0, r10
|
| - cmplw cr7, r0, r10
|
| - cmplw cr6, r0, r6
|
| - addi r31, r31, -1 C q1--
|
| - cror 28, 28, 25
|
| - bc+ 12, 28, L(9)
|
| - addi r31, r31, -1 C q1--
|
| - add r0, r0, r10
|
| -L(9): subf r0, r6, r0
|
| - divwu r6, r0, r11
|
| - mullw r9, r11, r6
|
| - mullw r11, r5, r6
|
| - subf r0, r9, r0
|
| - slwi r0, r0, 16
|
| - ori r0, r0, 65535
|
| - cmplw cr7, r0, r11
|
| - bge- cr7, L(13)
|
| - add r0, r0, r10
|
| - cmplw cr7, r0, r10
|
| - cmplw cr6, r0, r11
|
| - addi r6, r6, -1 C q0--
|
| - cror 28, 28, 25
|
| - bc+ 12, 28, L(13)
|
| -C add r0, r0, r10 C final remainder
|
| - addi r6, r6, -1 C q0--
|
| -L(13): rlwimi r6, r31, 16, 0, 15 C assemble final quotient
|
| -
|
| -C Adjust di by including d0
|
| - mullw r9, r10, r6 C t0 = LO(di * d1)
|
| - addc r11, r9, r7
|
| - subfe r0, r1, r1
|
| - mulhwu r9, r6, r7 C s1 = HI(di * d0)
|
| - addc r9, r11, r9
|
| - addze. r0, r0
|
| - blt cr0, L(17)
|
| -L(18): subfc r9, r10, r9
|
| - addi r6, r6, -1
|
| - addme. r0, r0
|
| - bge+ cr0, L(18)
|
| -L(17):
|
| -
|
| -C r0 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 r28 r29 r30 r31
|
| -C msl di d0 qp d1 fn up un
|
| -L(loop):
|
| - mullw r0, r12, r6 C q0 = LO(n2 * di)
|
| - cmpw cr7, r30, r4
|
| - addc r31, r0, r28 C q0 += n1
|
| - mulhwu r9, r12, r6 C q = HI(n2 * di)
|
| - adde r12, r9, r12 C q += n2
|
| - addi r30, r30, -1
|
| - mullw r0, r10, r12 C d1 * q
|
| - li r9, 0
|
| - subf r0, r0, r28 C n1 -= d1 * q
|
| - addi r5, r12, 1
|
| - ble- cr7, L(23)
|
| - lwzu r9, -4(r29)
|
| -L(23): mullw r11, r12, r7 C t0 = LO(d0 * q)
|
| - subfc r28, r7, r9 C n0 -= d0
|
| - subfe r0, r10, r0 C n1 -= d1
|
| - mulhwu r12, r12, r7 C t1 = HI(d0 * q)
|
| - subfc r28, r11, r28 C n0 -= t0
|
| - subfe r12, r12, r0 C n1 -= t1
|
| - cmplw cr7, r12, r31
|
| - blt+ cr7, L(24)
|
| - addc r28, r28, r7
|
| - adde r12, r12, r10
|
| - addi r5, r5, -1
|
| -L(24): cmplw cr7, r12, r10
|
| - bge- cr7, L(fix)
|
| -L(bck): stw r5, 0(r8)
|
| - addi r8, r8, -4
|
| - bdnz L(loop)
|
| -
|
| -L(ret): stw r28, 0(r29)
|
| - stw r12, 4(r29)
|
| - lmw r28, 8(r1)
|
| - addi r1, r1, 32
|
| - blr
|
| -
|
| -L(fix): cmplw cr6, r28, r7
|
| - bgt+ cr7, L(28)
|
| - blt- cr6, L(bck)
|
| -L(28): subfc r28, r7, r28
|
| - subfe r12, r10, r12
|
| - addi r5, r5, 1
|
| - b L(bck)
|
| -EPILOGUE()
|
|
|