Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(9)

Unified Diff: gcc/gmp/mpn/ia64/divrem_1.asm

Issue 3050029: [gcc] GCC 4.5.0=>4.5.1 (Closed) Base URL: ssh://git@gitrw.chromium.org:9222/nacl-toolchain.git
Patch Set: Created 10 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « gcc/gmp/mpn/ia64/copyi.asm ('k') | gcc/gmp/mpn/ia64/gmp-mparam.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: gcc/gmp/mpn/ia64/divrem_1.asm
diff --git a/gcc/gmp/mpn/ia64/divrem_1.asm b/gcc/gmp/mpn/ia64/divrem_1.asm
deleted file mode 100644
index aa50ac902bceb43dbcce898209b4d1b4abe43db6..0000000000000000000000000000000000000000
--- a/gcc/gmp/mpn/ia64/divrem_1.asm
+++ /dev/null
@@ -1,464 +0,0 @@
-dnl IA-64 mpn_divrem_1 and mpn_preinv_divrem_1 -- Divide an mpn number by an
-dnl unnormalized limb.
-
-dnl Copyright 2002, 2004, 2005 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C cycles/limb
-C Itanium: 40-42
-C Itanium 2: 29-30
-
-C This was generated by gcc, then the loops were optimized. The preinv entry
-C point was shoehorned into the file. Lots of things outside the loops could
-C be streamlined. It would probably be a good idea to merge the loops for
-C normalized and unnormalized divisor, since the shifting stuff is done for
-C free in parallel with other operations. It would even be possible to merge
-C all loops, if the ld8 were made conditional.
-
-C TODO
-C * Consider delaying inversion for normalized mpn_divrem_1 entry till after
-C computing leading limb.
-C * Inline and interleave limb inversion code with loop setup code.
-
-ASM_START()
-
-C HP's assembler requires these declarations for importing mpn_invert_limb
- .global mpn_invert_limb
- .type mpn_invert_limb,@function
-
-C INPUT PARAMETERS
-C rp = r32
-C qxn = r33
-C up = r34
-C n = r35
-C vl = r36
-C vlinv = r37 (preinv only)
-C cnt = r38 (preinv only)
-
-PROLOGUE(mpn_preinv_divrem_1)
- .prologue
- .save ar.pfs, r42
- alloc r42 = ar.pfs, 7, 8, 1, 0
- .save ar.lc, r44
- mov r44 = ar.lc
- .save rp, r41
- mov r41 = b0
- .body
-ifdef(`HAVE_ABI_32',
-` addp4 r32 = 0, r32
- sxt4 r33 = r33
- addp4 r34 = 0, r34
- sxt4 r35 = r35
- ;;
-')
- mov r40 = r38
- shladd r34 = r35, 3, r34
- ;;
- adds r34 = -8, r34
- ;;
- ld8 r39 = [r34], -8
- ;;
-
- add r15 = r35, r33
- ;;
- mov r8 = r37
- shladd r32 = r15, 3, r32 C r32 = rp + n + qxn
- cmp.le p8, p0 = 0, r36
- ;;
- adds r32 = -8, r32 C r32 = rp + n + qxn - 1
- cmp.leu p6, p7 = r36, r39
- (p8) br.cond.dpnt .Lpunnorm
- ;;
-
- (p6) addl r15 = 1, r0
- (p7) mov r15 = r0
- ;;
- (p6) sub r38 = r39, r36
- (p7) mov r38 = r39
- st8 [r32] = r15, -8
- adds r35 = -2, r35 C un -= 2
- br .Lpn
-
-.Lpunnorm:
- (p6) add r34 = 8, r34
- mov r38 = 0 C r = 0
- shl r36 = r36, r40
- (p6) br.cond.dptk .Lpu
- ;;
- shl r38 = r39, r40 C r = ahigh << cnt
- cmp.ne p8, p0 = 1, r35
- st8 [r32] = r0, -8
- adds r35 = -1, r35 C un--
- (p8) br.cond.dpnt .Lpu
-
- mov r23 = 1
- ;;
- setf.sig f6 = r8
- setf.sig f12 = r23
- br .L435
-EPILOGUE()
-
-
-PROLOGUE(mpn_divrem_1)
- .prologue
- .save ar.pfs, r42
- alloc r42 = ar.pfs, 5, 8, 1, 0
- .save ar.lc, r44
- mov r44 = ar.lc
- .save rp, r41
- mov r41 = b0
- .body
-ifdef(`HAVE_ABI_32',
-` addp4 r32 = 0, r32
- sxt4 r33 = r33
- addp4 r34 = 0, r34
- sxt4 r35 = r35
- ;;
-')
- mov r38 = r0
- add r15 = r35, r33
- ;;
- cmp.ne p6, p7 = 0, r15
- ;;
- (p7) mov r8 = r0
- (p7) br.cond.dpnt .Lret
- shladd r14 = r15, 3, r32 C r14 = rp + n + qxn
- cmp.le p6, p7 = 0, r36
- ;;
- adds r32 = -8, r14 C r32 = rp + n + qxn - 1
- (p6) br.cond.dpnt .Lunnorm
- cmp.eq p6, p7 = 0, r35
- (p6) br.cond.dpnt .L179
- shladd r14 = r35, 3, r34
- ;;
- adds r14 = -8, r14
- adds r35 = -1, r35
- ;;
- ld8 r38 = [r14]
- ;;
- cmp.leu p6, p7 = r36, r38
- ;;
- (p6) addl r15 = 1, r0
- (p7) mov r15 = r0
- ;;
- st8 [r32] = r15, -8
- (p6) sub r38 = r38, r36
-
-.L179:
- mov r45 = r36
- adds r35 = -1, r35
- br.call.sptk.many b0 = mpn_invert_limb
- ;;
- shladd r34 = r35, 3, r34
-.Lpn:
- mov r23 = 1
- ;;
- setf.sig f6 = r8
- setf.sig f12 = r23
- cmp.le p6, p7 = 0, r35
- mov r40 = 0
- (p7) br.cond.dpnt .L435
- setf.sig f10 = r36
- mov ar.lc = r35
- setf.sig f7 = r38
- ;;
- sub r28 = -1, r36
-C Develop quotient limbs for normalized divisor
-.Loop1: C 00 C q=r18 nh=r38/f7
- ld8 r20 = [r34], -8
- xma.hu f11 = f7, f6, f0
- ;; C 04
- xma.l f8 = f11, f12, f7 C q = q + nh
- ;; C 08
- getf.sig r18 = f8
- xma.hu f9 = f8, f10, f0
- xma.l f8 = f8, f10, f0
- ;; C 12
- getf.sig r16 = f9
- C 13
- getf.sig r15 = f8
- ;; C 18
- cmp.ltu p6, p7 = r20, r15
- sub r15 = r20, r15
- sub r16 = r38, r16
- ;; C 19
- (p6) cmp.ne p8, p9 = 1, r16 C is rH != 0?
- (p7) cmp.ne p8, p9 = 0, r16 C is rH != 0?
- (p6) add r16 = -1, r16
- (p0) cmp.ne.unc p6, p7 = r0, r0
- ;; C 20
- (p8) cmp.ltu p6, p7 = r15, r36
- (p8) sub r15 = r15, r36
- (p8) add r18 = 1, r18 C q = q + 1; done if: rH > 0
- ;; C 21
- .pred.rel "mutex",p6,p7
- (p6) cmp.ne p8, p9 = 1, r16 C is rH != 0 still?
- (p7) cmp.ne p8, p9 = 0, r16 C is rH != 0 still?
- cmp.ltu p6, p7 = r15, r36 C speculative
- sub r28 = r15, r36 C speculative, just for cmp
- ;; C 22
- (p8) cmp.ltu p6, p7 = r28, r36 C redo last cmp if needed
- (p8) mov r15 = r28
- (p8) add r18 = 1, r18 C q = q + 1; done if: rH > 0
- ;; C 23
- (p6) setf.sig f7 = r15
- (p7) sub r15 = r15, r36
- (p7) add r18 = 1, r18 C q = q + 1; done if: rH > 0
- ;; C 24
- (p7) setf.sig f7 = r15
- st8 [r32] = r18, -8
- mov r38 = r15
- br.cloop.dptk .Loop1
- C 29/30
- br.sptk .L435
- ;;
-.Lunnorm:
- mux1 r16 = r36, @rev
- cmp.eq p6, p7 = 0, r35
- (p6) br.cond.dpnt .L322
- shladd r34 = r35, 3, r34
- ;;
- adds r34 = -8, r34
- ;;
- ld8 r39 = [r34]
- ;;
- cmp.leu p6, p7 = r36, r39
- (p6) br.cond.dptk .L322
- adds r34 = -8, r34
- ;;
- mov r38 = r39
- ;;
- cmp.ne p6, p7 = 1, r15
- st8 [r32] = r0, -8
- ;;
- (p7) mov r8 = r38
- (p7) br.cond.dpnt .Lret
- adds r35 = -1, r35
-.L322:
- sub r14 = r0, r16
- ;;
- or r14 = r16, r14
- ;;
- mov r16 = -8
- czx1.l r14 = r14
- ;;
- shladd r16 = r14, 3, r16
- ;;
- shr.u r14 = r36, r16
- ;;
- cmp.geu p6, p7 = 15, r14
- ;;
- (p7) shr.u r14 = r14, 4
- (p7) adds r16 = 4, r16
- ;;
- cmp.geu p6, p7 = 3, r14
- ;;
- (p7) shr.u r14 = r14, 2
- (p7) adds r16 = 2, r16
- ;;
- tbit.nz p6, p7 = r14, 1
- ;;
- .pred.rel "mutex",p6,p7
- (p6) sub r40 = 62, r16
- (p7) sub r40 = 63, r16
- ;;
- shl r45 = r36, r40
- shl r36 = r36, r40
- shl r38 = r38, r40
- br.call.sptk.many b0 = mpn_invert_limb
- ;;
-.Lpu:
- mov r23 = 1
- ;;
- setf.sig f6 = r8
- setf.sig f12 = r23
- cmp.eq p6, p7 = 0, r35
- (p6) br.cond.dpnt .L435
- sub r16 = 64, r40
- adds r35 = -2, r35
- ;;
- ld8 r39 = [r34], -8
- cmp.le p6, p7 = 0, r35
- ;;
- shr.u r14 = r39, r16
- ;;
- or r38 = r14, r38
- (p7) br.cond.dpnt .Lend3
- ;;
- mov r22 = r16
- setf.sig f10 = r36
- setf.sig f7 = r38
- mov ar.lc = r35
- ;;
-C Develop quotient limbs for unnormalized divisor
-.Loop3:
- ld8 r14 = [r34], -8
- xma.hu f11 = f7, f6, f0
- ;;
- xma.l f8 = f11, f12, f7 C q = q + nh
- ;;
- getf.sig r18 = f8
- xma.hu f9 = f8, f10, f0
- shl r20 = r39, r40
- xma.l f8 = f8, f10, f0
- shr.u r24 = r14, r22
- ;;
- getf.sig r16 = f9
- getf.sig r15 = f8
- or r20 = r24, r20
- ;;
- cmp.ltu p6, p7 = r20, r15
- sub r15 = r20, r15
- sub r16 = r38, r16
- ;;
- (p6) cmp.ne p8, p9 = 1, r16 C is rH != 0?
- (p7) cmp.ne p8, p9 = 0, r16 C is rH != 0?
- (p6) add r16 = -1, r16
- (p0) cmp.ne.unc p6, p7 = r0, r0
- ;;
- (p8) cmp.ltu p6, p7 = r15, r36
- (p8) sub r15 = r15, r36
- (p8) add r18 = 1, r18 C q = q + 1; done if: rH > 0
- ;;
- .pred.rel "mutex",p6,p7
- (p6) cmp.ne p8, p9 = 1, r16 C is rH != 0 still?
- (p7) cmp.ne p8, p9 = 0, r16 C is rH != 0 still?
- cmp.ltu p6, p7 = r15, r36 C speculative
- sub r28 = r15, r36 C speculative, just for cmp
- ;;
- (p8) cmp.ltu p6, p7 = r28, r36 C redo last cmp if needed
- (p8) mov r15 = r28
- (p8) add r18 = 1, r18 C q = q + 1; done if: rH > 0
- ;;
- (p6) setf.sig f7 = r15
- (p7) sub r15 = r15, r36
- (p7) add r18 = 1, r18 C q = q + 1; done if: rH > 0
- ;;
- (p7) setf.sig f7 = r15
- st8 [r32] = r18, -8
- mov r39 = r14
- mov r38 = r15
- br.cloop.dptk .Loop3
- ;;
-.Lend3:
- setf.sig f10 = r36
- setf.sig f7 = r38
- ;;
- xma.hu f11 = f7, f6, f0
- ;;
- xma.l f8 = f11, f12, f7 C q = q + nh
- ;;
- getf.sig r18 = f8
- xma.hu f9 = f8, f10, f0
- shl r20 = r39, r40
- xma.l f8 = f8, f10, f0
- ;;
- getf.sig r16 = f9
- getf.sig r15 = f8
- ;;
- cmp.ltu p6, p7 = r20, r15
- sub r15 = r20, r15
- sub r16 = r38, r16
- ;;
- (p6) cmp.ne p8, p9 = 1, r16 C is rH != 0?
- (p7) cmp.ne p8, p9 = 0, r16 C is rH != 0?
- (p6) add r16 = -1, r16
- (p0) cmp.ne.unc p6, p7 = r0, r0
- ;;
- (p8) cmp.ltu p6, p7 = r15, r36
- (p8) sub r15 = r15, r36
- (p8) add r18 = 1, r18 C q = q + 1; done if: rH > 0
- ;;
- .pred.rel "mutex",p6,p7
- (p6) cmp.ne p8, p9 = 1, r16 C is rH != 0 still?
- (p7) cmp.ne p8, p9 = 0, r16 C is rH != 0 still?
- ;;
- (p8) sub r15 = r15, r36
- (p8) add r18 = 1, r18 C q = q + 1; done if: rH > 0
- ;;
- cmp.ltu p6, p7 = r15, r36
- ;;
- (p7) sub r15 = r15, r36
- (p7) add r18 = 1, r18 C q = q + 1; done if: rH > 0
- ;;
- st8 [r32] = r18, -8
- mov r38 = r15
-.L435:
- adds r35 = -1, r33
- cmp.le p6, p7 = 1, r33
- (p7) br.cond.dpnt .Lend4
- ;;
- setf.sig f7 = r38
- setf.sig f10 = r36
- mov ar.lc = r35
- ;;
-.Loop4:
- xma.hu f11 = f7, f6, f0
- ;;
- xma.l f8 = f11, f12, f7 C q = q + nh
- ;;
- getf.sig r18 = f8
- xma.hu f9 = f8, f10, f0
- xma.l f8 = f8, f10, f0
- ;;
- getf.sig r16 = f9
- getf.sig r15 = f8
- ;;
- cmp.ltu p6, p7 = 0, r15
- sub r15 = 0, r15
- sub r16 = r38, r16
- ;;
- (p6) cmp.ne p8, p9 = 1, r16 C is rH != 0?
- (p7) cmp.ne p8, p9 = 0, r16 C is rH != 0?
- (p6) add r16 = -1, r16
- (p0) cmp.ne.unc p6, p7 = r0, r0
- ;;
- (p8) cmp.ltu p6, p7 = r15, r36
- (p8) sub r15 = r15, r36
- (p8) add r18 = 1, r18 C q = q + 1; done if: rH > 0
- ;;
- .pred.rel "mutex",p6,p7
- (p6) cmp.ne p8, p9 = 1, r16 C is rH != 0 still?
- (p7) cmp.ne p8, p9 = 0, r16 C is rH != 0 still?
- cmp.ltu p6, p7 = r15, r36 C speculative
- sub r28 = r15, r36 C speculative, just for cmp
- ;;
- (p8) cmp.ltu p6, p7 = r28, r36 C redo last cmp if needed
- (p8) mov r15 = r28
- (p8) add r18 = 1, r18 C q = q + 1; done if: rH > 0
- ;;
- (p6) setf.sig f7 = r15
- (p7) sub r15 = r15, r36
- (p7) add r18 = 1, r18 C q = q + 1; done if: rH > 0
- ;;
- (p7) setf.sig f7 = r15
- st8 [r32] = r18, -8
- mov r38 = r15
- br.cloop.dptk .Loop4
- ;;
-.Lend4:
- shr.u r8 = r38, r40
-.Lret:
- mov ar.pfs = r42
- mov ar.lc = r44
- mov b0 = r41
- br.ret.sptk.many b0
-EPILOGUE()
-ASM_END()
« no previous file with comments | « gcc/gmp/mpn/ia64/copyi.asm ('k') | gcc/gmp/mpn/ia64/gmp-mparam.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698