Index: gcc/gmp/mpn/ia64/bdiv_dbm1c.asm |
diff --git a/gcc/gmp/mpn/ia64/bdiv_dbm1c.asm b/gcc/gmp/mpn/ia64/bdiv_dbm1c.asm |
deleted file mode 100644 |
index 6ff4fdaaf9c9f258dea70f8e660ca3dc2c33c666..0000000000000000000000000000000000000000 |
--- a/gcc/gmp/mpn/ia64/bdiv_dbm1c.asm |
+++ /dev/null |
@@ -1,503 +0,0 @@ |
-dnl IA-64 mpn_bdiv_dbm1. |
- |
-dnl Copyright 2008, 2009 Free Software Foundation, Inc. |
- |
-dnl This file is part of the GNU MP Library. |
- |
-dnl The GNU MP Library is free software; you can redistribute it and/or modify |
-dnl it under the terms of the GNU Lesser General Public License as published |
-dnl by the Free Software Foundation; either version 3 of the License, or (at |
-dnl your option) any later version. |
- |
-dnl The GNU MP Library is distributed in the hope that it will be useful, but |
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
-dnl License for more details. |
- |
-dnl You should have received a copy of the GNU Lesser General Public License |
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. |
- |
-include(`../config.m4') |
- |
-C cycles/limb |
-C Itanium: 4 |
-C Itanium 2: 2 |
- |
-C TODO |
-C * Optimize feed-in and wind-down code, both for speed and code size. |
- |
-C INPUT PARAMETERS |
-define(`rp', `r32') |
-define(`up', `r33') |
-define(`n', `r34') |
-define(`bd', `r35') |
- |
-ASM_START() |
-PROLOGUE(mpn_bdiv_dbm1c) |
- .prologue |
- .save ar.lc, r2 |
- .body |
- |
-ifdef(`HAVE_ABI_32', |
-` addp4 rp = 0, rp C M I |
- addp4 up = 0, up C M I |
- zxt4 n = n C I |
- ;; |
-') |
-{.mmb |
- mov r15 = r36 C M I |
- ldf8 f9 = [up], 8 C M |
- nop.b 0 C B |
-} |
-.Lcommon: |
-{.mii |
- adds r16 = -1, n C M I |
- mov r2 = ar.lc C I0 |
- and r14 = 3, n C M I |
- ;; |
-} |
-{.mii |
- setf.sig f6 = bd C M2 M3 |
- shr.u r31 = r16, 2 C I0 |
- cmp.eq p10, p0 = 0, r14 C M I |
-} |
-{.mii |
- nop.m 0 C M |
- cmp.eq p11, p0 = 2, r14 C M I |
- cmp.eq p12, p0 = 3, r14 C M I |
- ;; |
-} |
-{.mii |
- cmp.ne p6, p7 = r0, r0 C M I |
- mov.i ar.lc = r31 C I0 |
- cmp.ne p8, p9 = r0, r0 C M I |
-} |
-{.bbb |
- (p10) br.dptk .Lb00 C B |
- (p11) br.dptk .Lb10 C B |
- (p12) br.dptk .Lb11 C B |
- ;; |
-} |
- |
-.Lb01: br.cloop.dptk .grt1 |
- ;; |
- xma.l f38 = f9, f6, f0 |
- xma.hu f39 = f9, f6, f0 |
- ;; |
- getf.sig r26 = f38 |
- getf.sig r27 = f39 |
- br .Lcj1 |
- |
-.grt1: ldf8 f10 = [r33], 8 |
- ;; |
- ldf8 f11 = [r33], 8 |
- ;; |
- ldf8 f12 = [r33], 8 |
- ;; |
- xma.l f38 = f9, f6, f0 |
- xma.hu f39 = f9, f6, f0 |
- ;; |
- ldf8 f13 = [r33], 8 |
- ;; |
- xma.l f32 = f10, f6, f0 |
- xma.hu f33 = f10, f6, f0 |
- br.cloop.dptk .grt5 |
- |
- ;; |
- getf.sig r26 = f38 |
- xma.l f34 = f11, f6, f0 |
- xma.hu f35 = f11, f6, f0 |
- ;; |
- getf.sig r27 = f39 |
- ;; |
- getf.sig r20 = f32 |
- xma.l f36 = f12, f6, f0 |
- xma.hu f37 = f12, f6, f0 |
- ;; |
- getf.sig r21 = f33 |
- ;; |
- getf.sig r22 = f34 |
- xma.l f38 = f13, f6, f0 |
- xma.hu f39 = f13, f6, f0 |
- br .Lcj5 |
- |
-.grt5: ldf8 f10 = [r33], 8 |
- ;; |
- getf.sig r26 = f38 |
- xma.l f34 = f11, f6, f0 |
- xma.hu f35 = f11, f6, f0 |
- ;; |
- getf.sig r27 = f39 |
- ldf8 f11 = [r33], 8 |
- ;; |
- getf.sig r20 = f32 |
- xma.l f36 = f12, f6, f0 |
- xma.hu f37 = f12, f6, f0 |
- ;; |
- getf.sig r21 = f33 |
- ldf8 f12 = [r33], 8 |
- ;; |
- getf.sig r22 = f34 |
- xma.l f38 = f13, f6, f0 |
- xma.hu f39 = f13, f6, f0 |
- br .LL01 |
- |
-.Lb10: ldf8 f13 = [r33], 8 |
- br.cloop.dptk .grt2 |
- ;; |
- |
- xma.l f36 = f9, f6, f0 |
- xma.hu f37 = f9, f6, f0 |
- ;; |
- xma.l f38 = f13, f6, f0 |
- xma.hu f39 = f13, f6, f0 |
- ;; |
- getf.sig r24 = f36 |
- ;; |
- getf.sig r25 = f37 |
- ;; |
- getf.sig r26 = f38 |
- ;; |
- getf.sig r27 = f39 |
- br .Lcj2 |
- |
-.grt2: ldf8 f10 = [r33], 8 |
- ;; |
- ldf8 f11 = [r33], 8 |
- ;; |
- xma.l f36 = f9, f6, f0 |
- xma.hu f37 = f9, f6, f0 |
- ;; |
- ldf8 f12 = [r33], 8 |
- ;; |
- xma.l f38 = f13, f6, f0 |
- xma.hu f39 = f13, f6, f0 |
- ;; |
- ldf8 f13 = [r33], 8 |
- ;; |
- getf.sig r24 = f36 |
- xma.l f32 = f10, f6, f0 |
- xma.hu f33 = f10, f6, f0 |
- br.cloop.dptk .grt6 |
- |
- getf.sig r25 = f37 |
- ;; |
- getf.sig r26 = f38 |
- xma.l f34 = f11, f6, f0 |
- xma.hu f35 = f11, f6, f0 |
- ;; |
- getf.sig r27 = f39 |
- ;; |
- getf.sig r20 = f32 |
- xma.l f36 = f12, f6, f0 |
- xma.hu f37 = f12, f6, f0 |
- br .Lcj6 |
- |
-.grt6: getf.sig r25 = f37 |
- ldf8 f10 = [r33], 8 |
- ;; |
- getf.sig r26 = f38 |
- xma.l f34 = f11, f6, f0 |
- xma.hu f35 = f11, f6, f0 |
- ;; |
- getf.sig r27 = f39 |
- ldf8 f11 = [r33], 8 |
- ;; |
- getf.sig r20 = f32 |
- xma.l f36 = f12, f6, f0 |
- xma.hu f37 = f12, f6, f0 |
- br .LL10 |
- |
- |
-.Lb11: ldf8 f12 = [r33], 8 |
- ;; |
- ldf8 f13 = [r33], 8 |
- br.cloop.dptk .grt3 |
- ;; |
- |
- xma.l f34 = f9, f6, f0 |
- xma.hu f35 = f9, f6, f0 |
- ;; |
- xma.l f36 = f12, f6, f0 |
- xma.hu f37 = f12, f6, f0 |
- ;; |
- getf.sig r22 = f34 |
- xma.l f38 = f13, f6, f0 |
- xma.hu f39 = f13, f6, f0 |
- ;; |
- getf.sig r23 = f35 |
- ;; |
- getf.sig r24 = f36 |
- ;; |
- getf.sig r25 = f37 |
- ;; |
- getf.sig r26 = f38 |
- br .Lcj3 |
- |
-.grt3: ldf8 f10 = [r33], 8 |
- ;; |
- xma.l f34 = f9, f6, f0 |
- xma.hu f35 = f9, f6, f0 |
- ;; |
- ldf8 f11 = [r33], 8 |
- ;; |
- xma.l f36 = f12, f6, f0 |
- xma.hu f37 = f12, f6, f0 |
- ;; |
- ldf8 f12 = [r33], 8 |
- ;; |
- getf.sig r22 = f34 |
- xma.l f38 = f13, f6, f0 |
- xma.hu f39 = f13, f6, f0 |
- ;; |
- getf.sig r23 = f35 |
- ldf8 f13 = [r33], 8 |
- ;; |
- getf.sig r24 = f36 |
- xma.l f32 = f10, f6, f0 |
- xma.hu f33 = f10, f6, f0 |
- br.cloop.dptk .grt7 |
- |
- getf.sig r25 = f37 |
- ;; |
- getf.sig r26 = f38 |
- xma.l f34 = f11, f6, f0 |
- xma.hu f35 = f11, f6, f0 |
- br .Lcj7 |
- |
-.grt7: getf.sig r25 = f37 |
- ldf8 f10 = [r33], 8 |
- ;; |
- getf.sig r26 = f38 |
- xma.l f34 = f11, f6, f0 |
- xma.hu f35 = f11, f6, f0 |
- br .LL11 |
- |
- |
-.Lb00: ldf8 f11 = [r33], 8 |
- ;; |
- ldf8 f12 = [r33], 8 |
- ;; |
- ldf8 f13 = [r33], 8 |
- br.cloop.dptk .grt4 |
- ;; |
- |
- xma.l f32 = f9, f6, f0 |
- xma.hu f33 = f9, f6, f0 |
- ;; |
- xma.l f34 = f11, f6, f0 |
- xma.hu f35 = f11, f6, f0 |
- ;; |
- getf.sig r20 = f32 |
- xma.l f36 = f12, f6, f0 |
- xma.hu f37 = f12, f6, f0 |
- ;; |
- getf.sig r21 = f33 |
- ;; |
- getf.sig r22 = f34 |
- xma.l f38 = f13, f6, f0 |
- xma.hu f39 = f13, f6, f0 |
- ;; |
- getf.sig r23 = f35 |
- ;; |
- getf.sig r24 = f36 |
- br .Lcj4 |
- |
-.grt4: xma.l f32 = f9, f6, f0 |
- xma.hu f33 = f9, f6, f0 |
- ;; |
- ldf8 f10 = [r33], 8 |
- ;; |
- xma.l f34 = f11, f6, f0 |
- xma.hu f35 = f11, f6, f0 |
- ;; |
- ldf8 f11 = [r33], 8 |
- ;; |
- getf.sig r20 = f32 |
- xma.l f36 = f12, f6, f0 |
- xma.hu f37 = f12, f6, f0 |
- ;; |
- getf.sig r21 = f33 |
- ldf8 f12 = [r33], 8 |
- ;; |
- getf.sig r22 = f34 |
- xma.l f38 = f13, f6, f0 |
- xma.hu f39 = f13, f6, f0 |
- ;; |
- getf.sig r23 = f35 |
- ldf8 f13 = [r33], 8 |
- ;; |
- getf.sig r24 = f36 |
- xma.l f32 = f10, f6, f0 |
- xma.hu f33 = f10, f6, f0 |
- br.cloop.dptk .LL00 |
- br .Lcj8 |
- |
-C *** MAIN LOOP START *** |
- ALIGN(32) |
-.Ltop: |
- .pred.rel "mutex",p6,p7 |
-C .mfi |
- getf.sig r24 = f36 |
- xma.l f32 = f10, f6, f0 |
- (p6) sub r15 = r19, r27, 1 |
-C .mfi |
- st8 [r32] = r19, 8 |
- xma.hu f33 = f10, f6, f0 |
- (p7) sub r15 = r19, r27 |
- ;; |
-.LL00: |
-C .mfi |
- getf.sig r25 = f37 |
- nop.f 0 |
- cmp.ltu p6, p7 = r15, r20 |
-C .mib |
- ldf8 f10 = [r33], 8 |
- sub r16 = r15, r20 |
- nop.b 0 |
- ;; |
- |
-C .mfi |
- getf.sig r26 = f38 |
- xma.l f34 = f11, f6, f0 |
- (p6) sub r15 = r16, r21, 1 |
-C .mfi |
- st8 [r32] = r16, 8 |
- xma.hu f35 = f11, f6, f0 |
- (p7) sub r15 = r16, r21 |
- ;; |
-.LL11: |
-C .mfi |
- getf.sig r27 = f39 |
- nop.f 0 |
- cmp.ltu p6, p7 = r15, r22 |
-C .mib |
- ldf8 f11 = [r33], 8 |
- sub r17 = r15, r22 |
- nop.b 0 |
- ;; |
- |
-C .mfi |
- getf.sig r20 = f32 |
- xma.l f36 = f12, f6, f0 |
- (p6) sub r15 = r17, r23, 1 |
-C .mfi |
- st8 [r32] = r17, 8 |
- xma.hu f37 = f12, f6, f0 |
- (p7) sub r15 = r17, r23 |
- ;; |
-.LL10: |
-C .mfi |
- getf.sig r21 = f33 |
- nop.f 0 |
- cmp.ltu p6, p7 = r15, r24 |
-C .mib |
- ldf8 f12 = [r33], 8 |
- sub r18 = r15, r24 |
- nop.b 0 |
- ;; |
- |
-C .mfi |
- getf.sig r22 = f34 |
- xma.l f38 = f13, f6, f0 |
- (p6) sub r15 = r18, r25, 1 |
-C .mfi |
- st8 [r32] = r18, 8 |
- xma.hu f39 = f13, f6, f0 |
- (p7) sub r15 = r18, r25 |
- ;; |
-.LL01: |
-C .mfi |
- getf.sig r23 = f35 |
- nop.f 0 |
- cmp.ltu p6, p7 = r15, r26 |
-C .mib |
- ldf8 f13 = [r33], 8 |
- sub r19 = r15, r26 |
- br.cloop.sptk.few .Ltop |
-C *** MAIN LOOP END *** |
- ;; |
- |
- getf.sig r24 = f36 |
- xma.l f32 = f10, f6, f0 |
- (p6) sub r15 = r19, r27, 1 |
- st8 [r32] = r19, 8 |
- xma.hu f33 = f10, f6, f0 |
- (p7) sub r15 = r19, r27 |
- ;; |
-.Lcj8: getf.sig r25 = f37 |
- cmp.ltu p6, p7 = r15, r20 |
- sub r16 = r15, r20 |
- ;; |
- getf.sig r26 = f38 |
- xma.l f34 = f11, f6, f0 |
- (p6) sub r15 = r16, r21, 1 |
- st8 [r32] = r16, 8 |
- xma.hu f35 = f11, f6, f0 |
- (p7) sub r15 = r16, r21 |
- ;; |
-.Lcj7: getf.sig r27 = f39 |
- cmp.ltu p6, p7 = r15, r22 |
- sub r17 = r15, r22 |
- ;; |
- getf.sig r20 = f32 |
- xma.l f36 = f12, f6, f0 |
- (p6) sub r15 = r17, r23, 1 |
- st8 [r32] = r17, 8 |
- xma.hu f37 = f12, f6, f0 |
- (p7) sub r15 = r17, r23 |
- ;; |
-.Lcj6: getf.sig r21 = f33 |
- cmp.ltu p6, p7 = r15, r24 |
- sub r18 = r15, r24 |
- ;; |
- getf.sig r22 = f34 |
- xma.l f38 = f13, f6, f0 |
- (p6) sub r15 = r18, r25, 1 |
- st8 [r32] = r18, 8 |
- xma.hu f39 = f13, f6, f0 |
- (p7) sub r15 = r18, r25 |
- ;; |
-.Lcj5: getf.sig r23 = f35 |
- cmp.ltu p6, p7 = r15, r26 |
- sub r19 = r15, r26 |
- ;; |
- getf.sig r24 = f36 |
- (p6) sub r15 = r19, r27, 1 |
- st8 [r32] = r19, 8 |
- (p7) sub r15 = r19, r27 |
- ;; |
-.Lcj4: getf.sig r25 = f37 |
- cmp.ltu p6, p7 = r15, r20 |
- sub r16 = r15, r20 |
- ;; |
- getf.sig r26 = f38 |
- (p6) sub r15 = r16, r21, 1 |
- st8 [r32] = r16, 8 |
- (p7) sub r15 = r16, r21 |
- ;; |
-.Lcj3: getf.sig r27 = f39 |
- cmp.ltu p6, p7 = r15, r22 |
- sub r17 = r15, r22 |
- ;; |
- (p6) sub r15 = r17, r23, 1 |
- st8 [r32] = r17, 8 |
- (p7) sub r15 = r17, r23 |
- ;; |
-.Lcj2: cmp.ltu p6, p7 = r15, r24 |
- sub r18 = r15, r24 |
- ;; |
- (p6) sub r15 = r18, r25, 1 |
- st8 [r32] = r18, 8 |
- (p7) sub r15 = r18, r25 |
- ;; |
-.Lcj1: cmp.ltu p6, p7 = r15, r26 |
- sub r19 = r15, r26 |
- ;; |
- (p6) sub r8 = r19, r27, 1 |
- st8 [r32] = r19 |
- (p7) sub r8 = r19, r27 |
- mov ar.lc = r2 |
- br.ret.sptk.many b0 |
-EPILOGUE() |
-ASM_END() |