Index: gcc/gmp/mpn/ia64/mul_1.asm |
diff --git a/gcc/gmp/mpn/ia64/mul_1.asm b/gcc/gmp/mpn/ia64/mul_1.asm |
deleted file mode 100644 |
index 8df8d93f8e3e48154847d5b7418d66afd8412392..0000000000000000000000000000000000000000 |
--- a/gcc/gmp/mpn/ia64/mul_1.asm |
+++ /dev/null |
@@ -1,573 +0,0 @@ |
-dnl IA-64 mpn_mul_1, mpn_mul_1c -- Multiply a limb vector with a limb and |
-dnl store the result in a second limb vector. |
- |
-dnl Copyright 2000, 2001, 2002, 2003, 2004, 2006, 2007 Free Software |
-dnl Foundation, Inc. |
- |
-dnl This file is part of the GNU MP Library. |
- |
-dnl The GNU MP Library is free software; you can redistribute it and/or modify |
-dnl it under the terms of the GNU Lesser General Public License as published |
-dnl by the Free Software Foundation; either version 3 of the License, or (at |
-dnl your option) any later version. |
- |
-dnl The GNU MP Library is distributed in the hope that it will be useful, but |
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
-dnl License for more details. |
- |
-dnl You should have received a copy of the GNU Lesser General Public License |
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. |
- |
-include(`../config.m4') |
- |
-C cycles/limb |
-C Itanium: 4.0 |
-C Itanium 2: 2.0 |
- |
-C TODO |
-C * Further optimize feed-in and wind-down code, both for speed and code size. |
-C * Handle low limb input and results specially, using a common stf8 in the |
-C epilogue. |
-C * Use 1 c/l carry propagation scheme in wind-down code. |
-C * Use extra pointer register for `up' to speed up feed-in loads. |
-C * Work out final differences with addmul_1.asm. |
- |
-C INPUT PARAMETERS |
-define(`rp', `r32') |
-define(`up', `r33') |
-define(`n', `r34') |
-define(`vl', `r35') |
-define(`cy', `r36') C for mpn_mul_1c |
- |
-ASM_START() |
-PROLOGUE(mpn_mul_1) |
- .prologue |
- .save ar.lc, r2 |
- .body |
- |
-ifdef(`HAVE_ABI_32', |
-` addp4 rp = 0, rp C M I |
- addp4 up = 0, up C M I |
- zxt4 n = n C I |
- ;; |
-') |
-{.mfi |
- adds r15 = -1, n C M I |
- mov f9 = f0 C F |
- mov.i r2 = ar.lc C I0 |
-} |
-{.mmi |
- ldf8 f7 = [up], 8 C M |
- nop.m 0 C M |
- and r14 = 3, n C M I |
- ;; |
-} |
-.Lcommon: |
-{.mii |
- setf.sig f6 = vl C M2 M3 |
- shr.u r31 = r15, 2 C I0 |
- cmp.eq p10, p0 = 0, r14 C M I |
-} |
-{.mii |
- cmp.eq p11, p0 = 2, r14 C M I |
- cmp.eq p12, p0 = 3, r14 C M I |
- nop.i 0 C I |
- ;; |
-} |
-{.mii |
- cmp.ne p6, p7 = r0, r0 C M I |
- mov.i ar.lc = r31 C I0 |
- cmp.ne p8, p9 = r0, r0 C M I |
-} |
-{.bbb |
- (p10) br.dptk .Lb00 C B |
- (p11) br.dptk .Lb10 C B |
- (p12) br.dptk .Lb11 C B |
- ;; |
-} |
- |
-.Lb01: mov r20 = 0 |
- br.cloop.dptk .grt1 C B |
- |
- xma.l f39 = f7, f6, f9 C F |
- xma.hu f43 = f7, f6, f9 C F |
- ;; |
- getf.sig r8 = f43 C M2 |
- stf8 [rp] = f39 C M2 M3 |
- mov.i ar.lc = r2 C I0 |
- br.ret.sptk.many b0 C B |
- |
-.grt1: |
- ldf8 f32 = [up], 8 |
- ;; |
- ldf8 f33 = [up], 8 |
- ;; |
- ldf8 f34 = [up], 8 |
- xma.l f39 = f7, f6, f9 |
- xma.hu f43 = f7, f6, f9 |
- ;; |
- ldf8 f35 = [up], 8 |
- br.cloop.dptk .grt5 |
- |
- xma.l f36 = f32, f6, f0 |
- xma.hu f40 = f32, f6, f0 |
- ;; |
- stf8 [rp] = f39, 8 |
- xma.l f37 = f33, f6, f0 |
- xma.hu f41 = f33, f6, f0 |
- ;; |
- getf.sig r21 = f43 |
- getf.sig r18 = f36 |
- xma.l f38 = f34, f6, f0 |
- xma.hu f42 = f34, f6, f0 |
- ;; |
- getf.sig r22 = f40 |
- getf.sig r19 = f37 |
- xma.l f39 = f35, f6, f0 |
- xma.hu f43 = f35, f6, f0 |
- ;; |
- getf.sig r23 = f41 |
- getf.sig r16 = f38 |
- br .Lcj5 |
- |
-.grt5: |
- xma.l f36 = f32, f6, f0 |
- xma.hu f40 = f32, f6, f0 |
- ;; |
- getf.sig r17 = f39 |
- ldf8 f32 = [up], 8 |
- xma.l f37 = f33, f6, f0 |
- xma.hu f41 = f33, f6, f0 |
- ;; |
- getf.sig r21 = f43 |
- ldf8 f33 = [up], 8 |
- xma.l f38 = f34, f6, f0 |
- ;; |
- getf.sig r18 = f36 |
- xma.hu f42 = f34, f6, f0 |
- ;; |
- getf.sig r22 = f40 |
- ldf8 f34 = [up], 8 |
- xma.l f39 = f35, f6, f0 |
- ;; |
- getf.sig r19 = f37 |
- xma.hu f43 = f35, f6, f0 |
- br .LL01 |
- |
- |
-.Lb10: ldf8 f35 = [up], 8 |
- mov r23 = 0 |
- br.cloop.dptk .grt2 |
- |
- xma.l f38 = f7, f6, f9 |
- xma.hu f42 = f7, f6, f9 |
- ;; |
- stf8 [rp] = f38, 8 |
- xma.l f39 = f35, f6, f42 |
- xma.hu f43 = f35, f6, f42 |
- ;; |
- getf.sig r8 = f43 |
- stf8 [rp] = f39 |
- mov.i ar.lc = r2 |
- br.ret.sptk.many b0 |
- |
- |
-.grt2: |
- ldf8 f32 = [up], 8 |
- ;; |
- ldf8 f33 = [up], 8 |
- xma.l f38 = f7, f6, f9 |
- xma.hu f42 = f7, f6, f9 |
- ;; |
- ldf8 f34 = [up], 8 |
- xma.l f39 = f35, f6, f0 |
- xma.hu f43 = f35, f6, f0 |
- ;; |
- ldf8 f35 = [up], 8 |
- br.cloop.dptk .grt6 |
- |
- stf8 [rp] = f38, 8 |
- xma.l f36 = f32, f6, f0 |
- xma.hu f40 = f32, f6, f0 |
- ;; |
- getf.sig r20 = f42 |
- getf.sig r17 = f39 |
- xma.l f37 = f33, f6, f0 |
- xma.hu f41 = f33, f6, f0 |
- ;; |
- getf.sig r21 = f43 |
- getf.sig r18 = f36 |
- xma.l f38 = f34, f6, f0 |
- xma.hu f42 = f34, f6, f0 |
- ;; |
- getf.sig r22 = f40 |
- getf.sig r19 = f37 |
- xma.l f39 = f35, f6, f0 |
- xma.hu f43 = f35, f6, f0 |
- br .Lcj6 |
- |
-.grt6: |
- getf.sig r16 = f38 |
- xma.l f36 = f32, f6, f0 |
- xma.hu f40 = f32, f6, f0 |
- ;; |
- getf.sig r20 = f42 |
- ldf8 f32 = [up], 8 |
- xma.l f37 = f33, f6, f0 |
- ;; |
- getf.sig r17 = f39 |
- xma.hu f41 = f33, f6, f0 |
- ;; |
- getf.sig r21 = f43 |
- ldf8 f33 = [up], 8 |
- xma.l f38 = f34, f6, f0 |
- ;; |
- getf.sig r18 = f36 |
- xma.hu f42 = f34, f6, f0 |
- br .LL10 |
- |
- |
-.Lb11: ldf8 f34 = [up], 8 |
- mov r22 = 0 |
- ;; |
- ldf8 f35 = [up], 8 |
- br.cloop.dptk .grt3 |
- ;; |
- |
- xma.l f37 = f7, f6, f9 |
- xma.hu f41 = f7, f6, f9 |
- xma.l f38 = f34, f6, f0 |
- xma.hu f42 = f34, f6, f0 |
- xma.l f39 = f35, f6, f0 |
- xma.hu f43 = f35, f6, f0 |
- ;; |
- getf.sig r23 = f41 |
- stf8 [rp] = f37, 8 |
- getf.sig r16 = f38 |
- getf.sig r20 = f42 |
- getf.sig r17 = f39 |
- getf.sig r8 = f43 |
- br .Lcj3 |
- |
-.grt3: |
- ldf8 f32 = [up], 8 |
- xma.l f37 = f7, f6, f9 |
- xma.hu f41 = f7, f6, f9 |
- ;; |
- ldf8 f33 = [up], 8 |
- xma.l f38 = f34, f6, f0 |
- xma.hu f42 = f34, f6, f0 |
- ;; |
- getf.sig r19 = f37 |
- ldf8 f34 = [up], 8 |
- xma.l f39 = f35, f6, f0 |
- xma.hu f43 = f35, f6, f0 |
- ;; |
- getf.sig r23 = f41 |
- ldf8 f35 = [up], 8 |
- br.cloop.dptk .grt7 |
- |
- getf.sig r16 = f38 |
- xma.l f36 = f32, f6, f0 |
- getf.sig r20 = f42 |
- xma.hu f40 = f32, f6, f0 |
- ;; |
- getf.sig r17 = f39 |
- xma.l f37 = f33, f6, f0 |
- getf.sig r21 = f43 |
- xma.hu f41 = f33, f6, f0 |
- ;; |
- getf.sig r18 = f36 |
- st8 [rp] = r19, 8 |
- xma.l f38 = f34, f6, f0 |
- xma.hu f42 = f34, f6, f0 |
- br .Lcj7 |
- |
-.grt7: |
- getf.sig r16 = f38 |
- xma.l f36 = f32, f6, f0 |
- xma.hu f40 = f32, f6, f0 |
- ;; |
- getf.sig r20 = f42 |
- ldf8 f32 = [up], 8 |
- xma.l f37 = f33, f6, f0 |
- ;; |
- getf.sig r17 = f39 |
- xma.hu f41 = f33, f6, f0 |
- br .LL11 |
- |
- |
-.Lb00: ldf8 f33 = [up], 8 |
- mov r21 = 0 |
- ;; |
- ldf8 f34 = [up], 8 |
- ;; |
- ldf8 f35 = [up], 8 |
- xma.l f36 = f7, f6, f9 |
- xma.hu f40 = f7, f6, f9 |
- br.cloop.dptk .grt4 |
- |
- xma.l f37 = f33, f6, f0 |
- xma.hu f41 = f33, f6, f0 |
- xma.l f38 = f34, f6, f0 |
- xma.hu f42 = f34, f6, f0 |
- ;; |
- getf.sig r22 = f40 |
- stf8 [rp] = f36, 8 |
- xma.l f39 = f35, f6, f0 |
- getf.sig r19 = f37 |
- xma.hu f43 = f35, f6, f0 |
- ;; |
- getf.sig r23 = f41 |
- getf.sig r16 = f38 |
- getf.sig r20 = f42 |
- getf.sig r17 = f39 |
- br .Lcj4 |
- |
-.grt4: |
- ldf8 f32 = [up], 8 |
- xma.l f37 = f33, f6, f0 |
- xma.hu f41 = f33, f6, f0 |
- ;; |
- getf.sig r18 = f36 |
- ldf8 f33 = [up], 8 |
- xma.l f38 = f34, f6, f0 |
- xma.hu f42 = f34, f6, f0 |
- ;; |
- getf.sig r22 = f40 |
- ldf8 f34 = [up], 8 |
- xma.l f39 = f35, f6, f0 |
- ;; |
- getf.sig r19 = f37 |
- getf.sig r23 = f41 |
- xma.hu f43 = f35, f6, f0 |
- ldf8 f35 = [up], 8 |
- br.cloop.dptk .grt8 |
- |
- getf.sig r16 = f38 |
- xma.l f36 = f32, f6, f0 |
- getf.sig r20 = f42 |
- xma.hu f40 = f32, f6, f0 |
- ;; |
- getf.sig r17 = f39 |
- st8 [rp] = r18, 8 |
- xma.l f37 = f33, f6, f0 |
- xma.hu f41 = f33, f6, f0 |
- br .Lcj8 |
- |
-.grt8: |
- getf.sig r16 = f38 |
- xma.l f36 = f32, f6, f0 |
- xma.hu f40 = f32, f6, f0 |
- br .LL00 |
- |
- |
-C *** MAIN LOOP START *** |
- ALIGN(32) |
-.Loop: |
- .pred.rel "mutex",p6,p7 |
- getf.sig r16 = f38 |
- xma.l f36 = f32, f6, f0 |
- (p6) cmp.leu p8, p9 = r24, r17 |
- st8 [rp] = r24, 8 |
- xma.hu f40 = f32, f6, f0 |
- (p7) cmp.ltu p8, p9 = r24, r17 |
- ;; |
-.LL00: |
- .pred.rel "mutex",p8,p9 |
- getf.sig r20 = f42 |
- (p8) add r24 = r18, r21, 1 |
- nop.b 0 |
- ldf8 f32 = [up], 8 |
- (p9) add r24 = r18, r21 |
- nop.b 0 |
- ;; |
- .pred.rel "mutex",p8,p9 |
- getf.sig r17 = f39 |
- xma.l f37 = f33, f6, f0 |
- (p8) cmp.leu p6, p7 = r24, r18 |
- st8 [rp] = r24, 8 |
- xma.hu f41 = f33, f6, f0 |
- (p9) cmp.ltu p6, p7 = r24, r18 |
- ;; |
-.LL11: |
- .pred.rel "mutex",p6,p7 |
- getf.sig r21 = f43 |
- (p6) add r24 = r19, r22, 1 |
- nop.b 0 |
- ldf8 f33 = [up], 8 |
- (p7) add r24 = r19, r22 |
- nop.b 0 |
- ;; |
- .pred.rel "mutex",p6,p7 |
- getf.sig r18 = f36 |
- xma.l f38 = f34, f6, f0 |
- (p6) cmp.leu p8, p9 = r24, r19 |
- st8 [rp] = r24, 8 |
- xma.hu f42 = f34, f6, f0 |
- (p7) cmp.ltu p8, p9 = r24, r19 |
- ;; |
-.LL10: |
- .pred.rel "mutex",p8,p9 |
- getf.sig r22 = f40 |
- (p8) add r24 = r16, r23, 1 |
- nop.b 0 |
- ldf8 f34 = [up], 8 |
- (p9) add r24 = r16, r23 |
- nop.b 0 |
- ;; |
- .pred.rel "mutex",p8,p9 |
- getf.sig r19 = f37 |
- xma.l f39 = f35, f6, f0 |
- (p8) cmp.leu p6, p7 = r24, r16 |
- st8 [rp] = r24, 8 |
- xma.hu f43 = f35, f6, f0 |
- (p9) cmp.ltu p6, p7 = r24, r16 |
- ;; |
-.LL01: |
- .pred.rel "mutex",p6,p7 |
- getf.sig r23 = f41 |
- (p6) add r24 = r17, r20, 1 |
- nop.b 0 |
- ldf8 f35 = [up], 8 |
- (p7) add r24 = r17, r20 |
- br.cloop.dptk .Loop |
-C *** MAIN LOOP END *** |
- ;; |
- |
-.Lcj9: |
- .pred.rel "mutex",p6,p7 |
- getf.sig r16 = f38 |
- xma.l f36 = f32, f6, f0 |
- (p6) cmp.leu p8, p9 = r24, r17 |
- st8 [rp] = r24, 8 |
- xma.hu f40 = f32, f6, f0 |
- (p7) cmp.ltu p8, p9 = r24, r17 |
- ;; |
- .pred.rel "mutex",p8,p9 |
- getf.sig r20 = f42 |
- (p8) add r24 = r18, r21, 1 |
- (p9) add r24 = r18, r21 |
- ;; |
- .pred.rel "mutex",p8,p9 |
- getf.sig r17 = f39 |
- xma.l f37 = f33, f6, f0 |
- (p8) cmp.leu p6, p7 = r24, r18 |
- st8 [rp] = r24, 8 |
- xma.hu f41 = f33, f6, f0 |
- (p9) cmp.ltu p6, p7 = r24, r18 |
- ;; |
-.Lcj8: |
- .pred.rel "mutex",p6,p7 |
- getf.sig r21 = f43 |
- (p6) add r24 = r19, r22, 1 |
- (p7) add r24 = r19, r22 |
- ;; |
- .pred.rel "mutex",p6,p7 |
- getf.sig r18 = f36 |
- xma.l f38 = f34, f6, f0 |
- (p6) cmp.leu p8, p9 = r24, r19 |
- st8 [rp] = r24, 8 |
- xma.hu f42 = f34, f6, f0 |
- (p7) cmp.ltu p8, p9 = r24, r19 |
- ;; |
-.Lcj7: |
- .pred.rel "mutex",p8,p9 |
- getf.sig r22 = f40 |
- (p8) add r24 = r16, r23, 1 |
- (p9) add r24 = r16, r23 |
- ;; |
- .pred.rel "mutex",p8,p9 |
- getf.sig r19 = f37 |
- xma.l f39 = f35, f6, f0 |
- (p8) cmp.leu p6, p7 = r24, r16 |
- st8 [rp] = r24, 8 |
- xma.hu f43 = f35, f6, f0 |
- (p9) cmp.ltu p6, p7 = r24, r16 |
- ;; |
-.Lcj6: |
- .pred.rel "mutex",p6,p7 |
- getf.sig r23 = f41 |
- (p6) add r24 = r17, r20, 1 |
- (p7) add r24 = r17, r20 |
- ;; |
- .pred.rel "mutex",p6,p7 |
- (p6) cmp.leu p8, p9 = r24, r17 |
- (p7) cmp.ltu p8, p9 = r24, r17 |
- getf.sig r16 = f38 |
- st8 [rp] = r24, 8 |
- ;; |
-.Lcj5: |
- .pred.rel "mutex",p8,p9 |
- getf.sig r20 = f42 |
- (p8) add r24 = r18, r21, 1 |
- (p9) add r24 = r18, r21 |
- ;; |
- .pred.rel "mutex",p8,p9 |
- (p8) cmp.leu p6, p7 = r24, r18 |
- (p9) cmp.ltu p6, p7 = r24, r18 |
- getf.sig r17 = f39 |
- st8 [rp] = r24, 8 |
- ;; |
-.Lcj4: |
- .pred.rel "mutex",p6,p7 |
- getf.sig r8 = f43 |
- (p6) add r24 = r19, r22, 1 |
- (p7) add r24 = r19, r22 |
- ;; |
- .pred.rel "mutex",p6,p7 |
- st8 [rp] = r24, 8 |
- (p6) cmp.leu p8, p9 = r24, r19 |
- (p7) cmp.ltu p8, p9 = r24, r19 |
- ;; |
-.Lcj3: |
- .pred.rel "mutex",p8,p9 |
- (p8) add r24 = r16, r23, 1 |
- (p9) add r24 = r16, r23 |
- ;; |
- .pred.rel "mutex",p8,p9 |
- st8 [rp] = r24, 8 |
- (p8) cmp.leu p6, p7 = r24, r16 |
- (p9) cmp.ltu p6, p7 = r24, r16 |
- ;; |
-.Lcj2: |
- .pred.rel "mutex",p6,p7 |
- (p6) add r24 = r17, r20, 1 |
- (p7) add r24 = r17, r20 |
- ;; |
- .pred.rel "mutex",p6,p7 |
- st8 [rp] = r24, 8 |
- (p6) cmp.leu p8, p9 = r24, r17 |
- (p7) cmp.ltu p8, p9 = r24, r17 |
- ;; |
- .pred.rel "mutex",p8,p9 |
- (p8) add r8 = 1, r8 |
- mov.i ar.lc = r2 |
- br.ret.sptk.many b0 |
-EPILOGUE() |
- |
-PROLOGUE(mpn_mul_1c) |
- .prologue |
- .save ar.lc, r2 |
- .body |
- |
-ifdef(`HAVE_ABI_32', |
-` addp4 rp = 0, rp C M I |
- addp4 up = 0, up C M I |
- zxt4 n = n C I |
- ;; |
-') |
-{.mmi |
- adds r15 = -1, n C M I |
- setf.sig f9 = cy C M2 M3 |
- mov.i r2 = ar.lc C I0 |
-} |
-{.mmb |
- ldf8 f7 = [up], 8 C M |
- and r14 = 3, n C M I |
- br.sptk .Lcommon |
- ;; |
-} |
-EPILOGUE() |
-ASM_END() |