| OLD | NEW |
| (Empty) |
| 1 dnl SPARC mpn_add_n -- Add two limb vectors of the same length > 0 and store | |
| 2 dnl sum in a third limb vector. | |
| 3 | |
| 4 dnl Copyright 2001 Free Software Foundation, Inc. | |
| 5 | |
| 6 dnl This file is part of the GNU MP Library. | |
| 7 | |
| 8 dnl The GNU MP Library is free software; you can redistribute it and/or modify | |
| 9 dnl it under the terms of the GNU Lesser General Public License as published | |
| 10 dnl by the Free Software Foundation; either version 3 of the License, or (at | |
| 11 dnl your option) any later version. | |
| 12 | |
| 13 dnl The GNU MP Library is distributed in the hope that it will be useful, but | |
| 14 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
| 15 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public | |
| 16 dnl License for more details. | |
| 17 | |
| 18 dnl You should have received a copy of the GNU Lesser General Public License | |
| 19 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. | |
| 20 | |
| 21 | |
| 22 include(`../config.m4') | |
| 23 | |
| 24 C INPUT PARAMETERS | |
| 25 define(rp,%o0) | |
| 26 define(s1p,%o1) | |
| 27 define(s2p,%o2) | |
| 28 define(n,%o3) | |
| 29 define(cy,%g1) | |
| 30 | |
| 31 C This code uses 64-bit operations on `o' and `g' registers. It doesn't | |
| 32 C require that `o' registers' upper 32 bits are preserved by the operating | |
| 33 C system, but if they are not, they must be zeroed. That is indeed what | |
| 34 C happens at least on Slowaris 2.5 and 2.6. | |
| 35 | |
| 36 C On UltraSPARC 1 and 2, this code runs at 3 cycles/limb from the Dcache and at | |
| 37 C about 10 cycles/limb from the Ecache. | |
| 38 | |
| 39 ASM_START() | |
| 40 PROLOGUE(mpn_add_n) | |
| 41 lduw [s1p+0],%o4 | |
| 42 lduw [s2p+0],%o5 | |
| 43 addcc n,-2,n | |
| 44 bl,pn %icc,L(end1) | |
| 45 lduw [s1p+4],%g2 | |
| 46 lduw [s2p+4],%g3 | |
| 47 be,pn %icc,L(end2) | |
| 48 mov 0,cy | |
| 49 | |
| 50 .align 16 | |
| 51 L(loop): | |
| 52 add %o4,%o5,%g4 | |
| 53 add rp,8,rp | |
| 54 lduw [s1p+8],%o4 | |
| 55 fitod %f0,%f2 | |
| 56 C --- | |
| 57 add cy,%g4,%g4 | |
| 58 addcc n,-1,n | |
| 59 lduw [s2p+8],%o5 | |
| 60 fitod %f0,%f2 | |
| 61 C --- | |
| 62 srlx %g4,32,cy | |
| 63 add s2p,8,s2p | |
| 64 stw %g4,[rp-8] | |
| 65 be,pn %icc,L(exito)+4 | |
| 66 C --- | |
| 67 add %g2,%g3,%g4 | |
| 68 addcc n,-1,n | |
| 69 lduw [s1p+12],%g2 | |
| 70 fitod %f0,%f2 | |
| 71 C --- | |
| 72 add cy,%g4,%g4 | |
| 73 add s1p,8,s1p | |
| 74 lduw [s2p+4],%g3 | |
| 75 fitod %f0,%f2 | |
| 76 C --- | |
| 77 srlx %g4,32,cy | |
| 78 bne,pt %icc,L(loop) | |
| 79 stw %g4,[rp-4] | |
| 80 C --- | |
| 81 L(exite): | |
| 82 add %o4,%o5,%g4 | |
| 83 add cy,%g4,%g4 | |
| 84 srlx %g4,32,cy | |
| 85 stw %g4,[rp+0] | |
| 86 add %g2,%g3,%g4 | |
| 87 add cy,%g4,%g4 | |
| 88 stw %g4,[rp+4] | |
| 89 retl | |
| 90 srlx %g4,32,%o0 | |
| 91 | |
| 92 L(exito): | |
| 93 add %g2,%g3,%g4 | |
| 94 add cy,%g4,%g4 | |
| 95 srlx %g4,32,cy | |
| 96 stw %g4,[rp-4] | |
| 97 add %o4,%o5,%g4 | |
| 98 add cy,%g4,%g4 | |
| 99 stw %g4,[rp+0] | |
| 100 retl | |
| 101 srlx %g4,32,%o0 | |
| 102 | |
| 103 L(end1): | |
| 104 add %o4,%o5,%g4 | |
| 105 stw %g4,[rp+0] | |
| 106 retl | |
| 107 srlx %g4,32,%o0 | |
| 108 | |
| 109 L(end2): | |
| 110 add %o4,%o5,%g4 | |
| 111 srlx %g4,32,cy | |
| 112 stw %g4,[rp+0] | |
| 113 add %g2,%g3,%g4 | |
| 114 add cy,%g4,%g4 | |
| 115 stw %g4,[rp+4] | |
| 116 retl | |
| 117 srlx %g4,32,%o0 | |
| 118 EPILOGUE(mpn_add_n) | |
| OLD | NEW |