OLD | NEW |
| (Empty) |
1 dnl SPARC mpn_add_n -- Add two limb vectors of the same length > 0 and store | |
2 dnl sum in a third limb vector. | |
3 | |
4 dnl Copyright 2001 Free Software Foundation, Inc. | |
5 | |
6 dnl This file is part of the GNU MP Library. | |
7 | |
8 dnl The GNU MP Library is free software; you can redistribute it and/or modify | |
9 dnl it under the terms of the GNU Lesser General Public License as published | |
10 dnl by the Free Software Foundation; either version 3 of the License, or (at | |
11 dnl your option) any later version. | |
12 | |
13 dnl The GNU MP Library is distributed in the hope that it will be useful, but | |
14 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
15 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public | |
16 dnl License for more details. | |
17 | |
18 dnl You should have received a copy of the GNU Lesser General Public License | |
19 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. | |
20 | |
21 | |
22 include(`../config.m4') | |
23 | |
24 C INPUT PARAMETERS | |
25 define(rp,%o0) | |
26 define(s1p,%o1) | |
27 define(s2p,%o2) | |
28 define(n,%o3) | |
29 define(cy,%g1) | |
30 | |
31 C This code uses 64-bit operations on `o' and `g' registers. It doesn't | |
32 C require that `o' registers' upper 32 bits are preserved by the operating | |
33 C system, but if they are not, they must be zeroed. That is indeed what | |
34 C happens at least on Slowaris 2.5 and 2.6. | |
35 | |
36 C On UltraSPARC 1 and 2, this code runs at 3 cycles/limb from the Dcache and at | |
37 C about 10 cycles/limb from the Ecache. | |
38 | |
39 ASM_START() | |
40 PROLOGUE(mpn_add_n) | |
41 lduw [s1p+0],%o4 | |
42 lduw [s2p+0],%o5 | |
43 addcc n,-2,n | |
44 bl,pn %icc,L(end1) | |
45 lduw [s1p+4],%g2 | |
46 lduw [s2p+4],%g3 | |
47 be,pn %icc,L(end2) | |
48 mov 0,cy | |
49 | |
50 .align 16 | |
51 L(loop): | |
52 add %o4,%o5,%g4 | |
53 add rp,8,rp | |
54 lduw [s1p+8],%o4 | |
55 fitod %f0,%f2 | |
56 C --- | |
57 add cy,%g4,%g4 | |
58 addcc n,-1,n | |
59 lduw [s2p+8],%o5 | |
60 fitod %f0,%f2 | |
61 C --- | |
62 srlx %g4,32,cy | |
63 add s2p,8,s2p | |
64 stw %g4,[rp-8] | |
65 be,pn %icc,L(exito)+4 | |
66 C --- | |
67 add %g2,%g3,%g4 | |
68 addcc n,-1,n | |
69 lduw [s1p+12],%g2 | |
70 fitod %f0,%f2 | |
71 C --- | |
72 add cy,%g4,%g4 | |
73 add s1p,8,s1p | |
74 lduw [s2p+4],%g3 | |
75 fitod %f0,%f2 | |
76 C --- | |
77 srlx %g4,32,cy | |
78 bne,pt %icc,L(loop) | |
79 stw %g4,[rp-4] | |
80 C --- | |
81 L(exite): | |
82 add %o4,%o5,%g4 | |
83 add cy,%g4,%g4 | |
84 srlx %g4,32,cy | |
85 stw %g4,[rp+0] | |
86 add %g2,%g3,%g4 | |
87 add cy,%g4,%g4 | |
88 stw %g4,[rp+4] | |
89 retl | |
90 srlx %g4,32,%o0 | |
91 | |
92 L(exito): | |
93 add %g2,%g3,%g4 | |
94 add cy,%g4,%g4 | |
95 srlx %g4,32,cy | |
96 stw %g4,[rp-4] | |
97 add %o4,%o5,%g4 | |
98 add cy,%g4,%g4 | |
99 stw %g4,[rp+0] | |
100 retl | |
101 srlx %g4,32,%o0 | |
102 | |
103 L(end1): | |
104 add %o4,%o5,%g4 | |
105 stw %g4,[rp+0] | |
106 retl | |
107 srlx %g4,32,%o0 | |
108 | |
109 L(end2): | |
110 add %o4,%o5,%g4 | |
111 srlx %g4,32,cy | |
112 stw %g4,[rp+0] | |
113 add %g2,%g3,%g4 | |
114 add cy,%g4,%g4 | |
115 stw %g4,[rp+4] | |
116 retl | |
117 srlx %g4,32,%o0 | |
118 EPILOGUE(mpn_add_n) | |
OLD | NEW |