OLD | NEW |
| (Empty) |
1 dnl PA64 mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1). | |
2 | |
3 dnl Copyright 2003 Free Software Foundation, Inc. | |
4 | |
5 dnl This file is part of the GNU MP Library. | |
6 | |
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify | |
8 dnl it under the terms of the GNU Lesser General Public License as published | |
9 dnl by the Free Software Foundation; either version 3 of the License, or (at | |
10 dnl your option) any later version. | |
11 | |
12 dnl The GNU MP Library is distributed in the hope that it will be useful, but | |
13 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
14 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public | |
15 dnl License for more details. | |
16 | |
17 dnl You should have received a copy of the GNU Lesser General Public License | |
18 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. | |
19 | |
20 include(`../config.m4') | |
21 | |
22 C cycles/limb | |
23 C 8000,8200: 2 | |
24 C 8500,8600,8700: 1.75 | |
25 | |
26 C TODO | |
27 C * Write special feed-in code for each (n mod 8). (See the ia64 code.) | |
28 C * Try to make this run at closer to 1.5 c/l. | |
29 C * Set up register aliases (define(`u0',`%r19')). | |
30 C * Explicitly align loop. | |
31 | |
32 dnl INPUT PARAMETERS | |
33 define(`rp',`%r26') | |
34 define(`up',`%r25') | |
35 define(`vp',`%r24') | |
36 define(`n',`%r23') | |
37 | |
38 ifdef(`OPERATION_addlsh1_n',` | |
39 define(ADCSBC, `add,dc') | |
40 define(INITC, `ldi 0,') | |
41 define(func, mpn_addlsh1_n) | |
42 ') | |
43 ifdef(`OPERATION_sublsh1_n',` | |
44 define(ADCSBC, `sub,db') | |
45 define(INITC, `ldi 1,') | |
46 define(func, mpn_sublsh1_n) | |
47 ') | |
48 | |
49 MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n) | |
50 | |
51 ifdef(`HAVE_ABI_2_0w',` | |
52 define(LEVEL, `.level 2.0w') | |
53 define(RETREG, `%r28') | |
54 define(CLRRET1, `dnl') | |
55 ') | |
56 ifdef(`HAVE_ABI_2_0n',` | |
57 define(LEVEL, `.level 2.0') | |
58 define(RETREG, `%r29') | |
59 define(CLRRET1, `ldi 0, %r28') | |
60 ') | |
61 | |
62 LEVEL | |
63 PROLOGUE(func) | |
64 std,ma %r3, 0x100(%r30) C save reg | |
65 | |
66 INITC %r1 C init saved cy | |
67 | |
68 C Primitive code for the first (n mod 8) limbs: | |
69 extrd,u n, 63, 3, %r22 C count for loop0 | |
70 comib,= 0, %r22, L(unrolled) C skip loop0? | |
71 copy %r0, %r28 | |
72 LDEF(loop0) | |
73 ldd 0(vp), %r21 | |
74 ldo 8(vp), vp | |
75 ldd 0(up), %r19 | |
76 ldo 8(up), up | |
77 shrpd %r21, %r28, 63, %r31 | |
78 addi -1, %r1, %r0 C restore cy | |
79 ADCSBC %r19, %r31, %r29 | |
80 std %r29, 0(rp) | |
81 add,dc %r0, %r0, %r1 C save cy | |
82 copy %r21, %r28 | |
83 addib,> -1, %r22, L(loop0) | |
84 ldo 8(rp), rp | |
85 | |
86 addib,>= -8, n, L(unrolled) | |
87 addi -1, %r1, %r0 C restore cy | |
88 | |
89 shrpd %r0, %r28, 63, %r28 | |
90 ADCSBC %r0, %r28, RETREG | |
91 ifdef(`OPERATION_sublsh1_n', | |
92 ` sub %r0, RETREG, RETREG') | |
93 CLRRET1 | |
94 | |
95 bve (%r2) | |
96 ldd,mb -0x100(%r30), %r3 | |
97 | |
98 | |
99 LDEF(unrolled) | |
100 std %r4, -0xf8(%r30) C save reg | |
101 ldd 0(vp), %r4 | |
102 std %r5, -0xf0(%r30) C save reg | |
103 ldd 8(vp), %r5 | |
104 std %r6, -0xe8(%r30) C save reg | |
105 ldd 16(vp), %r6 | |
106 std %r7, -0xe0(%r30) C save reg | |
107 | |
108 ldd 24(vp), %r7 | |
109 shrpd %r4, %r28, 63, %r31 | |
110 std %r8, -0xd8(%r30) C save reg | |
111 ldd 32(vp), %r8 | |
112 shrpd %r5, %r4, 63, %r4 | |
113 std %r9, -0xd0(%r30) C save reg | |
114 ldd 40(vp), %r9 | |
115 shrpd %r6, %r5, 63, %r5 | |
116 ldd 48(vp), %r3 | |
117 shrpd %r7, %r6, 63, %r6 | |
118 ldd 56(vp), %r28 | |
119 shrpd %r8, %r7, 63, %r7 | |
120 ldd 0(up), %r19 | |
121 shrpd %r9, %r8, 63, %r8 | |
122 ldd 8(up), %r20 | |
123 shrpd %r3, %r9, 63, %r9 | |
124 ldd 16(up), %r21 | |
125 shrpd %r28, %r3, 63, %r3 | |
126 ldd 24(up), %r22 | |
127 | |
128 nop C alignment FIXME | |
129 addib,<= -8, n, L(end) | |
130 addi -1, %r1, %r0 C restore cy | |
131 LDEF(loop) | |
132 ADCSBC %r19, %r31, %r29 | |
133 ldd 32(up), %r19 | |
134 std %r29, 0(rp) | |
135 ADCSBC %r20, %r4, %r29 | |
136 ldd 40(up), %r20 | |
137 std %r29, 8(rp) | |
138 ADCSBC %r21, %r5, %r29 | |
139 ldd 48(up), %r21 | |
140 std %r29, 16(rp) | |
141 ADCSBC %r22, %r6, %r29 | |
142 ldd 56(up), %r22 | |
143 std %r29, 24(rp) | |
144 ADCSBC %r19, %r7, %r29 | |
145 ldd 64(vp), %r4 | |
146 std %r29, 32(rp) | |
147 ADCSBC %r20, %r8, %r29 | |
148 ldd 72(vp), %r5 | |
149 std %r29, 40(rp) | |
150 ADCSBC %r21, %r9, %r29 | |
151 ldd 80(vp), %r6 | |
152 std %r29, 48(rp) | |
153 ADCSBC %r22, %r3, %r29 | |
154 std %r29, 56(rp) | |
155 | |
156 add,dc %r0, %r0, %r1 C save cy | |
157 | |
158 ldd 88(vp), %r7 | |
159 shrpd %r4, %r28, 63, %r31 | |
160 ldd 96(vp), %r8 | |
161 shrpd %r5, %r4, 63, %r4 | |
162 ldd 104(vp), %r9 | |
163 shrpd %r6, %r5, 63, %r5 | |
164 ldd 112(vp), %r3 | |
165 shrpd %r7, %r6, 63, %r6 | |
166 ldd 120(vp), %r28 | |
167 shrpd %r8, %r7, 63, %r7 | |
168 ldd 64(up), %r19 | |
169 shrpd %r9, %r8, 63, %r8 | |
170 ldd 72(up), %r20 | |
171 shrpd %r3, %r9, 63, %r9 | |
172 ldd 80(up), %r21 | |
173 shrpd %r28, %r3, 63, %r3 | |
174 ldd 88(up), %r22 | |
175 | |
176 ldo 64(vp), vp | |
177 ldo 64(rp), rp | |
178 ldo 64(up), up | |
179 addib,> -8, n, L(loop) | |
180 addi -1, %r1, %r0 C restore cy | |
181 LDEF(end) | |
182 ADCSBC %r19, %r31, %r29 | |
183 ldd 32(up), %r19 | |
184 std %r29, 0(rp) | |
185 ADCSBC %r20, %r4, %r29 | |
186 ldd 40(up), %r20 | |
187 std %r29, 8(rp) | |
188 ADCSBC %r21, %r5, %r29 | |
189 ldd 48(up), %r21 | |
190 std %r29, 16(rp) | |
191 ADCSBC %r22, %r6, %r29 | |
192 ldd 56(up), %r22 | |
193 std %r29, 24(rp) | |
194 ADCSBC %r19, %r7, %r29 | |
195 ldd -0xf8(%r30), %r4 C restore reg | |
196 std %r29, 32(rp) | |
197 ADCSBC %r20, %r8, %r29 | |
198 ldd -0xf0(%r30), %r5 C restore reg | |
199 std %r29, 40(rp) | |
200 ADCSBC %r21, %r9, %r29 | |
201 ldd -0xe8(%r30), %r6 C restore reg | |
202 std %r29, 48(rp) | |
203 ADCSBC %r22, %r3, %r29 | |
204 ldd -0xe0(%r30), %r7 C restore reg | |
205 std %r29, 56(rp) | |
206 | |
207 shrpd %r0, %r28, 63, %r28 | |
208 ldd -0xd8(%r30), %r8 C restore reg | |
209 ADCSBC %r0, %r28, RETREG | |
210 ifdef(`OPERATION_sublsh1_n', | |
211 ` sub %r0, RETREG, RETREG') | |
212 CLRRET1 | |
213 | |
214 ldd -0xd0(%r30), %r9 C restore reg | |
215 bve (%r2) | |
216 ldd,mb -0x100(%r30), %r3 C restore reg | |
217 EPILOGUE() | |
OLD | NEW |