Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(48)

Side by Side Diff: gcc/gmp/mpn/sparc64/sqr_diagonal.asm

Issue 3050029: [gcc] GCC 4.5.0=>4.5.1 (Closed) Base URL: ssh://git@gitrw.chromium.org:9222/nacl-toolchain.git
Patch Set: Created 10 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « gcc/gmp/mpn/sparc64/sparc64.h ('k') | gcc/gmp/mpn/thumb/add_n.s » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 dnl SPARC v9 64-bit mpn_sqr_diagonal.
2
3 dnl Copyright 2001, 2002 Free Software Foundation, Inc.
4
5 dnl This file is part of the GNU MP Library.
6
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of the GNU Lesser General Public License as published
9 dnl by the Free Software Foundation; either version 3 of the License, or (at
10 dnl your option) any later version.
11
12 dnl The GNU MP Library is distributed in the hope that it will be useful, but
13 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 dnl License for more details.
16
17 dnl You should have received a copy of the GNU Lesser General Public License
18 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
19
20 include(`../config.m4')
21
22 C cycles/limb
23 C UltraSPARC 1&2: 22
24 C UltraSPARC 3: 36
25
26 C This was generated by the Sun C compiler. It runs at 22 cycles/limb on the
27 C UltraSPARC-1/2, three cycles slower than theoretically possible for optimal
28 C code using the same algorithm. For 1-3 limbs, a special loop was generated,
29 C which causes performance problems in particular for 2 and 3 limbs.
30 C Ultimately, this should be replaced by hand-written code in the same software
31 C pipeline style as e.g., addmul_1.asm.
32
33 ASM_START()
34 REGISTER(%g2,#scratch)
35 REGISTER(%g3,#scratch)
36 PROLOGUE(mpn_sqr_diagonal)
37 save %sp, -240, %sp
38
39 sethi %hi(0x1ffc00), %o0
40 sethi %hi(0x3ffc00), %o1
41 add %o0, 1023, %o7
42 cmp %i2, 4
43 add %o1, 1023, %o4
44 or %g0, %i1, %g1
45 or %g0, %i0, %o0
46 bl,pn %xcc, .Lsmall
47 or %g0, 0, %g2
48
49 ldx [%i1], %o1
50 add %i1, 24, %g1
51 or %g0, 3, %g2
52 srlx %o1, 42, %g3
53 stx %g3, [%sp+2279]
54 and %o1, %o7, %o2
55 stx %o2, [%sp+2263]
56 srlx %o1, 21, %o1
57 ldd [%sp+2279], %f0
58 and %o1, %o7, %o1
59 stx %o1, [%sp+2271]
60 ldx [%i1+8], %o2
61 fxtod %f0, %f12
62 srlx %o2, 21, %o1
63 and %o2, %o7, %g3
64 ldd [%sp+2263], %f2
65 fmuld %f12, %f12, %f10
66 srlx %o2, 42, %o2
67 ldd [%sp+2271], %f0
68 and %o1, %o7, %o1
69 fxtod %f2, %f8
70 stx %o2, [%sp+2279]
71 stx %o1, [%sp+2271]
72 fxtod %f0, %f0
73 stx %g3, [%sp+2263]
74 fdtox %f10, %f14
75 fmuld %f12, %f8, %f6
76 ldx [%i1+16], %o2
77 std %f14, [%sp+2255]
78 fmuld %f0, %f0, %f2
79 fmuld %f8, %f8, %f10
80 srlx %o2, 42, %o1
81 faddd %f6, %f6, %f6
82 fmuld %f12, %f0, %f12
83 fmuld %f0, %f8, %f8
84 ldd [%sp+2279], %f0
85 ldd [%sp+2263], %f4
86 fdtox %f10, %f10
87 std %f10, [%sp+2239]
88 faddd %f2, %f6, %f6
89 ldd [%sp+2271], %f2
90 fdtox %f12, %f12
91 std %f12, [%sp+2247]
92 fdtox %f8, %f8
93 std %f8, [%sp+2231]
94 fdtox %f6, %f6
95 std %f6, [%sp+2223]
96
97 .Loop: srlx %o2, 21, %g3
98 stx %o1, [%sp+2279]
99 add %g2, 1, %g2
100 and %g3, %o7, %o1
101 ldx [%sp+2255], %g4
102 cmp %g2, %i2
103 stx %o1, [%sp+2271]
104 add %g1, 8, %g1
105 add %o0, 16, %o0
106 ldx [%sp+2239], %o1
107 fxtod %f0, %f10
108 fxtod %f4, %f14
109 ldx [%sp+2231], %i0
110 ldx [%sp+2223], %g5
111 ldx [%sp+2247], %g3
112 and %o2, %o7, %o2
113 fxtod %f2, %f8
114 fmuld %f10, %f10, %f0
115 stx %o2, [%sp+2263]
116 fmuld %f10, %f14, %f6
117 ldx [%g1-8], %o2
118 fmuld %f10, %f8, %f12
119 fdtox %f0, %f2
120 ldd [%sp+2279], %f0
121 fmuld %f8, %f8, %f4
122 faddd %f6, %f6, %f6
123 fmuld %f14, %f14, %f10
124 std %f2, [%sp+2255]
125 sllx %g4, 20, %g4
126 ldd [%sp+2271], %f2
127 fmuld %f8, %f14, %f8
128 sllx %i0, 22, %i1
129 fdtox %f12, %f12
130 std %f12, [%sp+2247]
131 sllx %g5, 42, %i0
132 add %o1, %i1, %o1
133 faddd %f4, %f6, %f6
134 ldd [%sp+2263], %f4
135 add %o1, %i0, %o1
136 add %g3, %g4, %g3
137 fdtox %f10, %f10
138 std %f10, [%sp+2239]
139 srlx %o1, 42, %g4
140 and %g5, %o4, %i0
141 fdtox %f8, %f8
142 std %f8, [%sp+2231]
143 srlx %g5, 22, %g5
144 sub %g4, %i0, %g4
145 fdtox %f6, %f6
146 std %f6, [%sp+2223]
147 srlx %g4, 63, %g4
148 add %g3, %g5, %g3
149 add %g3, %g4, %g3
150 stx %o1, [%o0-16]
151 srlx %o2, 42, %o1
152 bl,pt %xcc, .Loop
153 stx %g3, [%o0-8]
154
155 stx %o1, [%sp+2279]
156 srlx %o2, 21, %o1
157 fxtod %f0, %f16
158 ldx [%sp+2223], %g3
159 fxtod %f4, %f6
160 and %o2, %o7, %o3
161 stx %o3, [%sp+2263]
162 fxtod %f2, %f4
163 and %o1, %o7, %o1
164 ldx [%sp+2231], %o2
165 sllx %g3, 42, %g4
166 fmuld %f16, %f16, %f14
167 stx %o1, [%sp+2271]
168 fmuld %f16, %f6, %f8
169 add %o0, 48, %o0
170 ldx [%sp+2239], %o1
171 sllx %o2, 22, %o2
172 fmuld %f4, %f4, %f10
173 ldx [%sp+2255], %o3
174 fdtox %f14, %f14
175 fmuld %f4, %f6, %f2
176 std %f14, [%sp+2255]
177 faddd %f8, %f8, %f12
178 add %o1, %o2, %o2
179 fmuld %f16, %f4, %f4
180 ldd [%sp+2279], %f0
181 sllx %o3, 20, %g5
182 add %o2, %g4, %o2
183 fmuld %f6, %f6, %f6
184 srlx %o2, 42, %o3
185 and %g3, %o4, %g4
186 srlx %g3, 22, %g3
187 faddd %f10, %f12, %f16
188 ldd [%sp+2271], %f12
189 ldd [%sp+2263], %f8
190 fxtod %f0, %f0
191 sub %o3, %g4, %o3
192 ldx [%sp+2247], %o1
193 srlx %o3, 63, %o3
194 fdtox %f2, %f10
195 fxtod %f8, %f8
196 std %f10, [%sp+2231]
197 fdtox %f6, %f6
198 std %f6, [%sp+2239]
199 add %o1, %g5, %o1
200 fmuld %f0, %f0, %f2
201 fdtox %f16, %f16
202 std %f16, [%sp+2223]
203 add %o1, %g3, %o1
204 fdtox %f4, %f4
205 std %f4, [%sp+2247]
206 fmuld %f0, %f8, %f10
207 fxtod %f12, %f12
208 add %o1, %o3, %o1
209 stx %o2, [%o0-48]
210 fmuld %f8, %f8, %f6
211 stx %o1, [%o0-40]
212 fdtox %f2, %f2
213 ldx [%sp+2231], %o2
214 faddd %f10, %f10, %f10
215 ldx [%sp+2223], %g3
216 fmuld %f12, %f12, %f4
217 fdtox %f6, %f6
218 ldx [%sp+2239], %o1
219 sllx %o2, 22, %o2
220 fmuld %f12, %f8, %f8
221 sllx %g3, 42, %g5
222 ldx [%sp+2255], %o3
223 fmuld %f0, %f12, %f0
224 add %o1, %o2, %o2
225 faddd %f4, %f10, %f4
226 ldx [%sp+2247], %o1
227 add %o2, %g5, %o2
228 and %g3, %o4, %g4
229 fdtox %f8, %f8
230 sllx %o3, 20, %g5
231 std %f8, [%sp+2231]
232 fdtox %f0, %f0
233 srlx %o2, 42, %o3
234 add %o1, %g5, %o1
235 fdtox %f4, %f4
236 srlx %g3, 22, %g3
237 sub %o3, %g4, %o3
238 std %f6, [%sp+2239]
239 std %f4, [%sp+2223]
240 srlx %o3, 63, %o3
241 add %o1, %g3, %o1
242 std %f2, [%sp+2255]
243 add %o1, %o3, %o1
244 std %f0, [%sp+2247]
245 stx %o2, [%o0-32]
246 stx %o1, [%o0-24]
247 ldx [%sp+2231], %o2
248 ldx [%sp+2223], %o3
249 ldx [%sp+2239], %o1
250 sllx %o2, 22, %o2
251 sllx %o3, 42, %g5
252 ldx [%sp+2255], %g4
253 and %o3, %o4, %g3
254 add %o1, %o2, %o2
255 ldx [%sp+2247], %o1
256 add %o2, %g5, %o2
257 stx %o2, [%o0-16]
258 sllx %g4, 20, %g4
259 srlx %o2, 42, %o2
260 add %o1, %g4, %o1
261 srlx %o3, 22, %o3
262 sub %o2, %g3, %o2
263 srlx %o2, 63, %o2
264 add %o1, %o3, %o1
265 add %o1, %o2, %o1
266 stx %o1, [%o0-8]
267 ret
268 restore %g0, %g0, %g0
269 .Lsmall:
270 ldx [%g1], %o2
271 .Loop0:
272 and %o2, %o7, %o1
273 stx %o1, [%sp+2263]
274 add %g2, 1, %g2
275 srlx %o2, 21, %o1
276 add %g1, 8, %g1
277 srlx %o2, 42, %o2
278 stx %o2, [%sp+2279]
279 and %o1, %o7, %o1
280 ldd [%sp+2263], %f0
281 cmp %g2, %i2
282 stx %o1, [%sp+2271]
283 fxtod %f0, %f6
284 ldd [%sp+2279], %f0
285 ldd [%sp+2271], %f4
286 fxtod %f0, %f2
287 fmuld %f6, %f6, %f0
288 fxtod %f4, %f10
289 fmuld %f2, %f6, %f4
290 fdtox %f0, %f0
291 std %f0, [%sp+2239]
292 fmuld %f10, %f6, %f8
293 fmuld %f10, %f10, %f0
294 faddd %f4, %f4, %f6
295 fmuld %f2, %f2, %f4
296 fdtox %f8, %f8
297 std %f8, [%sp+2231]
298 fmuld %f2, %f10, %f2
299 faddd %f0, %f6, %f0
300 fdtox %f4, %f4
301 std %f4, [%sp+2255]
302 fdtox %f2, %f2
303 std %f2, [%sp+2247]
304 fdtox %f0, %f0
305 std %f0, [%sp+2223]
306 ldx [%sp+2239], %o1
307 ldx [%sp+2255], %g4
308 ldx [%sp+2231], %o2
309 sllx %g4, 20, %g4
310 ldx [%sp+2223], %o3
311 sllx %o2, 22, %o2
312 sllx %o3, 42, %g5
313 add %o1, %o2, %o2
314 ldx [%sp+2247], %o1
315 add %o2, %g5, %o2
316 stx %o2, [%o0]
317 and %o3, %o4, %g3
318 srlx %o2, 42, %o2
319 add %o1, %g4, %o1
320 srlx %o3, 22, %o3
321 sub %o2, %g3, %o2
322 srlx %o2, 63, %o2
323 add %o1, %o3, %o1
324 add %o1, %o2, %o1
325 stx %o1, [%o0+8]
326 add %o0, 16, %o0
327 bl,a,pt %xcc, .Loop0
328 ldx [%g1], %o2
329 ret
330 restore %g0, %g0, %g0
331 EPILOGUE(mpn_sqr_diagonal)
OLDNEW
« no previous file with comments | « gcc/gmp/mpn/sparc64/sparc64.h ('k') | gcc/gmp/mpn/thumb/add_n.s » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698