OLD | NEW |
| (Empty) |
1 .ident "s390x.S, version 1.1" | |
2 // ==================================================================== | |
3 // Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | |
4 // project. | |
5 // | |
6 // Rights for redistribution and usage in source and binary forms are | |
7 // granted according to the OpenSSL license. Warranty of any kind is | |
8 // disclaimed. | |
9 // ==================================================================== | |
10 | |
11 .text | |
12 | |
13 #define zero %r0 | |
14 | |
15 // BN_ULONG bn_mul_add_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5); | |
16 .globl bn_mul_add_words | |
17 .type bn_mul_add_words,@function | |
18 .align 4 | |
19 bn_mul_add_words: | |
20 lghi zero,0 // zero = 0 | |
21 la %r1,0(%r2) // put rp aside | |
22 lghi %r2,0 // i=0; | |
23 ltgfr %r4,%r4 | |
24 bler %r14 // if (len<=0) return 0; | |
25 | |
26 stmg %r6,%r10,48(%r15) | |
27 lghi %r10,3 | |
28 lghi %r8,0 // carry = 0 | |
29 nr %r10,%r4 // len%4 | |
30 sra %r4,2 // cnt=len/4 | |
31 jz .Loop1_madd // carry is incidentally cleared if branch taken | |
32 algr zero,zero // clear carry | |
33 | |
34 .Loop4_madd: | |
35 lg %r7,0(%r2,%r3) // ap[i] | |
36 mlgr %r6,%r5 // *=w | |
37 alcgr %r7,%r8 // +=carry | |
38 alcgr %r6,zero | |
39 alg %r7,0(%r2,%r1) // +=rp[i] | |
40 stg %r7,0(%r2,%r1) // rp[i]= | |
41 | |
42 lg %r9,8(%r2,%r3) | |
43 mlgr %r8,%r5 | |
44 alcgr %r9,%r6 | |
45 alcgr %r8,zero | |
46 alg %r9,8(%r2,%r1) | |
47 stg %r9,8(%r2,%r1) | |
48 | |
49 lg %r7,16(%r2,%r3) | |
50 mlgr %r6,%r5 | |
51 alcgr %r7,%r8 | |
52 alcgr %r6,zero | |
53 alg %r7,16(%r2,%r1) | |
54 stg %r7,16(%r2,%r1) | |
55 | |
56 lg %r9,24(%r2,%r3) | |
57 mlgr %r8,%r5 | |
58 alcgr %r9,%r6 | |
59 alcgr %r8,zero | |
60 alg %r9,24(%r2,%r1) | |
61 stg %r9,24(%r2,%r1) | |
62 | |
63 la %r2,32(%r2) // i+=4 | |
64 brct %r4,.Loop4_madd | |
65 | |
66 la %r10,1(%r10) // see if len%4 is zero ... | |
67 brct %r10,.Loop1_madd // without touching condition code:-) | |
68 | |
69 .Lend_madd: | |
70 alcgr %r8,zero // collect carry bit | |
71 lgr %r2,%r8 | |
72 lmg %r6,%r10,48(%r15) | |
73 br %r14 | |
74 | |
75 .Loop1_madd: | |
76 lg %r7,0(%r2,%r3) // ap[i] | |
77 mlgr %r6,%r5 // *=w | |
78 alcgr %r7,%r8 // +=carry | |
79 alcgr %r6,zero | |
80 alg %r7,0(%r2,%r1) // +=rp[i] | |
81 stg %r7,0(%r2,%r1) // rp[i]= | |
82 | |
83 lgr %r8,%r6 | |
84 la %r2,8(%r2) // i++ | |
85 brct %r10,.Loop1_madd | |
86 | |
87 j .Lend_madd | |
88 .size bn_mul_add_words,.-bn_mul_add_words | |
89 | |
90 // BN_ULONG bn_mul_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5); | |
91 .globl bn_mul_words | |
92 .type bn_mul_words,@function | |
93 .align 4 | |
94 bn_mul_words: | |
95 lghi zero,0 // zero = 0 | |
96 la %r1,0(%r2) // put rp aside | |
97 lghi %r2,0 // i=0; | |
98 ltgfr %r4,%r4 | |
99 bler %r14 // if (len<=0) return 0; | |
100 | |
101 stmg %r6,%r10,48(%r15) | |
102 lghi %r10,3 | |
103 lghi %r8,0 // carry = 0 | |
104 nr %r10,%r4 // len%4 | |
105 sra %r4,2 // cnt=len/4 | |
106 jz .Loop1_mul // carry is incidentally cleared if branch taken | |
107 algr zero,zero // clear carry | |
108 | |
109 .Loop4_mul: | |
110 lg %r7,0(%r2,%r3) // ap[i] | |
111 mlgr %r6,%r5 // *=w | |
112 alcgr %r7,%r8 // +=carry | |
113 stg %r7,0(%r2,%r1) // rp[i]= | |
114 | |
115 lg %r9,8(%r2,%r3) | |
116 mlgr %r8,%r5 | |
117 alcgr %r9,%r6 | |
118 stg %r9,8(%r2,%r1) | |
119 | |
120 lg %r7,16(%r2,%r3) | |
121 mlgr %r6,%r5 | |
122 alcgr %r7,%r8 | |
123 stg %r7,16(%r2,%r1) | |
124 | |
125 lg %r9,24(%r2,%r3) | |
126 mlgr %r8,%r5 | |
127 alcgr %r9,%r6 | |
128 stg %r9,24(%r2,%r1) | |
129 | |
130 la %r2,32(%r2) // i+=4 | |
131 brct %r4,.Loop4_mul | |
132 | |
133 la %r10,1(%r10) // see if len%4 is zero ... | |
134 brct %r10,.Loop1_mul // without touching condition code:-) | |
135 | |
136 .Lend_mul: | |
137 alcgr %r8,zero // collect carry bit | |
138 lgr %r2,%r8 | |
139 lmg %r6,%r10,48(%r15) | |
140 br %r14 | |
141 | |
142 .Loop1_mul: | |
143 lg %r7,0(%r2,%r3) // ap[i] | |
144 mlgr %r6,%r5 // *=w | |
145 alcgr %r7,%r8 // +=carry | |
146 stg %r7,0(%r2,%r1) // rp[i]= | |
147 | |
148 lgr %r8,%r6 | |
149 la %r2,8(%r2) // i++ | |
150 brct %r10,.Loop1_mul | |
151 | |
152 j .Lend_mul | |
153 .size bn_mul_words,.-bn_mul_words | |
154 | |
155 // void bn_sqr_words(BN_ULONG *r2,BN_ULONG *r2,int r4) | |
156 .globl bn_sqr_words | |
157 .type bn_sqr_words,@function | |
158 .align 4 | |
159 bn_sqr_words: | |
160 ltgfr %r4,%r4 | |
161 bler %r14 | |
162 | |
163 stmg %r6,%r7,48(%r15) | |
164 srag %r1,%r4,2 // cnt=len/4 | |
165 jz .Loop1_sqr | |
166 | |
167 .Loop4_sqr: | |
168 lg %r7,0(%r3) | |
169 mlgr %r6,%r7 | |
170 stg %r7,0(%r2) | |
171 stg %r6,8(%r2) | |
172 | |
173 lg %r7,8(%r3) | |
174 mlgr %r6,%r7 | |
175 stg %r7,16(%r2) | |
176 stg %r6,24(%r2) | |
177 | |
178 lg %r7,16(%r3) | |
179 mlgr %r6,%r7 | |
180 stg %r7,32(%r2) | |
181 stg %r6,40(%r2) | |
182 | |
183 lg %r7,24(%r3) | |
184 mlgr %r6,%r7 | |
185 stg %r7,48(%r2) | |
186 stg %r6,56(%r2) | |
187 | |
188 la %r3,32(%r3) | |
189 la %r2,64(%r2) | |
190 brct %r1,.Loop4_sqr | |
191 | |
192 lghi %r1,3 | |
193 nr %r4,%r1 // cnt=len%4 | |
194 jz .Lend_sqr | |
195 | |
196 .Loop1_sqr: | |
197 lg %r7,0(%r3) | |
198 mlgr %r6,%r7 | |
199 stg %r7,0(%r2) | |
200 stg %r6,8(%r2) | |
201 | |
202 la %r3,8(%r3) | |
203 la %r2,16(%r2) | |
204 brct %r4,.Loop1_sqr | |
205 | |
206 .Lend_sqr: | |
207 lmg %r6,%r7,48(%r15) | |
208 br %r14 | |
209 .size bn_sqr_words,.-bn_sqr_words | |
210 | |
211 // BN_ULONG bn_div_words(BN_ULONG h,BN_ULONG l,BN_ULONG d); | |
212 .globl bn_div_words | |
213 .type bn_div_words,@function | |
214 .align 4 | |
215 bn_div_words: | |
216 dlgr %r2,%r4 | |
217 lgr %r2,%r3 | |
218 br %r14 | |
219 .size bn_div_words,.-bn_div_words | |
220 | |
221 // BN_ULONG bn_add_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5); | |
222 .globl bn_add_words | |
223 .type bn_add_words,@function | |
224 .align 4 | |
225 bn_add_words: | |
226 la %r1,0(%r2) // put rp aside | |
227 lghi %r2,0 // i=0 | |
228 ltgfr %r5,%r5 | |
229 bler %r14 // if (len<=0) return 0; | |
230 | |
231 stg %r6,48(%r15) | |
232 lghi %r6,3 | |
233 nr %r6,%r5 // len%4 | |
234 sra %r5,2 // len/4, use sra because it sets condition code | |
235 jz .Loop1_add // carry is incidentally cleared if branch taken | |
236 algr %r2,%r2 // clear carry | |
237 | |
238 .Loop4_add: | |
239 lg %r0,0(%r2,%r3) | |
240 alcg %r0,0(%r2,%r4) | |
241 stg %r0,0(%r2,%r1) | |
242 lg %r0,8(%r2,%r3) | |
243 alcg %r0,8(%r2,%r4) | |
244 stg %r0,8(%r2,%r1) | |
245 lg %r0,16(%r2,%r3) | |
246 alcg %r0,16(%r2,%r4) | |
247 stg %r0,16(%r2,%r1) | |
248 lg %r0,24(%r2,%r3) | |
249 alcg %r0,24(%r2,%r4) | |
250 stg %r0,24(%r2,%r1) | |
251 | |
252 la %r2,32(%r2) // i+=4 | |
253 brct %r5,.Loop4_add | |
254 | |
255 la %r6,1(%r6) // see if len%4 is zero ... | |
256 brct %r6,.Loop1_add // without touching condition code:-) | |
257 | |
258 .Lexit_add: | |
259 lghi %r2,0 | |
260 alcgr %r2,%r2 | |
261 lg %r6,48(%r15) | |
262 br %r14 | |
263 | |
264 .Loop1_add: | |
265 lg %r0,0(%r2,%r3) | |
266 alcg %r0,0(%r2,%r4) | |
267 stg %r0,0(%r2,%r1) | |
268 | |
269 la %r2,8(%r2) // i++ | |
270 brct %r6,.Loop1_add | |
271 | |
272 j .Lexit_add | |
273 .size bn_add_words,.-bn_add_words | |
274 | |
275 // BN_ULONG bn_sub_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5); | |
276 .globl bn_sub_words | |
277 .type bn_sub_words,@function | |
278 .align 4 | |
279 bn_sub_words: | |
280 la %r1,0(%r2) // put rp aside | |
281 lghi %r2,0 // i=0 | |
282 ltgfr %r5,%r5 | |
283 bler %r14 // if (len<=0) return 0; | |
284 | |
285 stg %r6,48(%r15) | |
286 lghi %r6,3 | |
287 nr %r6,%r5 // len%4 | |
288 sra %r5,2 // len/4, use sra because it sets condition code | |
289 jnz .Loop4_sub // borrow is incidentally cleared if branch take
n | |
290 slgr %r2,%r2 // clear borrow | |
291 | |
292 .Loop1_sub: | |
293 lg %r0,0(%r2,%r3) | |
294 slbg %r0,0(%r2,%r4) | |
295 stg %r0,0(%r2,%r1) | |
296 | |
297 la %r2,8(%r2) // i++ | |
298 brct %r6,.Loop1_sub | |
299 j .Lexit_sub | |
300 | |
301 .Loop4_sub: | |
302 lg %r0,0(%r2,%r3) | |
303 slbg %r0,0(%r2,%r4) | |
304 stg %r0,0(%r2,%r1) | |
305 lg %r0,8(%r2,%r3) | |
306 slbg %r0,8(%r2,%r4) | |
307 stg %r0,8(%r2,%r1) | |
308 lg %r0,16(%r2,%r3) | |
309 slbg %r0,16(%r2,%r4) | |
310 stg %r0,16(%r2,%r1) | |
311 lg %r0,24(%r2,%r3) | |
312 slbg %r0,24(%r2,%r4) | |
313 stg %r0,24(%r2,%r1) | |
314 | |
315 la %r2,32(%r2) // i+=4 | |
316 brct %r5,.Loop4_sub | |
317 | |
318 la %r6,1(%r6) // see if len%4 is zero ... | |
319 brct %r6,.Loop1_sub // without touching condition code:-) | |
320 | |
321 .Lexit_sub: | |
322 lghi %r2,0 | |
323 slbgr %r2,%r2 | |
324 lcgr %r2,%r2 | |
325 lg %r6,48(%r15) | |
326 br %r14 | |
327 .size bn_sub_words,.-bn_sub_words | |
328 | |
329 #define c1 %r1 | |
330 #define c2 %r5 | |
331 #define c3 %r8 | |
332 | |
333 #define mul_add_c(ai,bi,c1,c2,c3) \ | |
334 lg %r7,ai*8(%r3); \ | |
335 mlg %r6,bi*8(%r4); \ | |
336 algr c1,%r7; \ | |
337 alcgr c2,%r6; \ | |
338 alcgr c3,zero | |
339 | |
340 // void bn_mul_comba8(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4); | |
341 .globl bn_mul_comba8 | |
342 .type bn_mul_comba8,@function | |
343 .align 4 | |
344 bn_mul_comba8: | |
345 stmg %r6,%r8,48(%r15) | |
346 | |
347 lghi c1,0 | |
348 lghi c2,0 | |
349 lghi c3,0 | |
350 lghi zero,0 | |
351 | |
352 mul_add_c(0,0,c1,c2,c3); | |
353 stg c1,0*8(%r2) | |
354 lghi c1,0 | |
355 | |
356 mul_add_c(0,1,c2,c3,c1); | |
357 mul_add_c(1,0,c2,c3,c1); | |
358 stg c2,1*8(%r2) | |
359 lghi c2,0 | |
360 | |
361 mul_add_c(2,0,c3,c1,c2); | |
362 mul_add_c(1,1,c3,c1,c2); | |
363 mul_add_c(0,2,c3,c1,c2); | |
364 stg c3,2*8(%r2) | |
365 lghi c3,0 | |
366 | |
367 mul_add_c(0,3,c1,c2,c3); | |
368 mul_add_c(1,2,c1,c2,c3); | |
369 mul_add_c(2,1,c1,c2,c3); | |
370 mul_add_c(3,0,c1,c2,c3); | |
371 stg c1,3*8(%r2) | |
372 lghi c1,0 | |
373 | |
374 mul_add_c(4,0,c2,c3,c1); | |
375 mul_add_c(3,1,c2,c3,c1); | |
376 mul_add_c(2,2,c2,c3,c1); | |
377 mul_add_c(1,3,c2,c3,c1); | |
378 mul_add_c(0,4,c2,c3,c1); | |
379 stg c2,4*8(%r2) | |
380 lghi c2,0 | |
381 | |
382 mul_add_c(0,5,c3,c1,c2); | |
383 mul_add_c(1,4,c3,c1,c2); | |
384 mul_add_c(2,3,c3,c1,c2); | |
385 mul_add_c(3,2,c3,c1,c2); | |
386 mul_add_c(4,1,c3,c1,c2); | |
387 mul_add_c(5,0,c3,c1,c2); | |
388 stg c3,5*8(%r2) | |
389 lghi c3,0 | |
390 | |
391 mul_add_c(6,0,c1,c2,c3); | |
392 mul_add_c(5,1,c1,c2,c3); | |
393 mul_add_c(4,2,c1,c2,c3); | |
394 mul_add_c(3,3,c1,c2,c3); | |
395 mul_add_c(2,4,c1,c2,c3); | |
396 mul_add_c(1,5,c1,c2,c3); | |
397 mul_add_c(0,6,c1,c2,c3); | |
398 stg c1,6*8(%r2) | |
399 lghi c1,0 | |
400 | |
401 mul_add_c(0,7,c2,c3,c1); | |
402 mul_add_c(1,6,c2,c3,c1); | |
403 mul_add_c(2,5,c2,c3,c1); | |
404 mul_add_c(3,4,c2,c3,c1); | |
405 mul_add_c(4,3,c2,c3,c1); | |
406 mul_add_c(5,2,c2,c3,c1); | |
407 mul_add_c(6,1,c2,c3,c1); | |
408 mul_add_c(7,0,c2,c3,c1); | |
409 stg c2,7*8(%r2) | |
410 lghi c2,0 | |
411 | |
412 mul_add_c(7,1,c3,c1,c2); | |
413 mul_add_c(6,2,c3,c1,c2); | |
414 mul_add_c(5,3,c3,c1,c2); | |
415 mul_add_c(4,4,c3,c1,c2); | |
416 mul_add_c(3,5,c3,c1,c2); | |
417 mul_add_c(2,6,c3,c1,c2); | |
418 mul_add_c(1,7,c3,c1,c2); | |
419 stg c3,8*8(%r2) | |
420 lghi c3,0 | |
421 | |
422 mul_add_c(2,7,c1,c2,c3); | |
423 mul_add_c(3,6,c1,c2,c3); | |
424 mul_add_c(4,5,c1,c2,c3); | |
425 mul_add_c(5,4,c1,c2,c3); | |
426 mul_add_c(6,3,c1,c2,c3); | |
427 mul_add_c(7,2,c1,c2,c3); | |
428 stg c1,9*8(%r2) | |
429 lghi c1,0 | |
430 | |
431 mul_add_c(7,3,c2,c3,c1); | |
432 mul_add_c(6,4,c2,c3,c1); | |
433 mul_add_c(5,5,c2,c3,c1); | |
434 mul_add_c(4,6,c2,c3,c1); | |
435 mul_add_c(3,7,c2,c3,c1); | |
436 stg c2,10*8(%r2) | |
437 lghi c2,0 | |
438 | |
439 mul_add_c(4,7,c3,c1,c2); | |
440 mul_add_c(5,6,c3,c1,c2); | |
441 mul_add_c(6,5,c3,c1,c2); | |
442 mul_add_c(7,4,c3,c1,c2); | |
443 stg c3,11*8(%r2) | |
444 lghi c3,0 | |
445 | |
446 mul_add_c(7,5,c1,c2,c3); | |
447 mul_add_c(6,6,c1,c2,c3); | |
448 mul_add_c(5,7,c1,c2,c3); | |
449 stg c1,12*8(%r2) | |
450 lghi c1,0 | |
451 | |
452 | |
453 mul_add_c(6,7,c2,c3,c1); | |
454 mul_add_c(7,6,c2,c3,c1); | |
455 stg c2,13*8(%r2) | |
456 lghi c2,0 | |
457 | |
458 mul_add_c(7,7,c3,c1,c2); | |
459 stg c3,14*8(%r2) | |
460 stg c1,15*8(%r2) | |
461 | |
462 lmg %r6,%r8,48(%r15) | |
463 br %r14 | |
464 .size bn_mul_comba8,.-bn_mul_comba8 | |
465 | |
466 // void bn_mul_comba4(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4); | |
467 .globl bn_mul_comba4 | |
468 .type bn_mul_comba4,@function | |
469 .align 4 | |
470 bn_mul_comba4: | |
471 stmg %r6,%r8,48(%r15) | |
472 | |
473 lghi c1,0 | |
474 lghi c2,0 | |
475 lghi c3,0 | |
476 lghi zero,0 | |
477 | |
478 mul_add_c(0,0,c1,c2,c3); | |
479 stg c1,0*8(%r3) | |
480 lghi c1,0 | |
481 | |
482 mul_add_c(0,1,c2,c3,c1); | |
483 mul_add_c(1,0,c2,c3,c1); | |
484 stg c2,1*8(%r2) | |
485 lghi c2,0 | |
486 | |
487 mul_add_c(2,0,c3,c1,c2); | |
488 mul_add_c(1,1,c3,c1,c2); | |
489 mul_add_c(0,2,c3,c1,c2); | |
490 stg c3,2*8(%r2) | |
491 lghi c3,0 | |
492 | |
493 mul_add_c(0,3,c1,c2,c3); | |
494 mul_add_c(1,2,c1,c2,c3); | |
495 mul_add_c(2,1,c1,c2,c3); | |
496 mul_add_c(3,0,c1,c2,c3); | |
497 stg c1,3*8(%r2) | |
498 lghi c1,0 | |
499 | |
500 mul_add_c(3,1,c2,c3,c1); | |
501 mul_add_c(2,2,c2,c3,c1); | |
502 mul_add_c(1,3,c2,c3,c1); | |
503 stg c2,4*8(%r2) | |
504 lghi c2,0 | |
505 | |
506 mul_add_c(2,3,c3,c1,c2); | |
507 mul_add_c(3,2,c3,c1,c2); | |
508 stg c3,5*8(%r2) | |
509 lghi c3,0 | |
510 | |
511 mul_add_c(3,3,c1,c2,c3); | |
512 stg c1,6*8(%r2) | |
513 stg c2,7*8(%r2) | |
514 | |
515 stmg %r6,%r8,48(%r15) | |
516 br %r14 | |
517 .size bn_mul_comba4,.-bn_mul_comba4 | |
518 | |
519 #define sqr_add_c(ai,c1,c2,c3) \ | |
520 lg %r7,ai*8(%r3); \ | |
521 mlgr %r6,%r7; \ | |
522 algr c1,%r7; \ | |
523 alcgr c2,%r6; \ | |
524 alcgr c3,zero | |
525 | |
526 #define sqr_add_c2(ai,aj,c1,c2,c3) \ | |
527 lg %r7,ai*8(%r3); \ | |
528 mlg %r6,aj*8(%r3); \ | |
529 algr c1,%r7; \ | |
530 alcgr c2,%r6; \ | |
531 alcgr c3,zero; \ | |
532 algr c1,%r7; \ | |
533 alcgr c2,%r6; \ | |
534 alcgr c3,zero | |
535 | |
536 // void bn_sqr_comba8(BN_ULONG *r2,BN_ULONG *r3); | |
537 .globl bn_sqr_comba8 | |
538 .type bn_sqr_comba8,@function | |
539 .align 4 | |
540 bn_sqr_comba8: | |
541 stmg %r6,%r8,48(%r15) | |
542 | |
543 lghi c1,0 | |
544 lghi c2,0 | |
545 lghi c3,0 | |
546 lghi zero,0 | |
547 | |
548 sqr_add_c(0,c1,c2,c3); | |
549 stg c1,0*8(%r2) | |
550 lghi c1,0 | |
551 | |
552 sqr_add_c2(1,0,c2,c3,c1); | |
553 stg c2,1*8(%r2) | |
554 lghi c2,0 | |
555 | |
556 sqr_add_c(1,c3,c1,c2); | |
557 sqr_add_c2(2,0,c3,c1,c2); | |
558 stg c3,2*8(%r2) | |
559 lghi c3,0 | |
560 | |
561 sqr_add_c2(3,0,c1,c2,c3); | |
562 sqr_add_c2(2,1,c1,c2,c3); | |
563 stg c1,3*8(%r2) | |
564 lghi c1,0 | |
565 | |
566 sqr_add_c(2,c2,c3,c1); | |
567 sqr_add_c2(3,1,c2,c3,c1); | |
568 sqr_add_c2(4,0,c2,c3,c1); | |
569 stg c2,4*8(%r2) | |
570 lghi c2,0 | |
571 | |
572 sqr_add_c2(5,0,c3,c1,c2); | |
573 sqr_add_c2(4,1,c3,c1,c2); | |
574 sqr_add_c2(3,2,c3,c1,c2); | |
575 stg c3,5*8(%r2) | |
576 lghi c3,0 | |
577 | |
578 sqr_add_c(3,c1,c2,c3); | |
579 sqr_add_c2(4,2,c1,c2,c3); | |
580 sqr_add_c2(5,1,c1,c2,c3); | |
581 sqr_add_c2(6,0,c1,c2,c3); | |
582 stg c1,6*8(%r2) | |
583 lghi c1,0 | |
584 | |
585 sqr_add_c2(7,0,c2,c3,c1); | |
586 sqr_add_c2(6,1,c2,c3,c1); | |
587 sqr_add_c2(5,2,c2,c3,c1); | |
588 sqr_add_c2(4,3,c2,c3,c1); | |
589 stg c2,7*8(%r2) | |
590 lghi c2,0 | |
591 | |
592 sqr_add_c(4,c3,c1,c2); | |
593 sqr_add_c2(5,3,c3,c1,c2); | |
594 sqr_add_c2(6,2,c3,c1,c2); | |
595 sqr_add_c2(7,1,c3,c1,c2); | |
596 stg c3,8*8(%r2) | |
597 lghi c3,0 | |
598 | |
599 sqr_add_c2(7,2,c1,c2,c3); | |
600 sqr_add_c2(6,3,c1,c2,c3); | |
601 sqr_add_c2(5,4,c1,c2,c3); | |
602 stg c1,9*8(%r2) | |
603 lghi c1,0 | |
604 | |
605 sqr_add_c(5,c2,c3,c1); | |
606 sqr_add_c2(6,4,c2,c3,c1); | |
607 sqr_add_c2(7,3,c2,c3,c1); | |
608 stg c2,10*8(%r2) | |
609 lghi c2,0 | |
610 | |
611 sqr_add_c2(7,4,c3,c1,c2); | |
612 sqr_add_c2(6,5,c3,c1,c2); | |
613 stg c3,11*8(%r2) | |
614 lghi c3,0 | |
615 | |
616 sqr_add_c(6,c1,c2,c3); | |
617 sqr_add_c2(7,5,c1,c2,c3); | |
618 stg c1,12*8(%r2) | |
619 lghi c1,0 | |
620 | |
621 sqr_add_c2(7,6,c2,c3,c1); | |
622 stg c2,13*8(%r2) | |
623 lghi c2,0 | |
624 | |
625 sqr_add_c(7,c3,c1,c2); | |
626 stg c3,14*8(%r2) | |
627 stg c1,15*8(%r2) | |
628 | |
629 lmg %r6,%r8,48(%r15) | |
630 br %r14 | |
631 .size bn_sqr_comba8,.-bn_sqr_comba8 | |
632 | |
633 // void bn_sqr_comba4(BN_ULONG *r2,BN_ULONG *r3); | |
634 .globl bn_sqr_comba4 | |
635 .type bn_sqr_comba4,@function | |
636 .align 4 | |
637 bn_sqr_comba4: | |
638 stmg %r6,%r8,48(%r15) | |
639 | |
640 lghi c1,0 | |
641 lghi c2,0 | |
642 lghi c3,0 | |
643 lghi zero,0 | |
644 | |
645 sqr_add_c(0,c1,c2,c3); | |
646 stg c1,0*8(%r2) | |
647 lghi c1,0 | |
648 | |
649 sqr_add_c2(1,0,c2,c3,c1); | |
650 stg c2,1*8(%r2) | |
651 lghi c2,0 | |
652 | |
653 sqr_add_c(1,c3,c1,c2); | |
654 sqr_add_c2(2,0,c3,c1,c2); | |
655 stg c3,2*8(%r2) | |
656 lghi c3,0 | |
657 | |
658 sqr_add_c2(3,0,c1,c2,c3); | |
659 sqr_add_c2(2,1,c1,c2,c3); | |
660 stg c1,3*8(%r2) | |
661 lghi c1,0 | |
662 | |
663 sqr_add_c(2,c2,c3,c1); | |
664 sqr_add_c2(3,1,c2,c3,c1); | |
665 stg c2,4*8(%r2) | |
666 lghi c2,0 | |
667 | |
668 sqr_add_c2(3,2,c3,c1,c2); | |
669 stg c3,5*8(%r2) | |
670 lghi c3,0 | |
671 | |
672 sqr_add_c(3,c1,c2,c3); | |
673 stg c1,6*8(%r2) | |
674 stg c2,7*8(%r2) | |
675 | |
676 lmg %r6,%r8,48(%r15) | |
677 br %r14 | |
678 .size bn_sqr_comba4,.-bn_sqr_comba4 | |
OLD | NEW |