OLD | NEW |
1 .ident "sparcv8plus.s, Version 1.4" | 1 .ident "sparcv8plus.s, Version 1.4" |
2 .ident "SPARC v9 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" | 2 .ident "SPARC v9 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" |
3 | 3 |
4 /* | 4 /* |
5 * ==================================================================== | 5 * ==================================================================== |
6 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | 6 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL |
7 * project. | 7 * project. |
8 * | 8 * |
9 * Rights for redistribution and usage in source and binary forms are | 9 * Rights for redistribution and usage in source and binary forms are |
10 * granted according to the OpenSSL license. Warranty of any kind is | 10 * granted according to the OpenSSL license. Warranty of any kind is |
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
137 * op(p+0); op(p+1); op(p+2); op(p+3); | 137 * op(p+0); op(p+1); op(p+2); op(p+3); |
138 * p+=4; n=-4; | 138 * p+=4; n=-4; |
139 * } | 139 * } |
140 * if (n) { | 140 * if (n) { |
141 * op(p+0); if (--n==0) return; | 141 * op(p+0); if (--n==0) return; |
142 * op(p+2); if (--n==0) return; | 142 * op(p+2); if (--n==0) return; |
143 * op(p+3); return; | 143 * op(p+3); return; |
144 * } | 144 * } |
145 */ | 145 */ |
146 | 146 |
| 147 #if defined(__SUNPRO_C) && defined(__sparcv9) |
| 148 /* They've said -xarch=v9 at command line */ |
| 149 .register %g2,#scratch |
| 150 .register %g3,#scratch |
| 151 # define FRAME_SIZE -192 |
| 152 #elif defined(__GNUC__) && defined(__arch64__) |
| 153 /* They've said -m64 at command line */ |
| 154 .register %g2,#scratch |
| 155 .register %g3,#scratch |
| 156 # define FRAME_SIZE -192 |
| 157 #else |
| 158 # define FRAME_SIZE -96 |
| 159 #endif |
147 /* | 160 /* |
148 * GNU assembler can't stand stuw:-( | 161 * GNU assembler can't stand stuw:-( |
149 */ | 162 */ |
150 #define stuw st | 163 #define stuw st |
151 | 164 |
152 .section ".text",#alloc,#execinstr | 165 .section ".text",#alloc,#execinstr |
153 .file "bn_asm.sparc.v8plus.S" | 166 .file "bn_asm.sparc.v8plus.S" |
154 | 167 |
155 .align 32 | 168 .align 32 |
156 | 169 |
(...skipping 455 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
612 * previous versions this comment stated that "the trouble is that | 625 * previous versions this comment stated that "the trouble is that |
613 * it's not feasible to implement the mumbo-jumbo in less V9 | 626 * it's not feasible to implement the mumbo-jumbo in less V9 |
614 * instructions:-(" which apparently isn't true thanks to | 627 * instructions:-(" which apparently isn't true thanks to |
615 * 'bcs,a %xcc,.+8; inc %rd' pair. But the performance improvement | 628 * 'bcs,a %xcc,.+8; inc %rd' pair. But the performance improvement |
616 * results not from the shorter code, but from elimination of | 629 * results not from the shorter code, but from elimination of |
617 * multicycle none-pairable 'rd %y,%rd' instructions. | 630 * multicycle none-pairable 'rd %y,%rd' instructions. |
618 * | 631 * |
619 * Andy. | 632 * Andy. |
620 */ | 633 */ |
621 | 634 |
622 #define FRAME_SIZE -96 | |
623 | |
624 /* | 635 /* |
625 * Here is register usage map for *all* routines below. | 636 * Here is register usage map for *all* routines below. |
626 */ | 637 */ |
627 #define t_1 %o0 | 638 #define t_1 %o0 |
628 #define t_2 %o1 | 639 #define t_2 %o1 |
629 #define c_12 %o2 | 640 #define c_12 %o2 |
630 #define c_3 %o3 | 641 #define c_3 %o3 |
631 | 642 |
632 #define ap(I) [%i1+4*I] | 643 #define ap(I) [%i1+4*I] |
633 #define bp(I) [%i2+4*I] | 644 #define bp(I) [%i2+4*I] |
(...skipping 904 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1538 stuw t_1,rp(6) !r[6]=c1; | 1549 stuw t_1,rp(6) !r[6]=c1; |
1539 stuw c_12,rp(7) !r[7]=c2; | 1550 stuw c_12,rp(7) !r[7]=c2; |
1540 | 1551 |
1541 ret | 1552 ret |
1542 restore %g0,%g0,%o0 | 1553 restore %g0,%g0,%o0 |
1543 | 1554 |
1544 .type bn_sqr_comba4,#function | 1555 .type bn_sqr_comba4,#function |
1545 .size bn_sqr_comba4,(.-bn_sqr_comba4) | 1556 .size bn_sqr_comba4,(.-bn_sqr_comba4) |
1546 | 1557 |
1547 .align 32 | 1558 .align 32 |
OLD | NEW |