OLD | NEW |
(Empty) | |
| 1 .ident "s390x.S, version 1.1" |
| 2 // ==================================================================== |
| 3 // Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL |
| 4 // project. |
| 5 // |
| 6 // Rights for redistribution and usage in source and binary forms are |
| 7 // granted according to the OpenSSL license. Warranty of any kind is |
| 8 // disclaimed. |
| 9 // ==================================================================== |
| 10 |
| 11 .text |
| 12 |
| 13 #define zero %r0 |
| 14 |
| 15 // BN_ULONG bn_mul_add_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5); |
| 16 .globl bn_mul_add_words |
| 17 .type bn_mul_add_words,@function |
| 18 .align 4 |
| 19 bn_mul_add_words: |
| 20 lghi zero,0 // zero = 0 |
| 21 la %r1,0(%r2) // put rp aside |
| 22 lghi %r2,0 // i=0; |
| 23 ltgfr %r4,%r4 |
| 24 bler %r14 // if (len<=0) return 0; |
| 25 |
| 26 stmg %r6,%r10,48(%r15) |
| 27 lghi %r10,3 |
| 28 lghi %r8,0 // carry = 0 |
| 29 nr %r10,%r4 // len%4 |
| 30 sra %r4,2 // cnt=len/4 |
| 31 jz .Loop1_madd // carry is incidentally cleared if branch taken |
| 32 algr zero,zero // clear carry |
| 33 |
| 34 .Loop4_madd: |
| 35 lg %r7,0(%r2,%r3) // ap[i] |
| 36 mlgr %r6,%r5 // *=w |
| 37 alcgr %r7,%r8 // +=carry |
| 38 alcgr %r6,zero |
| 39 alg %r7,0(%r2,%r1) // +=rp[i] |
| 40 stg %r7,0(%r2,%r1) // rp[i]= |
| 41 |
| 42 lg %r9,8(%r2,%r3) |
| 43 mlgr %r8,%r5 |
| 44 alcgr %r9,%r6 |
| 45 alcgr %r8,zero |
| 46 alg %r9,8(%r2,%r1) |
| 47 stg %r9,8(%r2,%r1) |
| 48 |
| 49 lg %r7,16(%r2,%r3) |
| 50 mlgr %r6,%r5 |
| 51 alcgr %r7,%r8 |
| 52 alcgr %r6,zero |
| 53 alg %r7,16(%r2,%r1) |
| 54 stg %r7,16(%r2,%r1) |
| 55 |
| 56 lg %r9,24(%r2,%r3) |
| 57 mlgr %r8,%r5 |
| 58 alcgr %r9,%r6 |
| 59 alcgr %r8,zero |
| 60 alg %r9,24(%r2,%r1) |
| 61 stg %r9,24(%r2,%r1) |
| 62 |
| 63 la %r2,32(%r2) // i+=4 |
| 64 brct %r4,.Loop4_madd |
| 65 |
| 66 la %r10,1(%r10) // see if len%4 is zero ... |
| 67 brct %r10,.Loop1_madd // without touching condition code:-) |
| 68 |
| 69 .Lend_madd: |
| 70 alcgr %r8,zero // collect carry bit |
| 71 lgr %r2,%r8 |
| 72 lmg %r6,%r10,48(%r15) |
| 73 br %r14 |
| 74 |
| 75 .Loop1_madd: |
| 76 lg %r7,0(%r2,%r3) // ap[i] |
| 77 mlgr %r6,%r5 // *=w |
| 78 alcgr %r7,%r8 // +=carry |
| 79 alcgr %r6,zero |
| 80 alg %r7,0(%r2,%r1) // +=rp[i] |
| 81 stg %r7,0(%r2,%r1) // rp[i]= |
| 82 |
| 83 lgr %r8,%r6 |
| 84 la %r2,8(%r2) // i++ |
| 85 brct %r10,.Loop1_madd |
| 86 |
| 87 j .Lend_madd |
| 88 .size bn_mul_add_words,.-bn_mul_add_words |
| 89 |
| 90 // BN_ULONG bn_mul_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5); |
| 91 .globl bn_mul_words |
| 92 .type bn_mul_words,@function |
| 93 .align 4 |
| 94 bn_mul_words: |
| 95 lghi zero,0 // zero = 0 |
| 96 la %r1,0(%r2) // put rp aside |
| 97 lghi %r2,0 // i=0; |
| 98 ltgfr %r4,%r4 |
| 99 bler %r14 // if (len<=0) return 0; |
| 100 |
| 101 stmg %r6,%r10,48(%r15) |
| 102 lghi %r10,3 |
| 103 lghi %r8,0 // carry = 0 |
| 104 nr %r10,%r4 // len%4 |
| 105 sra %r4,2 // cnt=len/4 |
| 106 jz .Loop1_mul // carry is incidentally cleared if branch taken |
| 107 algr zero,zero // clear carry |
| 108 |
| 109 .Loop4_mul: |
| 110 lg %r7,0(%r2,%r3) // ap[i] |
| 111 mlgr %r6,%r5 // *=w |
| 112 alcgr %r7,%r8 // +=carry |
| 113 stg %r7,0(%r2,%r1) // rp[i]= |
| 114 |
| 115 lg %r9,8(%r2,%r3) |
| 116 mlgr %r8,%r5 |
| 117 alcgr %r9,%r6 |
| 118 stg %r9,8(%r2,%r1) |
| 119 |
| 120 lg %r7,16(%r2,%r3) |
| 121 mlgr %r6,%r5 |
| 122 alcgr %r7,%r8 |
| 123 stg %r7,16(%r2,%r1) |
| 124 |
| 125 lg %r9,24(%r2,%r3) |
| 126 mlgr %r8,%r5 |
| 127 alcgr %r9,%r6 |
| 128 stg %r9,24(%r2,%r1) |
| 129 |
| 130 la %r2,32(%r2) // i+=4 |
| 131 brct %r4,.Loop4_mul |
| 132 |
| 133 la %r10,1(%r10) // see if len%4 is zero ... |
| 134 brct %r10,.Loop1_mul // without touching condition code:-) |
| 135 |
| 136 .Lend_mul: |
| 137 alcgr %r8,zero // collect carry bit |
| 138 lgr %r2,%r8 |
| 139 lmg %r6,%r10,48(%r15) |
| 140 br %r14 |
| 141 |
| 142 .Loop1_mul: |
| 143 lg %r7,0(%r2,%r3) // ap[i] |
| 144 mlgr %r6,%r5 // *=w |
| 145 alcgr %r7,%r8 // +=carry |
| 146 stg %r7,0(%r2,%r1) // rp[i]= |
| 147 |
| 148 lgr %r8,%r6 |
| 149 la %r2,8(%r2) // i++ |
| 150 brct %r10,.Loop1_mul |
| 151 |
| 152 j .Lend_mul |
| 153 .size bn_mul_words,.-bn_mul_words |
| 154 |
| 155 // void bn_sqr_words(BN_ULONG *r2,BN_ULONG *r2,int r4) |
| 156 .globl bn_sqr_words |
| 157 .type bn_sqr_words,@function |
| 158 .align 4 |
| 159 bn_sqr_words: |
| 160 ltgfr %r4,%r4 |
| 161 bler %r14 |
| 162 |
| 163 stmg %r6,%r7,48(%r15) |
| 164 srag %r1,%r4,2 // cnt=len/4 |
| 165 jz .Loop1_sqr |
| 166 |
| 167 .Loop4_sqr: |
| 168 lg %r7,0(%r3) |
| 169 mlgr %r6,%r7 |
| 170 stg %r7,0(%r2) |
| 171 stg %r6,8(%r2) |
| 172 |
| 173 lg %r7,8(%r3) |
| 174 mlgr %r6,%r7 |
| 175 stg %r7,16(%r2) |
| 176 stg %r6,24(%r2) |
| 177 |
| 178 lg %r7,16(%r3) |
| 179 mlgr %r6,%r7 |
| 180 stg %r7,32(%r2) |
| 181 stg %r6,40(%r2) |
| 182 |
| 183 lg %r7,24(%r3) |
| 184 mlgr %r6,%r7 |
| 185 stg %r7,48(%r2) |
| 186 stg %r6,56(%r2) |
| 187 |
| 188 la %r3,32(%r3) |
| 189 la %r2,64(%r2) |
| 190 brct %r1,.Loop4_sqr |
| 191 |
| 192 lghi %r1,3 |
| 193 nr %r4,%r1 // cnt=len%4 |
| 194 jz .Lend_sqr |
| 195 |
| 196 .Loop1_sqr: |
| 197 lg %r7,0(%r3) |
| 198 mlgr %r6,%r7 |
| 199 stg %r7,0(%r2) |
| 200 stg %r6,8(%r2) |
| 201 |
| 202 la %r3,8(%r3) |
| 203 la %r2,16(%r2) |
| 204 brct %r4,.Loop1_sqr |
| 205 |
| 206 .Lend_sqr: |
| 207 lmg %r6,%r7,48(%r15) |
| 208 br %r14 |
| 209 .size bn_sqr_words,.-bn_sqr_words |
| 210 |
| 211 // BN_ULONG bn_div_words(BN_ULONG h,BN_ULONG l,BN_ULONG d); |
| 212 .globl bn_div_words |
| 213 .type bn_div_words,@function |
| 214 .align 4 |
| 215 bn_div_words: |
| 216 dlgr %r2,%r4 |
| 217 lgr %r2,%r3 |
| 218 br %r14 |
| 219 .size bn_div_words,.-bn_div_words |
| 220 |
| 221 // BN_ULONG bn_add_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5); |
| 222 .globl bn_add_words |
| 223 .type bn_add_words,@function |
| 224 .align 4 |
| 225 bn_add_words: |
| 226 la %r1,0(%r2) // put rp aside |
| 227 lghi %r2,0 // i=0 |
| 228 ltgfr %r5,%r5 |
| 229 bler %r14 // if (len<=0) return 0; |
| 230 |
| 231 stg %r6,48(%r15) |
| 232 lghi %r6,3 |
| 233 nr %r6,%r5 // len%4 |
| 234 sra %r5,2 // len/4, use sra because it sets condition code |
| 235 jz .Loop1_add // carry is incidentally cleared if branch taken |
| 236 algr %r2,%r2 // clear carry |
| 237 |
| 238 .Loop4_add: |
| 239 lg %r0,0(%r2,%r3) |
| 240 alcg %r0,0(%r2,%r4) |
| 241 stg %r0,0(%r2,%r1) |
| 242 lg %r0,8(%r2,%r3) |
| 243 alcg %r0,8(%r2,%r4) |
| 244 stg %r0,8(%r2,%r1) |
| 245 lg %r0,16(%r2,%r3) |
| 246 alcg %r0,16(%r2,%r4) |
| 247 stg %r0,16(%r2,%r1) |
| 248 lg %r0,24(%r2,%r3) |
| 249 alcg %r0,24(%r2,%r4) |
| 250 stg %r0,24(%r2,%r1) |
| 251 |
| 252 la %r2,32(%r2) // i+=4 |
| 253 brct %r5,.Loop4_add |
| 254 |
| 255 la %r6,1(%r6) // see if len%4 is zero ... |
| 256 brct %r6,.Loop1_add // without touching condition code:-) |
| 257 |
| 258 .Lexit_add: |
| 259 lghi %r2,0 |
| 260 alcgr %r2,%r2 |
| 261 lg %r6,48(%r15) |
| 262 br %r14 |
| 263 |
| 264 .Loop1_add: |
| 265 lg %r0,0(%r2,%r3) |
| 266 alcg %r0,0(%r2,%r4) |
| 267 stg %r0,0(%r2,%r1) |
| 268 |
| 269 la %r2,8(%r2) // i++ |
| 270 brct %r6,.Loop1_add |
| 271 |
| 272 j .Lexit_add |
| 273 .size bn_add_words,.-bn_add_words |
| 274 |
| 275 // BN_ULONG bn_sub_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5); |
| 276 .globl bn_sub_words |
| 277 .type bn_sub_words,@function |
| 278 .align 4 |
| 279 bn_sub_words: |
| 280 la %r1,0(%r2) // put rp aside |
| 281 lghi %r2,0 // i=0 |
| 282 ltgfr %r5,%r5 |
| 283 bler %r14 // if (len<=0) return 0; |
| 284 |
| 285 stg %r6,48(%r15) |
| 286 lghi %r6,3 |
| 287 nr %r6,%r5 // len%4 |
| 288 sra %r5,2 // len/4, use sra because it sets condition code |
| 289 jnz .Loop4_sub // borrow is incidentally cleared if branch take
n |
| 290 slgr %r2,%r2 // clear borrow |
| 291 |
| 292 .Loop1_sub: |
| 293 lg %r0,0(%r2,%r3) |
| 294 slbg %r0,0(%r2,%r4) |
| 295 stg %r0,0(%r2,%r1) |
| 296 |
| 297 la %r2,8(%r2) // i++ |
| 298 brct %r6,.Loop1_sub |
| 299 j .Lexit_sub |
| 300 |
| 301 .Loop4_sub: |
| 302 lg %r0,0(%r2,%r3) |
| 303 slbg %r0,0(%r2,%r4) |
| 304 stg %r0,0(%r2,%r1) |
| 305 lg %r0,8(%r2,%r3) |
| 306 slbg %r0,8(%r2,%r4) |
| 307 stg %r0,8(%r2,%r1) |
| 308 lg %r0,16(%r2,%r3) |
| 309 slbg %r0,16(%r2,%r4) |
| 310 stg %r0,16(%r2,%r1) |
| 311 lg %r0,24(%r2,%r3) |
| 312 slbg %r0,24(%r2,%r4) |
| 313 stg %r0,24(%r2,%r1) |
| 314 |
| 315 la %r2,32(%r2) // i+=4 |
| 316 brct %r5,.Loop4_sub |
| 317 |
| 318 la %r6,1(%r6) // see if len%4 is zero ... |
| 319 brct %r6,.Loop1_sub // without touching condition code:-) |
| 320 |
| 321 .Lexit_sub: |
| 322 lghi %r2,0 |
| 323 slbgr %r2,%r2 |
| 324 lcgr %r2,%r2 |
| 325 lg %r6,48(%r15) |
| 326 br %r14 |
| 327 .size bn_sub_words,.-bn_sub_words |
| 328 |
| 329 #define c1 %r1 |
| 330 #define c2 %r5 |
| 331 #define c3 %r8 |
| 332 |
| 333 #define mul_add_c(ai,bi,c1,c2,c3) \ |
| 334 lg %r7,ai*8(%r3); \ |
| 335 mlg %r6,bi*8(%r4); \ |
| 336 algr c1,%r7; \ |
| 337 alcgr c2,%r6; \ |
| 338 alcgr c3,zero |
| 339 |
| 340 // void bn_mul_comba8(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4); |
| 341 .globl bn_mul_comba8 |
| 342 .type bn_mul_comba8,@function |
| 343 .align 4 |
| 344 bn_mul_comba8: |
| 345 stmg %r6,%r8,48(%r15) |
| 346 |
| 347 lghi c1,0 |
| 348 lghi c2,0 |
| 349 lghi c3,0 |
| 350 lghi zero,0 |
| 351 |
| 352 mul_add_c(0,0,c1,c2,c3); |
| 353 stg c1,0*8(%r2) |
| 354 lghi c1,0 |
| 355 |
| 356 mul_add_c(0,1,c2,c3,c1); |
| 357 mul_add_c(1,0,c2,c3,c1); |
| 358 stg c2,1*8(%r2) |
| 359 lghi c2,0 |
| 360 |
| 361 mul_add_c(2,0,c3,c1,c2); |
| 362 mul_add_c(1,1,c3,c1,c2); |
| 363 mul_add_c(0,2,c3,c1,c2); |
| 364 stg c3,2*8(%r2) |
| 365 lghi c3,0 |
| 366 |
| 367 mul_add_c(0,3,c1,c2,c3); |
| 368 mul_add_c(1,2,c1,c2,c3); |
| 369 mul_add_c(2,1,c1,c2,c3); |
| 370 mul_add_c(3,0,c1,c2,c3); |
| 371 stg c1,3*8(%r2) |
| 372 lghi c1,0 |
| 373 |
| 374 mul_add_c(4,0,c2,c3,c1); |
| 375 mul_add_c(3,1,c2,c3,c1); |
| 376 mul_add_c(2,2,c2,c3,c1); |
| 377 mul_add_c(1,3,c2,c3,c1); |
| 378 mul_add_c(0,4,c2,c3,c1); |
| 379 stg c2,4*8(%r2) |
| 380 lghi c2,0 |
| 381 |
| 382 mul_add_c(0,5,c3,c1,c2); |
| 383 mul_add_c(1,4,c3,c1,c2); |
| 384 mul_add_c(2,3,c3,c1,c2); |
| 385 mul_add_c(3,2,c3,c1,c2); |
| 386 mul_add_c(4,1,c3,c1,c2); |
| 387 mul_add_c(5,0,c3,c1,c2); |
| 388 stg c3,5*8(%r2) |
| 389 lghi c3,0 |
| 390 |
| 391 mul_add_c(6,0,c1,c2,c3); |
| 392 mul_add_c(5,1,c1,c2,c3); |
| 393 mul_add_c(4,2,c1,c2,c3); |
| 394 mul_add_c(3,3,c1,c2,c3); |
| 395 mul_add_c(2,4,c1,c2,c3); |
| 396 mul_add_c(1,5,c1,c2,c3); |
| 397 mul_add_c(0,6,c1,c2,c3); |
| 398 stg c1,6*8(%r2) |
| 399 lghi c1,0 |
| 400 |
| 401 mul_add_c(0,7,c2,c3,c1); |
| 402 mul_add_c(1,6,c2,c3,c1); |
| 403 mul_add_c(2,5,c2,c3,c1); |
| 404 mul_add_c(3,4,c2,c3,c1); |
| 405 mul_add_c(4,3,c2,c3,c1); |
| 406 mul_add_c(5,2,c2,c3,c1); |
| 407 mul_add_c(6,1,c2,c3,c1); |
| 408 mul_add_c(7,0,c2,c3,c1); |
| 409 stg c2,7*8(%r2) |
| 410 lghi c2,0 |
| 411 |
| 412 mul_add_c(7,1,c3,c1,c2); |
| 413 mul_add_c(6,2,c3,c1,c2); |
| 414 mul_add_c(5,3,c3,c1,c2); |
| 415 mul_add_c(4,4,c3,c1,c2); |
| 416 mul_add_c(3,5,c3,c1,c2); |
| 417 mul_add_c(2,6,c3,c1,c2); |
| 418 mul_add_c(1,7,c3,c1,c2); |
| 419 stg c3,8*8(%r2) |
| 420 lghi c3,0 |
| 421 |
| 422 mul_add_c(2,7,c1,c2,c3); |
| 423 mul_add_c(3,6,c1,c2,c3); |
| 424 mul_add_c(4,5,c1,c2,c3); |
| 425 mul_add_c(5,4,c1,c2,c3); |
| 426 mul_add_c(6,3,c1,c2,c3); |
| 427 mul_add_c(7,2,c1,c2,c3); |
| 428 stg c1,9*8(%r2) |
| 429 lghi c1,0 |
| 430 |
| 431 mul_add_c(7,3,c2,c3,c1); |
| 432 mul_add_c(6,4,c2,c3,c1); |
| 433 mul_add_c(5,5,c2,c3,c1); |
| 434 mul_add_c(4,6,c2,c3,c1); |
| 435 mul_add_c(3,7,c2,c3,c1); |
| 436 stg c2,10*8(%r2) |
| 437 lghi c2,0 |
| 438 |
| 439 mul_add_c(4,7,c3,c1,c2); |
| 440 mul_add_c(5,6,c3,c1,c2); |
| 441 mul_add_c(6,5,c3,c1,c2); |
| 442 mul_add_c(7,4,c3,c1,c2); |
| 443 stg c3,11*8(%r2) |
| 444 lghi c3,0 |
| 445 |
| 446 mul_add_c(7,5,c1,c2,c3); |
| 447 mul_add_c(6,6,c1,c2,c3); |
| 448 mul_add_c(5,7,c1,c2,c3); |
| 449 stg c1,12*8(%r2) |
| 450 lghi c1,0 |
| 451 |
| 452 |
| 453 mul_add_c(6,7,c2,c3,c1); |
| 454 mul_add_c(7,6,c2,c3,c1); |
| 455 stg c2,13*8(%r2) |
| 456 lghi c2,0 |
| 457 |
| 458 mul_add_c(7,7,c3,c1,c2); |
| 459 stg c3,14*8(%r2) |
| 460 stg c1,15*8(%r2) |
| 461 |
| 462 lmg %r6,%r8,48(%r15) |
| 463 br %r14 |
| 464 .size bn_mul_comba8,.-bn_mul_comba8 |
| 465 |
| 466 // void bn_mul_comba4(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4); |
| 467 .globl bn_mul_comba4 |
| 468 .type bn_mul_comba4,@function |
| 469 .align 4 |
| 470 bn_mul_comba4: |
| 471 stmg %r6,%r8,48(%r15) |
| 472 |
| 473 lghi c1,0 |
| 474 lghi c2,0 |
| 475 lghi c3,0 |
| 476 lghi zero,0 |
| 477 |
| 478 mul_add_c(0,0,c1,c2,c3); |
| 479 stg c1,0*8(%r3) |
| 480 lghi c1,0 |
| 481 |
| 482 mul_add_c(0,1,c2,c3,c1); |
| 483 mul_add_c(1,0,c2,c3,c1); |
| 484 stg c2,1*8(%r2) |
| 485 lghi c2,0 |
| 486 |
| 487 mul_add_c(2,0,c3,c1,c2); |
| 488 mul_add_c(1,1,c3,c1,c2); |
| 489 mul_add_c(0,2,c3,c1,c2); |
| 490 stg c3,2*8(%r2) |
| 491 lghi c3,0 |
| 492 |
| 493 mul_add_c(0,3,c1,c2,c3); |
| 494 mul_add_c(1,2,c1,c2,c3); |
| 495 mul_add_c(2,1,c1,c2,c3); |
| 496 mul_add_c(3,0,c1,c2,c3); |
| 497 stg c1,3*8(%r2) |
| 498 lghi c1,0 |
| 499 |
| 500 mul_add_c(3,1,c2,c3,c1); |
| 501 mul_add_c(2,2,c2,c3,c1); |
| 502 mul_add_c(1,3,c2,c3,c1); |
| 503 stg c2,4*8(%r2) |
| 504 lghi c2,0 |
| 505 |
| 506 mul_add_c(2,3,c3,c1,c2); |
| 507 mul_add_c(3,2,c3,c1,c2); |
| 508 stg c3,5*8(%r2) |
| 509 lghi c3,0 |
| 510 |
| 511 mul_add_c(3,3,c1,c2,c3); |
| 512 stg c1,6*8(%r2) |
| 513 stg c2,7*8(%r2) |
| 514 |
| 515 stmg %r6,%r8,48(%r15) |
| 516 br %r14 |
| 517 .size bn_mul_comba4,.-bn_mul_comba4 |
| 518 |
| 519 #define sqr_add_c(ai,c1,c2,c3) \ |
| 520 lg %r7,ai*8(%r3); \ |
| 521 mlgr %r6,%r7; \ |
| 522 algr c1,%r7; \ |
| 523 alcgr c2,%r6; \ |
| 524 alcgr c3,zero |
| 525 |
| 526 #define sqr_add_c2(ai,aj,c1,c2,c3) \ |
| 527 lg %r7,ai*8(%r3); \ |
| 528 mlg %r6,aj*8(%r3); \ |
| 529 algr c1,%r7; \ |
| 530 alcgr c2,%r6; \ |
| 531 alcgr c3,zero; \ |
| 532 algr c1,%r7; \ |
| 533 alcgr c2,%r6; \ |
| 534 alcgr c3,zero |
| 535 |
| 536 // void bn_sqr_comba8(BN_ULONG *r2,BN_ULONG *r3); |
| 537 .globl bn_sqr_comba8 |
| 538 .type bn_sqr_comba8,@function |
| 539 .align 4 |
| 540 bn_sqr_comba8: |
| 541 stmg %r6,%r8,48(%r15) |
| 542 |
| 543 lghi c1,0 |
| 544 lghi c2,0 |
| 545 lghi c3,0 |
| 546 lghi zero,0 |
| 547 |
| 548 sqr_add_c(0,c1,c2,c3); |
| 549 stg c1,0*8(%r2) |
| 550 lghi c1,0 |
| 551 |
| 552 sqr_add_c2(1,0,c2,c3,c1); |
| 553 stg c2,1*8(%r2) |
| 554 lghi c2,0 |
| 555 |
| 556 sqr_add_c(1,c3,c1,c2); |
| 557 sqr_add_c2(2,0,c3,c1,c2); |
| 558 stg c3,2*8(%r2) |
| 559 lghi c3,0 |
| 560 |
| 561 sqr_add_c2(3,0,c1,c2,c3); |
| 562 sqr_add_c2(2,1,c1,c2,c3); |
| 563 stg c1,3*8(%r2) |
| 564 lghi c1,0 |
| 565 |
| 566 sqr_add_c(2,c2,c3,c1); |
| 567 sqr_add_c2(3,1,c2,c3,c1); |
| 568 sqr_add_c2(4,0,c2,c3,c1); |
| 569 stg c2,4*8(%r2) |
| 570 lghi c2,0 |
| 571 |
| 572 sqr_add_c2(5,0,c3,c1,c2); |
| 573 sqr_add_c2(4,1,c3,c1,c2); |
| 574 sqr_add_c2(3,2,c3,c1,c2); |
| 575 stg c3,5*8(%r2) |
| 576 lghi c3,0 |
| 577 |
| 578 sqr_add_c(3,c1,c2,c3); |
| 579 sqr_add_c2(4,2,c1,c2,c3); |
| 580 sqr_add_c2(5,1,c1,c2,c3); |
| 581 sqr_add_c2(6,0,c1,c2,c3); |
| 582 stg c1,6*8(%r2) |
| 583 lghi c1,0 |
| 584 |
| 585 sqr_add_c2(7,0,c2,c3,c1); |
| 586 sqr_add_c2(6,1,c2,c3,c1); |
| 587 sqr_add_c2(5,2,c2,c3,c1); |
| 588 sqr_add_c2(4,3,c2,c3,c1); |
| 589 stg c2,7*8(%r2) |
| 590 lghi c2,0 |
| 591 |
| 592 sqr_add_c(4,c3,c1,c2); |
| 593 sqr_add_c2(5,3,c3,c1,c2); |
| 594 sqr_add_c2(6,2,c3,c1,c2); |
| 595 sqr_add_c2(7,1,c3,c1,c2); |
| 596 stg c3,8*8(%r2) |
| 597 lghi c3,0 |
| 598 |
| 599 sqr_add_c2(7,2,c1,c2,c3); |
| 600 sqr_add_c2(6,3,c1,c2,c3); |
| 601 sqr_add_c2(5,4,c1,c2,c3); |
| 602 stg c1,9*8(%r2) |
| 603 lghi c1,0 |
| 604 |
| 605 sqr_add_c(5,c2,c3,c1); |
| 606 sqr_add_c2(6,4,c2,c3,c1); |
| 607 sqr_add_c2(7,3,c2,c3,c1); |
| 608 stg c2,10*8(%r2) |
| 609 lghi c2,0 |
| 610 |
| 611 sqr_add_c2(7,4,c3,c1,c2); |
| 612 sqr_add_c2(6,5,c3,c1,c2); |
| 613 stg c3,11*8(%r2) |
| 614 lghi c3,0 |
| 615 |
| 616 sqr_add_c(6,c1,c2,c3); |
| 617 sqr_add_c2(7,5,c1,c2,c3); |
| 618 stg c1,12*8(%r2) |
| 619 lghi c1,0 |
| 620 |
| 621 sqr_add_c2(7,6,c2,c3,c1); |
| 622 stg c2,13*8(%r2) |
| 623 lghi c2,0 |
| 624 |
| 625 sqr_add_c(7,c3,c1,c2); |
| 626 stg c3,14*8(%r2) |
| 627 stg c1,15*8(%r2) |
| 628 |
| 629 lmg %r6,%r8,48(%r15) |
| 630 br %r14 |
| 631 .size bn_sqr_comba8,.-bn_sqr_comba8 |
| 632 |
| 633 // void bn_sqr_comba4(BN_ULONG *r2,BN_ULONG *r3); |
| 634 .globl bn_sqr_comba4 |
| 635 .type bn_sqr_comba4,@function |
| 636 .align 4 |
| 637 bn_sqr_comba4: |
| 638 stmg %r6,%r8,48(%r15) |
| 639 |
| 640 lghi c1,0 |
| 641 lghi c2,0 |
| 642 lghi c3,0 |
| 643 lghi zero,0 |
| 644 |
| 645 sqr_add_c(0,c1,c2,c3); |
| 646 stg c1,0*8(%r2) |
| 647 lghi c1,0 |
| 648 |
| 649 sqr_add_c2(1,0,c2,c3,c1); |
| 650 stg c2,1*8(%r2) |
| 651 lghi c2,0 |
| 652 |
| 653 sqr_add_c(1,c3,c1,c2); |
| 654 sqr_add_c2(2,0,c3,c1,c2); |
| 655 stg c3,2*8(%r2) |
| 656 lghi c3,0 |
| 657 |
| 658 sqr_add_c2(3,0,c1,c2,c3); |
| 659 sqr_add_c2(2,1,c1,c2,c3); |
| 660 stg c1,3*8(%r2) |
| 661 lghi c1,0 |
| 662 |
| 663 sqr_add_c(2,c2,c3,c1); |
| 664 sqr_add_c2(3,1,c2,c3,c1); |
| 665 stg c2,4*8(%r2) |
| 666 lghi c2,0 |
| 667 |
| 668 sqr_add_c2(3,2,c3,c1,c2); |
| 669 stg c3,5*8(%r2) |
| 670 lghi c3,0 |
| 671 |
| 672 sqr_add_c(3,c1,c2,c3); |
| 673 stg c1,6*8(%r2) |
| 674 stg c2,7*8(%r2) |
| 675 |
| 676 lmg %r6,%r8,48(%r15) |
| 677 br %r14 |
| 678 .size bn_sqr_comba4,.-bn_sqr_comba4 |
OLD | NEW |