| OLD | NEW |
| (Empty) |
| 1 .ident "sparcv8.s, Version 1.4" | |
| 2 .ident "SPARC v8 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" | |
| 3 | |
| 4 /* | |
| 5 * ==================================================================== | |
| 6 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | |
| 7 * project. | |
| 8 * | |
| 9 * Rights for redistribution and usage in source and binary forms are | |
| 10 * granted according to the OpenSSL license. Warranty of any kind is | |
| 11 * disclaimed. | |
| 12 * ==================================================================== | |
| 13 */ | |
| 14 | |
| 15 /* | |
| 16 * This is my modest contributon to OpenSSL project (see | |
| 17 * http://www.openssl.org/ for more information about it) and is | |
| 18 * a drop-in SuperSPARC ISA replacement for crypto/bn/bn_asm.c | |
| 19 * module. For updates see http://fy.chalmers.se/~appro/hpe/. | |
| 20 * | |
| 21 * See bn_asm.sparc.v8plus.S for more details. | |
| 22 */ | |
| 23 | |
| 24 /* | |
| 25 * Revision history. | |
| 26 * | |
| 27 * 1.1 - new loop unrolling model(*); | |
| 28 * 1.2 - made gas friendly; | |
| 29 * 1.3 - fixed problem with /usr/ccs/lib/cpp; | |
| 30 * 1.4 - some retunes; | |
| 31 * | |
| 32 * (*) see bn_asm.sparc.v8plus.S for details | |
| 33 */ | |
| 34 | |
| 35 .section ".text",#alloc,#execinstr | |
| 36 .file "bn_asm.sparc.v8.S" | |
| 37 | |
| 38 .align 32 | |
| 39 | |
| 40 .global bn_mul_add_words | |
| 41 /* | |
| 42 * BN_ULONG bn_mul_add_words(rp,ap,num,w) | |
| 43 * BN_ULONG *rp,*ap; | |
| 44 * int num; | |
| 45 * BN_ULONG w; | |
| 46 */ | |
| 47 bn_mul_add_words: | |
| 48 cmp %o2,0 | |
| 49 bg,a .L_bn_mul_add_words_proceed | |
| 50 ld [%o1],%g2 | |
| 51 retl | |
| 52 clr %o0 | |
| 53 | |
| 54 .L_bn_mul_add_words_proceed: | |
| 55 andcc %o2,-4,%g0 | |
| 56 bz .L_bn_mul_add_words_tail | |
| 57 clr %o5 | |
| 58 | |
| 59 .L_bn_mul_add_words_loop: | |
| 60 ld [%o0],%o4 | |
| 61 ld [%o1+4],%g3 | |
| 62 umul %o3,%g2,%g2 | |
| 63 rd %y,%g1 | |
| 64 addcc %o4,%o5,%o4 | |
| 65 addx %g1,0,%g1 | |
| 66 addcc %o4,%g2,%o4 | |
| 67 st %o4,[%o0] | |
| 68 addx %g1,0,%o5 | |
| 69 | |
| 70 ld [%o0+4],%o4 | |
| 71 ld [%o1+8],%g2 | |
| 72 umul %o3,%g3,%g3 | |
| 73 dec 4,%o2 | |
| 74 rd %y,%g1 | |
| 75 addcc %o4,%o5,%o4 | |
| 76 addx %g1,0,%g1 | |
| 77 addcc %o4,%g3,%o4 | |
| 78 st %o4,[%o0+4] | |
| 79 addx %g1,0,%o5 | |
| 80 | |
| 81 ld [%o0+8],%o4 | |
| 82 ld [%o1+12],%g3 | |
| 83 umul %o3,%g2,%g2 | |
| 84 inc 16,%o1 | |
| 85 rd %y,%g1 | |
| 86 addcc %o4,%o5,%o4 | |
| 87 addx %g1,0,%g1 | |
| 88 addcc %o4,%g2,%o4 | |
| 89 st %o4,[%o0+8] | |
| 90 addx %g1,0,%o5 | |
| 91 | |
| 92 ld [%o0+12],%o4 | |
| 93 umul %o3,%g3,%g3 | |
| 94 inc 16,%o0 | |
| 95 rd %y,%g1 | |
| 96 addcc %o4,%o5,%o4 | |
| 97 addx %g1,0,%g1 | |
| 98 addcc %o4,%g3,%o4 | |
| 99 st %o4,[%o0-4] | |
| 100 addx %g1,0,%o5 | |
| 101 andcc %o2,-4,%g0 | |
| 102 bnz,a .L_bn_mul_add_words_loop | |
| 103 ld [%o1],%g2 | |
| 104 | |
| 105 tst %o2 | |
| 106 bnz,a .L_bn_mul_add_words_tail | |
| 107 ld [%o1],%g2 | |
| 108 .L_bn_mul_add_words_return: | |
| 109 retl | |
| 110 mov %o5,%o0 | |
| 111 nop | |
| 112 | |
| 113 .L_bn_mul_add_words_tail: | |
| 114 ld [%o0],%o4 | |
| 115 umul %o3,%g2,%g2 | |
| 116 addcc %o4,%o5,%o4 | |
| 117 rd %y,%g1 | |
| 118 addx %g1,0,%g1 | |
| 119 addcc %o4,%g2,%o4 | |
| 120 addx %g1,0,%o5 | |
| 121 deccc %o2 | |
| 122 bz .L_bn_mul_add_words_return | |
| 123 st %o4,[%o0] | |
| 124 | |
| 125 ld [%o1+4],%g2 | |
| 126 ld [%o0+4],%o4 | |
| 127 umul %o3,%g2,%g2 | |
| 128 rd %y,%g1 | |
| 129 addcc %o4,%o5,%o4 | |
| 130 addx %g1,0,%g1 | |
| 131 addcc %o4,%g2,%o4 | |
| 132 addx %g1,0,%o5 | |
| 133 deccc %o2 | |
| 134 bz .L_bn_mul_add_words_return | |
| 135 st %o4,[%o0+4] | |
| 136 | |
| 137 ld [%o1+8],%g2 | |
| 138 ld [%o0+8],%o4 | |
| 139 umul %o3,%g2,%g2 | |
| 140 rd %y,%g1 | |
| 141 addcc %o4,%o5,%o4 | |
| 142 addx %g1,0,%g1 | |
| 143 addcc %o4,%g2,%o4 | |
| 144 st %o4,[%o0+8] | |
| 145 retl | |
| 146 addx %g1,0,%o0 | |
| 147 | |
| 148 .type bn_mul_add_words,#function | |
| 149 .size bn_mul_add_words,(.-bn_mul_add_words) | |
| 150 | |
| 151 .align 32 | |
| 152 | |
| 153 .global bn_mul_words | |
| 154 /* | |
| 155 * BN_ULONG bn_mul_words(rp,ap,num,w) | |
| 156 * BN_ULONG *rp,*ap; | |
| 157 * int num; | |
| 158 * BN_ULONG w; | |
| 159 */ | |
| 160 bn_mul_words: | |
| 161 cmp %o2,0 | |
| 162 bg,a .L_bn_mul_words_proceeed | |
| 163 ld [%o1],%g2 | |
| 164 retl | |
| 165 clr %o0 | |
| 166 | |
| 167 .L_bn_mul_words_proceeed: | |
| 168 andcc %o2,-4,%g0 | |
| 169 bz .L_bn_mul_words_tail | |
| 170 clr %o5 | |
| 171 | |
| 172 .L_bn_mul_words_loop: | |
| 173 ld [%o1+4],%g3 | |
| 174 umul %o3,%g2,%g2 | |
| 175 addcc %g2,%o5,%g2 | |
| 176 rd %y,%g1 | |
| 177 addx %g1,0,%o5 | |
| 178 st %g2,[%o0] | |
| 179 | |
| 180 ld [%o1+8],%g2 | |
| 181 umul %o3,%g3,%g3 | |
| 182 addcc %g3,%o5,%g3 | |
| 183 rd %y,%g1 | |
| 184 dec 4,%o2 | |
| 185 addx %g1,0,%o5 | |
| 186 st %g3,[%o0+4] | |
| 187 | |
| 188 ld [%o1+12],%g3 | |
| 189 umul %o3,%g2,%g2 | |
| 190 addcc %g2,%o5,%g2 | |
| 191 rd %y,%g1 | |
| 192 inc 16,%o1 | |
| 193 st %g2,[%o0+8] | |
| 194 addx %g1,0,%o5 | |
| 195 | |
| 196 umul %o3,%g3,%g3 | |
| 197 addcc %g3,%o5,%g3 | |
| 198 rd %y,%g1 | |
| 199 inc 16,%o0 | |
| 200 addx %g1,0,%o5 | |
| 201 st %g3,[%o0-4] | |
| 202 andcc %o2,-4,%g0 | |
| 203 nop | |
| 204 bnz,a .L_bn_mul_words_loop | |
| 205 ld [%o1],%g2 | |
| 206 | |
| 207 tst %o2 | |
| 208 bnz,a .L_bn_mul_words_tail | |
| 209 ld [%o1],%g2 | |
| 210 .L_bn_mul_words_return: | |
| 211 retl | |
| 212 mov %o5,%o0 | |
| 213 nop | |
| 214 | |
| 215 .L_bn_mul_words_tail: | |
| 216 umul %o3,%g2,%g2 | |
| 217 addcc %g2,%o5,%g2 | |
| 218 rd %y,%g1 | |
| 219 addx %g1,0,%o5 | |
| 220 deccc %o2 | |
| 221 bz .L_bn_mul_words_return | |
| 222 st %g2,[%o0] | |
| 223 nop | |
| 224 | |
| 225 ld [%o1+4],%g2 | |
| 226 umul %o3,%g2,%g2 | |
| 227 addcc %g2,%o5,%g2 | |
| 228 rd %y,%g1 | |
| 229 addx %g1,0,%o5 | |
| 230 deccc %o2 | |
| 231 bz .L_bn_mul_words_return | |
| 232 st %g2,[%o0+4] | |
| 233 | |
| 234 ld [%o1+8],%g2 | |
| 235 umul %o3,%g2,%g2 | |
| 236 addcc %g2,%o5,%g2 | |
| 237 rd %y,%g1 | |
| 238 st %g2,[%o0+8] | |
| 239 retl | |
| 240 addx %g1,0,%o0 | |
| 241 | |
| 242 .type bn_mul_words,#function | |
| 243 .size bn_mul_words,(.-bn_mul_words) | |
| 244 | |
| 245 .align 32 | |
| 246 .global bn_sqr_words | |
| 247 /* | |
| 248 * void bn_sqr_words(r,a,n) | |
| 249 * BN_ULONG *r,*a; | |
| 250 * int n; | |
| 251 */ | |
| 252 bn_sqr_words: | |
| 253 cmp %o2,0 | |
| 254 bg,a .L_bn_sqr_words_proceeed | |
| 255 ld [%o1],%g2 | |
| 256 retl | |
| 257 clr %o0 | |
| 258 | |
| 259 .L_bn_sqr_words_proceeed: | |
| 260 andcc %o2,-4,%g0 | |
| 261 bz .L_bn_sqr_words_tail | |
| 262 clr %o5 | |
| 263 | |
| 264 .L_bn_sqr_words_loop: | |
| 265 ld [%o1+4],%g3 | |
| 266 umul %g2,%g2,%o4 | |
| 267 st %o4,[%o0] | |
| 268 rd %y,%o5 | |
| 269 st %o5,[%o0+4] | |
| 270 | |
| 271 ld [%o1+8],%g2 | |
| 272 umul %g3,%g3,%o4 | |
| 273 dec 4,%o2 | |
| 274 st %o4,[%o0+8] | |
| 275 rd %y,%o5 | |
| 276 st %o5,[%o0+12] | |
| 277 nop | |
| 278 | |
| 279 ld [%o1+12],%g3 | |
| 280 umul %g2,%g2,%o4 | |
| 281 st %o4,[%o0+16] | |
| 282 rd %y,%o5 | |
| 283 inc 16,%o1 | |
| 284 st %o5,[%o0+20] | |
| 285 | |
| 286 umul %g3,%g3,%o4 | |
| 287 inc 32,%o0 | |
| 288 st %o4,[%o0-8] | |
| 289 rd %y,%o5 | |
| 290 st %o5,[%o0-4] | |
| 291 andcc %o2,-4,%g2 | |
| 292 bnz,a .L_bn_sqr_words_loop | |
| 293 ld [%o1],%g2 | |
| 294 | |
| 295 tst %o2 | |
| 296 nop | |
| 297 bnz,a .L_bn_sqr_words_tail | |
| 298 ld [%o1],%g2 | |
| 299 .L_bn_sqr_words_return: | |
| 300 retl | |
| 301 clr %o0 | |
| 302 | |
| 303 .L_bn_sqr_words_tail: | |
| 304 umul %g2,%g2,%o4 | |
| 305 st %o4,[%o0] | |
| 306 deccc %o2 | |
| 307 rd %y,%o5 | |
| 308 bz .L_bn_sqr_words_return | |
| 309 st %o5,[%o0+4] | |
| 310 | |
| 311 ld [%o1+4],%g2 | |
| 312 umul %g2,%g2,%o4 | |
| 313 st %o4,[%o0+8] | |
| 314 deccc %o2 | |
| 315 rd %y,%o5 | |
| 316 nop | |
| 317 bz .L_bn_sqr_words_return | |
| 318 st %o5,[%o0+12] | |
| 319 | |
| 320 ld [%o1+8],%g2 | |
| 321 umul %g2,%g2,%o4 | |
| 322 st %o4,[%o0+16] | |
| 323 rd %y,%o5 | |
| 324 st %o5,[%o0+20] | |
| 325 retl | |
| 326 clr %o0 | |
| 327 | |
| 328 .type bn_sqr_words,#function | |
| 329 .size bn_sqr_words,(.-bn_sqr_words) | |
| 330 | |
| 331 .align 32 | |
| 332 | |
| 333 .global bn_div_words | |
| 334 /* | |
| 335 * BN_ULONG bn_div_words(h,l,d) | |
| 336 * BN_ULONG h,l,d; | |
| 337 */ | |
| 338 bn_div_words: | |
| 339 wr %o0,%y | |
| 340 udiv %o1,%o2,%o0 | |
| 341 retl | |
| 342 nop | |
| 343 | |
| 344 .type bn_div_words,#function | |
| 345 .size bn_div_words,(.-bn_div_words) | |
| 346 | |
| 347 .align 32 | |
| 348 | |
| 349 .global bn_add_words | |
| 350 /* | |
| 351 * BN_ULONG bn_add_words(rp,ap,bp,n) | |
| 352 * BN_ULONG *rp,*ap,*bp; | |
| 353 * int n; | |
| 354 */ | |
| 355 bn_add_words: | |
| 356 cmp %o3,0 | |
| 357 bg,a .L_bn_add_words_proceed | |
| 358 ld [%o1],%o4 | |
| 359 retl | |
| 360 clr %o0 | |
| 361 | |
| 362 .L_bn_add_words_proceed: | |
| 363 andcc %o3,-4,%g0 | |
| 364 bz .L_bn_add_words_tail | |
| 365 clr %g1 | |
| 366 ba .L_bn_add_words_warn_loop | |
| 367 addcc %g0,0,%g0 ! clear carry flag | |
| 368 | |
| 369 .L_bn_add_words_loop: | |
| 370 ld [%o1],%o4 | |
| 371 .L_bn_add_words_warn_loop: | |
| 372 ld [%o2],%o5 | |
| 373 ld [%o1+4],%g3 | |
| 374 ld [%o2+4],%g4 | |
| 375 dec 4,%o3 | |
| 376 addxcc %o5,%o4,%o5 | |
| 377 st %o5,[%o0] | |
| 378 | |
| 379 ld [%o1+8],%o4 | |
| 380 ld [%o2+8],%o5 | |
| 381 inc 16,%o1 | |
| 382 addxcc %g3,%g4,%g3 | |
| 383 st %g3,[%o0+4] | |
| 384 | |
| 385 ld [%o1-4],%g3 | |
| 386 ld [%o2+12],%g4 | |
| 387 inc 16,%o2 | |
| 388 addxcc %o5,%o4,%o5 | |
| 389 st %o5,[%o0+8] | |
| 390 | |
| 391 inc 16,%o0 | |
| 392 addxcc %g3,%g4,%g3 | |
| 393 st %g3,[%o0-4] | |
| 394 addx %g0,0,%g1 | |
| 395 andcc %o3,-4,%g0 | |
| 396 bnz,a .L_bn_add_words_loop | |
| 397 addcc %g1,-1,%g0 | |
| 398 | |
| 399 tst %o3 | |
| 400 bnz,a .L_bn_add_words_tail | |
| 401 ld [%o1],%o4 | |
| 402 .L_bn_add_words_return: | |
| 403 retl | |
| 404 mov %g1,%o0 | |
| 405 | |
| 406 .L_bn_add_words_tail: | |
| 407 addcc %g1,-1,%g0 | |
| 408 ld [%o2],%o5 | |
| 409 addxcc %o5,%o4,%o5 | |
| 410 addx %g0,0,%g1 | |
| 411 deccc %o3 | |
| 412 bz .L_bn_add_words_return | |
| 413 st %o5,[%o0] | |
| 414 | |
| 415 ld [%o1+4],%o4 | |
| 416 addcc %g1,-1,%g0 | |
| 417 ld [%o2+4],%o5 | |
| 418 addxcc %o5,%o4,%o5 | |
| 419 addx %g0,0,%g1 | |
| 420 deccc %o3 | |
| 421 bz .L_bn_add_words_return | |
| 422 st %o5,[%o0+4] | |
| 423 | |
| 424 ld [%o1+8],%o4 | |
| 425 addcc %g1,-1,%g0 | |
| 426 ld [%o2+8],%o5 | |
| 427 addxcc %o5,%o4,%o5 | |
| 428 st %o5,[%o0+8] | |
| 429 retl | |
| 430 addx %g0,0,%o0 | |
| 431 | |
| 432 .type bn_add_words,#function | |
| 433 .size bn_add_words,(.-bn_add_words) | |
| 434 | |
| 435 .align 32 | |
| 436 | |
| 437 .global bn_sub_words | |
| 438 /* | |
| 439 * BN_ULONG bn_sub_words(rp,ap,bp,n) | |
| 440 * BN_ULONG *rp,*ap,*bp; | |
| 441 * int n; | |
| 442 */ | |
| 443 bn_sub_words: | |
| 444 cmp %o3,0 | |
| 445 bg,a .L_bn_sub_words_proceed | |
| 446 ld [%o1],%o4 | |
| 447 retl | |
| 448 clr %o0 | |
| 449 | |
| 450 .L_bn_sub_words_proceed: | |
| 451 andcc %o3,-4,%g0 | |
| 452 bz .L_bn_sub_words_tail | |
| 453 clr %g1 | |
| 454 ba .L_bn_sub_words_warm_loop | |
| 455 addcc %g0,0,%g0 ! clear carry flag | |
| 456 | |
| 457 .L_bn_sub_words_loop: | |
| 458 ld [%o1],%o4 | |
| 459 .L_bn_sub_words_warm_loop: | |
| 460 ld [%o2],%o5 | |
| 461 ld [%o1+4],%g3 | |
| 462 ld [%o2+4],%g4 | |
| 463 dec 4,%o3 | |
| 464 subxcc %o4,%o5,%o5 | |
| 465 st %o5,[%o0] | |
| 466 | |
| 467 ld [%o1+8],%o4 | |
| 468 ld [%o2+8],%o5 | |
| 469 inc 16,%o1 | |
| 470 subxcc %g3,%g4,%g4 | |
| 471 st %g4,[%o0+4] | |
| 472 | |
| 473 ld [%o1-4],%g3 | |
| 474 ld [%o2+12],%g4 | |
| 475 inc 16,%o2 | |
| 476 subxcc %o4,%o5,%o5 | |
| 477 st %o5,[%o0+8] | |
| 478 | |
| 479 inc 16,%o0 | |
| 480 subxcc %g3,%g4,%g4 | |
| 481 st %g4,[%o0-4] | |
| 482 addx %g0,0,%g1 | |
| 483 andcc %o3,-4,%g0 | |
| 484 bnz,a .L_bn_sub_words_loop | |
| 485 addcc %g1,-1,%g0 | |
| 486 | |
| 487 tst %o3 | |
| 488 nop | |
| 489 bnz,a .L_bn_sub_words_tail | |
| 490 ld [%o1],%o4 | |
| 491 .L_bn_sub_words_return: | |
| 492 retl | |
| 493 mov %g1,%o0 | |
| 494 | |
| 495 .L_bn_sub_words_tail: | |
| 496 addcc %g1,-1,%g0 | |
| 497 ld [%o2],%o5 | |
| 498 subxcc %o4,%o5,%o5 | |
| 499 addx %g0,0,%g1 | |
| 500 deccc %o3 | |
| 501 bz .L_bn_sub_words_return | |
| 502 st %o5,[%o0] | |
| 503 nop | |
| 504 | |
| 505 ld [%o1+4],%o4 | |
| 506 addcc %g1,-1,%g0 | |
| 507 ld [%o2+4],%o5 | |
| 508 subxcc %o4,%o5,%o5 | |
| 509 addx %g0,0,%g1 | |
| 510 deccc %o3 | |
| 511 bz .L_bn_sub_words_return | |
| 512 st %o5,[%o0+4] | |
| 513 | |
| 514 ld [%o1+8],%o4 | |
| 515 addcc %g1,-1,%g0 | |
| 516 ld [%o2+8],%o5 | |
| 517 subxcc %o4,%o5,%o5 | |
| 518 st %o5,[%o0+8] | |
| 519 retl | |
| 520 addx %g0,0,%o0 | |
| 521 | |
| 522 .type bn_sub_words,#function | |
| 523 .size bn_sub_words,(.-bn_sub_words) | |
| 524 | |
| 525 #define FRAME_SIZE -96 | |
| 526 | |
| 527 /* | |
| 528 * Here is register usage map for *all* routines below. | |
| 529 */ | |
| 530 #define t_1 %o0 | |
| 531 #define t_2 %o1 | |
| 532 #define c_1 %o2 | |
| 533 #define c_2 %o3 | |
| 534 #define c_3 %o4 | |
| 535 | |
| 536 #define ap(I) [%i1+4*I] | |
| 537 #define bp(I) [%i2+4*I] | |
| 538 #define rp(I) [%i0+4*I] | |
| 539 | |
| 540 #define a_0 %l0 | |
| 541 #define a_1 %l1 | |
| 542 #define a_2 %l2 | |
| 543 #define a_3 %l3 | |
| 544 #define a_4 %l4 | |
| 545 #define a_5 %l5 | |
| 546 #define a_6 %l6 | |
| 547 #define a_7 %l7 | |
| 548 | |
| 549 #define b_0 %i3 | |
| 550 #define b_1 %i4 | |
| 551 #define b_2 %i5 | |
| 552 #define b_3 %o5 | |
| 553 #define b_4 %g1 | |
| 554 #define b_5 %g2 | |
| 555 #define b_6 %g3 | |
| 556 #define b_7 %g4 | |
| 557 | |
| 558 .align 32 | |
| 559 .global bn_mul_comba8 | |
| 560 /* | |
| 561 * void bn_mul_comba8(r,a,b) | |
| 562 * BN_ULONG *r,*a,*b; | |
| 563 */ | |
| 564 bn_mul_comba8: | |
| 565 save %sp,FRAME_SIZE,%sp | |
| 566 ld ap(0),a_0 | |
| 567 ld bp(0),b_0 | |
| 568 umul a_0,b_0,c_1 !=!mul_add_c(a[0],b[0],c1,c2,c3); | |
| 569 ld bp(1),b_1 | |
| 570 rd %y,c_2 | |
| 571 st c_1,rp(0) !r[0]=c1; | |
| 572 | |
| 573 umul a_0,b_1,t_1 !=!mul_add_c(a[0],b[1],c2,c3,c1); | |
| 574 ld ap(1),a_1 | |
| 575 addcc c_2,t_1,c_2 | |
| 576 rd %y,t_2 | |
| 577 addxcc %g0,t_2,c_3 != | |
| 578 addx %g0,%g0,c_1 | |
| 579 ld ap(2),a_2 | |
| 580 umul a_1,b_0,t_1 !mul_add_c(a[1],b[0],c2,c3,c1); | |
| 581 addcc c_2,t_1,c_2 != | |
| 582 rd %y,t_2 | |
| 583 addxcc c_3,t_2,c_3 | |
| 584 st c_2,rp(1) !r[1]=c2; | |
| 585 addx c_1,%g0,c_1 != | |
| 586 | |
| 587 umul a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2); | |
| 588 addcc c_3,t_1,c_3 | |
| 589 rd %y,t_2 | |
| 590 addxcc c_1,t_2,c_1 != | |
| 591 addx %g0,%g0,c_2 | |
| 592 ld bp(2),b_2 | |
| 593 umul a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2); | |
| 594 addcc c_3,t_1,c_3 != | |
| 595 rd %y,t_2 | |
| 596 addxcc c_1,t_2,c_1 | |
| 597 ld bp(3),b_3 | |
| 598 addx c_2,%g0,c_2 != | |
| 599 umul a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2); | |
| 600 addcc c_3,t_1,c_3 | |
| 601 rd %y,t_2 | |
| 602 addxcc c_1,t_2,c_1 != | |
| 603 addx c_2,%g0,c_2 | |
| 604 st c_3,rp(2) !r[2]=c3; | |
| 605 | |
| 606 umul a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3); | |
| 607 addcc c_1,t_1,c_1 != | |
| 608 rd %y,t_2 | |
| 609 addxcc c_2,t_2,c_2 | |
| 610 addx %g0,%g0,c_3 | |
| 611 umul a_1,b_2,t_1 !=!mul_add_c(a[1],b[2],c1,c2,c3); | |
| 612 addcc c_1,t_1,c_1 | |
| 613 rd %y,t_2 | |
| 614 addxcc c_2,t_2,c_2 | |
| 615 addx c_3,%g0,c_3 != | |
| 616 ld ap(3),a_3 | |
| 617 umul a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3); | |
| 618 addcc c_1,t_1,c_1 | |
| 619 rd %y,t_2 != | |
| 620 addxcc c_2,t_2,c_2 | |
| 621 addx c_3,%g0,c_3 | |
| 622 ld ap(4),a_4 | |
| 623 umul a_3,b_0,t_1 !mul_add_c(a[3],b[0],c1,c2,c3);!= | |
| 624 addcc c_1,t_1,c_1 | |
| 625 rd %y,t_2 | |
| 626 addxcc c_2,t_2,c_2 | |
| 627 addx c_3,%g0,c_3 != | |
| 628 st c_1,rp(3) !r[3]=c1; | |
| 629 | |
| 630 umul a_4,b_0,t_1 !mul_add_c(a[4],b[0],c2,c3,c1); | |
| 631 addcc c_2,t_1,c_2 | |
| 632 rd %y,t_2 != | |
| 633 addxcc c_3,t_2,c_3 | |
| 634 addx %g0,%g0,c_1 | |
| 635 umul a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1); | |
| 636 addcc c_2,t_1,c_2 != | |
| 637 rd %y,t_2 | |
| 638 addxcc c_3,t_2,c_3 | |
| 639 addx c_1,%g0,c_1 | |
| 640 umul a_2,b_2,t_1 !=!mul_add_c(a[2],b[2],c2,c3,c1); | |
| 641 addcc c_2,t_1,c_2 | |
| 642 rd %y,t_2 | |
| 643 addxcc c_3,t_2,c_3 | |
| 644 addx c_1,%g0,c_1 != | |
| 645 ld bp(4),b_4 | |
| 646 umul a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1); | |
| 647 addcc c_2,t_1,c_2 | |
| 648 rd %y,t_2 != | |
| 649 addxcc c_3,t_2,c_3 | |
| 650 addx c_1,%g0,c_1 | |
| 651 ld bp(5),b_5 | |
| 652 umul a_0,b_4,t_1 !=!mul_add_c(a[0],b[4],c2,c3,c1); | |
| 653 addcc c_2,t_1,c_2 | |
| 654 rd %y,t_2 | |
| 655 addxcc c_3,t_2,c_3 | |
| 656 addx c_1,%g0,c_1 != | |
| 657 st c_2,rp(4) !r[4]=c2; | |
| 658 | |
| 659 umul a_0,b_5,t_1 !mul_add_c(a[0],b[5],c3,c1,c2); | |
| 660 addcc c_3,t_1,c_3 | |
| 661 rd %y,t_2 != | |
| 662 addxcc c_1,t_2,c_1 | |
| 663 addx %g0,%g0,c_2 | |
| 664 umul a_1,b_4,t_1 !mul_add_c(a[1],b[4],c3,c1,c2); | |
| 665 addcc c_3,t_1,c_3 != | |
| 666 rd %y,t_2 | |
| 667 addxcc c_1,t_2,c_1 | |
| 668 addx c_2,%g0,c_2 | |
| 669 umul a_2,b_3,t_1 !=!mul_add_c(a[2],b[3],c3,c1,c2); | |
| 670 addcc c_3,t_1,c_3 | |
| 671 rd %y,t_2 | |
| 672 addxcc c_1,t_2,c_1 | |
| 673 addx c_2,%g0,c_2 != | |
| 674 umul a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2); | |
| 675 addcc c_3,t_1,c_3 | |
| 676 rd %y,t_2 | |
| 677 addxcc c_1,t_2,c_1 != | |
| 678 addx c_2,%g0,c_2 | |
| 679 ld ap(5),a_5 | |
| 680 umul a_4,b_1,t_1 !mul_add_c(a[4],b[1],c3,c1,c2); | |
| 681 addcc c_3,t_1,c_3 != | |
| 682 rd %y,t_2 | |
| 683 addxcc c_1,t_2,c_1 | |
| 684 ld ap(6),a_6 | |
| 685 addx c_2,%g0,c_2 != | |
| 686 umul a_5,b_0,t_1 !mul_add_c(a[5],b[0],c3,c1,c2); | |
| 687 addcc c_3,t_1,c_3 | |
| 688 rd %y,t_2 | |
| 689 addxcc c_1,t_2,c_1 != | |
| 690 addx c_2,%g0,c_2 | |
| 691 st c_3,rp(5) !r[5]=c3; | |
| 692 | |
| 693 umul a_6,b_0,t_1 !mul_add_c(a[6],b[0],c1,c2,c3); | |
| 694 addcc c_1,t_1,c_1 != | |
| 695 rd %y,t_2 | |
| 696 addxcc c_2,t_2,c_2 | |
| 697 addx %g0,%g0,c_3 | |
| 698 umul a_5,b_1,t_1 !=!mul_add_c(a[5],b[1],c1,c2,c3); | |
| 699 addcc c_1,t_1,c_1 | |
| 700 rd %y,t_2 | |
| 701 addxcc c_2,t_2,c_2 | |
| 702 addx c_3,%g0,c_3 != | |
| 703 umul a_4,b_2,t_1 !mul_add_c(a[4],b[2],c1,c2,c3); | |
| 704 addcc c_1,t_1,c_1 | |
| 705 rd %y,t_2 | |
| 706 addxcc c_2,t_2,c_2 != | |
| 707 addx c_3,%g0,c_3 | |
| 708 umul a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3); | |
| 709 addcc c_1,t_1,c_1 | |
| 710 rd %y,t_2 != | |
| 711 addxcc c_2,t_2,c_2 | |
| 712 addx c_3,%g0,c_3 | |
| 713 umul a_2,b_4,t_1 !mul_add_c(a[2],b[4],c1,c2,c3); | |
| 714 addcc c_1,t_1,c_1 != | |
| 715 rd %y,t_2 | |
| 716 addxcc c_2,t_2,c_2 | |
| 717 ld bp(6),b_6 | |
| 718 addx c_3,%g0,c_3 != | |
| 719 umul a_1,b_5,t_1 !mul_add_c(a[1],b[5],c1,c2,c3); | |
| 720 addcc c_1,t_1,c_1 | |
| 721 rd %y,t_2 | |
| 722 addxcc c_2,t_2,c_2 != | |
| 723 addx c_3,%g0,c_3 | |
| 724 ld bp(7),b_7 | |
| 725 umul a_0,b_6,t_1 !mul_add_c(a[0],b[6],c1,c2,c3); | |
| 726 addcc c_1,t_1,c_1 != | |
| 727 rd %y,t_2 | |
| 728 addxcc c_2,t_2,c_2 | |
| 729 st c_1,rp(6) !r[6]=c1; | |
| 730 addx c_3,%g0,c_3 != | |
| 731 | |
| 732 umul a_0,b_7,t_1 !mul_add_c(a[0],b[7],c2,c3,c1); | |
| 733 addcc c_2,t_1,c_2 | |
| 734 rd %y,t_2 | |
| 735 addxcc c_3,t_2,c_3 != | |
| 736 addx %g0,%g0,c_1 | |
| 737 umul a_1,b_6,t_1 !mul_add_c(a[1],b[6],c2,c3,c1); | |
| 738 addcc c_2,t_1,c_2 | |
| 739 rd %y,t_2 != | |
| 740 addxcc c_3,t_2,c_3 | |
| 741 addx c_1,%g0,c_1 | |
| 742 umul a_2,b_5,t_1 !mul_add_c(a[2],b[5],c2,c3,c1); | |
| 743 addcc c_2,t_1,c_2 != | |
| 744 rd %y,t_2 | |
| 745 addxcc c_3,t_2,c_3 | |
| 746 addx c_1,%g0,c_1 | |
| 747 umul a_3,b_4,t_1 !=!mul_add_c(a[3],b[4],c2,c3,c1); | |
| 748 addcc c_2,t_1,c_2 | |
| 749 rd %y,t_2 | |
| 750 addxcc c_3,t_2,c_3 | |
| 751 addx c_1,%g0,c_1 != | |
| 752 umul a_4,b_3,t_1 !mul_add_c(a[4],b[3],c2,c3,c1); | |
| 753 addcc c_2,t_1,c_2 | |
| 754 rd %y,t_2 | |
| 755 addxcc c_3,t_2,c_3 != | |
| 756 addx c_1,%g0,c_1 | |
| 757 umul a_5,b_2,t_1 !mul_add_c(a[5],b[2],c2,c3,c1); | |
| 758 addcc c_2,t_1,c_2 | |
| 759 rd %y,t_2 != | |
| 760 addxcc c_3,t_2,c_3 | |
| 761 addx c_1,%g0,c_1 | |
| 762 ld ap(7),a_7 | |
| 763 umul a_6,b_1,t_1 !=!mul_add_c(a[6],b[1],c2,c3,c1); | |
| 764 addcc c_2,t_1,c_2 | |
| 765 rd %y,t_2 | |
| 766 addxcc c_3,t_2,c_3 | |
| 767 addx c_1,%g0,c_1 != | |
| 768 umul a_7,b_0,t_1 !mul_add_c(a[7],b[0],c2,c3,c1); | |
| 769 addcc c_2,t_1,c_2 | |
| 770 rd %y,t_2 | |
| 771 addxcc c_3,t_2,c_3 != | |
| 772 addx c_1,%g0,c_1 | |
| 773 st c_2,rp(7) !r[7]=c2; | |
| 774 | |
| 775 umul a_7,b_1,t_1 !mul_add_c(a[7],b[1],c3,c1,c2); | |
| 776 addcc c_3,t_1,c_3 != | |
| 777 rd %y,t_2 | |
| 778 addxcc c_1,t_2,c_1 | |
| 779 addx %g0,%g0,c_2 | |
| 780 umul a_6,b_2,t_1 !=!mul_add_c(a[6],b[2],c3,c1,c2); | |
| 781 addcc c_3,t_1,c_3 | |
| 782 rd %y,t_2 | |
| 783 addxcc c_1,t_2,c_1 | |
| 784 addx c_2,%g0,c_2 != | |
| 785 umul a_5,b_3,t_1 !mul_add_c(a[5],b[3],c3,c1,c2); | |
| 786 addcc c_3,t_1,c_3 | |
| 787 rd %y,t_2 | |
| 788 addxcc c_1,t_2,c_1 != | |
| 789 addx c_2,%g0,c_2 | |
| 790 umul a_4,b_4,t_1 !mul_add_c(a[4],b[4],c3,c1,c2); | |
| 791 addcc c_3,t_1,c_3 | |
| 792 rd %y,t_2 != | |
| 793 addxcc c_1,t_2,c_1 | |
| 794 addx c_2,%g0,c_2 | |
| 795 umul a_3,b_5,t_1 !mul_add_c(a[3],b[5],c3,c1,c2); | |
| 796 addcc c_3,t_1,c_3 != | |
| 797 rd %y,t_2 | |
| 798 addxcc c_1,t_2,c_1 | |
| 799 addx c_2,%g0,c_2 | |
| 800 umul a_2,b_6,t_1 !=!mul_add_c(a[2],b[6],c3,c1,c2); | |
| 801 addcc c_3,t_1,c_3 | |
| 802 rd %y,t_2 | |
| 803 addxcc c_1,t_2,c_1 | |
| 804 addx c_2,%g0,c_2 != | |
| 805 umul a_1,b_7,t_1 !mul_add_c(a[1],b[7],c3,c1,c2); | |
| 806 addcc c_3,t_1,c_3 | |
| 807 rd %y,t_2 | |
| 808 addxcc c_1,t_2,c_1 ! | |
| 809 addx c_2,%g0,c_2 | |
| 810 st c_3,rp(8) !r[8]=c3; | |
| 811 | |
| 812 umul a_2,b_7,t_1 !mul_add_c(a[2],b[7],c1,c2,c3); | |
| 813 addcc c_1,t_1,c_1 != | |
| 814 rd %y,t_2 | |
| 815 addxcc c_2,t_2,c_2 | |
| 816 addx %g0,%g0,c_3 | |
| 817 umul a_3,b_6,t_1 !=!mul_add_c(a[3],b[6],c1,c2,c3); | |
| 818 addcc c_1,t_1,c_1 | |
| 819 rd %y,t_2 | |
| 820 addxcc c_2,t_2,c_2 | |
| 821 addx c_3,%g0,c_3 != | |
| 822 umul a_4,b_5,t_1 !mul_add_c(a[4],b[5],c1,c2,c3); | |
| 823 addcc c_1,t_1,c_1 | |
| 824 rd %y,t_2 | |
| 825 addxcc c_2,t_2,c_2 != | |
| 826 addx c_3,%g0,c_3 | |
| 827 umul a_5,b_4,t_1 !mul_add_c(a[5],b[4],c1,c2,c3); | |
| 828 addcc c_1,t_1,c_1 | |
| 829 rd %y,t_2 != | |
| 830 addxcc c_2,t_2,c_2 | |
| 831 addx c_3,%g0,c_3 | |
| 832 umul a_6,b_3,t_1 !mul_add_c(a[6],b[3],c1,c2,c3); | |
| 833 addcc c_1,t_1,c_1 != | |
| 834 rd %y,t_2 | |
| 835 addxcc c_2,t_2,c_2 | |
| 836 addx c_3,%g0,c_3 | |
| 837 umul a_7,b_2,t_1 !=!mul_add_c(a[7],b[2],c1,c2,c3); | |
| 838 addcc c_1,t_1,c_1 | |
| 839 rd %y,t_2 | |
| 840 addxcc c_2,t_2,c_2 | |
| 841 addx c_3,%g0,c_3 != | |
| 842 st c_1,rp(9) !r[9]=c1; | |
| 843 | |
| 844 umul a_7,b_3,t_1 !mul_add_c(a[7],b[3],c2,c3,c1); | |
| 845 addcc c_2,t_1,c_2 | |
| 846 rd %y,t_2 != | |
| 847 addxcc c_3,t_2,c_3 | |
| 848 addx %g0,%g0,c_1 | |
| 849 umul a_6,b_4,t_1 !mul_add_c(a[6],b[4],c2,c3,c1); | |
| 850 addcc c_2,t_1,c_2 != | |
| 851 rd %y,t_2 | |
| 852 addxcc c_3,t_2,c_3 | |
| 853 addx c_1,%g0,c_1 | |
| 854 umul a_5,b_5,t_1 !=!mul_add_c(a[5],b[5],c2,c3,c1); | |
| 855 addcc c_2,t_1,c_2 | |
| 856 rd %y,t_2 | |
| 857 addxcc c_3,t_2,c_3 | |
| 858 addx c_1,%g0,c_1 != | |
| 859 umul a_4,b_6,t_1 !mul_add_c(a[4],b[6],c2,c3,c1); | |
| 860 addcc c_2,t_1,c_2 | |
| 861 rd %y,t_2 | |
| 862 addxcc c_3,t_2,c_3 != | |
| 863 addx c_1,%g0,c_1 | |
| 864 umul a_3,b_7,t_1 !mul_add_c(a[3],b[7],c2,c3,c1); | |
| 865 addcc c_2,t_1,c_2 | |
| 866 rd %y,t_2 != | |
| 867 addxcc c_3,t_2,c_3 | |
| 868 addx c_1,%g0,c_1 | |
| 869 st c_2,rp(10) !r[10]=c2; | |
| 870 | |
| 871 umul a_4,b_7,t_1 !=!mul_add_c(a[4],b[7],c3,c1,c2); | |
| 872 addcc c_3,t_1,c_3 | |
| 873 rd %y,t_2 | |
| 874 addxcc c_1,t_2,c_1 | |
| 875 addx %g0,%g0,c_2 != | |
| 876 umul a_5,b_6,t_1 !mul_add_c(a[5],b[6],c3,c1,c2); | |
| 877 addcc c_3,t_1,c_3 | |
| 878 rd %y,t_2 | |
| 879 addxcc c_1,t_2,c_1 != | |
| 880 addx c_2,%g0,c_2 | |
| 881 umul a_6,b_5,t_1 !mul_add_c(a[6],b[5],c3,c1,c2); | |
| 882 addcc c_3,t_1,c_3 | |
| 883 rd %y,t_2 != | |
| 884 addxcc c_1,t_2,c_1 | |
| 885 addx c_2,%g0,c_2 | |
| 886 umul a_7,b_4,t_1 !mul_add_c(a[7],b[4],c3,c1,c2); | |
| 887 addcc c_3,t_1,c_3 != | |
| 888 rd %y,t_2 | |
| 889 addxcc c_1,t_2,c_1 | |
| 890 st c_3,rp(11) !r[11]=c3; | |
| 891 addx c_2,%g0,c_2 != | |
| 892 | |
| 893 umul a_7,b_5,t_1 !mul_add_c(a[7],b[5],c1,c2,c3); | |
| 894 addcc c_1,t_1,c_1 | |
| 895 rd %y,t_2 | |
| 896 addxcc c_2,t_2,c_2 != | |
| 897 addx %g0,%g0,c_3 | |
| 898 umul a_6,b_6,t_1 !mul_add_c(a[6],b[6],c1,c2,c3); | |
| 899 addcc c_1,t_1,c_1 | |
| 900 rd %y,t_2 != | |
| 901 addxcc c_2,t_2,c_2 | |
| 902 addx c_3,%g0,c_3 | |
| 903 umul a_5,b_7,t_1 !mul_add_c(a[5],b[7],c1,c2,c3); | |
| 904 addcc c_1,t_1,c_1 != | |
| 905 rd %y,t_2 | |
| 906 addxcc c_2,t_2,c_2 | |
| 907 st c_1,rp(12) !r[12]=c1; | |
| 908 addx c_3,%g0,c_3 != | |
| 909 | |
| 910 umul a_6,b_7,t_1 !mul_add_c(a[6],b[7],c2,c3,c1); | |
| 911 addcc c_2,t_1,c_2 | |
| 912 rd %y,t_2 | |
| 913 addxcc c_3,t_2,c_3 != | |
| 914 addx %g0,%g0,c_1 | |
| 915 umul a_7,b_6,t_1 !mul_add_c(a[7],b[6],c2,c3,c1); | |
| 916 addcc c_2,t_1,c_2 | |
| 917 rd %y,t_2 != | |
| 918 addxcc c_3,t_2,c_3 | |
| 919 addx c_1,%g0,c_1 | |
| 920 st c_2,rp(13) !r[13]=c2; | |
| 921 | |
| 922 umul a_7,b_7,t_1 !=!mul_add_c(a[7],b[7],c3,c1,c2); | |
| 923 addcc c_3,t_1,c_3 | |
| 924 rd %y,t_2 | |
| 925 addxcc c_1,t_2,c_1 | |
| 926 nop != | |
| 927 st c_3,rp(14) !r[14]=c3; | |
| 928 st c_1,rp(15) !r[15]=c1; | |
| 929 | |
| 930 ret | |
| 931 restore %g0,%g0,%o0 | |
| 932 | |
| 933 .type bn_mul_comba8,#function | |
| 934 .size bn_mul_comba8,(.-bn_mul_comba8) | |
| 935 | |
| 936 .align 32 | |
| 937 | |
| 938 .global bn_mul_comba4 | |
| 939 /* | |
| 940 * void bn_mul_comba4(r,a,b) | |
| 941 * BN_ULONG *r,*a,*b; | |
| 942 */ | |
| 943 bn_mul_comba4: | |
| 944 save %sp,FRAME_SIZE,%sp | |
| 945 ld ap(0),a_0 | |
| 946 ld bp(0),b_0 | |
| 947 umul a_0,b_0,c_1 !=!mul_add_c(a[0],b[0],c1,c2,c3); | |
| 948 ld bp(1),b_1 | |
| 949 rd %y,c_2 | |
| 950 st c_1,rp(0) !r[0]=c1; | |
| 951 | |
| 952 umul a_0,b_1,t_1 !=!mul_add_c(a[0],b[1],c2,c3,c1); | |
| 953 ld ap(1),a_1 | |
| 954 addcc c_2,t_1,c_2 | |
| 955 rd %y,t_2 != | |
| 956 addxcc %g0,t_2,c_3 | |
| 957 addx %g0,%g0,c_1 | |
| 958 ld ap(2),a_2 | |
| 959 umul a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1); | |
| 960 addcc c_2,t_1,c_2 | |
| 961 rd %y,t_2 | |
| 962 addxcc c_3,t_2,c_3 | |
| 963 addx c_1,%g0,c_1 != | |
| 964 st c_2,rp(1) !r[1]=c2; | |
| 965 | |
| 966 umul a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2); | |
| 967 addcc c_3,t_1,c_3 | |
| 968 rd %y,t_2 != | |
| 969 addxcc c_1,t_2,c_1 | |
| 970 addx %g0,%g0,c_2 | |
| 971 ld bp(2),b_2 | |
| 972 umul a_1,b_1,t_1 !=!mul_add_c(a[1],b[1],c3,c1,c2); | |
| 973 addcc c_3,t_1,c_3 | |
| 974 rd %y,t_2 | |
| 975 addxcc c_1,t_2,c_1 | |
| 976 addx c_2,%g0,c_2 != | |
| 977 ld bp(3),b_3 | |
| 978 umul a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2); | |
| 979 addcc c_3,t_1,c_3 | |
| 980 rd %y,t_2 != | |
| 981 addxcc c_1,t_2,c_1 | |
| 982 addx c_2,%g0,c_2 | |
| 983 st c_3,rp(2) !r[2]=c3; | |
| 984 | |
| 985 umul a_0,b_3,t_1 !=!mul_add_c(a[0],b[3],c1,c2,c3); | |
| 986 addcc c_1,t_1,c_1 | |
| 987 rd %y,t_2 | |
| 988 addxcc c_2,t_2,c_2 | |
| 989 addx %g0,%g0,c_3 != | |
| 990 umul a_1,b_2,t_1 !mul_add_c(a[1],b[2],c1,c2,c3); | |
| 991 addcc c_1,t_1,c_1 | |
| 992 rd %y,t_2 | |
| 993 addxcc c_2,t_2,c_2 != | |
| 994 addx c_3,%g0,c_3 | |
| 995 ld ap(3),a_3 | |
| 996 umul a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3); | |
| 997 addcc c_1,t_1,c_1 != | |
| 998 rd %y,t_2 | |
| 999 addxcc c_2,t_2,c_2 | |
| 1000 addx c_3,%g0,c_3 | |
| 1001 umul a_3,b_0,t_1 !=!mul_add_c(a[3],b[0],c1,c2,c3); | |
| 1002 addcc c_1,t_1,c_1 | |
| 1003 rd %y,t_2 | |
| 1004 addxcc c_2,t_2,c_2 | |
| 1005 addx c_3,%g0,c_3 != | |
| 1006 st c_1,rp(3) !r[3]=c1; | |
| 1007 | |
| 1008 umul a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1); | |
| 1009 addcc c_2,t_1,c_2 | |
| 1010 rd %y,t_2 != | |
| 1011 addxcc c_3,t_2,c_3 | |
| 1012 addx %g0,%g0,c_1 | |
| 1013 umul a_2,b_2,t_1 !mul_add_c(a[2],b[2],c2,c3,c1); | |
| 1014 addcc c_2,t_1,c_2 != | |
| 1015 rd %y,t_2 | |
| 1016 addxcc c_3,t_2,c_3 | |
| 1017 addx c_1,%g0,c_1 | |
| 1018 umul a_1,b_3,t_1 !=!mul_add_c(a[1],b[3],c2,c3,c1); | |
| 1019 addcc c_2,t_1,c_2 | |
| 1020 rd %y,t_2 | |
| 1021 addxcc c_3,t_2,c_3 | |
| 1022 addx c_1,%g0,c_1 != | |
| 1023 st c_2,rp(4) !r[4]=c2; | |
| 1024 | |
| 1025 umul a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2); | |
| 1026 addcc c_3,t_1,c_3 | |
| 1027 rd %y,t_2 != | |
| 1028 addxcc c_1,t_2,c_1 | |
| 1029 addx %g0,%g0,c_2 | |
| 1030 umul a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2); | |
| 1031 addcc c_3,t_1,c_3 != | |
| 1032 rd %y,t_2 | |
| 1033 addxcc c_1,t_2,c_1 | |
| 1034 st c_3,rp(5) !r[5]=c3; | |
| 1035 addx c_2,%g0,c_2 != | |
| 1036 | |
| 1037 umul a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3); | |
| 1038 addcc c_1,t_1,c_1 | |
| 1039 rd %y,t_2 | |
| 1040 addxcc c_2,t_2,c_2 != | |
| 1041 st c_1,rp(6) !r[6]=c1; | |
| 1042 st c_2,rp(7) !r[7]=c2; | |
| 1043 | |
| 1044 ret | |
| 1045 restore %g0,%g0,%o0 | |
| 1046 | |
| 1047 .type bn_mul_comba4,#function | |
| 1048 .size bn_mul_comba4,(.-bn_mul_comba4) | |
| 1049 | |
| 1050 .align 32 | |
| 1051 | |
| 1052 .global bn_sqr_comba8 | |
| 1053 bn_sqr_comba8: | |
| 1054 save %sp,FRAME_SIZE,%sp | |
| 1055 ld ap(0),a_0 | |
| 1056 ld ap(1),a_1 | |
| 1057 umul a_0,a_0,c_1 !=!sqr_add_c(a,0,c1,c2,c3); | |
| 1058 rd %y,c_2 | |
| 1059 st c_1,rp(0) !r[0]=c1; | |
| 1060 | |
| 1061 ld ap(2),a_2 | |
| 1062 umul a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1); | |
| 1063 addcc c_2,t_1,c_2 | |
| 1064 rd %y,t_2 | |
| 1065 addxcc %g0,t_2,c_3 | |
| 1066 addx %g0,%g0,c_1 != | |
| 1067 addcc c_2,t_1,c_2 | |
| 1068 addxcc c_3,t_2,c_3 | |
| 1069 st c_2,rp(1) !r[1]=c2; | |
| 1070 addx c_1,%g0,c_1 != | |
| 1071 | |
| 1072 umul a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2); | |
| 1073 addcc c_3,t_1,c_3 | |
| 1074 rd %y,t_2 | |
| 1075 addxcc c_1,t_2,c_1 != | |
| 1076 addx %g0,%g0,c_2 | |
| 1077 addcc c_3,t_1,c_3 | |
| 1078 addxcc c_1,t_2,c_1 | |
| 1079 addx c_2,%g0,c_2 != | |
| 1080 ld ap(3),a_3 | |
| 1081 umul a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2); | |
| 1082 addcc c_3,t_1,c_3 | |
| 1083 rd %y,t_2 != | |
| 1084 addxcc c_1,t_2,c_1 | |
| 1085 addx c_2,%g0,c_2 | |
| 1086 st c_3,rp(2) !r[2]=c3; | |
| 1087 | |
| 1088 umul a_0,a_3,t_1 !=!sqr_add_c2(a,3,0,c1,c2,c3); | |
| 1089 addcc c_1,t_1,c_1 | |
| 1090 rd %y,t_2 | |
| 1091 addxcc c_2,t_2,c_2 | |
| 1092 addx %g0,%g0,c_3 != | |
| 1093 addcc c_1,t_1,c_1 | |
| 1094 addxcc c_2,t_2,c_2 | |
| 1095 ld ap(4),a_4 | |
| 1096 addx c_3,%g0,c_3 != | |
| 1097 umul a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3); | |
| 1098 addcc c_1,t_1,c_1 | |
| 1099 rd %y,t_2 | |
| 1100 addxcc c_2,t_2,c_2 != | |
| 1101 addx c_3,%g0,c_3 | |
| 1102 addcc c_1,t_1,c_1 | |
| 1103 addxcc c_2,t_2,c_2 | |
| 1104 addx c_3,%g0,c_3 != | |
| 1105 st c_1,rp(3) !r[3]=c1; | |
| 1106 | |
| 1107 umul a_4,a_0,t_1 !sqr_add_c2(a,4,0,c2,c3,c1); | |
| 1108 addcc c_2,t_1,c_2 | |
| 1109 rd %y,t_2 != | |
| 1110 addxcc c_3,t_2,c_3 | |
| 1111 addx %g0,%g0,c_1 | |
| 1112 addcc c_2,t_1,c_2 | |
| 1113 addxcc c_3,t_2,c_3 != | |
| 1114 addx c_1,%g0,c_1 | |
| 1115 umul a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1); | |
| 1116 addcc c_2,t_1,c_2 | |
| 1117 rd %y,t_2 != | |
| 1118 addxcc c_3,t_2,c_3 | |
| 1119 addx c_1,%g0,c_1 | |
| 1120 addcc c_2,t_1,c_2 | |
| 1121 addxcc c_3,t_2,c_3 != | |
| 1122 addx c_1,%g0,c_1 | |
| 1123 ld ap(5),a_5 | |
| 1124 umul a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1); | |
| 1125 addcc c_2,t_1,c_2 != | |
| 1126 rd %y,t_2 | |
| 1127 addxcc c_3,t_2,c_3 | |
| 1128 st c_2,rp(4) !r[4]=c2; | |
| 1129 addx c_1,%g0,c_1 != | |
| 1130 | |
| 1131 umul a_0,a_5,t_1 !sqr_add_c2(a,5,0,c3,c1,c2); | |
| 1132 addcc c_3,t_1,c_3 | |
| 1133 rd %y,t_2 | |
| 1134 addxcc c_1,t_2,c_1 != | |
| 1135 addx %g0,%g0,c_2 | |
| 1136 addcc c_3,t_1,c_3 | |
| 1137 addxcc c_1,t_2,c_1 | |
| 1138 addx c_2,%g0,c_2 != | |
| 1139 umul a_1,a_4,t_1 !sqr_add_c2(a,4,1,c3,c1,c2); | |
| 1140 addcc c_3,t_1,c_3 | |
| 1141 rd %y,t_2 | |
| 1142 addxcc c_1,t_2,c_1 != | |
| 1143 addx c_2,%g0,c_2 | |
| 1144 addcc c_3,t_1,c_3 | |
| 1145 addxcc c_1,t_2,c_1 | |
| 1146 addx c_2,%g0,c_2 != | |
| 1147 ld ap(6),a_6 | |
| 1148 umul a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2); | |
| 1149 addcc c_3,t_1,c_3 | |
| 1150 rd %y,t_2 != | |
| 1151 addxcc c_1,t_2,c_1 | |
| 1152 addx c_2,%g0,c_2 | |
| 1153 addcc c_3,t_1,c_3 | |
| 1154 addxcc c_1,t_2,c_1 != | |
| 1155 addx c_2,%g0,c_2 | |
| 1156 st c_3,rp(5) !r[5]=c3; | |
| 1157 | |
| 1158 umul a_6,a_0,t_1 !sqr_add_c2(a,6,0,c1,c2,c3); | |
| 1159 addcc c_1,t_1,c_1 != | |
| 1160 rd %y,t_2 | |
| 1161 addxcc c_2,t_2,c_2 | |
| 1162 addx %g0,%g0,c_3 | |
| 1163 addcc c_1,t_1,c_1 != | |
| 1164 addxcc c_2,t_2,c_2 | |
| 1165 addx c_3,%g0,c_3 | |
| 1166 umul a_5,a_1,t_1 !sqr_add_c2(a,5,1,c1,c2,c3); | |
| 1167 addcc c_1,t_1,c_1 != | |
| 1168 rd %y,t_2 | |
| 1169 addxcc c_2,t_2,c_2 | |
| 1170 addx c_3,%g0,c_3 | |
| 1171 addcc c_1,t_1,c_1 != | |
| 1172 addxcc c_2,t_2,c_2 | |
| 1173 addx c_3,%g0,c_3 | |
| 1174 umul a_4,a_2,t_1 !sqr_add_c2(a,4,2,c1,c2,c3); | |
| 1175 addcc c_1,t_1,c_1 != | |
| 1176 rd %y,t_2 | |
| 1177 addxcc c_2,t_2,c_2 | |
| 1178 addx c_3,%g0,c_3 | |
| 1179 addcc c_1,t_1,c_1 != | |
| 1180 addxcc c_2,t_2,c_2 | |
| 1181 addx c_3,%g0,c_3 | |
| 1182 ld ap(7),a_7 | |
| 1183 umul a_3,a_3,t_1 !=!sqr_add_c(a,3,c1,c2,c3); | |
| 1184 addcc c_1,t_1,c_1 | |
| 1185 rd %y,t_2 | |
| 1186 addxcc c_2,t_2,c_2 | |
| 1187 addx c_3,%g0,c_3 != | |
| 1188 st c_1,rp(6) !r[6]=c1; | |
| 1189 | |
| 1190 umul a_0,a_7,t_1 !sqr_add_c2(a,7,0,c2,c3,c1); | |
| 1191 addcc c_2,t_1,c_2 | |
| 1192 rd %y,t_2 != | |
| 1193 addxcc c_3,t_2,c_3 | |
| 1194 addx %g0,%g0,c_1 | |
| 1195 addcc c_2,t_1,c_2 | |
| 1196 addxcc c_3,t_2,c_3 != | |
| 1197 addx c_1,%g0,c_1 | |
| 1198 umul a_1,a_6,t_1 !sqr_add_c2(a,6,1,c2,c3,c1); | |
| 1199 addcc c_2,t_1,c_2 | |
| 1200 rd %y,t_2 != | |
| 1201 addxcc c_3,t_2,c_3 | |
| 1202 addx c_1,%g0,c_1 | |
| 1203 addcc c_2,t_1,c_2 | |
| 1204 addxcc c_3,t_2,c_3 != | |
| 1205 addx c_1,%g0,c_1 | |
| 1206 umul a_2,a_5,t_1 !sqr_add_c2(a,5,2,c2,c3,c1); | |
| 1207 addcc c_2,t_1,c_2 | |
| 1208 rd %y,t_2 != | |
| 1209 addxcc c_3,t_2,c_3 | |
| 1210 addx c_1,%g0,c_1 | |
| 1211 addcc c_2,t_1,c_2 | |
| 1212 addxcc c_3,t_2,c_3 != | |
| 1213 addx c_1,%g0,c_1 | |
| 1214 umul a_3,a_4,t_1 !sqr_add_c2(a,4,3,c2,c3,c1); | |
| 1215 addcc c_2,t_1,c_2 | |
| 1216 rd %y,t_2 != | |
| 1217 addxcc c_3,t_2,c_3 | |
| 1218 addx c_1,%g0,c_1 | |
| 1219 addcc c_2,t_1,c_2 | |
| 1220 addxcc c_3,t_2,c_3 != | |
| 1221 addx c_1,%g0,c_1 | |
| 1222 st c_2,rp(7) !r[7]=c2; | |
| 1223 | |
| 1224 umul a_7,a_1,t_1 !sqr_add_c2(a,7,1,c3,c1,c2); | |
| 1225 addcc c_3,t_1,c_3 != | |
| 1226 rd %y,t_2 | |
| 1227 addxcc c_1,t_2,c_1 | |
| 1228 addx %g0,%g0,c_2 | |
| 1229 addcc c_3,t_1,c_3 != | |
| 1230 addxcc c_1,t_2,c_1 | |
| 1231 addx c_2,%g0,c_2 | |
| 1232 umul a_6,a_2,t_1 !sqr_add_c2(a,6,2,c3,c1,c2); | |
| 1233 addcc c_3,t_1,c_3 != | |
| 1234 rd %y,t_2 | |
| 1235 addxcc c_1,t_2,c_1 | |
| 1236 addx c_2,%g0,c_2 | |
| 1237 addcc c_3,t_1,c_3 != | |
| 1238 addxcc c_1,t_2,c_1 | |
| 1239 addx c_2,%g0,c_2 | |
| 1240 umul a_5,a_3,t_1 !sqr_add_c2(a,5,3,c3,c1,c2); | |
| 1241 addcc c_3,t_1,c_3 != | |
| 1242 rd %y,t_2 | |
| 1243 addxcc c_1,t_2,c_1 | |
| 1244 addx c_2,%g0,c_2 | |
| 1245 addcc c_3,t_1,c_3 != | |
| 1246 addxcc c_1,t_2,c_1 | |
| 1247 addx c_2,%g0,c_2 | |
| 1248 umul a_4,a_4,t_1 !sqr_add_c(a,4,c3,c1,c2); | |
| 1249 addcc c_3,t_1,c_3 != | |
| 1250 rd %y,t_2 | |
| 1251 addxcc c_1,t_2,c_1 | |
| 1252 st c_3,rp(8) !r[8]=c3; | |
| 1253 addx c_2,%g0,c_2 != | |
| 1254 | |
| 1255 umul a_2,a_7,t_1 !sqr_add_c2(a,7,2,c1,c2,c3); | |
| 1256 addcc c_1,t_1,c_1 | |
| 1257 rd %y,t_2 | |
| 1258 addxcc c_2,t_2,c_2 != | |
| 1259 addx %g0,%g0,c_3 | |
| 1260 addcc c_1,t_1,c_1 | |
| 1261 addxcc c_2,t_2,c_2 | |
| 1262 addx c_3,%g0,c_3 != | |
| 1263 umul a_3,a_6,t_1 !sqr_add_c2(a,6,3,c1,c2,c3); | |
| 1264 addcc c_1,t_1,c_1 | |
| 1265 rd %y,t_2 | |
| 1266 addxcc c_2,t_2,c_2 != | |
| 1267 addx c_3,%g0,c_3 | |
| 1268 addcc c_1,t_1,c_1 | |
| 1269 addxcc c_2,t_2,c_2 | |
| 1270 addx c_3,%g0,c_3 != | |
| 1271 umul a_4,a_5,t_1 !sqr_add_c2(a,5,4,c1,c2,c3); | |
| 1272 addcc c_1,t_1,c_1 | |
| 1273 rd %y,t_2 | |
| 1274 addxcc c_2,t_2,c_2 != | |
| 1275 addx c_3,%g0,c_3 | |
| 1276 addcc c_1,t_1,c_1 | |
| 1277 addxcc c_2,t_2,c_2 | |
| 1278 addx c_3,%g0,c_3 != | |
| 1279 st c_1,rp(9) !r[9]=c1; | |
| 1280 | |
| 1281 umul a_7,a_3,t_1 !sqr_add_c2(a,7,3,c2,c3,c1); | |
| 1282 addcc c_2,t_1,c_2 | |
| 1283 rd %y,t_2 != | |
| 1284 addxcc c_3,t_2,c_3 | |
| 1285 addx %g0,%g0,c_1 | |
| 1286 addcc c_2,t_1,c_2 | |
| 1287 addxcc c_3,t_2,c_3 != | |
| 1288 addx c_1,%g0,c_1 | |
| 1289 umul a_6,a_4,t_1 !sqr_add_c2(a,6,4,c2,c3,c1); | |
| 1290 addcc c_2,t_1,c_2 | |
| 1291 rd %y,t_2 != | |
| 1292 addxcc c_3,t_2,c_3 | |
| 1293 addx c_1,%g0,c_1 | |
| 1294 addcc c_2,t_1,c_2 | |
| 1295 addxcc c_3,t_2,c_3 != | |
| 1296 addx c_1,%g0,c_1 | |
| 1297 umul a_5,a_5,t_1 !sqr_add_c(a,5,c2,c3,c1); | |
| 1298 addcc c_2,t_1,c_2 | |
| 1299 rd %y,t_2 != | |
| 1300 addxcc c_3,t_2,c_3 | |
| 1301 addx c_1,%g0,c_1 | |
| 1302 st c_2,rp(10) !r[10]=c2; | |
| 1303 | |
| 1304 umul a_4,a_7,t_1 !=!sqr_add_c2(a,7,4,c3,c1,c2); | |
| 1305 addcc c_3,t_1,c_3 | |
| 1306 rd %y,t_2 | |
| 1307 addxcc c_1,t_2,c_1 | |
| 1308 addx %g0,%g0,c_2 != | |
| 1309 addcc c_3,t_1,c_3 | |
| 1310 addxcc c_1,t_2,c_1 | |
| 1311 addx c_2,%g0,c_2 | |
| 1312 umul a_5,a_6,t_1 !=!sqr_add_c2(a,6,5,c3,c1,c2); | |
| 1313 addcc c_3,t_1,c_3 | |
| 1314 rd %y,t_2 | |
| 1315 addxcc c_1,t_2,c_1 | |
| 1316 addx c_2,%g0,c_2 != | |
| 1317 addcc c_3,t_1,c_3 | |
| 1318 addxcc c_1,t_2,c_1 | |
| 1319 st c_3,rp(11) !r[11]=c3; | |
| 1320 addx c_2,%g0,c_2 != | |
| 1321 | |
| 1322 umul a_7,a_5,t_1 !sqr_add_c2(a,7,5,c1,c2,c3); | |
| 1323 addcc c_1,t_1,c_1 | |
| 1324 rd %y,t_2 | |
| 1325 addxcc c_2,t_2,c_2 != | |
| 1326 addx %g0,%g0,c_3 | |
| 1327 addcc c_1,t_1,c_1 | |
| 1328 addxcc c_2,t_2,c_2 | |
| 1329 addx c_3,%g0,c_3 != | |
| 1330 umul a_6,a_6,t_1 !sqr_add_c(a,6,c1,c2,c3); | |
| 1331 addcc c_1,t_1,c_1 | |
| 1332 rd %y,t_2 | |
| 1333 addxcc c_2,t_2,c_2 != | |
| 1334 addx c_3,%g0,c_3 | |
| 1335 st c_1,rp(12) !r[12]=c1; | |
| 1336 | |
| 1337 umul a_6,a_7,t_1 !sqr_add_c2(a,7,6,c2,c3,c1); | |
| 1338 addcc c_2,t_1,c_2 != | |
| 1339 rd %y,t_2 | |
| 1340 addxcc c_3,t_2,c_3 | |
| 1341 addx %g0,%g0,c_1 | |
| 1342 addcc c_2,t_1,c_2 != | |
| 1343 addxcc c_3,t_2,c_3 | |
| 1344 st c_2,rp(13) !r[13]=c2; | |
| 1345 addx c_1,%g0,c_1 != | |
| 1346 | |
| 1347 umul a_7,a_7,t_1 !sqr_add_c(a,7,c3,c1,c2); | |
| 1348 addcc c_3,t_1,c_3 | |
| 1349 rd %y,t_2 | |
| 1350 addxcc c_1,t_2,c_1 != | |
| 1351 st c_3,rp(14) !r[14]=c3; | |
| 1352 st c_1,rp(15) !r[15]=c1; | |
| 1353 | |
| 1354 ret | |
| 1355 restore %g0,%g0,%o0 | |
| 1356 | |
| 1357 .type bn_sqr_comba8,#function | |
| 1358 .size bn_sqr_comba8,(.-bn_sqr_comba8) | |
| 1359 | |
| 1360 .align 32 | |
| 1361 | |
| 1362 .global bn_sqr_comba4 | |
| 1363 /* | |
| 1364 * void bn_sqr_comba4(r,a) | |
| 1365 * BN_ULONG *r,*a; | |
| 1366 */ | |
| 1367 bn_sqr_comba4: | |
| 1368 save %sp,FRAME_SIZE,%sp | |
| 1369 ld ap(0),a_0 | |
| 1370 umul a_0,a_0,c_1 !sqr_add_c(a,0,c1,c2,c3); | |
| 1371 ld ap(1),a_1 != | |
| 1372 rd %y,c_2 | |
| 1373 st c_1,rp(0) !r[0]=c1; | |
| 1374 | |
| 1375 ld ap(2),a_2 | |
| 1376 umul a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1); | |
| 1377 addcc c_2,t_1,c_2 | |
| 1378 rd %y,t_2 | |
| 1379 addxcc %g0,t_2,c_3 | |
| 1380 addx %g0,%g0,c_1 != | |
| 1381 addcc c_2,t_1,c_2 | |
| 1382 addxcc c_3,t_2,c_3 | |
| 1383 addx c_1,%g0,c_1 != | |
| 1384 st c_2,rp(1) !r[1]=c2; | |
| 1385 | |
| 1386 umul a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2); | |
| 1387 addcc c_3,t_1,c_3 | |
| 1388 rd %y,t_2 != | |
| 1389 addxcc c_1,t_2,c_1 | |
| 1390 addx %g0,%g0,c_2 | |
| 1391 addcc c_3,t_1,c_3 | |
| 1392 addxcc c_1,t_2,c_1 != | |
| 1393 addx c_2,%g0,c_2 | |
| 1394 ld ap(3),a_3 | |
| 1395 umul a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2); | |
| 1396 addcc c_3,t_1,c_3 != | |
| 1397 rd %y,t_2 | |
| 1398 addxcc c_1,t_2,c_1 | |
| 1399 st c_3,rp(2) !r[2]=c3; | |
| 1400 addx c_2,%g0,c_2 != | |
| 1401 | |
| 1402 umul a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3); | |
| 1403 addcc c_1,t_1,c_1 | |
| 1404 rd %y,t_2 | |
| 1405 addxcc c_2,t_2,c_2 != | |
| 1406 addx %g0,%g0,c_3 | |
| 1407 addcc c_1,t_1,c_1 | |
| 1408 addxcc c_2,t_2,c_2 | |
| 1409 addx c_3,%g0,c_3 != | |
| 1410 umul a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3); | |
| 1411 addcc c_1,t_1,c_1 | |
| 1412 rd %y,t_2 | |
| 1413 addxcc c_2,t_2,c_2 != | |
| 1414 addx c_3,%g0,c_3 | |
| 1415 addcc c_1,t_1,c_1 | |
| 1416 addxcc c_2,t_2,c_2 | |
| 1417 addx c_3,%g0,c_3 != | |
| 1418 st c_1,rp(3) !r[3]=c1; | |
| 1419 | |
| 1420 umul a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1); | |
| 1421 addcc c_2,t_1,c_2 | |
| 1422 rd %y,t_2 != | |
| 1423 addxcc c_3,t_2,c_3 | |
| 1424 addx %g0,%g0,c_1 | |
| 1425 addcc c_2,t_1,c_2 | |
| 1426 addxcc c_3,t_2,c_3 != | |
| 1427 addx c_1,%g0,c_1 | |
| 1428 umul a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1); | |
| 1429 addcc c_2,t_1,c_2 | |
| 1430 rd %y,t_2 != | |
| 1431 addxcc c_3,t_2,c_3 | |
| 1432 addx c_1,%g0,c_1 | |
| 1433 st c_2,rp(4) !r[4]=c2; | |
| 1434 | |
| 1435 umul a_2,a_3,t_1 !=!sqr_add_c2(a,3,2,c3,c1,c2); | |
| 1436 addcc c_3,t_1,c_3 | |
| 1437 rd %y,t_2 | |
| 1438 addxcc c_1,t_2,c_1 | |
| 1439 addx %g0,%g0,c_2 != | |
| 1440 addcc c_3,t_1,c_3 | |
| 1441 addxcc c_1,t_2,c_1 | |
| 1442 st c_3,rp(5) !r[5]=c3; | |
| 1443 addx c_2,%g0,c_2 != | |
| 1444 | |
| 1445 umul a_3,a_3,t_1 !sqr_add_c(a,3,c1,c2,c3); | |
| 1446 addcc c_1,t_1,c_1 | |
| 1447 rd %y,t_2 | |
| 1448 addxcc c_2,t_2,c_2 != | |
| 1449 st c_1,rp(6) !r[6]=c1; | |
| 1450 st c_2,rp(7) !r[7]=c2; | |
| 1451 | |
| 1452 ret | |
| 1453 restore %g0,%g0,%o0 | |
| 1454 | |
| 1455 .type bn_sqr_comba4,#function | |
| 1456 .size bn_sqr_comba4,(.-bn_sqr_comba4) | |
| 1457 | |
| 1458 .align 32 | |
| OLD | NEW |