openssl/crypto/md5/asm/md5-ia64.S - Issue 2072073002: Delete bundled copy of OpenSSL and replace with README.

Side by Side Diff: openssl/crypto/md5/asm/md5-ia64.S

Issue 2072073002: Delete bundled copy of OpenSSL and replace with README. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/openssl@master

Patch Set: Delete bundled copy of OpenSSL and replace with README. Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 /* Copyright (c) 2005 Hewlett-Packard Development Company, L.P.

2

3 Permission is hereby granted, free of charge, to any person obtaining

4 a copy of this software and associated documentation files (the

5 "Software"), to deal in the Software without restriction, including

6 without limitation the rights to use, copy, modify, merge, publish,

7 distribute, sublicense, and/or sell copies of the Software, and to

8 permit persons to whom the Software is furnished to do so, subject to

9 the following conditions:

10

11 The above copyright notice and this permission notice shall be

12 included in all copies or substantial portions of the Software.

13

14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

15 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

16 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND

17 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE

18 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION

19 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION

20 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */

21

22 // Common registers are assigned as follows:

23 //

24 // COMMON

25 //

26 // t0 Const Tbl Ptr TPtr

27 // t1 Round Constant TRound

28 // t4 Block residual LenResid

29 // t5 Residual Data DTmp

30 //

31 // {in,out}0 Block 0 Cycle RotateM0

32 // {in,out}1 Block Value 12 M12

33 // {in,out}2 Block Value 8 M8

34 // {in,out}3 Block Value 4 M4

35 // {in,out}4 Block Value 0 M0

36 // {in,out}5 Block 1 Cycle RotateM1

37 // {in,out}6 Block Value 13 M13

38 // {in,out}7 Block Value 9 M9

39 // {in,out}8 Block Value 5 M5

40 // {in,out}9 Block Value 1 M1

41 // {in,out}10 Block 2 Cycle RotateM2

42 // {in,out}11 Block Value 14 M14

43 // {in,out}12 Block Value 10 M10

44 // {in,out}13 Block Value 6 M6

45 // {in,out}14 Block Value 2 M2

46 // {in,out}15 Block 3 Cycle RotateM3

47 // {in,out}16 Block Value 15 M15

48 // {in,out}17 Block Value 11 M11

49 // {in,out}18 Block Value 7 M7

50 // {in,out}19 Block Value 3 M3

51 // {in,out}20 Scratch Z

52 // {in,out}21 Scratch Y

53 // {in,out}22 Scratch X

54 // {in,out}23 Scratch W

55 // {in,out}24 Digest A A

56 // {in,out}25 Digest B B

57 // {in,out}26 Digest C C

58 // {in,out}27 Digest D D

59 // {in,out}28 Active Data Ptr DPtr

60 // in28 Dummy Value -

61 // out28 Dummy Value -

62 // bt0 Coroutine Link QUICK_RTN

63 //

64 /// These predicates are used for computing the padding block(s) and

65 /// are shared between the driver and digest co-routines

66 //

67 // pt0 Extra Pad Block pExtra

68 // pt1 Load next word pLoad

69 // pt2 Skip next word pSkip

70 // pt3 Search for Pad pNoPad

71 // pt4 Pad Word 0 pPad0

72 // pt5 Pad Word 1 pPad1

73 // pt6 Pad Word 2 pPad2

74 // pt7 Pad Word 3 pPad3

75

76 #define DTmp r19

77 #define LenResid r18

78 #define QUICK_RTN b6

79 #define TPtr r14

80 #define TRound r15

81 #define pExtra p6

82 #define pLoad p7

83 #define pNoPad p9

84 #define pPad0 p10

85 #define pPad1 p11

86 #define pPad2 p12

87 #define pPad3 p13

88 #define pSkip p8

89

90 #define A_ out24

91 #define B_ out25

92 #define C_ out26

93 #define D_ out27

94 #define DPtr_ out28

95 #define M0_ out4

96 #define M1_ out9

97 #define M10_ out12

98 #define M11_ out17

99 #define M12_ out1

100 #define M13_ out6

101 #define M14_ out11

102 #define M15_ out16

103 #define M2_ out14

104 #define M3_ out19

105 #define M4_ out3

106 #define M5_ out8

107 #define M6_ out13

108 #define M7_ out18

109 #define M8_ out2

110 #define M9_ out7

111 #define RotateM0_ out0

112 #define RotateM1_ out5

113 #define RotateM2_ out10

114 #define RotateM3_ out15

115 #define W_ out23

116 #define X_ out22

117 #define Y_ out21

118 #define Z_ out20

119

120 #define A in24

121 #define B in25

122 #define C in26

123 #define D in27

124 #define DPtr in28

125 #define M0 in4

126 #define M1 in9

127 #define M10 in12

128 #define M11 in17

129 #define M12 in1

130 #define M13 in6

131 #define M14 in11

132 #define M15 in16

133 #define M2 in14

134 #define M3 in19

135 #define M4 in3

136 #define M5 in8

137 #define M6 in13

138 #define M7 in18

139 #define M8 in2

140 #define M9 in7

141 #define RotateM0 in0

142 #define RotateM1 in5

143 #define RotateM2 in10

144 #define RotateM3 in15

145 #define W in23

146 #define X in22

147 #define Y in21

148 #define Z in20

149

150 /* register stack configuration for md5_block_asm_data_order(): */

151 #define MD5_NINP 3

152 #define MD5_NLOC 0

153 #define MD5_NOUT 29

154 #define MD5_NROT 0

155

156 /* register stack configuration for helpers: */

157 #define _NINPUTS MD5_NOUT

158 #define _NLOCALS 0

159 #define _NOUTPUT 0

160 #define _NROTATE 24 /* this must be <= _NINPUTS */

161

162 #if defined(_HPUX_SOURCE) && !defined(_LP64)

163 #define ADDP addp4

164 #else

165 #define ADDP add

166 #endif

167

168 #if defined(_HPUX_SOURCE) \|\| defined(B_ENDIAN)

169 #define HOST_IS_BIG_ENDIAN

170 #endif

171

172 // Macros for getting the left and right portions of little-endian words

173

174 #define GETLW(dst, src, align) dep.z dst = src, 32 - 8 * align, 8 * align

175 #define GETRW(dst, src, align) extr.u dst = src, 8 * align, 32 - 8 * align

176

177 // MD5 driver

178 //

179 // Reads an input block, then calls the digest block

180 // subroutine and adds the results to the accumulated

181 // digest. It allocates 32 outs which the subroutine

182 // uses as it's inputs and rotating

183 // registers. Initializes the round constant pointer and

184 // takes care of saving/restoring ar.lc

185 //

186 /// INPUT

187 //

188 // in0 Context Ptr CtxPtr0

189 // in1 Input Data Ptr DPtrIn

190 // in2 Integral Blocks BlockCount

191 // rp Return Address -

192 //

193 /// CODE

194 //

195 // v2 Input Align InAlign

196 // t0 Shared w/digest -

197 // t1 Shared w/digest -

198 // t2 Shared w/digest -

199 // t3 Shared w/digest -

200 // t4 Shared w/digest -

201 // t5 Shared w/digest -

202 // t6 PFS Save PFSSave

203 // t7 ar.lc Save LCSave

204 // t8 Saved PR PRSave

205 // t9 2nd CtxPtr CtxPtr1

206 // t10 Table Base CTable

207 // t11 Table[0] CTable0

208 // t13 Accumulator A AccumA

209 // t14 Accumulator B AccumB

210 // t15 Accumulator C AccumC

211 // t16 Accumulator D AccumD

212 // pt0 Shared w/digest -

213 // pt1 Shared w/digest -

214 // pt2 Shared w/digest -

215 // pt3 Shared w/digest -

216 // pt4 Shared w/digest -

217 // pt5 Shared w/digest -

218 // pt6 Shared w/digest -

219 // pt7 Shared w/digest -

220 // pt8 Not Aligned pOff

221 // pt8 Blocks Left pAgain

222

223 #define AccumA r27

224 #define AccumB r28

225 #define AccumC r29

226 #define AccumD r30

227 #define CTable r24

228 #define CTable0 r25

229 #define CtxPtr0 in0

230 #define CtxPtr1 r23

231 #define DPtrIn in1

232 #define BlockCount in2

233 #define InAlign r10

234 #define LCSave r21

235 #define PFSSave r20

236 #define PRSave r22

237 #define pAgain p63

238 #define pOff p63

239

240 .text

241

242 /* md5_block_asm_data_order(MD5_CTX c, const void data, size_t num)

243

244 where:

245 c: a pointer to a structure of this type:

246

247 typedef struct MD5state_st

248 {

249 MD5_LONG A,B,C,D;

250 MD5_LONG Nl,Nh;

251 MD5_LONG data[MD5_LBLOCK];

252 unsigned int num;

253 }

254 MD5_CTX;

255

256 data: a pointer to the input data (may be misaligned)

257 num: the number of 16-byte blocks to hash (i.e., the length

258 of DATA is 16*NUM.

259

260 */

261

262 .type md5_block_asm_data_order, @function

263 .global md5_block_asm_data_order

264 .align 32

265 .proc md5_block_asm_data_order

266 md5_block_asm_data_order:

267 .md5_block:

268 .prologue

269 { .mmi

270 .save ar.pfs, PFSSave

271 alloc PFSSave = ar.pfs, MD5_NINP, MD5_NLOC, MD5_NOUT, MD5_NROT

272 ADDP CtxPtr1 = 8, CtxPtr0

273 mov CTable = ip

274 }

275 { .mmi

276 ADDP DPtrIn = 0, DPtrIn

277 ADDP CtxPtr0 = 0, CtxPtr0

278 .save ar.lc, LCSave

279 mov LCSave = ar.lc

280 }

281 ;;

282 { .mmi

283 add CTable = .md5_tbl_data_order#-.md5_block#, CTable

284 and InAlign = 0x3, DPtrIn

285 }

286

287 { .mmi

288 ld4 AccumA = [CtxPtr0], 4

289 ld4 AccumC = [CtxPtr1], 4

290 .save pr, PRSave

291 mov PRSave = pr

292 .body

293 }

294 ;;

295 { .mmi

296 ld4 AccumB = [CtxPtr0]

297 ld4 AccumD = [CtxPtr1]

298 dep DPtr_ = 0, DPtrIn, 0, 2

299 } ;;

300 #ifdef HOST_IS_BIG_ENDIAN

301 rum psr.be;; // switch to little-endian

302 #endif

303 { .mmb

304 ld4 CTable0 = [CTable], 4

305 cmp.ne pOff, p0 = 0, InAlign

306 (pOff) br.cond.spnt.many .md5_unaligned

307 } ;;

308

309 // The FF load/compute loop rotates values three times, so that

310 // loading into M12 here produces the M0 value, M13 -> M1, etc.

311

312 .md5_block_loop0:

313 { .mmi

314 ld4 M12_ = [DPtr_], 4

315 mov TPtr = CTable

316 mov TRound = CTable0

317 } ;;

318 { .mmi

319 ld4 M13_ = [DPtr_], 4

320 mov A_ = AccumA

321 mov B_ = AccumB

322 } ;;

323 { .mmi

324 ld4 M14_ = [DPtr_], 4

325 mov C_ = AccumC

326 mov D_ = AccumD

327 } ;;

328 { .mmb

329 ld4 M15_ = [DPtr_], 4

330 add BlockCount = -1, BlockCount

331 br.call.sptk.many QUICK_RTN = md5_digest_block0

332 } ;;

333

334 // Now, we add the new digest values and do some clean-up

335 // before checking if there's another full block to process

336

337 { .mmi

338 add AccumA = AccumA, A_

339 add AccumB = AccumB, B_

340 cmp.ne pAgain, p0 = 0, BlockCount

341 }

342 { .mib

343 add AccumC = AccumC, C_

344 add AccumD = AccumD, D_

345 (pAgain) br.cond.dptk.many .md5_block_loop0

346 } ;;

347

348 .md5_exit:

349 #ifdef HOST_IS_BIG_ENDIAN

350 sum psr.be;; // switch back to big-endian mode

351 #endif

352 { .mmi

353 st4 [CtxPtr0] = AccumB, -4

354 st4 [CtxPtr1] = AccumD, -4

355 mov pr = PRSave, 0x1ffff ;;

356 }

357 { .mmi

358 st4 [CtxPtr0] = AccumA

359 st4 [CtxPtr1] = AccumC

360 mov ar.lc = LCSave

361 } ;;

362 { .mib

363 mov ar.pfs = PFSSave

364 br.ret.sptk.few rp

365 } ;;

366

367 #define MD5UNALIGNED(offset) \

368 .md5_process##offset: \

369 { .mib ; \

370 nop 0x0 ; \

371 GETRW(DTmp, DTmp, offset) ; \

372 } ;; \

373 .md5_block_loop##offset: \

374 { .mmi ; \

375 ld4 Y_ = [DPtr_], 4 ; \

376 mov TPtr = CTable ; \

377 mov TRound = CTable0 ; \

378 } ;; \

379 { .mmi ; \

380 ld4 M13_ = [DPtr_], 4 ; \

381 mov A_ = AccumA ; \

382 mov B_ = AccumB ; \

383 } ;; \

384 { .mii ; \

385 ld4 M14_ = [DPtr_], 4 ; \

386 GETLW(W_, Y_, offset) ; \

387 mov C_ = AccumC ; \

388 } \

389 { .mmi ; \

390 mov D_ = AccumD ;; \

391 or M12_ = W_, DTmp ; \

392 GETRW(DTmp, Y_, offset) ; \

393 } \

394 { .mib ; \

395 ld4 M15_ = [DPtr_], 4 ; \

396 add BlockCount = -1, BlockCount ; \

397 br.call.sptk.many QUICK_RTN = md5_digest_block##offset; \

398 } ;; \

399 { .mmi ; \

400 add AccumA = AccumA, A_ ; \

401 add AccumB = AccumB, B_ ; \

402 cmp.ne pAgain, p0 = 0, BlockCount ; \

403 } \

404 { .mib ; \

405 add AccumC = AccumC, C_ ; \

406 add AccumD = AccumD, D_ ; \

407 (pAgain) br.cond.dptk.many .md5_block_loop##offset ; \

408 } ;; \

409 { .mib ; \

410 nop 0x0 ; \

411 nop 0x0 ; \

412 br.cond.sptk.many .md5_exit ; \

413 } ;;

414

415 .align 32

416 .md5_unaligned:

417 //

418 // Because variable shifts are expensive, we special case each of

419 // the four alignements. In practice, this won't hurt too much

420 // since only one working set of code will be loaded.

421 //

422 { .mib

423 ld4 DTmp = [DPtr_], 4

424 cmp.eq pOff, p0 = 1, InAlign

425 (pOff) br.cond.dpnt.many .md5_process1

426 } ;;

427 { .mib

428 cmp.eq pOff, p0 = 2, InAlign

429 nop 0x0

430 (pOff) br.cond.dpnt.many .md5_process2

431 } ;;

432 MD5UNALIGNED(3)

433 MD5UNALIGNED(1)

434 MD5UNALIGNED(2)

435

436 .endp md5_block_asm_data_order

437

438

439 // MD5 Perform the F function and load

440 //

441 // Passed the first 4 words (M0 - M3) and initial (A, B, C, D) values,

442 // computes the FF() round of functions, then branches to the common

443 // digest code to finish up with GG(), HH, and II().

444 //

445 // INPUT

446 //

447 // rp Return Address -

448 //

449 // CODE

450 //

451 // v0 PFS bit bucket PFS

452 // v1 Loop Trip Count LTrip

453 // pt0 Load next word pMore

454

455 /* For F round: */

456 #define LTrip r9

457 #define PFS r8

458 #define pMore p6

459

460 /* For GHI rounds: */

461 #define T r9

462 #define U r10

463 #define V r11

464

465 #define COMPUTE(a, b, s, M, R) \

466 { \

467 .mii ; \

468 ld4 TRound = [TPtr], 4 ; \

469 dep.z Y = Z, 32, 32 ;; \

470 shrp Z = Z, Y, 64 - s ; \

471 } ;; \

472 { \

473 .mmi ; \

474 add a = Z, b ; \

475 mov R = M ; \

476 nop 0x0 ; \

477 } ;;

478

479 #define LOOP(a, b, s, M, R, label) \

480 { .mii ; \

481 ld4 TRound = [TPtr], 4 ; \

482 dep.z Y = Z, 32, 32 ;; \

483 shrp Z = Z, Y, 64 - s ; \

484 } ;; \

485 { .mib ; \

486 add a = Z, b ; \

487 mov R = M ; \

488 br.ctop.sptk.many label ; \

489 } ;;

490

491 // G(B, C, D) = (B & D) \| (C & ~D)

492

493 #define G(a, b, c, d, M) \

494 { .mmi ; \

495 add Z = M, TRound ; \

496 and Y = b, d ; \

497 andcm X = c, d ; \

498 } ;; \

499 { .mii ; \

500 add Z = Z, a ; \

501 or Y = Y, X ;; \

502 add Z = Z, Y ; \

503 } ;;

504

505 // H(B, C, D) = B ^ C ^ D

506

507 #define H(a, b, c, d, M) \

508 { .mmi ; \

509 add Z = M, TRound ; \

510 xor Y = b, c ; \

511 nop 0x0 ; \

512 } ;; \

513 { .mii ; \

514 add Z = Z, a ; \

515 xor Y = Y, d ;; \

516 add Z = Z, Y ; \

517 } ;;

518

519 // I(B, C, D) = C ^ (B \| ~D)

520 //

521 // However, since we have an andcm operator, we use the fact that

522 //

523 // Y ^ Z == ~Y ^ ~Z

524 //

525 // to rewrite the expression as

526 //

527 // I(B, C, D) = ~C ^ (~B & D)

528

529 #define I(a, b, c, d, M) \

530 { .mmi ; \

531 add Z = M, TRound ; \

532 andcm Y = d, b ; \

533 andcm X = -1, c ; \

534 } ;; \

535 { .mii ; \

536 add Z = Z, a ; \

537 xor Y = Y, X ;; \

538 add Z = Z, Y ; \

539 } ;;

540

541 #define GG4(label) \

542 G(A, B, C, D, M0) \

543 COMPUTE(A, B, 5, M0, RotateM0) \

544 G(D, A, B, C, M1) \

545 COMPUTE(D, A, 9, M1, RotateM1) \

546 G(C, D, A, B, M2) \

547 COMPUTE(C, D, 14, M2, RotateM2) \

548 G(B, C, D, A, M3) \

549 LOOP(B, C, 20, M3, RotateM3, label)

550

551 #define HH4(label) \

552 H(A, B, C, D, M0) \

553 COMPUTE(A, B, 4, M0, RotateM0) \

554 H(D, A, B, C, M1) \

555 COMPUTE(D, A, 11, M1, RotateM1) \

556 H(C, D, A, B, M2) \

557 COMPUTE(C, D, 16, M2, RotateM2) \

558 H(B, C, D, A, M3) \

559 LOOP(B, C, 23, M3, RotateM3, label)

560

561 #define II4(label) \

562 I(A, B, C, D, M0) \

563 COMPUTE(A, B, 6, M0, RotateM0) \

564 I(D, A, B, C, M1) \

565 COMPUTE(D, A, 10, M1, RotateM1) \

566 I(C, D, A, B, M2) \

567 COMPUTE(C, D, 15, M2, RotateM2) \

568 I(B, C, D, A, M3) \

569 LOOP(B, C, 21, M3, RotateM3, label)

570

571 #define FFLOAD(a, b, c, d, M, N, s) \

572 { .mii ; \

573 (pMore) ld4 N = [DPtr], 4 ; \

574 add Z = M, TRound ; \

575 and Y = c, b ; \

576 } \

577 { .mmi ; \

578 andcm X = d, b ;; \

579 add Z = Z, a ; \

580 or Y = Y, X ; \

581 } ;; \

582 { .mii ; \

583 ld4 TRound = [TPtr], 4 ; \

584 add Z = Z, Y ;; \

585 dep.z Y = Z, 32, 32 ; \

586 } ;; \

587 { .mii ; \

588 nop 0x0 ; \

589 shrp Z = Z, Y, 64 - s ;; \

590 add a = Z, b ; \

591 } ;;

592

593 #define FFLOOP(a, b, c, d, M, N, s, dest) \

594 { .mii ; \

595 (pMore) ld4 N = [DPtr], 4 ; \

596 add Z = M, TRound ; \

597 and Y = c, b ; \

598 } \

599 { .mmi ; \

600 andcm X = d, b ;; \

601 add Z = Z, a ; \

602 or Y = Y, X ; \

603 } ;; \

604 { .mii ; \

605 ld4 TRound = [TPtr], 4 ; \

606 add Z = Z, Y ;; \

607 dep.z Y = Z, 32, 32 ; \

608 } ;; \

609 { .mii ; \

610 nop 0x0 ; \

611 shrp Z = Z, Y, 64 - s ;; \

612 add a = Z, b ; \

613 } \

614 { .mib ; \

615 cmp.ne pMore, p0 = 0, LTrip ; \

616 add LTrip = -1, LTrip ; \

617 br.ctop.dptk.many dest ; \

618 } ;;

619

620 .type md5_digest_block0, @function

621 .align 32

622

623 .proc md5_digest_block0

624 .prologue

625 md5_digest_block0:

626 .altrp QUICK_RTN

627 .body

628 { .mmi

629 alloc PFS = ar.pfs, _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE

630 mov LTrip = 2

631 mov ar.lc = 3

632 } ;;

633 { .mii

634 cmp.eq pMore, p0 = r0, r0

635 mov ar.ec = 0

636 nop 0x0

637 } ;;

638

639 .md5_FF_round0:

640 FFLOAD(A, B, C, D, M12, RotateM0, 7)

641 FFLOAD(D, A, B, C, M13, RotateM1, 12)

642 FFLOAD(C, D, A, B, M14, RotateM2, 17)

643 FFLOOP(B, C, D, A, M15, RotateM3, 22, .md5_FF_round0)

644 //

645 // !!! Fall through to md5_digest_GHI

646 //

647 .endp md5_digest_block0

648

649 .type md5_digest_GHI, @function

650 .align 32

651

652 .proc md5_digest_GHI

653 .prologue

654 .regstk _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE

655 md5_digest_GHI:

656 .altrp QUICK_RTN

657 .body

658 //

659 // The following sequence shuffles the block counstants round for the

660 // next round:

661 //

662 // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15

663 // 1 6 11 0 5 10 14 4 9 14 3 8 13 2 7 12

664 //

665 { .mmi

666 mov Z = M0

667 mov Y = M15

668 mov ar.lc = 3

669 }

670 { .mmi

671 mov X = M2

672 mov W = M9

673 mov V = M4

674 } ;;

675

676 { .mmi

677 mov M0 = M1

678 mov M15 = M12

679 mov ar.ec = 1

680 }

681 { .mmi

682 mov M2 = M11

683 mov M9 = M14

684 mov M4 = M5

685 } ;;

686

687 { .mmi

688 mov M1 = M6

689 mov M12 = M13

690 mov U = M3

691 }

692 { .mmi

693 mov M11 = M8

694 mov M14 = M7

695 mov M5 = M10

696 } ;;

697

698 { .mmi

699 mov M6 = Y

700 mov M13 = X

701 mov M3 = Z

702 }

703 { .mmi

704 mov M8 = W

705 mov M7 = V

706 mov M10 = U

707 } ;;

708

709 .md5_GG_round:

710 GG4(.md5_GG_round)

711

712 // The following sequence shuffles the block constants round for the

713 // next round:

714 //

715 // 1 6 11 0 5 10 14 4 9 14 3 8 13 2 7 12

716 // 5 8 11 14 1 4 7 10 13 0 3 6 9 12 15 2

717

718 { .mmi

719 mov Z = M0

720 mov Y = M1

721 mov ar.lc = 3

722 }

723 { .mmi

724 mov X = M3

725 mov W = M5

726 mov V = M6

727 } ;;

728

729 { .mmi

730 mov M0 = M4

731 mov M1 = M11

732 mov ar.ec = 1

733 }

734 { .mmi

735 mov M3 = M9

736 mov U = M8

737 mov T = M13

738 } ;;

739

740 { .mmi

741 mov M4 = Z

742 mov M11 = Y

743 mov M5 = M7

744 }

745 { .mmi

746 mov M6 = M14

747 mov M8 = M12

748 mov M13 = M15

749 } ;;

750

751 { .mmi

752 mov M7 = W

753 mov M14 = V

754 nop 0x0

755 }

756 { .mmi

757 mov M9 = X

758 mov M12 = U

759 mov M15 = T

760 } ;;

761

762 .md5_HH_round:

763 HH4(.md5_HH_round)

764

765 // The following sequence shuffles the block constants round for the

766 // next round:

767 //

768 // 5 8 11 14 1 4 7 10 13 0 3 6 9 12 15 2

769 // 0 7 14 5 12 3 10 1 8 15 6 13 4 11 2 9

770

771 { .mmi

772 mov Z = M0

773 mov Y = M15

774 mov ar.lc = 3

775 }

776 { .mmi

777 mov X = M10

778 mov W = M1

779 mov V = M4

780 } ;;

781

782 { .mmi

783 mov M0 = M9

784 mov M15 = M12

785 mov ar.ec = 1

786 }

787 { .mmi

788 mov M10 = M11

789 mov M1 = M6

790 mov M4 = M13

791 } ;;

792

793 { .mmi

794 mov M9 = M14

795 mov M12 = M5

796 mov U = M3

797 }

798 { .mmi

799 mov M11 = M8

800 mov M6 = M7

801 mov M13 = M2

802 } ;;

803

804 { .mmi

805 mov M14 = Y

806 mov M5 = X

807 mov M3 = Z

808 }

809 { .mmi

810 mov M8 = W

811 mov M7 = V

812 mov M2 = U

813 } ;;

814

815 .md5_II_round:

816 II4(.md5_II_round)

817

818 { .mib

819 nop 0x0

820 nop 0x0

821 br.ret.sptk.many QUICK_RTN

822 } ;;

823

824 .endp md5_digest_GHI

825

826 #define FFLOADU(a, b, c, d, M, P, N, s, offset) \

827 { .mii ; \

828 (pMore) ld4 N = [DPtr], 4 ; \

829 add Z = M, TRound ; \

830 and Y = c, b ; \

831 } \

832 { .mmi ; \

833 andcm X = d, b ;; \

834 add Z = Z, a ; \

835 or Y = Y, X ; \

836 } ;; \

837 { .mii ; \

838 ld4 TRound = [TPtr], 4 ; \

839 GETLW(W, P, offset) ; \

840 add Z = Z, Y ; \

841 } ;; \

842 { .mii ; \

843 or W = W, DTmp ; \

844 dep.z Y = Z, 32, 32 ;; \

845 shrp Z = Z, Y, 64 - s ; \

846 } ;; \

847 { .mii ; \

848 add a = Z, b ; \

849 GETRW(DTmp, P, offset) ; \

850 mov P = W ; \

851 } ;;

852

853 #define FFLOOPU(a, b, c, d, M, P, N, s, offset) \

854 { .mii ; \

855 (pMore) ld4 N = [DPtr], 4 ; \

856 add Z = M, TRound ; \

857 and Y = c, b ; \

858 } \

859 { .mmi ; \

860 andcm X = d, b ;; \

861 add Z = Z, a ; \

862 or Y = Y, X ; \

863 } ;; \

864 { .mii ; \

865 ld4 TRound = [TPtr], 4 ; \

866 (pMore) GETLW(W, P, offset) ; \

867 add Z = Z, Y ; \

868 } ;; \

869 { .mii ; \

870 (pMore) or W = W, DTmp ; \

871 dep.z Y = Z, 32, 32 ;; \

872 shrp Z = Z, Y, 64 - s ; \

873 } ;; \

874 { .mii ; \

875 add a = Z, b ; \

876 (pMore) GETRW(DTmp, P, offset) ; \

877 (pMore) mov P = W ; \

878 } \

879 { .mib ; \

880 cmp.ne pMore, p0 = 0, LTrip ; \

881 add LTrip = -1, LTrip ; \

882 br.ctop.sptk.many .md5_FF_round##offset ; \

883 } ;;

884

885 #define MD5FBLOCK(offset) \

886 .type md5_digest_block##offset, @function ; \

887 \

888 .align 32 ; \

889 .proc md5_digest_block##offset ; \

890 .prologue ; \

891 .altrp QUICK_RTN ; \

892 .body ; \

893 md5_digest_block##offset: \

894 { .mmi ; \

895 alloc PFS = ar.pfs, _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE ; \

896 mov LTrip = 2 ; \

897 mov ar.lc = 3 ; \

898 } ;; \

899 { .mii ; \

900 cmp.eq pMore, p0 = r0, r0 ; \

901 mov ar.ec = 0 ; \

902 nop 0x0 ; \

903 } ;; \

904 \

905 .pred.rel "mutex", pLoad, pSkip ; \

906 .md5_FF_round##offset: \

907 FFLOADU(A, B, C, D, M12, M13, RotateM0, 7, offset) \

908 FFLOADU(D, A, B, C, M13, M14, RotateM1, 12, offset) \

909 FFLOADU(C, D, A, B, M14, M15, RotateM2, 17, offset) \

910 FFLOOPU(B, C, D, A, M15, RotateM0, RotateM3, 22, offset) \

911 \

912 { .mib ; \

913 nop 0x0 ; \

914 nop 0x0 ; \

915 br.cond.sptk.many md5_digest_GHI ; \

916 } ;; \

917 .endp md5_digest_block##offset

918

919 MD5FBLOCK(1)

920 MD5FBLOCK(2)

921 MD5FBLOCK(3)

922

923 .align 64

924 .type md5_constants, @object

925 md5_constants:

926 .md5_tbl_data_order: // To ensure little-endian data

927 // order, code as bytes.

928 data1 0x78, 0xa4, 0x6a, 0xd7 // 0

929 data1 0x56, 0xb7, 0xc7, 0xe8 // 1

930 data1 0xdb, 0x70, 0x20, 0x24 // 2

931 data1 0xee, 0xce, 0xbd, 0xc1 // 3

932 data1 0xaf, 0x0f, 0x7c, 0xf5 // 4

933 data1 0x2a, 0xc6, 0x87, 0x47 // 5

934 data1 0x13, 0x46, 0x30, 0xa8 // 6

935 data1 0x01, 0x95, 0x46, 0xfd // 7

936 data1 0xd8, 0x98, 0x80, 0x69 // 8

937 data1 0xaf, 0xf7, 0x44, 0x8b // 9

938 data1 0xb1, 0x5b, 0xff, 0xff // 10

939 data1 0xbe, 0xd7, 0x5c, 0x89 // 11

940 data1 0x22, 0x11, 0x90, 0x6b // 12

941 data1 0x93, 0x71, 0x98, 0xfd // 13

942 data1 0x8e, 0x43, 0x79, 0xa6 // 14

943 data1 0x21, 0x08, 0xb4, 0x49 // 15

944 data1 0x62, 0x25, 0x1e, 0xf6 // 16

945 data1 0x40, 0xb3, 0x40, 0xc0 // 17

946 data1 0x51, 0x5a, 0x5e, 0x26 // 18

947 data1 0xaa, 0xc7, 0xb6, 0xe9 // 19

948 data1 0x5d, 0x10, 0x2f, 0xd6 // 20

949 data1 0x53, 0x14, 0x44, 0x02 // 21

950 data1 0x81, 0xe6, 0xa1, 0xd8 // 22

951 data1 0xc8, 0xfb, 0xd3, 0xe7 // 23

952 data1 0xe6, 0xcd, 0xe1, 0x21 // 24

953 data1 0xd6, 0x07, 0x37, 0xc3 // 25

954 data1 0x87, 0x0d, 0xd5, 0xf4 // 26

955 data1 0xed, 0x14, 0x5a, 0x45 // 27

956 data1 0x05, 0xe9, 0xe3, 0xa9 // 28

957 data1 0xf8, 0xa3, 0xef, 0xfc // 29

958 data1 0xd9, 0x02, 0x6f, 0x67 // 30

959 data1 0x8a, 0x4c, 0x2a, 0x8d // 31

960 data1 0x42, 0x39, 0xfa, 0xff // 32

961 data1 0x81, 0xf6, 0x71, 0x87 // 33

962 data1 0x22, 0x61, 0x9d, 0x6d // 34

963 data1 0x0c, 0x38, 0xe5, 0xfd // 35

964 data1 0x44, 0xea, 0xbe, 0xa4 // 36

965 data1 0xa9, 0xcf, 0xde, 0x4b // 37

966 data1 0x60, 0x4b, 0xbb, 0xf6 // 38

967 data1 0x70, 0xbc, 0xbf, 0xbe // 39

968 data1 0xc6, 0x7e, 0x9b, 0x28 // 40

969 data1 0xfa, 0x27, 0xa1, 0xea // 41

970 data1 0x85, 0x30, 0xef, 0xd4 // 42

971 data1 0x05, 0x1d, 0x88, 0x04 // 43

972 data1 0x39, 0xd0, 0xd4, 0xd9 // 44

973 data1 0xe5, 0x99, 0xdb, 0xe6 // 45

974 data1 0xf8, 0x7c, 0xa2, 0x1f // 46

975 data1 0x65, 0x56, 0xac, 0xc4 // 47

976 data1 0x44, 0x22, 0x29, 0xf4 // 48

977 data1 0x97, 0xff, 0x2a, 0x43 // 49

978 data1 0xa7, 0x23, 0x94, 0xab // 50

979 data1 0x39, 0xa0, 0x93, 0xfc // 51

980 data1 0xc3, 0x59, 0x5b, 0x65 // 52

981 data1 0x92, 0xcc, 0x0c, 0x8f // 53

982 data1 0x7d, 0xf4, 0xef, 0xff // 54

983 data1 0xd1, 0x5d, 0x84, 0x85 // 55

984 data1 0x4f, 0x7e, 0xa8, 0x6f // 56

985 data1 0xe0, 0xe6, 0x2c, 0xfe // 57

986 data1 0x14, 0x43, 0x01, 0xa3 // 58

987 data1 0xa1, 0x11, 0x08, 0x4e // 59

988 data1 0x82, 0x7e, 0x53, 0xf7 // 60

989 data1 0x35, 0xf2, 0x3a, 0xbd // 61

990 data1 0xbb, 0xd2, 0xd7, 0x2a // 62

991 data1 0x91, 0xd3, 0x86, 0xeb // 63

992 .size md5_constants#,64*4

OLD	NEW

« no previous file with comments | « openssl/crypto/md5/asm/md5-586-mac.S ('k') | openssl/crypto/md5/asm/md5-x86_64.S » ('j') | no next file with comments »