Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(334)

Side by Side Diff: openssl/crypto/md5/asm/md5-ia64.S

Issue 2072073002: Delete bundled copy of OpenSSL and replace with README. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/openssl@master
Patch Set: Delete bundled copy of OpenSSL and replace with README. Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « openssl/crypto/md5/asm/md5-586-mac.S ('k') | openssl/crypto/md5/asm/md5-x86_64.S » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /* Copyright (c) 2005 Hewlett-Packard Development Company, L.P.
2
3 Permission is hereby granted, free of charge, to any person obtaining
4 a copy of this software and associated documentation files (the
5 "Software"), to deal in the Software without restriction, including
6 without limitation the rights to use, copy, modify, merge, publish,
7 distribute, sublicense, and/or sell copies of the Software, and to
8 permit persons to whom the Software is furnished to do so, subject to
9 the following conditions:
10
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
21
22 // Common registers are assigned as follows:
23 //
24 // COMMON
25 //
26 // t0 Const Tbl Ptr TPtr
27 // t1 Round Constant TRound
28 // t4 Block residual LenResid
29 // t5 Residual Data DTmp
30 //
31 // {in,out}0 Block 0 Cycle RotateM0
32 // {in,out}1 Block Value 12 M12
33 // {in,out}2 Block Value 8 M8
34 // {in,out}3 Block Value 4 M4
35 // {in,out}4 Block Value 0 M0
36 // {in,out}5 Block 1 Cycle RotateM1
37 // {in,out}6 Block Value 13 M13
38 // {in,out}7 Block Value 9 M9
39 // {in,out}8 Block Value 5 M5
40 // {in,out}9 Block Value 1 M1
41 // {in,out}10 Block 2 Cycle RotateM2
42 // {in,out}11 Block Value 14 M14
43 // {in,out}12 Block Value 10 M10
44 // {in,out}13 Block Value 6 M6
45 // {in,out}14 Block Value 2 M2
46 // {in,out}15 Block 3 Cycle RotateM3
47 // {in,out}16 Block Value 15 M15
48 // {in,out}17 Block Value 11 M11
49 // {in,out}18 Block Value 7 M7
50 // {in,out}19 Block Value 3 M3
51 // {in,out}20 Scratch Z
52 // {in,out}21 Scratch Y
53 // {in,out}22 Scratch X
54 // {in,out}23 Scratch W
55 // {in,out}24 Digest A A
56 // {in,out}25 Digest B B
57 // {in,out}26 Digest C C
58 // {in,out}27 Digest D D
59 // {in,out}28 Active Data Ptr DPtr
60 // in28 Dummy Value -
61 // out28 Dummy Value -
62 // bt0 Coroutine Link QUICK_RTN
63 //
64 /// These predicates are used for computing the padding block(s) and
65 /// are shared between the driver and digest co-routines
66 //
67 // pt0 Extra Pad Block pExtra
68 // pt1 Load next word pLoad
69 // pt2 Skip next word pSkip
70 // pt3 Search for Pad pNoPad
71 // pt4 Pad Word 0 pPad0
72 // pt5 Pad Word 1 pPad1
73 // pt6 Pad Word 2 pPad2
74 // pt7 Pad Word 3 pPad3
75
76 #define DTmp r19
77 #define LenResid r18
78 #define QUICK_RTN b6
79 #define TPtr r14
80 #define TRound r15
81 #define pExtra p6
82 #define pLoad p7
83 #define pNoPad p9
84 #define pPad0 p10
85 #define pPad1 p11
86 #define pPad2 p12
87 #define pPad3 p13
88 #define pSkip p8
89
90 #define A_ out24
91 #define B_ out25
92 #define C_ out26
93 #define D_ out27
94 #define DPtr_ out28
95 #define M0_ out4
96 #define M1_ out9
97 #define M10_ out12
98 #define M11_ out17
99 #define M12_ out1
100 #define M13_ out6
101 #define M14_ out11
102 #define M15_ out16
103 #define M2_ out14
104 #define M3_ out19
105 #define M4_ out3
106 #define M5_ out8
107 #define M6_ out13
108 #define M7_ out18
109 #define M8_ out2
110 #define M9_ out7
111 #define RotateM0_ out0
112 #define RotateM1_ out5
113 #define RotateM2_ out10
114 #define RotateM3_ out15
115 #define W_ out23
116 #define X_ out22
117 #define Y_ out21
118 #define Z_ out20
119
120 #define A in24
121 #define B in25
122 #define C in26
123 #define D in27
124 #define DPtr in28
125 #define M0 in4
126 #define M1 in9
127 #define M10 in12
128 #define M11 in17
129 #define M12 in1
130 #define M13 in6
131 #define M14 in11
132 #define M15 in16
133 #define M2 in14
134 #define M3 in19
135 #define M4 in3
136 #define M5 in8
137 #define M6 in13
138 #define M7 in18
139 #define M8 in2
140 #define M9 in7
141 #define RotateM0 in0
142 #define RotateM1 in5
143 #define RotateM2 in10
144 #define RotateM3 in15
145 #define W in23
146 #define X in22
147 #define Y in21
148 #define Z in20
149
150 /* register stack configuration for md5_block_asm_data_order(): */
151 #define MD5_NINP 3
152 #define MD5_NLOC 0
153 #define MD5_NOUT 29
154 #define MD5_NROT 0
155
156 /* register stack configuration for helpers: */
157 #define _NINPUTS MD5_NOUT
158 #define _NLOCALS 0
159 #define _NOUTPUT 0
160 #define _NROTATE 24 /* this must be <= _NINPUTS */
161
162 #if defined(_HPUX_SOURCE) && !defined(_LP64)
163 #define ADDP addp4
164 #else
165 #define ADDP add
166 #endif
167
168 #if defined(_HPUX_SOURCE) || defined(B_ENDIAN)
169 #define HOST_IS_BIG_ENDIAN
170 #endif
171
172 // Macros for getting the left and right portions of little-endian words
173
174 #define GETLW(dst, src, align) dep.z dst = src, 32 - 8 * align, 8 * align
175 #define GETRW(dst, src, align) extr.u dst = src, 8 * align, 32 - 8 * align
176
177 // MD5 driver
178 //
179 // Reads an input block, then calls the digest block
180 // subroutine and adds the results to the accumulated
181 // digest. It allocates 32 outs which the subroutine
182 // uses as it's inputs and rotating
183 // registers. Initializes the round constant pointer and
184 // takes care of saving/restoring ar.lc
185 //
186 /// INPUT
187 //
188 // in0 Context Ptr CtxPtr0
189 // in1 Input Data Ptr DPtrIn
190 // in2 Integral Blocks BlockCount
191 // rp Return Address -
192 //
193 /// CODE
194 //
195 // v2 Input Align InAlign
196 // t0 Shared w/digest -
197 // t1 Shared w/digest -
198 // t2 Shared w/digest -
199 // t3 Shared w/digest -
200 // t4 Shared w/digest -
201 // t5 Shared w/digest -
202 // t6 PFS Save PFSSave
203 // t7 ar.lc Save LCSave
204 // t8 Saved PR PRSave
205 // t9 2nd CtxPtr CtxPtr1
206 // t10 Table Base CTable
207 // t11 Table[0] CTable0
208 // t13 Accumulator A AccumA
209 // t14 Accumulator B AccumB
210 // t15 Accumulator C AccumC
211 // t16 Accumulator D AccumD
212 // pt0 Shared w/digest -
213 // pt1 Shared w/digest -
214 // pt2 Shared w/digest -
215 // pt3 Shared w/digest -
216 // pt4 Shared w/digest -
217 // pt5 Shared w/digest -
218 // pt6 Shared w/digest -
219 // pt7 Shared w/digest -
220 // pt8 Not Aligned pOff
221 // pt8 Blocks Left pAgain
222
223 #define AccumA r27
224 #define AccumB r28
225 #define AccumC r29
226 #define AccumD r30
227 #define CTable r24
228 #define CTable0 r25
229 #define CtxPtr0 in0
230 #define CtxPtr1 r23
231 #define DPtrIn in1
232 #define BlockCount in2
233 #define InAlign r10
234 #define LCSave r21
235 #define PFSSave r20
236 #define PRSave r22
237 #define pAgain p63
238 #define pOff p63
239
240 .text
241
242 /* md5_block_asm_data_order(MD5_CTX *c, const void *data, size_t num)
243
244 where:
245 c: a pointer to a structure of this type:
246
247 typedef struct MD5state_st
248 {
249 MD5_LONG A,B,C,D;
250 MD5_LONG Nl,Nh;
251 MD5_LONG data[MD5_LBLOCK];
252 unsigned int num;
253 }
254 MD5_CTX;
255
256 data: a pointer to the input data (may be misaligned)
257 num: the number of 16-byte blocks to hash (i.e., the length
258 of DATA is 16*NUM.
259
260 */
261
262 .type md5_block_asm_data_order, @function
263 .global md5_block_asm_data_order
264 .align 32
265 .proc md5_block_asm_data_order
266 md5_block_asm_data_order:
267 .md5_block:
268 .prologue
269 { .mmi
270 .save ar.pfs, PFSSave
271 alloc PFSSave = ar.pfs, MD5_NINP, MD5_NLOC, MD5_NOUT, MD5_NROT
272 ADDP CtxPtr1 = 8, CtxPtr0
273 mov CTable = ip
274 }
275 { .mmi
276 ADDP DPtrIn = 0, DPtrIn
277 ADDP CtxPtr0 = 0, CtxPtr0
278 .save ar.lc, LCSave
279 mov LCSave = ar.lc
280 }
281 ;;
282 { .mmi
283 add CTable = .md5_tbl_data_order#-.md5_block#, CTable
284 and InAlign = 0x3, DPtrIn
285 }
286
287 { .mmi
288 ld4 AccumA = [CtxPtr0], 4
289 ld4 AccumC = [CtxPtr1], 4
290 .save pr, PRSave
291 mov PRSave = pr
292 .body
293 }
294 ;;
295 { .mmi
296 ld4 AccumB = [CtxPtr0]
297 ld4 AccumD = [CtxPtr1]
298 dep DPtr_ = 0, DPtrIn, 0, 2
299 } ;;
300 #ifdef HOST_IS_BIG_ENDIAN
301 rum psr.be;; // switch to little-endian
302 #endif
303 { .mmb
304 ld4 CTable0 = [CTable], 4
305 cmp.ne pOff, p0 = 0, InAlign
306 (pOff) br.cond.spnt.many .md5_unaligned
307 } ;;
308
309 // The FF load/compute loop rotates values three times, so that
310 // loading into M12 here produces the M0 value, M13 -> M1, etc.
311
312 .md5_block_loop0:
313 { .mmi
314 ld4 M12_ = [DPtr_], 4
315 mov TPtr = CTable
316 mov TRound = CTable0
317 } ;;
318 { .mmi
319 ld4 M13_ = [DPtr_], 4
320 mov A_ = AccumA
321 mov B_ = AccumB
322 } ;;
323 { .mmi
324 ld4 M14_ = [DPtr_], 4
325 mov C_ = AccumC
326 mov D_ = AccumD
327 } ;;
328 { .mmb
329 ld4 M15_ = [DPtr_], 4
330 add BlockCount = -1, BlockCount
331 br.call.sptk.many QUICK_RTN = md5_digest_block0
332 } ;;
333
334 // Now, we add the new digest values and do some clean-up
335 // before checking if there's another full block to process
336
337 { .mmi
338 add AccumA = AccumA, A_
339 add AccumB = AccumB, B_
340 cmp.ne pAgain, p0 = 0, BlockCount
341 }
342 { .mib
343 add AccumC = AccumC, C_
344 add AccumD = AccumD, D_
345 (pAgain) br.cond.dptk.many .md5_block_loop0
346 } ;;
347
348 .md5_exit:
349 #ifdef HOST_IS_BIG_ENDIAN
350 sum psr.be;; // switch back to big-endian mode
351 #endif
352 { .mmi
353 st4 [CtxPtr0] = AccumB, -4
354 st4 [CtxPtr1] = AccumD, -4
355 mov pr = PRSave, 0x1ffff ;;
356 }
357 { .mmi
358 st4 [CtxPtr0] = AccumA
359 st4 [CtxPtr1] = AccumC
360 mov ar.lc = LCSave
361 } ;;
362 { .mib
363 mov ar.pfs = PFSSave
364 br.ret.sptk.few rp
365 } ;;
366
367 #define MD5UNALIGNED(offset) \
368 .md5_process##offset: \
369 { .mib ; \
370 nop 0x0 ; \
371 GETRW(DTmp, DTmp, offset) ; \
372 } ;; \
373 .md5_block_loop##offset: \
374 { .mmi ; \
375 ld4 Y_ = [DPtr_], 4 ; \
376 mov TPtr = CTable ; \
377 mov TRound = CTable0 ; \
378 } ;; \
379 { .mmi ; \
380 ld4 M13_ = [DPtr_], 4 ; \
381 mov A_ = AccumA ; \
382 mov B_ = AccumB ; \
383 } ;; \
384 { .mii ; \
385 ld4 M14_ = [DPtr_], 4 ; \
386 GETLW(W_, Y_, offset) ; \
387 mov C_ = AccumC ; \
388 } \
389 { .mmi ; \
390 mov D_ = AccumD ;; \
391 or M12_ = W_, DTmp ; \
392 GETRW(DTmp, Y_, offset) ; \
393 } \
394 { .mib ; \
395 ld4 M15_ = [DPtr_], 4 ; \
396 add BlockCount = -1, BlockCount ; \
397 br.call.sptk.many QUICK_RTN = md5_digest_block##offset; \
398 } ;; \
399 { .mmi ; \
400 add AccumA = AccumA, A_ ; \
401 add AccumB = AccumB, B_ ; \
402 cmp.ne pAgain, p0 = 0, BlockCount ; \
403 } \
404 { .mib ; \
405 add AccumC = AccumC, C_ ; \
406 add AccumD = AccumD, D_ ; \
407 (pAgain) br.cond.dptk.many .md5_block_loop##offset ; \
408 } ;; \
409 { .mib ; \
410 nop 0x0 ; \
411 nop 0x0 ; \
412 br.cond.sptk.many .md5_exit ; \
413 } ;;
414
415 .align 32
416 .md5_unaligned:
417 //
418 // Because variable shifts are expensive, we special case each of
419 // the four alignements. In practice, this won't hurt too much
420 // since only one working set of code will be loaded.
421 //
422 { .mib
423 ld4 DTmp = [DPtr_], 4
424 cmp.eq pOff, p0 = 1, InAlign
425 (pOff) br.cond.dpnt.many .md5_process1
426 } ;;
427 { .mib
428 cmp.eq pOff, p0 = 2, InAlign
429 nop 0x0
430 (pOff) br.cond.dpnt.many .md5_process2
431 } ;;
432 MD5UNALIGNED(3)
433 MD5UNALIGNED(1)
434 MD5UNALIGNED(2)
435
436 .endp md5_block_asm_data_order
437
438
439 // MD5 Perform the F function and load
440 //
441 // Passed the first 4 words (M0 - M3) and initial (A, B, C, D) values,
442 // computes the FF() round of functions, then branches to the common
443 // digest code to finish up with GG(), HH, and II().
444 //
445 // INPUT
446 //
447 // rp Return Address -
448 //
449 // CODE
450 //
451 // v0 PFS bit bucket PFS
452 // v1 Loop Trip Count LTrip
453 // pt0 Load next word pMore
454
455 /* For F round: */
456 #define LTrip r9
457 #define PFS r8
458 #define pMore p6
459
460 /* For GHI rounds: */
461 #define T r9
462 #define U r10
463 #define V r11
464
465 #define COMPUTE(a, b, s, M, R) \
466 { \
467 .mii ; \
468 ld4 TRound = [TPtr], 4 ; \
469 dep.z Y = Z, 32, 32 ;; \
470 shrp Z = Z, Y, 64 - s ; \
471 } ;; \
472 { \
473 .mmi ; \
474 add a = Z, b ; \
475 mov R = M ; \
476 nop 0x0 ; \
477 } ;;
478
479 #define LOOP(a, b, s, M, R, label) \
480 { .mii ; \
481 ld4 TRound = [TPtr], 4 ; \
482 dep.z Y = Z, 32, 32 ;; \
483 shrp Z = Z, Y, 64 - s ; \
484 } ;; \
485 { .mib ; \
486 add a = Z, b ; \
487 mov R = M ; \
488 br.ctop.sptk.many label ; \
489 } ;;
490
491 // G(B, C, D) = (B & D) | (C & ~D)
492
493 #define G(a, b, c, d, M) \
494 { .mmi ; \
495 add Z = M, TRound ; \
496 and Y = b, d ; \
497 andcm X = c, d ; \
498 } ;; \
499 { .mii ; \
500 add Z = Z, a ; \
501 or Y = Y, X ;; \
502 add Z = Z, Y ; \
503 } ;;
504
505 // H(B, C, D) = B ^ C ^ D
506
507 #define H(a, b, c, d, M) \
508 { .mmi ; \
509 add Z = M, TRound ; \
510 xor Y = b, c ; \
511 nop 0x0 ; \
512 } ;; \
513 { .mii ; \
514 add Z = Z, a ; \
515 xor Y = Y, d ;; \
516 add Z = Z, Y ; \
517 } ;;
518
519 // I(B, C, D) = C ^ (B | ~D)
520 //
521 // However, since we have an andcm operator, we use the fact that
522 //
523 // Y ^ Z == ~Y ^ ~Z
524 //
525 // to rewrite the expression as
526 //
527 // I(B, C, D) = ~C ^ (~B & D)
528
529 #define I(a, b, c, d, M) \
530 { .mmi ; \
531 add Z = M, TRound ; \
532 andcm Y = d, b ; \
533 andcm X = -1, c ; \
534 } ;; \
535 { .mii ; \
536 add Z = Z, a ; \
537 xor Y = Y, X ;; \
538 add Z = Z, Y ; \
539 } ;;
540
541 #define GG4(label) \
542 G(A, B, C, D, M0) \
543 COMPUTE(A, B, 5, M0, RotateM0) \
544 G(D, A, B, C, M1) \
545 COMPUTE(D, A, 9, M1, RotateM1) \
546 G(C, D, A, B, M2) \
547 COMPUTE(C, D, 14, M2, RotateM2) \
548 G(B, C, D, A, M3) \
549 LOOP(B, C, 20, M3, RotateM3, label)
550
551 #define HH4(label) \
552 H(A, B, C, D, M0) \
553 COMPUTE(A, B, 4, M0, RotateM0) \
554 H(D, A, B, C, M1) \
555 COMPUTE(D, A, 11, M1, RotateM1) \
556 H(C, D, A, B, M2) \
557 COMPUTE(C, D, 16, M2, RotateM2) \
558 H(B, C, D, A, M3) \
559 LOOP(B, C, 23, M3, RotateM3, label)
560
561 #define II4(label) \
562 I(A, B, C, D, M0) \
563 COMPUTE(A, B, 6, M0, RotateM0) \
564 I(D, A, B, C, M1) \
565 COMPUTE(D, A, 10, M1, RotateM1) \
566 I(C, D, A, B, M2) \
567 COMPUTE(C, D, 15, M2, RotateM2) \
568 I(B, C, D, A, M3) \
569 LOOP(B, C, 21, M3, RotateM3, label)
570
571 #define FFLOAD(a, b, c, d, M, N, s) \
572 { .mii ; \
573 (pMore) ld4 N = [DPtr], 4 ; \
574 add Z = M, TRound ; \
575 and Y = c, b ; \
576 } \
577 { .mmi ; \
578 andcm X = d, b ;; \
579 add Z = Z, a ; \
580 or Y = Y, X ; \
581 } ;; \
582 { .mii ; \
583 ld4 TRound = [TPtr], 4 ; \
584 add Z = Z, Y ;; \
585 dep.z Y = Z, 32, 32 ; \
586 } ;; \
587 { .mii ; \
588 nop 0x0 ; \
589 shrp Z = Z, Y, 64 - s ;; \
590 add a = Z, b ; \
591 } ;;
592
593 #define FFLOOP(a, b, c, d, M, N, s, dest) \
594 { .mii ; \
595 (pMore) ld4 N = [DPtr], 4 ; \
596 add Z = M, TRound ; \
597 and Y = c, b ; \
598 } \
599 { .mmi ; \
600 andcm X = d, b ;; \
601 add Z = Z, a ; \
602 or Y = Y, X ; \
603 } ;; \
604 { .mii ; \
605 ld4 TRound = [TPtr], 4 ; \
606 add Z = Z, Y ;; \
607 dep.z Y = Z, 32, 32 ; \
608 } ;; \
609 { .mii ; \
610 nop 0x0 ; \
611 shrp Z = Z, Y, 64 - s ;; \
612 add a = Z, b ; \
613 } \
614 { .mib ; \
615 cmp.ne pMore, p0 = 0, LTrip ; \
616 add LTrip = -1, LTrip ; \
617 br.ctop.dptk.many dest ; \
618 } ;;
619
620 .type md5_digest_block0, @function
621 .align 32
622
623 .proc md5_digest_block0
624 .prologue
625 md5_digest_block0:
626 .altrp QUICK_RTN
627 .body
628 { .mmi
629 alloc PFS = ar.pfs, _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE
630 mov LTrip = 2
631 mov ar.lc = 3
632 } ;;
633 { .mii
634 cmp.eq pMore, p0 = r0, r0
635 mov ar.ec = 0
636 nop 0x0
637 } ;;
638
639 .md5_FF_round0:
640 FFLOAD(A, B, C, D, M12, RotateM0, 7)
641 FFLOAD(D, A, B, C, M13, RotateM1, 12)
642 FFLOAD(C, D, A, B, M14, RotateM2, 17)
643 FFLOOP(B, C, D, A, M15, RotateM3, 22, .md5_FF_round0)
644 //
645 // !!! Fall through to md5_digest_GHI
646 //
647 .endp md5_digest_block0
648
649 .type md5_digest_GHI, @function
650 .align 32
651
652 .proc md5_digest_GHI
653 .prologue
654 .regstk _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE
655 md5_digest_GHI:
656 .altrp QUICK_RTN
657 .body
658 //
659 // The following sequence shuffles the block counstants round for the
660 // next round:
661 //
662 // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
663 // 1 6 11 0 5 10 14 4 9 14 3 8 13 2 7 12
664 //
665 { .mmi
666 mov Z = M0
667 mov Y = M15
668 mov ar.lc = 3
669 }
670 { .mmi
671 mov X = M2
672 mov W = M9
673 mov V = M4
674 } ;;
675
676 { .mmi
677 mov M0 = M1
678 mov M15 = M12
679 mov ar.ec = 1
680 }
681 { .mmi
682 mov M2 = M11
683 mov M9 = M14
684 mov M4 = M5
685 } ;;
686
687 { .mmi
688 mov M1 = M6
689 mov M12 = M13
690 mov U = M3
691 }
692 { .mmi
693 mov M11 = M8
694 mov M14 = M7
695 mov M5 = M10
696 } ;;
697
698 { .mmi
699 mov M6 = Y
700 mov M13 = X
701 mov M3 = Z
702 }
703 { .mmi
704 mov M8 = W
705 mov M7 = V
706 mov M10 = U
707 } ;;
708
709 .md5_GG_round:
710 GG4(.md5_GG_round)
711
712 // The following sequence shuffles the block constants round for the
713 // next round:
714 //
715 // 1 6 11 0 5 10 14 4 9 14 3 8 13 2 7 12
716 // 5 8 11 14 1 4 7 10 13 0 3 6 9 12 15 2
717
718 { .mmi
719 mov Z = M0
720 mov Y = M1
721 mov ar.lc = 3
722 }
723 { .mmi
724 mov X = M3
725 mov W = M5
726 mov V = M6
727 } ;;
728
729 { .mmi
730 mov M0 = M4
731 mov M1 = M11
732 mov ar.ec = 1
733 }
734 { .mmi
735 mov M3 = M9
736 mov U = M8
737 mov T = M13
738 } ;;
739
740 { .mmi
741 mov M4 = Z
742 mov M11 = Y
743 mov M5 = M7
744 }
745 { .mmi
746 mov M6 = M14
747 mov M8 = M12
748 mov M13 = M15
749 } ;;
750
751 { .mmi
752 mov M7 = W
753 mov M14 = V
754 nop 0x0
755 }
756 { .mmi
757 mov M9 = X
758 mov M12 = U
759 mov M15 = T
760 } ;;
761
762 .md5_HH_round:
763 HH4(.md5_HH_round)
764
765 // The following sequence shuffles the block constants round for the
766 // next round:
767 //
768 // 5 8 11 14 1 4 7 10 13 0 3 6 9 12 15 2
769 // 0 7 14 5 12 3 10 1 8 15 6 13 4 11 2 9
770
771 { .mmi
772 mov Z = M0
773 mov Y = M15
774 mov ar.lc = 3
775 }
776 { .mmi
777 mov X = M10
778 mov W = M1
779 mov V = M4
780 } ;;
781
782 { .mmi
783 mov M0 = M9
784 mov M15 = M12
785 mov ar.ec = 1
786 }
787 { .mmi
788 mov M10 = M11
789 mov M1 = M6
790 mov M4 = M13
791 } ;;
792
793 { .mmi
794 mov M9 = M14
795 mov M12 = M5
796 mov U = M3
797 }
798 { .mmi
799 mov M11 = M8
800 mov M6 = M7
801 mov M13 = M2
802 } ;;
803
804 { .mmi
805 mov M14 = Y
806 mov M5 = X
807 mov M3 = Z
808 }
809 { .mmi
810 mov M8 = W
811 mov M7 = V
812 mov M2 = U
813 } ;;
814
815 .md5_II_round:
816 II4(.md5_II_round)
817
818 { .mib
819 nop 0x0
820 nop 0x0
821 br.ret.sptk.many QUICK_RTN
822 } ;;
823
824 .endp md5_digest_GHI
825
826 #define FFLOADU(a, b, c, d, M, P, N, s, offset) \
827 { .mii ; \
828 (pMore) ld4 N = [DPtr], 4 ; \
829 add Z = M, TRound ; \
830 and Y = c, b ; \
831 } \
832 { .mmi ; \
833 andcm X = d, b ;; \
834 add Z = Z, a ; \
835 or Y = Y, X ; \
836 } ;; \
837 { .mii ; \
838 ld4 TRound = [TPtr], 4 ; \
839 GETLW(W, P, offset) ; \
840 add Z = Z, Y ; \
841 } ;; \
842 { .mii ; \
843 or W = W, DTmp ; \
844 dep.z Y = Z, 32, 32 ;; \
845 shrp Z = Z, Y, 64 - s ; \
846 } ;; \
847 { .mii ; \
848 add a = Z, b ; \
849 GETRW(DTmp, P, offset) ; \
850 mov P = W ; \
851 } ;;
852
853 #define FFLOOPU(a, b, c, d, M, P, N, s, offset) \
854 { .mii ; \
855 (pMore) ld4 N = [DPtr], 4 ; \
856 add Z = M, TRound ; \
857 and Y = c, b ; \
858 } \
859 { .mmi ; \
860 andcm X = d, b ;; \
861 add Z = Z, a ; \
862 or Y = Y, X ; \
863 } ;; \
864 { .mii ; \
865 ld4 TRound = [TPtr], 4 ; \
866 (pMore) GETLW(W, P, offset) ; \
867 add Z = Z, Y ; \
868 } ;; \
869 { .mii ; \
870 (pMore) or W = W, DTmp ; \
871 dep.z Y = Z, 32, 32 ;; \
872 shrp Z = Z, Y, 64 - s ; \
873 } ;; \
874 { .mii ; \
875 add a = Z, b ; \
876 (pMore) GETRW(DTmp, P, offset) ; \
877 (pMore) mov P = W ; \
878 } \
879 { .mib ; \
880 cmp.ne pMore, p0 = 0, LTrip ; \
881 add LTrip = -1, LTrip ; \
882 br.ctop.sptk.many .md5_FF_round##offset ; \
883 } ;;
884
885 #define MD5FBLOCK(offset) \
886 .type md5_digest_block##offset, @function ; \
887 \
888 .align 32 ; \
889 .proc md5_digest_block##offset ; \
890 .prologue ; \
891 .altrp QUICK_RTN ; \
892 .body ; \
893 md5_digest_block##offset: \
894 { .mmi ; \
895 alloc PFS = ar.pfs, _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE ; \
896 mov LTrip = 2 ; \
897 mov ar.lc = 3 ; \
898 } ;; \
899 { .mii ; \
900 cmp.eq pMore, p0 = r0, r0 ; \
901 mov ar.ec = 0 ; \
902 nop 0x0 ; \
903 } ;; \
904 \
905 .pred.rel "mutex", pLoad, pSkip ; \
906 .md5_FF_round##offset: \
907 FFLOADU(A, B, C, D, M12, M13, RotateM0, 7, offset) \
908 FFLOADU(D, A, B, C, M13, M14, RotateM1, 12, offset) \
909 FFLOADU(C, D, A, B, M14, M15, RotateM2, 17, offset) \
910 FFLOOPU(B, C, D, A, M15, RotateM0, RotateM3, 22, offset) \
911 \
912 { .mib ; \
913 nop 0x0 ; \
914 nop 0x0 ; \
915 br.cond.sptk.many md5_digest_GHI ; \
916 } ;; \
917 .endp md5_digest_block##offset
918
919 MD5FBLOCK(1)
920 MD5FBLOCK(2)
921 MD5FBLOCK(3)
922
923 .align 64
924 .type md5_constants, @object
925 md5_constants:
926 .md5_tbl_data_order: // To ensure little-endian data
927 // order, code as bytes.
928 data1 0x78, 0xa4, 0x6a, 0xd7 // 0
929 data1 0x56, 0xb7, 0xc7, 0xe8 // 1
930 data1 0xdb, 0x70, 0x20, 0x24 // 2
931 data1 0xee, 0xce, 0xbd, 0xc1 // 3
932 data1 0xaf, 0x0f, 0x7c, 0xf5 // 4
933 data1 0x2a, 0xc6, 0x87, 0x47 // 5
934 data1 0x13, 0x46, 0x30, 0xa8 // 6
935 data1 0x01, 0x95, 0x46, 0xfd // 7
936 data1 0xd8, 0x98, 0x80, 0x69 // 8
937 data1 0xaf, 0xf7, 0x44, 0x8b // 9
938 data1 0xb1, 0x5b, 0xff, 0xff // 10
939 data1 0xbe, 0xd7, 0x5c, 0x89 // 11
940 data1 0x22, 0x11, 0x90, 0x6b // 12
941 data1 0x93, 0x71, 0x98, 0xfd // 13
942 data1 0x8e, 0x43, 0x79, 0xa6 // 14
943 data1 0x21, 0x08, 0xb4, 0x49 // 15
944 data1 0x62, 0x25, 0x1e, 0xf6 // 16
945 data1 0x40, 0xb3, 0x40, 0xc0 // 17
946 data1 0x51, 0x5a, 0x5e, 0x26 // 18
947 data1 0xaa, 0xc7, 0xb6, 0xe9 // 19
948 data1 0x5d, 0x10, 0x2f, 0xd6 // 20
949 data1 0x53, 0x14, 0x44, 0x02 // 21
950 data1 0x81, 0xe6, 0xa1, 0xd8 // 22
951 data1 0xc8, 0xfb, 0xd3, 0xe7 // 23
952 data1 0xe6, 0xcd, 0xe1, 0x21 // 24
953 data1 0xd6, 0x07, 0x37, 0xc3 // 25
954 data1 0x87, 0x0d, 0xd5, 0xf4 // 26
955 data1 0xed, 0x14, 0x5a, 0x45 // 27
956 data1 0x05, 0xe9, 0xe3, 0xa9 // 28
957 data1 0xf8, 0xa3, 0xef, 0xfc // 29
958 data1 0xd9, 0x02, 0x6f, 0x67 // 30
959 data1 0x8a, 0x4c, 0x2a, 0x8d // 31
960 data1 0x42, 0x39, 0xfa, 0xff // 32
961 data1 0x81, 0xf6, 0x71, 0x87 // 33
962 data1 0x22, 0x61, 0x9d, 0x6d // 34
963 data1 0x0c, 0x38, 0xe5, 0xfd // 35
964 data1 0x44, 0xea, 0xbe, 0xa4 // 36
965 data1 0xa9, 0xcf, 0xde, 0x4b // 37
966 data1 0x60, 0x4b, 0xbb, 0xf6 // 38
967 data1 0x70, 0xbc, 0xbf, 0xbe // 39
968 data1 0xc6, 0x7e, 0x9b, 0x28 // 40
969 data1 0xfa, 0x27, 0xa1, 0xea // 41
970 data1 0x85, 0x30, 0xef, 0xd4 // 42
971 data1 0x05, 0x1d, 0x88, 0x04 // 43
972 data1 0x39, 0xd0, 0xd4, 0xd9 // 44
973 data1 0xe5, 0x99, 0xdb, 0xe6 // 45
974 data1 0xf8, 0x7c, 0xa2, 0x1f // 46
975 data1 0x65, 0x56, 0xac, 0xc4 // 47
976 data1 0x44, 0x22, 0x29, 0xf4 // 48
977 data1 0x97, 0xff, 0x2a, 0x43 // 49
978 data1 0xa7, 0x23, 0x94, 0xab // 50
979 data1 0x39, 0xa0, 0x93, 0xfc // 51
980 data1 0xc3, 0x59, 0x5b, 0x65 // 52
981 data1 0x92, 0xcc, 0x0c, 0x8f // 53
982 data1 0x7d, 0xf4, 0xef, 0xff // 54
983 data1 0xd1, 0x5d, 0x84, 0x85 // 55
984 data1 0x4f, 0x7e, 0xa8, 0x6f // 56
985 data1 0xe0, 0xe6, 0x2c, 0xfe // 57
986 data1 0x14, 0x43, 0x01, 0xa3 // 58
987 data1 0xa1, 0x11, 0x08, 0x4e // 59
988 data1 0x82, 0x7e, 0x53, 0xf7 // 60
989 data1 0x35, 0xf2, 0x3a, 0xbd // 61
990 data1 0xbb, 0xd2, 0xd7, 0x2a // 62
991 data1 0x91, 0xd3, 0x86, 0xeb // 63
992 .size md5_constants#,64*4
OLDNEW
« no previous file with comments | « openssl/crypto/md5/asm/md5-586-mac.S ('k') | openssl/crypto/md5/asm/md5-x86_64.S » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698