Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(146)

Side by Side Diff: nss/lib/freebl/intel-aes-x86-masm.asm

Issue 214183004: Implement AES in different modes of operation, using AES-NI and (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/nss.git@master
Patch Set: Remove an assertion. ctr->cipher doesn't set *outlen. Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « nss/lib/freebl/intel-aes-x64-masm.asm ('k') | nss/lib/freebl/intel-gcm.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 ; LICENSE:
2 ; This submission to NSS is to be made available under the terms of the
3 ; Mozilla Public License, v. 2.0. You can obtain one at http:
4 ; //mozilla.org/MPL/2.0/.
5 ;###############################################################################
6 ; Copyright(c) 2014, Intel Corp.
7 ; Developers and authors:
8 ; Shay Gueron and Vlad Krasnov
9 ; Intel Corporation, Israel Development Centre, Haifa, Israel
10 ; Please send feedback directly to crypto.feedback.alias@intel.com
11
12
13 .MODEL FLAT, C
14 .XMM
15
16 .DATA
17 ALIGN 16
18 Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh
19 Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h
20 Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh
21 Lcon1 dd 1,1,1,1
22 Lcon2 dd 1bh,1bh,1bh,1bh
23
24 .CODE
25
26 ctx textequ <ecx>
27 output textequ <edx>
28 input textequ <eax>
29 inputLen textequ <edi>
30
31
32 aes_rnd MACRO i
33 movdqu xmm7, [i*16 + ctx]
34 aesenc xmm0, xmm7
35 aesenc xmm1, xmm7
36 aesenc xmm2, xmm7
37 aesenc xmm3, xmm7
38 aesenc xmm4, xmm7
39 aesenc xmm5, xmm7
40 aesenc xmm6, xmm7
41 ENDM
42
43 aes_last_rnd MACRO i
44 movdqu xmm7, [i*16 + ctx]
45 aesenclast xmm0, xmm7
46 aesenclast xmm1, xmm7
47 aesenclast xmm2, xmm7
48 aesenclast xmm3, xmm7
49 aesenclast xmm4, xmm7
50 aesenclast xmm5, xmm7
51 aesenclast xmm6, xmm7
52 ENDM
53
54 aes_dec_rnd MACRO i
55 movdqu xmm7, [i*16 + ctx]
56 aesdec xmm0, xmm7
57 aesdec xmm1, xmm7
58 aesdec xmm2, xmm7
59 aesdec xmm3, xmm7
60 aesdec xmm4, xmm7
61 aesdec xmm5, xmm7
62 aesdec xmm6, xmm7
63 ENDM
64
65 aes_dec_last_rnd MACRO i
66 movdqu xmm7, [i*16 + ctx]
67 aesdeclast xmm0, xmm7
68 aesdeclast xmm1, xmm7
69 aesdeclast xmm2, xmm7
70 aesdeclast xmm3, xmm7
71 aesdeclast xmm4, xmm7
72 aesdeclast xmm5, xmm7
73 aesdeclast xmm6, xmm7
74 ENDM
75
76
77 gen_aes_ecb_func MACRO enc, rnds
78
79 LOCAL loop7
80 LOCAL loop1
81 LOCAL bail
82
83 push inputLen
84
85 mov ctx, [esp + 2*4 + 0*4]
86 mov output, [esp + 2*4 + 1*4]
87 mov input, [esp + 2*4 + 4*4]
88 mov inputLen, [esp + 2*4 + 5*4]
89
90 lea ctx, [44+ctx]
91
92 loop7:
93 cmp inputLen, 7*16
94 jb loop1
95
96 movdqu xmm0, [0*16 + input]
97 movdqu xmm1, [1*16 + input]
98 movdqu xmm2, [2*16 + input]
99 movdqu xmm3, [3*16 + input]
100 movdqu xmm4, [4*16 + input]
101 movdqu xmm5, [5*16 + input]
102 movdqu xmm6, [6*16 + input]
103
104 movdqu xmm7, [0*16 + ctx]
105 pxor xmm0, xmm7
106 pxor xmm1, xmm7
107 pxor xmm2, xmm7
108 pxor xmm3, xmm7
109 pxor xmm4, xmm7
110 pxor xmm5, xmm7
111 pxor xmm6, xmm7
112
113 IF enc eq 1
114 rnd textequ <aes_rnd>
115 lastrnd textequ <aes_last_rnd>
116 aesinst textequ <aesenc>
117 aeslastinst textequ <aesenclast>
118 ELSE
119 rnd textequ <aes_dec_rnd>
120 lastrnd textequ <aes_dec_last_rnd>
121 aesinst textequ <aesdec>
122 aeslastinst textequ <aesdeclast>
123 ENDIF
124
125 i = 1
126 WHILE i LT rnds
127 rnd i
128 i = i+1
129 ENDM
130 lastrnd rnds
131
132 movdqu [0*16 + output], xmm0
133 movdqu [1*16 + output], xmm1
134 movdqu [2*16 + output], xmm2
135 movdqu [3*16 + output], xmm3
136 movdqu [4*16 + output], xmm4
137 movdqu [5*16 + output], xmm5
138 movdqu [6*16 + output], xmm6
139
140 lea input, [7*16 + input]
141 lea output, [7*16 + output]
142 sub inputLen, 7*16
143 jmp loop7
144
145 loop1:
146 cmp inputLen, 1*16
147 jb bail
148
149 movdqu xmm0, [input]
150 movdqu xmm7, [0*16 + ctx]
151 pxor xmm0, xmm7
152
153 i = 1
154 WHILE i LT rnds
155 movdqu xmm7, [i*16 + ctx]
156 aesinst xmm0, xmm7
157 i = i+1
158 ENDM
159 movdqu xmm7, [rnds*16 + ctx]
160 aeslastinst xmm0, xmm7
161
162 movdqu [output], xmm0
163
164 lea input, [1*16 + input]
165 lea output, [1*16 + output]
166 sub inputLen, 1*16
167 jmp loop1
168
169 bail:
170 xor eax, eax
171 pop inputLen
172 ret
173
174 ENDM
175
176 ALIGN 16
177 intel_aes_encrypt_ecb_128 PROC
178 gen_aes_ecb_func 1, 10
179 intel_aes_encrypt_ecb_128 ENDP
180
181 ALIGN 16
182 intel_aes_encrypt_ecb_192 PROC
183 gen_aes_ecb_func 1, 12
184 intel_aes_encrypt_ecb_192 ENDP
185
186 ALIGN 16
187 intel_aes_encrypt_ecb_256 PROC
188 gen_aes_ecb_func 1, 14
189 intel_aes_encrypt_ecb_256 ENDP
190
191 ALIGN 16
192 intel_aes_decrypt_ecb_128 PROC
193 gen_aes_ecb_func 0, 10
194 intel_aes_decrypt_ecb_128 ENDP
195
196 ALIGN 16
197 intel_aes_decrypt_ecb_192 PROC
198 gen_aes_ecb_func 0, 12
199 intel_aes_decrypt_ecb_192 ENDP
200
201 ALIGN 16
202 intel_aes_decrypt_ecb_256 PROC
203 gen_aes_ecb_func 0, 14
204 intel_aes_decrypt_ecb_256 ENDP
205
206
207 KEY textequ <ecx>
208 KS textequ <edx>
209 ITR textequ <eax>
210
211 ALIGN 16
212 intel_aes_encrypt_init_128 PROC
213
214 mov KEY, [esp + 1*4 + 0*4]
215 mov KS, [esp + 1*4 + 1*4]
216
217
218 movdqu xmm1, [KEY]
219 movdqu [KS], xmm1
220 movdqa xmm2, xmm1
221
222 lea ITR, Lcon1
223 movdqa xmm0, [ITR]
224 lea ITR, Lmask
225 movdqa xmm4, [ITR]
226
227 mov ITR, 8
228
229 Lenc_128_ks_loop:
230 lea KS, [16 + KS]
231 dec ITR
232
233 pshufb xmm2, xmm4
234 aesenclast xmm2, xmm0
235 pslld xmm0, 1
236 movdqa xmm3, xmm1
237 pslldq xmm3, 4
238 pxor xmm1, xmm3
239 pslldq xmm3, 4
240 pxor xmm1, xmm3
241 pslldq xmm3, 4
242 pxor xmm1, xmm3
243 pxor xmm1, xmm2
244 movdqu [KS], xmm1
245 movdqa xmm2, xmm1
246
247 jne Lenc_128_ks_loop
248
249 lea ITR, Lcon2
250 movdqa xmm0, [ITR]
251
252 pshufb xmm2, xmm4
253 aesenclast xmm2, xmm0
254 pslld xmm0, 1
255 movdqa xmm3, xmm1
256 pslldq xmm3, 4
257 pxor xmm1, xmm3
258 pslldq xmm3, 4
259 pxor xmm1, xmm3
260 pslldq xmm3, 4
261 pxor xmm1, xmm3
262 pxor xmm1, xmm2
263 movdqu [16 + KS], xmm1
264 movdqa xmm2, xmm1
265
266 pshufb xmm2, xmm4
267 aesenclast xmm2, xmm0
268 movdqa xmm3, xmm1
269 pslldq xmm3, 4
270 pxor xmm1, xmm3
271 pslldq xmm3, 4
272 pxor xmm1, xmm3
273 pslldq xmm3, 4
274 pxor xmm1, xmm3
275 pxor xmm1, xmm2
276 movdqu [32 + KS], xmm1
277 movdqa xmm2, xmm1
278
279 ret
280 intel_aes_encrypt_init_128 ENDP
281
282
283 ALIGN 16
284 intel_aes_decrypt_init_128 PROC
285
286 mov KEY, [esp + 1*4 + 0*4]
287 mov KS, [esp + 1*4 + 1*4]
288
289 push KS
290 push KEY
291
292 call intel_aes_encrypt_init_128
293
294 pop KEY
295 pop KS
296
297 movdqu xmm0, [0*16 + KS]
298 movdqu xmm1, [10*16 + KS]
299 movdqu [10*16 + KS], xmm0
300 movdqu [0*16 + KS], xmm1
301
302 i = 1
303 WHILE i LT 5
304 movdqu xmm0, [i*16 + KS]
305 movdqu xmm1, [(10-i)*16 + KS]
306
307 aesimc xmm0, xmm0
308 aesimc xmm1, xmm1
309
310 movdqu [(10-i)*16 + KS], xmm0
311 movdqu [i*16 + KS], xmm1
312
313 i = i+1
314 ENDM
315
316 movdqu xmm0, [5*16 + KS]
317 aesimc xmm0, xmm0
318 movdqu [5*16 + KS], xmm0
319 ret
320 intel_aes_decrypt_init_128 ENDP
321
322
323 ALIGN 16
324 intel_aes_encrypt_init_192 PROC
325
326 mov KEY, [esp + 1*4 + 0*4]
327 mov KS, [esp + 1*4 + 1*4]
328
329 pxor xmm3, xmm3
330 movdqu xmm1, [KEY]
331 pinsrd xmm3, DWORD PTR [16 + KEY], 0
332 pinsrd xmm3, DWORD PTR [20 + KEY], 1
333
334 movdqu [KS], xmm1
335 movdqa xmm5, xmm3
336
337 lea ITR, Lcon1
338 movdqu xmm0, [ITR]
339 lea ITR, Lmask192
340 movdqu xmm4, [ITR]
341
342 mov ITR, 4
343
344 Lenc_192_ks_loop:
345 movdqa xmm2, xmm3
346 pshufb xmm2, xmm4
347 aesenclast xmm2, xmm0
348 pslld xmm0, 1
349
350 movdqa xmm6, xmm1
351 movdqa xmm7, xmm3
352 pslldq xmm6, 4
353 pslldq xmm7, 4
354 pxor xmm1, xmm6
355 pxor xmm3, xmm7
356 pslldq xmm6, 4
357 pxor xmm1, xmm6
358 pslldq xmm6, 4
359 pxor xmm1, xmm6
360 pxor xmm1, xmm2
361 pshufd xmm2, xmm1, 0ffh
362 pxor xmm3, xmm2
363
364 movdqa xmm6, xmm1
365 shufpd xmm5, xmm1, 00h
366 shufpd xmm6, xmm3, 01h
367
368 movdqu [16 + KS], xmm5
369 movdqu [32 + KS], xmm6
370
371 movdqa xmm2, xmm3
372 pshufb xmm2, xmm4
373 aesenclast xmm2, xmm0
374 pslld xmm0, 1
375
376 movdqa xmm6, xmm1
377 movdqa xmm7, xmm3
378 pslldq xmm6, 4
379 pslldq xmm7, 4
380 pxor xmm1, xmm6
381 pxor xmm3, xmm7
382 pslldq xmm6, 4
383 pxor xmm1, xmm6
384 pslldq xmm6, 4
385 pxor xmm1, xmm6
386 pxor xmm1, xmm2
387 pshufd xmm2, xmm1, 0ffh
388 pxor xmm3, xmm2
389
390 movdqu [48 + KS], xmm1
391 movdqa xmm5, xmm3
392
393 lea KS, [48 + KS]
394
395 dec ITR
396 jnz Lenc_192_ks_loop
397
398 movdqu [16 + KS], xmm5
399 ret
400 intel_aes_encrypt_init_192 ENDP
401
402 ALIGN 16
403 intel_aes_decrypt_init_192 PROC
404 mov KEY, [esp + 1*4 + 0*4]
405 mov KS, [esp + 1*4 + 1*4]
406
407 push KS
408 push KEY
409
410 call intel_aes_encrypt_init_192
411
412 pop KEY
413 pop KS
414
415 movdqu xmm0, [0*16 + KS]
416 movdqu xmm1, [12*16 + KS]
417 movdqu [12*16 + KS], xmm0
418 movdqu [0*16 + KS], xmm1
419
420 i = 1
421 WHILE i LT 6
422 movdqu xmm0, [i*16 + KS]
423 movdqu xmm1, [(12-i)*16 + KS]
424
425 aesimc xmm0, xmm0
426 aesimc xmm1, xmm1
427
428 movdqu [(12-i)*16 + KS], xmm0
429 movdqu [i*16 + KS], xmm1
430
431 i = i+1
432 ENDM
433
434 movdqu xmm0, [6*16 + KS]
435 aesimc xmm0, xmm0
436 movdqu [6*16 + KS], xmm0
437 ret
438 intel_aes_decrypt_init_192 ENDP
439
440 ALIGN 16
441 intel_aes_encrypt_init_256 PROC
442
443 mov KEY, [esp + 1*4 + 0*4]
444 mov KS, [esp + 1*4 + 1*4]
445 movdqu xmm1, [16*0 + KEY]
446 movdqu xmm3, [16*1 + KEY]
447
448 movdqu [16*0 + KS], xmm1
449 movdqu [16*1 + KS], xmm3
450
451 lea ITR, Lcon1
452 movdqu xmm0, [ITR]
453 lea ITR, Lmask256
454 movdqu xmm5, [ITR]
455
456 pxor xmm6, xmm6
457
458 mov ITR, 6
459
460 Lenc_256_ks_loop:
461
462 movdqa xmm2, xmm3
463 pshufb xmm2, xmm5
464 aesenclast xmm2, xmm0
465 pslld xmm0, 1
466 movdqa xmm4, xmm1
467 pslldq xmm4, 4
468 pxor xmm1, xmm4
469 pslldq xmm4, 4
470 pxor xmm1, xmm4
471 pslldq xmm4, 4
472 pxor xmm1, xmm4
473 pxor xmm1, xmm2
474 movdqu [16*2 + KS], xmm1
475
476 pshufd xmm2, xmm1, 0ffh
477 aesenclast xmm2, xmm6
478 movdqa xmm4, xmm3
479 pslldq xmm4, 4
480 pxor xmm3, xmm4
481 pslldq xmm4, 4
482 pxor xmm3, xmm4
483 pslldq xmm4, 4
484 pxor xmm3, xmm4
485 pxor xmm3, xmm2
486 movdqu [16*3 + KS], xmm3
487
488 lea KS, [32 + KS]
489 dec ITR
490 jnz Lenc_256_ks_loop
491
492 movdqa xmm2, xmm3
493 pshufb xmm2, xmm5
494 aesenclast xmm2, xmm0
495 movdqa xmm4, xmm1
496 pslldq xmm4, 4
497 pxor xmm1, xmm4
498 pslldq xmm4, 4
499 pxor xmm1, xmm4
500 pslldq xmm4, 4
501 pxor xmm1, xmm4
502 pxor xmm1, xmm2
503 movdqu [16*2 + KS], xmm1
504
505 ret
506 intel_aes_encrypt_init_256 ENDP
507
508 ALIGN 16
509 intel_aes_decrypt_init_256 PROC
510 mov KEY, [esp + 1*4 + 0*4]
511 mov KS, [esp + 1*4 + 1*4]
512
513 push KS
514 push KEY
515
516 call intel_aes_encrypt_init_256
517
518 pop KEY
519 pop KS
520
521 movdqu xmm0, [0*16 + KS]
522 movdqu xmm1, [14*16 + KS]
523 movdqu [14*16 + KS], xmm0
524 movdqu [0*16 + KS], xmm1
525
526 i = 1
527 WHILE i LT 7
528 movdqu xmm0, [i*16 + KS]
529 movdqu xmm1, [(14-i)*16 + KS]
530
531 aesimc xmm0, xmm0
532 aesimc xmm1, xmm1
533
534 movdqu [(14-i)*16 + KS], xmm0
535 movdqu [i*16 + KS], xmm1
536
537 i = i+1
538 ENDM
539
540 movdqu xmm0, [7*16 + KS]
541 aesimc xmm0, xmm0
542 movdqu [7*16 + KS], xmm0
543 ret
544 intel_aes_decrypt_init_256 ENDP
545
546
547
548 gen_aes_cbc_enc_func MACRO rnds
549
550 LOCAL loop1
551 LOCAL bail
552
553 push inputLen
554
555 mov ctx, [esp + 2*4 + 0*4]
556 mov output, [esp + 2*4 + 1*4]
557 mov input, [esp + 2*4 + 4*4]
558 mov inputLen, [esp + 2*4 + 5*4]
559
560 lea ctx, [44+ctx]
561
562 movdqu xmm0, [-32+ctx]
563
564 movdqu xmm2, [0*16 + ctx]
565 movdqu xmm3, [1*16 + ctx]
566 movdqu xmm4, [2*16 + ctx]
567 movdqu xmm5, [3*16 + ctx]
568 movdqu xmm6, [4*16 + ctx]
569
570 loop1:
571 cmp inputLen, 1*16
572 jb bail
573
574 movdqu xmm1, [input]
575 pxor xmm1, xmm2
576 pxor xmm0, xmm1
577
578 aesenc xmm0, xmm3
579 aesenc xmm0, xmm4
580 aesenc xmm0, xmm5
581 aesenc xmm0, xmm6
582
583 i = 5
584 WHILE i LT rnds
585 movdqu xmm7, [i*16 + ctx]
586 aesenc xmm0, xmm7
587 i = i+1
588 ENDM
589 movdqu xmm7, [rnds*16 + ctx]
590 aesenclast xmm0, xmm7
591
592 movdqu [output], xmm0
593
594 lea input, [1*16 + input]
595 lea output, [1*16 + output]
596 sub inputLen, 1*16
597 jmp loop1
598
599 bail:
600 movdqu [-32+ctx], xmm0
601
602 xor eax, eax
603 pop inputLen
604 ret
605
606 ENDM
607
608 gen_aes_cbc_dec_func MACRO rnds
609
610 LOCAL loop7
611 LOCAL loop1
612 LOCAL dec1
613 LOCAL bail
614
615 push inputLen
616
617 mov ctx, [esp + 2*4 + 0*4]
618 mov output, [esp + 2*4 + 1*4]
619 mov input, [esp + 2*4 + 4*4]
620 mov inputLen, [esp + 2*4 + 5*4]
621
622 lea ctx, [44+ctx]
623
624 loop7:
625 cmp inputLen, 7*16
626 jb dec1
627
628 movdqu xmm0, [0*16 + input]
629 movdqu xmm1, [1*16 + input]
630 movdqu xmm2, [2*16 + input]
631 movdqu xmm3, [3*16 + input]
632 movdqu xmm4, [4*16 + input]
633 movdqu xmm5, [5*16 + input]
634 movdqu xmm6, [6*16 + input]
635
636 movdqu xmm7, [0*16 + ctx]
637 pxor xmm0, xmm7
638 pxor xmm1, xmm7
639 pxor xmm2, xmm7
640 pxor xmm3, xmm7
641 pxor xmm4, xmm7
642 pxor xmm5, xmm7
643 pxor xmm6, xmm7
644
645 i = 1
646 WHILE i LT rnds
647 aes_dec_rnd i
648 i = i+1
649 ENDM
650 aes_dec_last_rnd rnds
651
652 movdqu xmm7, [-32 + ctx]
653 pxor xmm0, xmm7
654 movdqu xmm7, [0*16 + input]
655 pxor xmm1, xmm7
656 movdqu xmm7, [1*16 + input]
657 pxor xmm2, xmm7
658 movdqu xmm7, [2*16 + input]
659 pxor xmm3, xmm7
660 movdqu xmm7, [3*16 + input]
661 pxor xmm4, xmm7
662 movdqu xmm7, [4*16 + input]
663 pxor xmm5, xmm7
664 movdqu xmm7, [5*16 + input]
665 pxor xmm6, xmm7
666 movdqu xmm7, [6*16 + input]
667
668 movdqu [0*16 + output], xmm0
669 movdqu [1*16 + output], xmm1
670 movdqu [2*16 + output], xmm2
671 movdqu [3*16 + output], xmm3
672 movdqu [4*16 + output], xmm4
673 movdqu [5*16 + output], xmm5
674 movdqu [6*16 + output], xmm6
675 movdqu [-32 + ctx], xmm7
676
677 lea input, [7*16 + input]
678 lea output, [7*16 + output]
679 sub inputLen, 7*16
680 jmp loop7
681 dec1:
682
683 movdqu xmm3, [-32 + ctx]
684
685 loop1:
686 cmp inputLen, 1*16
687 jb bail
688
689 movdqu xmm0, [input]
690 movdqa xmm4, xmm0
691 movdqu xmm7, [0*16 + ctx]
692 pxor xmm0, xmm7
693
694 i = 1
695 WHILE i LT rnds
696 movdqu xmm7, [i*16 + ctx]
697 aesdec xmm0, xmm7
698 i = i+1
699 ENDM
700 movdqu xmm7, [rnds*16 + ctx]
701 aesdeclast xmm0, xmm7
702 pxor xmm3, xmm0
703
704 movdqu [output], xmm3
705 movdqa xmm3, xmm4
706
707 lea input, [1*16 + input]
708 lea output, [1*16 + output]
709 sub inputLen, 1*16
710 jmp loop1
711
712 bail:
713 movdqu [-32 + ctx], xmm3
714 xor eax, eax
715 pop inputLen
716 ret
717 ENDM
718
719 ALIGN 16
720 intel_aes_encrypt_cbc_128 PROC
721 gen_aes_cbc_enc_func 10
722 intel_aes_encrypt_cbc_128 ENDP
723
724 ALIGN 16
725 intel_aes_encrypt_cbc_192 PROC
726 gen_aes_cbc_enc_func 12
727 intel_aes_encrypt_cbc_192 ENDP
728
729 ALIGN 16
730 intel_aes_encrypt_cbc_256 PROC
731 gen_aes_cbc_enc_func 14
732 intel_aes_encrypt_cbc_256 ENDP
733
734 ALIGN 16
735 intel_aes_decrypt_cbc_128 PROC
736 gen_aes_cbc_dec_func 10
737 intel_aes_decrypt_cbc_128 ENDP
738
739 ALIGN 16
740 intel_aes_decrypt_cbc_192 PROC
741 gen_aes_cbc_dec_func 12
742 intel_aes_decrypt_cbc_192 ENDP
743
744 ALIGN 16
745 intel_aes_decrypt_cbc_256 PROC
746 gen_aes_cbc_dec_func 14
747 intel_aes_decrypt_cbc_256 ENDP
748
749
750
751 ctrCtx textequ <esi>
752 CTR textequ <ebx>
753
754 gen_aes_ctr_func MACRO rnds
755
756 LOCAL loop7
757 LOCAL loop1
758 LOCAL enc1
759 LOCAL bail
760
761 push inputLen
762 push ctrCtx
763 push CTR
764 push ebp
765
766 mov ctrCtx, [esp + 4*5 + 0*4]
767 mov output, [esp + 4*5 + 1*4]
768 mov input, [esp + 4*5 + 4*4]
769 mov inputLen, [esp + 4*5 + 5*4]
770
771 mov ctx, [4+ctrCtx]
772 lea ctx, [44+ctx]
773
774 mov ebp, esp
775 sub esp, 7*16
776 and esp, -16
777
778 movdqu xmm0, [8+ctrCtx]
779 mov ctrCtx, [ctrCtx + 8 + 3*4]
780 bswap ctrCtx
781 movdqu xmm1, [ctx + 0*16]
782
783 pxor xmm0, xmm1
784
785 movdqa [esp + 0*16], xmm0
786 movdqa [esp + 1*16], xmm0
787 movdqa [esp + 2*16], xmm0
788 movdqa [esp + 3*16], xmm0
789 movdqa [esp + 4*16], xmm0
790 movdqa [esp + 5*16], xmm0
791 movdqa [esp + 6*16], xmm0
792
793 inc ctrCtx
794 mov CTR, ctrCtx
795 bswap CTR
796 xor CTR, [ctx + 3*4]
797 mov [esp + 1*16 + 3*4], CTR
798
799 inc ctrCtx
800 mov CTR, ctrCtx
801 bswap CTR
802 xor CTR, [ctx + 3*4]
803 mov [esp + 2*16 + 3*4], CTR
804
805 inc ctrCtx
806 mov CTR, ctrCtx
807 bswap CTR
808 xor CTR, [ctx + 3*4]
809 mov [esp + 3*16 + 3*4], CTR
810
811 inc ctrCtx
812 mov CTR, ctrCtx
813 bswap CTR
814 xor CTR, [ctx + 3*4]
815 mov [esp + 4*16 + 3*4], CTR
816
817 inc ctrCtx
818 mov CTR, ctrCtx
819 bswap CTR
820 xor CTR, [ctx + 3*4]
821 mov [esp + 5*16 + 3*4], CTR
822
823 inc ctrCtx
824 mov CTR, ctrCtx
825 bswap CTR
826 xor CTR, [ctx + 3*4]
827 mov [esp + 6*16 + 3*4], CTR
828
829
830 loop7:
831 cmp inputLen, 7*16
832 jb loop1
833
834 movdqu xmm0, [0*16 + esp]
835 movdqu xmm1, [1*16 + esp]
836 movdqu xmm2, [2*16 + esp]
837 movdqu xmm3, [3*16 + esp]
838 movdqu xmm4, [4*16 + esp]
839 movdqu xmm5, [5*16 + esp]
840 movdqu xmm6, [6*16 + esp]
841
842 i = 1
843 WHILE i LE 7
844 aes_rnd i
845
846 inc ctrCtx
847 mov CTR, ctrCtx
848 bswap CTR
849 xor CTR, [ctx + 3*4]
850 mov [esp + (i-1)*16 + 3*4], CTR
851
852 i = i+1
853 ENDM
854 WHILE i LT rnds
855 aes_rnd i
856 i = i+1
857 ENDM
858 aes_last_rnd rnds
859
860 movdqu xmm7, [0*16 + input]
861 pxor xmm0, xmm7
862 movdqu xmm7, [1*16 + input]
863 pxor xmm1, xmm7
864 movdqu xmm7, [2*16 + input]
865 pxor xmm2, xmm7
866 movdqu xmm7, [3*16 + input]
867 pxor xmm3, xmm7
868 movdqu xmm7, [4*16 + input]
869 pxor xmm4, xmm7
870 movdqu xmm7, [5*16 + input]
871 pxor xmm5, xmm7
872 movdqu xmm7, [6*16 + input]
873 pxor xmm6, xmm7
874
875 movdqu [0*16 + output], xmm0
876 movdqu [1*16 + output], xmm1
877 movdqu [2*16 + output], xmm2
878 movdqu [3*16 + output], xmm3
879 movdqu [4*16 + output], xmm4
880 movdqu [5*16 + output], xmm5
881 movdqu [6*16 + output], xmm6
882
883 lea input, [7*16 + input]
884 lea output, [7*16 + output]
885 sub inputLen, 7*16
886 jmp loop7
887
888
889 loop1:
890 cmp inputLen, 1*16
891 jb bail
892
893 movdqu xmm0, [esp]
894 add esp, 16
895
896 i = 1
897 WHILE i LT rnds
898 movdqu xmm7, [i*16 + ctx]
899 aesenc xmm0, xmm7
900 i = i+1
901 ENDM
902 movdqu xmm7, [rnds*16 + ctx]
903 aesenclast xmm0, xmm7
904
905 movdqu xmm7, [input]
906 pxor xmm0, xmm7
907 movdqu [output], xmm0
908
909 lea input, [1*16 + input]
910 lea output, [1*16 + output]
911 sub inputLen, 1*16
912 jmp loop1
913
914 bail:
915
916 mov ctrCtx, [ebp + 4*5 + 0*4]
917 movdqu xmm0, [esp]
918 movdqu xmm1, [ctx + 0*16]
919 pxor xmm0, xmm1
920 movdqu [8+ctrCtx], xmm0
921
922
923 xor eax, eax
924 mov esp, ebp
925 pop ebp
926 pop CTR
927 pop ctrCtx
928 pop inputLen
929 ret
930 ENDM
931
932
933 ALIGN 16
934 intel_aes_encrypt_ctr_128 PROC
935 gen_aes_ctr_func 10
936 intel_aes_encrypt_ctr_128 ENDP
937
938 ALIGN 16
939 intel_aes_encrypt_ctr_192 PROC
940 gen_aes_ctr_func 12
941 intel_aes_encrypt_ctr_192 ENDP
942
943 ALIGN 16
944 intel_aes_encrypt_ctr_256 PROC
945 gen_aes_ctr_func 14
946 intel_aes_encrypt_ctr_256 ENDP
947
948
949 END
OLDNEW
« no previous file with comments | « nss/lib/freebl/intel-aes-x64-masm.asm ('k') | nss/lib/freebl/intel-gcm.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698