Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(251)

Side by Side Diff: nss/lib/freebl/intel-aes-x64-masm.asm

Issue 214183004: Implement AES in different modes of operation, using AES-NI and (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/nss.git@master
Patch Set: Remove an assertion. ctr->cipher doesn't set *outlen. Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « nss/lib/freebl/intel-aes.h ('k') | nss/lib/freebl/intel-aes-x86-masm.asm » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 ; LICENSE:
2 ; This submission to NSS is to be made available under the terms of the
3 ; Mozilla Public License, v. 2.0. You can obtain one at http:
4 ; //mozilla.org/MPL/2.0/.
5 ;###############################################################################
6 ; Copyright(c) 2014, Intel Corp.
7 ; Developers and authors:
8 ; Shay Gueron and Vlad Krasnov
9 ; Intel Corporation, Israel Development Centre, Haifa, Israel
10 ; Please send feedback directly to crypto.feedback.alias@intel.com
11
12
13 .DATA
14 ALIGN 16
15 Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh
16 Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h
17 Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh
18 Lcon1 dd 1,1,1,1
19 Lcon2 dd 1bh,1bh,1bh,1bh
20
21 .CODE
22
23 ctx textequ <rcx>
24 output textequ <rdx>
25 input textequ <r8>
26 inputLen textequ <r9d>
27
28
29 aes_rnd MACRO i
30 movdqu xmm8, [i*16 + ctx]
31 aesenc xmm0, xmm8
32 aesenc xmm1, xmm8
33 aesenc xmm2, xmm8
34 aesenc xmm3, xmm8
35 aesenc xmm4, xmm8
36 aesenc xmm5, xmm8
37 aesenc xmm6, xmm8
38 aesenc xmm7, xmm8
39 ENDM
40
41 aes_last_rnd MACRO i
42 movdqu xmm8, [i*16 + ctx]
43 aesenclast xmm0, xmm8
44 aesenclast xmm1, xmm8
45 aesenclast xmm2, xmm8
46 aesenclast xmm3, xmm8
47 aesenclast xmm4, xmm8
48 aesenclast xmm5, xmm8
49 aesenclast xmm6, xmm8
50 aesenclast xmm7, xmm8
51 ENDM
52
53 aes_dec_rnd MACRO i
54 movdqu xmm8, [i*16 + ctx]
55 aesdec xmm0, xmm8
56 aesdec xmm1, xmm8
57 aesdec xmm2, xmm8
58 aesdec xmm3, xmm8
59 aesdec xmm4, xmm8
60 aesdec xmm5, xmm8
61 aesdec xmm6, xmm8
62 aesdec xmm7, xmm8
63 ENDM
64
65 aes_dec_last_rnd MACRO i
66 movdqu xmm8, [i*16 + ctx]
67 aesdeclast xmm0, xmm8
68 aesdeclast xmm1, xmm8
69 aesdeclast xmm2, xmm8
70 aesdeclast xmm3, xmm8
71 aesdeclast xmm4, xmm8
72 aesdeclast xmm5, xmm8
73 aesdeclast xmm6, xmm8
74 aesdeclast xmm7, xmm8
75 ENDM
76
77
78 gen_aes_ecb_func MACRO enc, rnds
79
80 LOCAL loop8
81 LOCAL loop1
82 LOCAL bail
83
84 xor inputLen, inputLen
85 mov input, [rsp + 1*8 + 8*4]
86 mov inputLen, [rsp + 1*8 + 8*5]
87
88 sub rsp, 3*16
89
90 movdqu [rsp + 0*16], xmm6
91 movdqu [rsp + 1*16], xmm7
92 movdqu [rsp + 2*16], xmm8
93
94 lea ctx, [48+ctx]
95
96 loop8:
97 cmp inputLen, 8*16
98 jb loop1
99
100 movdqu xmm0, [0*16 + input]
101 movdqu xmm1, [1*16 + input]
102 movdqu xmm2, [2*16 + input]
103 movdqu xmm3, [3*16 + input]
104 movdqu xmm4, [4*16 + input]
105 movdqu xmm5, [5*16 + input]
106 movdqu xmm6, [6*16 + input]
107 movdqu xmm7, [7*16 + input]
108
109 movdqu xmm8, [0*16 + ctx]
110 pxor xmm0, xmm8
111 pxor xmm1, xmm8
112 pxor xmm2, xmm8
113 pxor xmm3, xmm8
114 pxor xmm4, xmm8
115 pxor xmm5, xmm8
116 pxor xmm6, xmm8
117 pxor xmm7, xmm8
118
119 IF enc eq 1
120 rnd textequ <aes_rnd>
121 lastrnd textequ <aes_last_rnd>
122 aesinst textequ <aesenc>
123 aeslastinst textequ <aesenclast>
124 ELSE
125 rnd textequ <aes_dec_rnd>
126 lastrnd textequ <aes_dec_last_rnd>
127 aesinst textequ <aesdec>
128 aeslastinst textequ <aesdeclast>
129 ENDIF
130
131 i = 1
132 WHILE i LT rnds
133 rnd i
134 i = i+1
135 ENDM
136 lastrnd rnds
137
138 movdqu [0*16 + output], xmm0
139 movdqu [1*16 + output], xmm1
140 movdqu [2*16 + output], xmm2
141 movdqu [3*16 + output], xmm3
142 movdqu [4*16 + output], xmm4
143 movdqu [5*16 + output], xmm5
144 movdqu [6*16 + output], xmm6
145 movdqu [7*16 + output], xmm7
146
147 lea input, [8*16 + input]
148 lea output, [8*16 + output]
149 sub inputLen, 8*16
150 jmp loop8
151
152 loop1:
153 cmp inputLen, 1*16
154 jb bail
155
156 movdqu xmm0, [input]
157 movdqu xmm7, [0*16 + ctx]
158 pxor xmm0, xmm7
159
160 i = 1
161 WHILE i LT rnds
162 movdqu xmm7, [i*16 + ctx]
163 aesinst xmm0, xmm7
164 i = i+1
165 ENDM
166 movdqu xmm7, [rnds*16 + ctx]
167 aeslastinst xmm0, xmm7
168
169 movdqu [output], xmm0
170
171 lea input, [1*16 + input]
172 lea output, [1*16 + output]
173 sub inputLen, 1*16
174 jmp loop1
175
176 bail:
177 xor rax, rax
178
179 movdqu xmm6, [rsp + 0*16]
180 movdqu xmm7, [rsp + 1*16]
181 movdqu xmm8, [rsp + 2*16]
182 add rsp, 3*16
183 ret
184 ENDM
185
186 intel_aes_encrypt_ecb_128 PROC
187 gen_aes_ecb_func 1, 10
188 intel_aes_encrypt_ecb_128 ENDP
189
190 intel_aes_encrypt_ecb_192 PROC
191 gen_aes_ecb_func 1, 12
192 intel_aes_encrypt_ecb_192 ENDP
193
194 intel_aes_encrypt_ecb_256 PROC
195 gen_aes_ecb_func 1, 14
196 intel_aes_encrypt_ecb_256 ENDP
197
198 intel_aes_decrypt_ecb_128 PROC
199 gen_aes_ecb_func 0, 10
200 intel_aes_decrypt_ecb_128 ENDP
201
202 intel_aes_decrypt_ecb_192 PROC
203 gen_aes_ecb_func 0, 12
204 intel_aes_decrypt_ecb_192 ENDP
205
206 intel_aes_decrypt_ecb_256 PROC
207 gen_aes_ecb_func 0, 14
208 intel_aes_decrypt_ecb_256 ENDP
209
210
211 KEY textequ <rcx>
212 KS textequ <rdx>
213 ITR textequ <r8>
214
215 intel_aes_encrypt_init_128 PROC
216
217 movdqu xmm1, [KEY]
218 movdqu [KS], xmm1
219 movdqa xmm2, xmm1
220
221 lea ITR, Lcon1
222 movdqa xmm0, [ITR]
223 lea ITR, Lmask
224 movdqa xmm4, [ITR]
225
226 mov ITR, 8
227
228 Lenc_128_ks_loop:
229 lea KS, [16 + KS]
230 dec ITR
231
232 pshufb xmm2, xmm4
233 aesenclast xmm2, xmm0
234 pslld xmm0, 1
235 movdqa xmm3, xmm1
236 pslldq xmm3, 4
237 pxor xmm1, xmm3
238 pslldq xmm3, 4
239 pxor xmm1, xmm3
240 pslldq xmm3, 4
241 pxor xmm1, xmm3
242 pxor xmm1, xmm2
243 movdqu [KS], xmm1
244 movdqa xmm2, xmm1
245
246 jne Lenc_128_ks_loop
247
248 lea ITR, Lcon2
249 movdqa xmm0, [ITR]
250
251 pshufb xmm2, xmm4
252 aesenclast xmm2, xmm0
253 pslld xmm0, 1
254 movdqa xmm3, xmm1
255 pslldq xmm3, 4
256 pxor xmm1, xmm3
257 pslldq xmm3, 4
258 pxor xmm1, xmm3
259 pslldq xmm3, 4
260 pxor xmm1, xmm3
261 pxor xmm1, xmm2
262 movdqu [16 + KS], xmm1
263 movdqa xmm2, xmm1
264
265 pshufb xmm2, xmm4
266 aesenclast xmm2, xmm0
267 movdqa xmm3, xmm1
268 pslldq xmm3, 4
269 pxor xmm1, xmm3
270 pslldq xmm3, 4
271 pxor xmm1, xmm3
272 pslldq xmm3, 4
273 pxor xmm1, xmm3
274 pxor xmm1, xmm2
275 movdqu [32 + KS], xmm1
276 movdqa xmm2, xmm1
277
278 ret
279 intel_aes_encrypt_init_128 ENDP
280
281
282 intel_aes_decrypt_init_128 PROC
283
284 push KS
285 push KEY
286
287 call intel_aes_encrypt_init_128
288
289 pop KEY
290 pop KS
291
292 movdqu xmm0, [0*16 + KS]
293 movdqu xmm1, [10*16 + KS]
294 movdqu [10*16 + KS], xmm0
295 movdqu [0*16 + KS], xmm1
296
297 i = 1
298 WHILE i LT 5
299 movdqu xmm0, [i*16 + KS]
300 movdqu xmm1, [(10-i)*16 + KS]
301
302 aesimc xmm0, xmm0
303 aesimc xmm1, xmm1
304
305 movdqu [(10-i)*16 + KS], xmm0
306 movdqu [i*16 + KS], xmm1
307
308 i = i+1
309 ENDM
310
311 movdqu xmm0, [5*16 + KS]
312 aesimc xmm0, xmm0
313 movdqu [5*16 + KS], xmm0
314 ret
315 intel_aes_decrypt_init_128 ENDP
316
317
318 intel_aes_encrypt_init_192 PROC
319
320 sub rsp, 16*2
321 movdqu [16*0 + rsp], xmm6
322 movdqu [16*1 + rsp], xmm7
323
324 movdqu xmm1, [KEY]
325 mov ITR, [16 + KEY]
326 movd xmm3, ITR
327
328 movdqu [KS], xmm1
329 movdqa xmm5, xmm3
330
331 lea ITR, Lcon1
332 movdqu xmm0, [ITR]
333 lea ITR, Lmask192
334 movdqu xmm4, [ITR]
335
336 mov ITR, 4
337
338 Lenc_192_ks_loop:
339 movdqa xmm2, xmm3
340 pshufb xmm2, xmm4
341 aesenclast xmm2, xmm0
342 pslld xmm0, 1
343
344 movdqa xmm6, xmm1
345 movdqa xmm7, xmm3
346 pslldq xmm6, 4
347 pslldq xmm7, 4
348 pxor xmm1, xmm6
349 pxor xmm3, xmm7
350 pslldq xmm6, 4
351 pxor xmm1, xmm6
352 pslldq xmm6, 4
353 pxor xmm1, xmm6
354 pxor xmm1, xmm2
355 pshufd xmm2, xmm1, 0ffh
356 pxor xmm3, xmm2
357
358 movdqa xmm6, xmm1
359 shufpd xmm5, xmm1, 00h
360 shufpd xmm6, xmm3, 01h
361
362 movdqu [16 + KS], xmm5
363 movdqu [32 + KS], xmm6
364
365 movdqa xmm2, xmm3
366 pshufb xmm2, xmm4
367 aesenclast xmm2, xmm0
368 pslld xmm0, 1
369
370 movdqa xmm6, xmm1
371 movdqa xmm7, xmm3
372 pslldq xmm6, 4
373 pslldq xmm7, 4
374 pxor xmm1, xmm6
375 pxor xmm3, xmm7
376 pslldq xmm6, 4
377 pxor xmm1, xmm6
378 pslldq xmm6, 4
379 pxor xmm1, xmm6
380 pxor xmm1, xmm2
381 pshufd xmm2, xmm1, 0ffh
382 pxor xmm3, xmm2
383
384 movdqu [48 + KS], xmm1
385 movdqa xmm5, xmm3
386
387 lea KS, [48 + KS]
388
389 dec ITR
390 jnz Lenc_192_ks_loop
391
392 movdqu [16 + KS], xmm5
393
394 movdqu xmm7, [16*1 + rsp]
395 movdqu xmm6, [16*0 + rsp]
396 add rsp, 16*2
397 ret
398 intel_aes_encrypt_init_192 ENDP
399
400 intel_aes_decrypt_init_192 PROC
401 push KS
402 push KEY
403
404 call intel_aes_encrypt_init_192
405
406 pop KEY
407 pop KS
408
409 movdqu xmm0, [0*16 + KS]
410 movdqu xmm1, [12*16 + KS]
411 movdqu [12*16 + KS], xmm0
412 movdqu [0*16 + KS], xmm1
413
414 i = 1
415 WHILE i LT 6
416 movdqu xmm0, [i*16 + KS]
417 movdqu xmm1, [(12-i)*16 + KS]
418
419 aesimc xmm0, xmm0
420 aesimc xmm1, xmm1
421
422 movdqu [(12-i)*16 + KS], xmm0
423 movdqu [i*16 + KS], xmm1
424
425 i = i+1
426 ENDM
427
428 movdqu xmm0, [6*16 + KS]
429 aesimc xmm0, xmm0
430 movdqu [6*16 + KS], xmm0
431 ret
432 intel_aes_decrypt_init_192 ENDP
433
434
435 intel_aes_encrypt_init_256 PROC
436 sub rsp, 16*2
437 movdqu [16*0 + rsp], xmm6
438 movdqu [16*1 + rsp], xmm7
439
440 movdqu xmm1, [16*0 + KEY]
441 movdqu xmm3, [16*1 + KEY]
442
443 movdqu [16*0 + KS], xmm1
444 movdqu [16*1 + KS], xmm3
445
446 lea ITR, Lcon1
447 movdqu xmm0, [ITR]
448 lea ITR, Lmask256
449 movdqu xmm5, [ITR]
450
451 pxor xmm6, xmm6
452
453 mov ITR, 6
454
455 Lenc_256_ks_loop:
456
457 movdqa xmm2, xmm3
458 pshufb xmm2, xmm5
459 aesenclast xmm2, xmm0
460 pslld xmm0, 1
461 movdqa xmm4, xmm1
462 pslldq xmm4, 4
463 pxor xmm1, xmm4
464 pslldq xmm4, 4
465 pxor xmm1, xmm4
466 pslldq xmm4, 4
467 pxor xmm1, xmm4
468 pxor xmm1, xmm2
469 movdqu [16*2 + KS], xmm1
470
471 pshufd xmm2, xmm1, 0ffh
472 aesenclast xmm2, xmm6
473 movdqa xmm4, xmm3
474 pslldq xmm4, 4
475 pxor xmm3, xmm4
476 pslldq xmm4, 4
477 pxor xmm3, xmm4
478 pslldq xmm4, 4
479 pxor xmm3, xmm4
480 pxor xmm3, xmm2
481 movdqu [16*3 + KS], xmm3
482
483 lea KS, [32 + KS]
484 dec ITR
485 jnz Lenc_256_ks_loop
486
487 movdqa xmm2, xmm3
488 pshufb xmm2, xmm5
489 aesenclast xmm2, xmm0
490 movdqa xmm4, xmm1
491 pslldq xmm4, 4
492 pxor xmm1, xmm4
493 pslldq xmm4, 4
494 pxor xmm1, xmm4
495 pslldq xmm4, 4
496 pxor xmm1, xmm4
497 pxor xmm1, xmm2
498 movdqu [16*2 + KS], xmm1
499
500 movdqu xmm7, [16*1 + rsp]
501 movdqu xmm6, [16*0 + rsp]
502 add rsp, 16*2
503 ret
504
505 intel_aes_encrypt_init_256 ENDP
506
507
508 intel_aes_decrypt_init_256 PROC
509 push KS
510 push KEY
511
512 call intel_aes_encrypt_init_256
513
514 pop KEY
515 pop KS
516
517 movdqu xmm0, [0*16 + KS]
518 movdqu xmm1, [14*16 + KS]
519 movdqu [14*16 + KS], xmm0
520 movdqu [0*16 + KS], xmm1
521
522 i = 1
523 WHILE i LT 7
524 movdqu xmm0, [i*16 + KS]
525 movdqu xmm1, [(14-i)*16 + KS]
526
527 aesimc xmm0, xmm0
528 aesimc xmm1, xmm1
529
530 movdqu [(14-i)*16 + KS], xmm0
531 movdqu [i*16 + KS], xmm1
532
533 i = i+1
534 ENDM
535
536 movdqu xmm0, [7*16 + KS]
537 aesimc xmm0, xmm0
538 movdqu [7*16 + KS], xmm0
539 ret
540 intel_aes_decrypt_init_256 ENDP
541
542
543
544 gen_aes_cbc_enc_func MACRO rnds
545
546 LOCAL loop1
547 LOCAL bail
548
549 mov input, [rsp + 1*8 + 8*4]
550 mov inputLen, [rsp + 1*8 + 8*5]
551
552 sub rsp, 3*16
553
554 movdqu [rsp + 0*16], xmm6
555 movdqu [rsp + 1*16], xmm7
556 movdqu [rsp + 2*16], xmm8
557
558 lea ctx, [48+ctx]
559
560 movdqu xmm0, [-32+ctx]
561
562 movdqu xmm2, [0*16 + ctx]
563 movdqu xmm3, [1*16 + ctx]
564 movdqu xmm4, [2*16 + ctx]
565 movdqu xmm5, [3*16 + ctx]
566 movdqu xmm6, [4*16 + ctx]
567 movdqu xmm7, [5*16 + ctx]
568
569 loop1:
570 cmp inputLen, 1*16
571 jb bail
572
573 movdqu xmm1, [input]
574 pxor xmm1, xmm2
575 pxor xmm0, xmm1
576
577 aesenc xmm0, xmm3
578 aesenc xmm0, xmm4
579 aesenc xmm0, xmm5
580 aesenc xmm0, xmm6
581 aesenc xmm0, xmm7
582
583 i = 6
584 WHILE i LT rnds
585 movdqu xmm8, [i*16 + ctx]
586 aesenc xmm0, xmm8
587 i = i+1
588 ENDM
589 movdqu xmm8, [rnds*16 + ctx]
590 aesenclast xmm0, xmm8
591
592 movdqu [output], xmm0
593
594 lea input, [1*16 + input]
595 lea output, [1*16 + output]
596 sub inputLen, 1*16
597 jmp loop1
598
599 bail:
600 movdqu [-32+ctx], xmm0
601
602 xor rax, rax
603
604 movdqu xmm6, [rsp + 0*16]
605 movdqu xmm7, [rsp + 1*16]
606 movdqu xmm8, [rsp + 2*16]
607 add rsp, 3*16
608 ret
609
610 ENDM
611
612 gen_aes_cbc_dec_func MACRO rnds
613
614 LOCAL loop8
615 LOCAL loop1
616 LOCAL dec1
617 LOCAL bail
618
619 mov input, [rsp + 1*8 + 8*4]
620 mov inputLen, [rsp + 1*8 + 8*5]
621
622 sub rsp, 3*16
623
624 movdqu [rsp + 0*16], xmm6
625 movdqu [rsp + 1*16], xmm7
626 movdqu [rsp + 2*16], xmm8
627
628 lea ctx, [48+ctx]
629
630 loop8:
631 cmp inputLen, 8*16
632 jb dec1
633
634 movdqu xmm0, [0*16 + input]
635 movdqu xmm1, [1*16 + input]
636 movdqu xmm2, [2*16 + input]
637 movdqu xmm3, [3*16 + input]
638 movdqu xmm4, [4*16 + input]
639 movdqu xmm5, [5*16 + input]
640 movdqu xmm6, [6*16 + input]
641 movdqu xmm7, [7*16 + input]
642
643 movdqu xmm8, [0*16 + ctx]
644 pxor xmm0, xmm8
645 pxor xmm1, xmm8
646 pxor xmm2, xmm8
647 pxor xmm3, xmm8
648 pxor xmm4, xmm8
649 pxor xmm5, xmm8
650 pxor xmm6, xmm8
651 pxor xmm7, xmm8
652
653 i = 1
654 WHILE i LT rnds
655 aes_dec_rnd i
656 i = i+1
657 ENDM
658 aes_dec_last_rnd rnds
659
660 movdqu xmm8, [-32 + ctx]
661 pxor xmm0, xmm8
662 movdqu xmm8, [0*16 + input]
663 pxor xmm1, xmm8
664 movdqu xmm8, [1*16 + input]
665 pxor xmm2, xmm8
666 movdqu xmm8, [2*16 + input]
667 pxor xmm3, xmm8
668 movdqu xmm8, [3*16 + input]
669 pxor xmm4, xmm8
670 movdqu xmm8, [4*16 + input]
671 pxor xmm5, xmm8
672 movdqu xmm8, [5*16 + input]
673 pxor xmm6, xmm8
674 movdqu xmm8, [6*16 + input]
675 pxor xmm7, xmm8
676 movdqu xmm8, [7*16 + input]
677
678 movdqu [0*16 + output], xmm0
679 movdqu [1*16 + output], xmm1
680 movdqu [2*16 + output], xmm2
681 movdqu [3*16 + output], xmm3
682 movdqu [4*16 + output], xmm4
683 movdqu [5*16 + output], xmm5
684 movdqu [6*16 + output], xmm6
685 movdqu [7*16 + output], xmm7
686 movdqu [-32 + ctx], xmm8
687
688 lea input, [8*16 + input]
689 lea output, [8*16 + output]
690 sub inputLen, 8*16
691 jmp loop8
692 dec1:
693
694 movdqu xmm3, [-32 + ctx]
695
696 loop1:
697 cmp inputLen, 1*16
698 jb bail
699
700 movdqu xmm0, [input]
701 movdqa xmm4, xmm0
702 movdqu xmm7, [0*16 + ctx]
703 pxor xmm0, xmm7
704
705 i = 1
706 WHILE i LT rnds
707 movdqu xmm7, [i*16 + ctx]
708 aesdec xmm0, xmm7
709 i = i+1
710 ENDM
711 movdqu xmm7, [rnds*16 + ctx]
712 aesdeclast xmm0, xmm7
713 pxor xmm3, xmm0
714
715 movdqu [output], xmm3
716 movdqa xmm3, xmm4
717
718 lea input, [1*16 + input]
719 lea output, [1*16 + output]
720 sub inputLen, 1*16
721 jmp loop1
722
723 bail:
724 movdqu [-32 + ctx], xmm3
725 xor rax, rax
726
727 movdqu xmm6, [rsp + 0*16]
728 movdqu xmm7, [rsp + 1*16]
729 movdqu xmm8, [rsp + 2*16]
730 add rsp, 3*16
731 ret
732 ENDM
733
734 intel_aes_encrypt_cbc_128 PROC
735 gen_aes_cbc_enc_func 10
736 intel_aes_encrypt_cbc_128 ENDP
737
738 intel_aes_encrypt_cbc_192 PROC
739 gen_aes_cbc_enc_func 12
740 intel_aes_encrypt_cbc_192 ENDP
741
742 intel_aes_encrypt_cbc_256 PROC
743 gen_aes_cbc_enc_func 14
744 intel_aes_encrypt_cbc_256 ENDP
745
746 intel_aes_decrypt_cbc_128 PROC
747 gen_aes_cbc_dec_func 10
748 intel_aes_decrypt_cbc_128 ENDP
749
750 intel_aes_decrypt_cbc_192 PROC
751 gen_aes_cbc_dec_func 12
752 intel_aes_decrypt_cbc_192 ENDP
753
754 intel_aes_decrypt_cbc_256 PROC
755 gen_aes_cbc_dec_func 14
756 intel_aes_decrypt_cbc_256 ENDP
757
758
759
760 ctrCtx textequ <r10>
761 CTR textequ <r11d>
762 CTRSave textequ <eax>
763
764 gen_aes_ctr_func MACRO rnds
765
766 LOCAL loop8
767 LOCAL loop1
768 LOCAL enc1
769 LOCAL bail
770
771 mov input, [rsp + 8*1 + 4*8]
772 mov inputLen, [rsp + 8*1 + 5*8]
773
774 mov ctrCtx, ctx
775 mov ctx, [8+ctrCtx]
776 lea ctx, [48+ctx]
777
778 sub rsp, 3*16
779 movdqu [rsp + 0*16], xmm6
780 movdqu [rsp + 1*16], xmm7
781 movdqu [rsp + 2*16], xmm8
782
783
784 push rbp
785 mov rbp, rsp
786 sub rsp, 8*16
787 and rsp, -16
788
789
790 movdqu xmm0, [16+ctrCtx]
791 mov CTRSave, DWORD PTR [ctrCtx + 16 + 3*4]
792 bswap CTRSave
793 movdqu xmm1, [ctx + 0*16]
794
795 pxor xmm0, xmm1
796
797 movdqa [rsp + 0*16], xmm0
798 movdqa [rsp + 1*16], xmm0
799 movdqa [rsp + 2*16], xmm0
800 movdqa [rsp + 3*16], xmm0
801 movdqa [rsp + 4*16], xmm0
802 movdqa [rsp + 5*16], xmm0
803 movdqa [rsp + 6*16], xmm0
804 movdqa [rsp + 7*16], xmm0
805
806 inc CTRSave
807 mov CTR, CTRSave
808 bswap CTR
809 xor CTR, DWORD PTR [ctx + 3*4]
810 mov DWORD PTR [rsp + 1*16 + 3*4], CTR
811
812 inc CTRSave
813 mov CTR, CTRSave
814 bswap CTR
815 xor CTR, DWORD PTR [ctx + 3*4]
816 mov DWORD PTR [rsp + 2*16 + 3*4], CTR
817
818 inc CTRSave
819 mov CTR, CTRSave
820 bswap CTR
821 xor CTR, DWORD PTR [ctx + 3*4]
822 mov DWORD PTR [rsp + 3*16 + 3*4], CTR
823
824 inc CTRSave
825 mov CTR, CTRSave
826 bswap CTR
827 xor CTR, DWORD PTR [ctx + 3*4]
828 mov DWORD PTR [rsp + 4*16 + 3*4], CTR
829
830 inc CTRSave
831 mov CTR, CTRSave
832 bswap CTR
833 xor CTR, DWORD PTR [ctx + 3*4]
834 mov DWORD PTR [rsp + 5*16 + 3*4], CTR
835
836 inc CTRSave
837 mov CTR, CTRSave
838 bswap CTR
839 xor CTR, DWORD PTR [ctx + 3*4]
840 mov DWORD PTR [rsp + 6*16 + 3*4], CTR
841
842 inc CTRSave
843 mov CTR, CTRSave
844 bswap CTR
845 xor CTR, DWORD PTR [ctx + 3*4]
846 mov DWORD PTR [rsp + 7*16 + 3*4], CTR
847
848
849 loop8:
850 cmp inputLen, 8*16
851 jb loop1
852
853 movdqu xmm0, [0*16 + rsp]
854 movdqu xmm1, [1*16 + rsp]
855 movdqu xmm2, [2*16 + rsp]
856 movdqu xmm3, [3*16 + rsp]
857 movdqu xmm4, [4*16 + rsp]
858 movdqu xmm5, [5*16 + rsp]
859 movdqu xmm6, [6*16 + rsp]
860 movdqu xmm7, [7*16 + rsp]
861
862 i = 1
863 WHILE i LE 8
864 aes_rnd i
865
866 inc CTRSave
867 mov CTR, CTRSave
868 bswap CTR
869 xor CTR, DWORD PTR [ctx + 3*4]
870 mov DWORD PTR [rsp + (i-1)*16 + 3*4], CTR
871
872 i = i+1
873 ENDM
874 WHILE i LT rnds
875 aes_rnd i
876 i = i+1
877 ENDM
878 aes_last_rnd rnds
879
880 movdqu xmm8, [0*16 + input]
881 pxor xmm0, xmm8
882 movdqu xmm8, [1*16 + input]
883 pxor xmm1, xmm8
884 movdqu xmm8, [2*16 + input]
885 pxor xmm2, xmm8
886 movdqu xmm8, [3*16 + input]
887 pxor xmm3, xmm8
888 movdqu xmm8, [4*16 + input]
889 pxor xmm4, xmm8
890 movdqu xmm8, [5*16 + input]
891 pxor xmm5, xmm8
892 movdqu xmm8, [6*16 + input]
893 pxor xmm6, xmm8
894 movdqu xmm8, [7*16 + input]
895 pxor xmm7, xmm8
896
897 movdqu [0*16 + output], xmm0
898 movdqu [1*16 + output], xmm1
899 movdqu [2*16 + output], xmm2
900 movdqu [3*16 + output], xmm3
901 movdqu [4*16 + output], xmm4
902 movdqu [5*16 + output], xmm5
903 movdqu [6*16 + output], xmm6
904 movdqu [7*16 + output], xmm7
905
906 lea input, [8*16 + input]
907 lea output, [8*16 + output]
908 sub inputLen, 8*16
909 jmp loop8
910
911
912 loop1:
913 cmp inputLen, 1*16
914 jb bail
915
916 movdqu xmm0, [rsp]
917 add rsp, 16
918
919 i = 1
920 WHILE i LT rnds
921 movdqu xmm7, [i*16 + ctx]
922 aesenc xmm0, xmm7
923 i = i+1
924 ENDM
925 movdqu xmm7, [rnds*16 + ctx]
926 aesenclast xmm0, xmm7
927
928 movdqu xmm7, [input]
929 pxor xmm0, xmm7
930 movdqu [output], xmm0
931
932 lea input, [1*16 + input]
933 lea output, [1*16 + output]
934 sub inputLen, 1*16
935 jmp loop1
936
937 bail:
938
939 movdqu xmm0, [rsp]
940 movdqu xmm1, [ctx + 0*16]
941 pxor xmm0, xmm1
942 movdqu [16+ctrCtx], xmm0
943
944
945 xor rax, rax
946 mov rsp, rbp
947 pop rbp
948
949 movdqu xmm6, [rsp + 0*16]
950 movdqu xmm7, [rsp + 1*16]
951 movdqu xmm8, [rsp + 2*16]
952 add rsp, 3*16
953
954 ret
955 ENDM
956
957
958 intel_aes_encrypt_ctr_128 PROC
959 gen_aes_ctr_func 10
960 intel_aes_encrypt_ctr_128 ENDP
961
962 intel_aes_encrypt_ctr_192 PROC
963 gen_aes_ctr_func 12
964 intel_aes_encrypt_ctr_192 ENDP
965
966 intel_aes_encrypt_ctr_256 PROC
967 gen_aes_ctr_func 14
968 intel_aes_encrypt_ctr_256 ENDP
969
970
971 END
OLDNEW
« no previous file with comments | « nss/lib/freebl/intel-aes.h ('k') | nss/lib/freebl/intel-aes-x86-masm.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698