Chromium Code Reviews

Side by Side Diff: third_party/boringssl/linux-aarch64/crypto/aes/aesv8-armx.S

Issue 862133002: Update from https://crrev.com/312398 (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff |
OLDNEW
(Empty)
1 #include "arm_arch.h"
2
3 #if __ARM_MAX_ARCH__>=7
4 .text
5 .arch armv8-a+crypto
6 .align 5
7 rcon:
8 .long 0x01,0x01,0x01,0x01
9 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
10 .long 0x1b,0x1b,0x1b,0x1b
11
12 .globl aes_v8_set_encrypt_key
13 .type aes_v8_set_encrypt_key,%function
14 .align 5
15 aes_v8_set_encrypt_key:
16 .Lenc_key:
17 stp x29,x30,[sp,#-16]!
18 add x29,sp,#0
19 mov x3,#-1
20 cmp x0,#0
21 b.eq .Lenc_key_abort
22 cmp x2,#0
23 b.eq .Lenc_key_abort
24 mov x3,#-2
25 cmp w1,#128
26 b.lt .Lenc_key_abort
27 cmp w1,#256
28 b.gt .Lenc_key_abort
29 tst w1,#0x3f
30 b.ne .Lenc_key_abort
31
32 adr x3,rcon
33 cmp w1,#192
34
35 eor v0.16b,v0.16b,v0.16b
36 ld1 {v3.16b},[x0],#16
37 mov w1,#8 // reuse w1
38 ld1 {v1.4s,v2.4s},[x3],#32
39
40 b.lt .Loop128
41 b.eq .L192
42 b .L256
43
44 .align 4
45 .Loop128:
46 tbl v6.16b,{v3.16b},v2.16b
47 ext v5.16b,v0.16b,v3.16b,#12
48 st1 {v3.4s},[x2],#16
49 aese v6.16b,v0.16b
50 subs w1,w1,#1
51
52 eor v3.16b,v3.16b,v5.16b
53 ext v5.16b,v0.16b,v5.16b,#12
54 eor v3.16b,v3.16b,v5.16b
55 ext v5.16b,v0.16b,v5.16b,#12
56 eor v6.16b,v6.16b,v1.16b
57 eor v3.16b,v3.16b,v5.16b
58 shl v1.16b,v1.16b,#1
59 eor v3.16b,v3.16b,v6.16b
60 b.ne .Loop128
61
62 ld1 {v1.4s},[x3]
63
64 tbl v6.16b,{v3.16b},v2.16b
65 ext v5.16b,v0.16b,v3.16b,#12
66 st1 {v3.4s},[x2],#16
67 aese v6.16b,v0.16b
68
69 eor v3.16b,v3.16b,v5.16b
70 ext v5.16b,v0.16b,v5.16b,#12
71 eor v3.16b,v3.16b,v5.16b
72 ext v5.16b,v0.16b,v5.16b,#12
73 eor v6.16b,v6.16b,v1.16b
74 eor v3.16b,v3.16b,v5.16b
75 shl v1.16b,v1.16b,#1
76 eor v3.16b,v3.16b,v6.16b
77
78 tbl v6.16b,{v3.16b},v2.16b
79 ext v5.16b,v0.16b,v3.16b,#12
80 st1 {v3.4s},[x2],#16
81 aese v6.16b,v0.16b
82
83 eor v3.16b,v3.16b,v5.16b
84 ext v5.16b,v0.16b,v5.16b,#12
85 eor v3.16b,v3.16b,v5.16b
86 ext v5.16b,v0.16b,v5.16b,#12
87 eor v6.16b,v6.16b,v1.16b
88 eor v3.16b,v3.16b,v5.16b
89 eor v3.16b,v3.16b,v6.16b
90 st1 {v3.4s},[x2]
91 add x2,x2,#0x50
92
93 mov w12,#10
94 b .Ldone
95
96 .align 4
97 .L192:
98 ld1 {v4.8b},[x0],#8
99 movi v6.16b,#8 // borrow v6.16b
100 st1 {v3.4s},[x2],#16
101 sub v2.16b,v2.16b,v6.16b // adjust the mask
102
103 .Loop192:
104 tbl v6.16b,{v4.16b},v2.16b
105 ext v5.16b,v0.16b,v3.16b,#12
106 st1 {v4.8b},[x2],#8
107 aese v6.16b,v0.16b
108 subs w1,w1,#1
109
110 eor v3.16b,v3.16b,v5.16b
111 ext v5.16b,v0.16b,v5.16b,#12
112 eor v3.16b,v3.16b,v5.16b
113 ext v5.16b,v0.16b,v5.16b,#12
114 eor v3.16b,v3.16b,v5.16b
115
116 dup v5.4s,v3.s[3]
117 eor v5.16b,v5.16b,v4.16b
118 eor v6.16b,v6.16b,v1.16b
119 ext v4.16b,v0.16b,v4.16b,#12
120 shl v1.16b,v1.16b,#1
121 eor v4.16b,v4.16b,v5.16b
122 eor v3.16b,v3.16b,v6.16b
123 eor v4.16b,v4.16b,v6.16b
124 st1 {v3.4s},[x2],#16
125 b.ne .Loop192
126
127 mov w12,#12
128 add x2,x2,#0x20
129 b .Ldone
130
131 .align 4
132 .L256:
133 ld1 {v4.16b},[x0]
134 mov w1,#7
135 mov w12,#14
136 st1 {v3.4s},[x2],#16
137
138 .Loop256:
139 tbl v6.16b,{v4.16b},v2.16b
140 ext v5.16b,v0.16b,v3.16b,#12
141 st1 {v4.4s},[x2],#16
142 aese v6.16b,v0.16b
143 subs w1,w1,#1
144
145 eor v3.16b,v3.16b,v5.16b
146 ext v5.16b,v0.16b,v5.16b,#12
147 eor v3.16b,v3.16b,v5.16b
148 ext v5.16b,v0.16b,v5.16b,#12
149 eor v6.16b,v6.16b,v1.16b
150 eor v3.16b,v3.16b,v5.16b
151 shl v1.16b,v1.16b,#1
152 eor v3.16b,v3.16b,v6.16b
153 st1 {v3.4s},[x2],#16
154 b.eq .Ldone
155
156 dup v6.4s,v3.s[3] // just splat
157 ext v5.16b,v0.16b,v4.16b,#12
158 aese v6.16b,v0.16b
159
160 eor v4.16b,v4.16b,v5.16b
161 ext v5.16b,v0.16b,v5.16b,#12
162 eor v4.16b,v4.16b,v5.16b
163 ext v5.16b,v0.16b,v5.16b,#12
164 eor v4.16b,v4.16b,v5.16b
165
166 eor v4.16b,v4.16b,v6.16b
167 b .Loop256
168
169 .Ldone:
170 str w12,[x2]
171 mov x3,#0
172
173 .Lenc_key_abort:
174 mov x0,x3 // return value
175 ldr x29,[sp],#16
176 ret
177 .size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
178
179 .globl aes_v8_set_decrypt_key
180 .type aes_v8_set_decrypt_key,%function
181 .align 5
182 aes_v8_set_decrypt_key:
183 stp x29,x30,[sp,#-16]!
184 add x29,sp,#0
185 bl .Lenc_key
186
187 cmp x0,#0
188 b.ne .Ldec_key_abort
189
190 sub x2,x2,#240 // restore original x2
191 mov x4,#-16
192 add x0,x2,x12,lsl#4 // end of key schedule
193
194 ld1 {v0.4s},[x2]
195 ld1 {v1.4s},[x0]
196 st1 {v0.4s},[x0],x4
197 st1 {v1.4s},[x2],#16
198
199 .Loop_imc:
200 ld1 {v0.4s},[x2]
201 ld1 {v1.4s},[x0]
202 aesimc v0.16b,v0.16b
203 aesimc v1.16b,v1.16b
204 st1 {v0.4s},[x0],x4
205 st1 {v1.4s},[x2],#16
206 cmp x0,x2
207 b.hi .Loop_imc
208
209 ld1 {v0.4s},[x2]
210 aesimc v0.16b,v0.16b
211 st1 {v0.4s},[x0]
212
213 eor x0,x0,x0 // return value
214 .Ldec_key_abort:
215 ldp x29,x30,[sp],#16
216 ret
217 .size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
218 .globl aes_v8_encrypt
219 .type aes_v8_encrypt,%function
220 .align 5
221 aes_v8_encrypt:
222 ldr w3,[x2,#240]
223 ld1 {v0.4s},[x2],#16
224 ld1 {v2.16b},[x0]
225 sub w3,w3,#2
226 ld1 {v1.4s},[x2],#16
227
228 .Loop_enc:
229 aese v2.16b,v0.16b
230 ld1 {v0.4s},[x2],#16
231 aesmc v2.16b,v2.16b
232 subs w3,w3,#2
233 aese v2.16b,v1.16b
234 ld1 {v1.4s},[x2],#16
235 aesmc v2.16b,v2.16b
236 b.gt .Loop_enc
237
238 aese v2.16b,v0.16b
239 ld1 {v0.4s},[x2]
240 aesmc v2.16b,v2.16b
241 aese v2.16b,v1.16b
242 eor v2.16b,v2.16b,v0.16b
243
244 st1 {v2.16b},[x1]
245 ret
246 .size aes_v8_encrypt,.-aes_v8_encrypt
247 .globl aes_v8_decrypt
248 .type aes_v8_decrypt,%function
249 .align 5
250 aes_v8_decrypt:
251 ldr w3,[x2,#240]
252 ld1 {v0.4s},[x2],#16
253 ld1 {v2.16b},[x0]
254 sub w3,w3,#2
255 ld1 {v1.4s},[x2],#16
256
257 .Loop_dec:
258 aesd v2.16b,v0.16b
259 ld1 {v0.4s},[x2],#16
260 aesimc v2.16b,v2.16b
261 subs w3,w3,#2
262 aesd v2.16b,v1.16b
263 ld1 {v1.4s},[x2],#16
264 aesimc v2.16b,v2.16b
265 b.gt .Loop_dec
266
267 aesd v2.16b,v0.16b
268 ld1 {v0.4s},[x2]
269 aesimc v2.16b,v2.16b
270 aesd v2.16b,v1.16b
271 eor v2.16b,v2.16b,v0.16b
272
273 st1 {v2.16b},[x1]
274 ret
275 .size aes_v8_decrypt,.-aes_v8_decrypt
276 .globl aes_v8_cbc_encrypt
277 .type aes_v8_cbc_encrypt,%function
278 .align 5
279 aes_v8_cbc_encrypt:
280 stp x29,x30,[sp,#-16]!
281 add x29,sp,#0
282 subs x2,x2,#16
283 mov x8,#16
284 b.lo .Lcbc_abort
285 csel x8,xzr,x8,eq
286
287 cmp w5,#0 // en- or decrypting?
288 ldr w5,[x3,#240]
289 and x2,x2,#-16
290 ld1 {v6.16b},[x4]
291 ld1 {v0.16b},[x0],x8
292
293 ld1 {v16.4s-v17.4s},[x3] // load key schedule...
294 sub w5,w5,#6
295 add x7,x3,x5,lsl#4 // pointer to last 7 round keys
296 sub w5,w5,#2
297 ld1 {v18.4s-v19.4s},[x7],#32
298 ld1 {v20.4s-v21.4s},[x7],#32
299 ld1 {v22.4s-v23.4s},[x7],#32
300 ld1 {v7.4s},[x7]
301
302 add x7,x3,#32
303 mov w6,w5
304 b.eq .Lcbc_dec
305
306 cmp w5,#2
307 eor v0.16b,v0.16b,v6.16b
308 eor v5.16b,v16.16b,v7.16b
309 b.eq .Lcbc_enc128
310
311 .Loop_cbc_enc:
312 aese v0.16b,v16.16b
313 ld1 {v16.4s},[x7],#16
314 aesmc v0.16b,v0.16b
315 subs w6,w6,#2
316 aese v0.16b,v17.16b
317 ld1 {v17.4s},[x7],#16
318 aesmc v0.16b,v0.16b
319 b.gt .Loop_cbc_enc
320
321 aese v0.16b,v16.16b
322 aesmc v0.16b,v0.16b
323 subs x2,x2,#16
324 aese v0.16b,v17.16b
325 aesmc v0.16b,v0.16b
326 csel x8,xzr,x8,eq
327 aese v0.16b,v18.16b
328 aesmc v0.16b,v0.16b
329 add x7,x3,#16
330 aese v0.16b,v19.16b
331 aesmc v0.16b,v0.16b
332 ld1 {v16.16b},[x0],x8
333 aese v0.16b,v20.16b
334 aesmc v0.16b,v0.16b
335 eor v16.16b,v16.16b,v5.16b
336 aese v0.16b,v21.16b
337 aesmc v0.16b,v0.16b
338 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
339 aese v0.16b,v22.16b
340 aesmc v0.16b,v0.16b
341 aese v0.16b,v23.16b
342
343 mov w6,w5
344 eor v6.16b,v0.16b,v7.16b
345 st1 {v6.16b},[x1],#16
346 b.hs .Loop_cbc_enc
347
348 b .Lcbc_done
349
350 .align 5
351 .Lcbc_enc128:
352 ld1 {v2.4s-v3.4s},[x7]
353 aese v0.16b,v16.16b
354 aesmc v0.16b,v0.16b
355 b .Lenter_cbc_enc128
356 .Loop_cbc_enc128:
357 aese v0.16b,v16.16b
358 aesmc v0.16b,v0.16b
359 st1 {v6.16b},[x1],#16
360 .Lenter_cbc_enc128:
361 aese v0.16b,v17.16b
362 aesmc v0.16b,v0.16b
363 subs x2,x2,#16
364 aese v0.16b,v2.16b
365 aesmc v0.16b,v0.16b
366 csel x8,xzr,x8,eq
367 aese v0.16b,v3.16b
368 aesmc v0.16b,v0.16b
369 aese v0.16b,v18.16b
370 aesmc v0.16b,v0.16b
371 aese v0.16b,v19.16b
372 aesmc v0.16b,v0.16b
373 ld1 {v16.16b},[x0],x8
374 aese v0.16b,v20.16b
375 aesmc v0.16b,v0.16b
376 aese v0.16b,v21.16b
377 aesmc v0.16b,v0.16b
378 aese v0.16b,v22.16b
379 aesmc v0.16b,v0.16b
380 eor v16.16b,v16.16b,v5.16b
381 aese v0.16b,v23.16b
382 eor v6.16b,v0.16b,v7.16b
383 b.hs .Loop_cbc_enc128
384
385 st1 {v6.16b},[x1],#16
386 b .Lcbc_done
387 .align 5
388 .Lcbc_dec:
389 ld1 {v18.16b},[x0],#16
390 subs x2,x2,#32 // bias
391 add w6,w5,#2
392 orr v3.16b,v0.16b,v0.16b
393 orr v1.16b,v0.16b,v0.16b
394 orr v19.16b,v18.16b,v18.16b
395 b.lo .Lcbc_dec_tail
396
397 orr v1.16b,v18.16b,v18.16b
398 ld1 {v18.16b},[x0],#16
399 orr v2.16b,v0.16b,v0.16b
400 orr v3.16b,v1.16b,v1.16b
401 orr v19.16b,v18.16b,v18.16b
402
403 .Loop3x_cbc_dec:
404 aesd v0.16b,v16.16b
405 aesd v1.16b,v16.16b
406 aesd v18.16b,v16.16b
407 ld1 {v16.4s},[x7],#16
408 aesimc v0.16b,v0.16b
409 aesimc v1.16b,v1.16b
410 aesimc v18.16b,v18.16b
411 subs w6,w6,#2
412 aesd v0.16b,v17.16b
413 aesd v1.16b,v17.16b
414 aesd v18.16b,v17.16b
415 ld1 {v17.4s},[x7],#16
416 aesimc v0.16b,v0.16b
417 aesimc v1.16b,v1.16b
418 aesimc v18.16b,v18.16b
419 b.gt .Loop3x_cbc_dec
420
421 aesd v0.16b,v16.16b
422 aesd v1.16b,v16.16b
423 aesd v18.16b,v16.16b
424 eor v4.16b,v6.16b,v7.16b
425 aesimc v0.16b,v0.16b
426 aesimc v1.16b,v1.16b
427 aesimc v18.16b,v18.16b
428 eor v5.16b,v2.16b,v7.16b
429 aesd v0.16b,v17.16b
430 aesd v1.16b,v17.16b
431 aesd v18.16b,v17.16b
432 eor v17.16b,v3.16b,v7.16b
433 subs x2,x2,#0x30
434 aesimc v0.16b,v0.16b
435 aesimc v1.16b,v1.16b
436 aesimc v18.16b,v18.16b
437 orr v6.16b,v19.16b,v19.16b
438 csel x6,x2,x6,lo // x6, w6, is zero at this point
439 aesd v0.16b,v20.16b
440 aesd v1.16b,v20.16b
441 aesd v18.16b,v20.16b
442 add x0,x0,x6 // x0 is adjusted in such way that
443 // at exit from the loop v1.16b-v18.16b
444 // are loaded with last "words"
445 aesimc v0.16b,v0.16b
446 aesimc v1.16b,v1.16b
447 aesimc v18.16b,v18.16b
448 mov x7,x3
449 aesd v0.16b,v21.16b
450 aesd v1.16b,v21.16b
451 aesd v18.16b,v21.16b
452 ld1 {v2.16b},[x0],#16
453 aesimc v0.16b,v0.16b
454 aesimc v1.16b,v1.16b
455 aesimc v18.16b,v18.16b
456 ld1 {v3.16b},[x0],#16
457 aesd v0.16b,v22.16b
458 aesd v1.16b,v22.16b
459 aesd v18.16b,v22.16b
460 ld1 {v19.16b},[x0],#16
461 aesimc v0.16b,v0.16b
462 aesimc v1.16b,v1.16b
463 aesimc v18.16b,v18.16b
464 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
465 aesd v0.16b,v23.16b
466 aesd v1.16b,v23.16b
467 aesd v18.16b,v23.16b
468
469 add w6,w5,#2
470 eor v4.16b,v4.16b,v0.16b
471 eor v5.16b,v5.16b,v1.16b
472 eor v18.16b,v18.16b,v17.16b
473 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
474 orr v0.16b,v2.16b,v2.16b
475 st1 {v4.16b},[x1],#16
476 orr v1.16b,v3.16b,v3.16b
477 st1 {v5.16b},[x1],#16
478 st1 {v18.16b},[x1],#16
479 orr v18.16b,v19.16b,v19.16b
480 b.hs .Loop3x_cbc_dec
481
482 cmn x2,#0x30
483 b.eq .Lcbc_done
484 nop
485
486 .Lcbc_dec_tail:
487 aesd v1.16b,v16.16b
488 aesd v18.16b,v16.16b
489 ld1 {v16.4s},[x7],#16
490 aesimc v1.16b,v1.16b
491 aesimc v18.16b,v18.16b
492 subs w6,w6,#2
493 aesd v1.16b,v17.16b
494 aesd v18.16b,v17.16b
495 ld1 {v17.4s},[x7],#16
496 aesimc v1.16b,v1.16b
497 aesimc v18.16b,v18.16b
498 b.gt .Lcbc_dec_tail
499
500 aesd v1.16b,v16.16b
501 aesd v18.16b,v16.16b
502 aesimc v1.16b,v1.16b
503 aesimc v18.16b,v18.16b
504 aesd v1.16b,v17.16b
505 aesd v18.16b,v17.16b
506 aesimc v1.16b,v1.16b
507 aesimc v18.16b,v18.16b
508 aesd v1.16b,v20.16b
509 aesd v18.16b,v20.16b
510 aesimc v1.16b,v1.16b
511 aesimc v18.16b,v18.16b
512 cmn x2,#0x20
513 aesd v1.16b,v21.16b
514 aesd v18.16b,v21.16b
515 aesimc v1.16b,v1.16b
516 aesimc v18.16b,v18.16b
517 eor v5.16b,v6.16b,v7.16b
518 aesd v1.16b,v22.16b
519 aesd v18.16b,v22.16b
520 aesimc v1.16b,v1.16b
521 aesimc v18.16b,v18.16b
522 eor v17.16b,v3.16b,v7.16b
523 aesd v1.16b,v23.16b
524 aesd v18.16b,v23.16b
525 b.eq .Lcbc_dec_one
526 eor v5.16b,v5.16b,v1.16b
527 eor v17.16b,v17.16b,v18.16b
528 orr v6.16b,v19.16b,v19.16b
529 st1 {v5.16b},[x1],#16
530 st1 {v17.16b},[x1],#16
531 b .Lcbc_done
532
533 .Lcbc_dec_one:
534 eor v5.16b,v5.16b,v18.16b
535 orr v6.16b,v19.16b,v19.16b
536 st1 {v5.16b},[x1],#16
537
538 .Lcbc_done:
539 st1 {v6.16b},[x4]
540 .Lcbc_abort:
541 ldr x29,[sp],#16
542 ret
543 .size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
544 .globl aes_v8_ctr32_encrypt_blocks
545 .type aes_v8_ctr32_encrypt_blocks,%function
546 .align 5
547 aes_v8_ctr32_encrypt_blocks:
548 stp x29,x30,[sp,#-16]!
549 add x29,sp,#0
550 ldr w5,[x3,#240]
551
552 ldr w8, [x4, #12]
553 ld1 {v0.4s},[x4]
554
555 ld1 {v16.4s-v17.4s},[x3] // load key schedule...
556 sub w5,w5,#4
557 mov x12,#16
558 cmp x2,#2
559 add x7,x3,x5,lsl#4 // pointer to last 5 round keys
560 sub w5,w5,#2
561 ld1 {v20.4s-v21.4s},[x7],#32
562 ld1 {v22.4s-v23.4s},[x7],#32
563 ld1 {v7.4s},[x7]
564 add x7,x3,#32
565 mov w6,w5
566 csel x12,xzr,x12,lo
567 #ifndef __ARMEB__
568 rev w8, w8
569 #endif
570 orr v1.16b,v0.16b,v0.16b
571 add w10, w8, #1
572 orr v18.16b,v0.16b,v0.16b
573 add w8, w8, #2
574 orr v6.16b,v0.16b,v0.16b
575 rev w10, w10
576 mov v1.s[3],w10
577 b.ls .Lctr32_tail
578 rev w12, w8
579 sub x2,x2,#3 // bias
580 mov v18.s[3],w12
581 b .Loop3x_ctr32
582
583 .align 4
584 .Loop3x_ctr32:
585 aese v0.16b,v16.16b
586 aese v1.16b,v16.16b
587 aese v18.16b,v16.16b
588 ld1 {v16.4s},[x7],#16
589 aesmc v0.16b,v0.16b
590 aesmc v1.16b,v1.16b
591 aesmc v18.16b,v18.16b
592 subs w6,w6,#2
593 aese v0.16b,v17.16b
594 aese v1.16b,v17.16b
595 aese v18.16b,v17.16b
596 ld1 {v17.4s},[x7],#16
597 aesmc v0.16b,v0.16b
598 aesmc v1.16b,v1.16b
599 aesmc v18.16b,v18.16b
600 b.gt .Loop3x_ctr32
601
602 aese v0.16b,v16.16b
603 aese v1.16b,v16.16b
604 aese v18.16b,v16.16b
605 mov x7,x3
606 aesmc v4.16b,v0.16b
607 ld1 {v2.16b},[x0],#16
608 aesmc v5.16b,v1.16b
609 aesmc v18.16b,v18.16b
610 orr v0.16b,v6.16b,v6.16b
611 aese v4.16b,v17.16b
612 ld1 {v3.16b},[x0],#16
613 aese v5.16b,v17.16b
614 aese v18.16b,v17.16b
615 orr v1.16b,v6.16b,v6.16b
616 aesmc v4.16b,v4.16b
617 ld1 {v19.16b},[x0],#16
618 aesmc v5.16b,v5.16b
619 aesmc v17.16b,v18.16b
620 orr v18.16b,v6.16b,v6.16b
621 add w9,w8,#1
622 aese v4.16b,v20.16b
623 aese v5.16b,v20.16b
624 aese v17.16b,v20.16b
625 eor v2.16b,v2.16b,v7.16b
626 add w10,w8,#2
627 aesmc v4.16b,v4.16b
628 aesmc v5.16b,v5.16b
629 aesmc v17.16b,v17.16b
630 eor v3.16b,v3.16b,v7.16b
631 add w8,w8,#3
632 aese v4.16b,v21.16b
633 aese v5.16b,v21.16b
634 aese v17.16b,v21.16b
635 eor v19.16b,v19.16b,v7.16b
636 rev w9,w9
637 aesmc v4.16b,v4.16b
638 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
639 aesmc v5.16b,v5.16b
640 aesmc v17.16b,v17.16b
641 mov v0.s[3], w9
642 rev w10,w10
643 aese v4.16b,v22.16b
644 aese v5.16b,v22.16b
645 aese v17.16b,v22.16b
646 mov v1.s[3], w10
647 rev w12,w8
648 aesmc v4.16b,v4.16b
649 aesmc v5.16b,v5.16b
650 aesmc v17.16b,v17.16b
651 mov v18.s[3], w12
652 subs x2,x2,#3
653 aese v4.16b,v23.16b
654 aese v5.16b,v23.16b
655 aese v17.16b,v23.16b
656
657 mov w6,w5
658 eor v2.16b,v2.16b,v4.16b
659 eor v3.16b,v3.16b,v5.16b
660 eor v19.16b,v19.16b,v17.16b
661 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
662 st1 {v2.16b},[x1],#16
663 st1 {v3.16b},[x1],#16
664 st1 {v19.16b},[x1],#16
665 b.hs .Loop3x_ctr32
666
667 adds x2,x2,#3
668 b.eq .Lctr32_done
669 cmp x2,#1
670 mov x12,#16
671 csel x12,xzr,x12,eq
672
673 .Lctr32_tail:
674 aese v0.16b,v16.16b
675 aese v1.16b,v16.16b
676 ld1 {v16.4s},[x7],#16
677 aesmc v0.16b,v0.16b
678 aesmc v1.16b,v1.16b
679 subs w6,w6,#2
680 aese v0.16b,v17.16b
681 aese v1.16b,v17.16b
682 ld1 {v17.4s},[x7],#16
683 aesmc v0.16b,v0.16b
684 aesmc v1.16b,v1.16b
685 b.gt .Lctr32_tail
686
687 aese v0.16b,v16.16b
688 aese v1.16b,v16.16b
689 aesmc v0.16b,v0.16b
690 aesmc v1.16b,v1.16b
691 aese v0.16b,v17.16b
692 aese v1.16b,v17.16b
693 aesmc v0.16b,v0.16b
694 aesmc v1.16b,v1.16b
695 ld1 {v2.16b},[x0],x12
696 aese v0.16b,v20.16b
697 aese v1.16b,v20.16b
698 ld1 {v3.16b},[x0]
699 aesmc v0.16b,v0.16b
700 aesmc v1.16b,v1.16b
701 aese v0.16b,v21.16b
702 aese v1.16b,v21.16b
703 aesmc v0.16b,v0.16b
704 aesmc v1.16b,v1.16b
705 aese v0.16b,v22.16b
706 aese v1.16b,v22.16b
707 eor v2.16b,v2.16b,v7.16b
708 aesmc v0.16b,v0.16b
709 aesmc v1.16b,v1.16b
710 eor v3.16b,v3.16b,v7.16b
711 aese v0.16b,v23.16b
712 aese v1.16b,v23.16b
713
714 cmp x2,#1
715 eor v2.16b,v2.16b,v0.16b
716 eor v3.16b,v3.16b,v1.16b
717 st1 {v2.16b},[x1],#16
718 b.eq .Lctr32_done
719 st1 {v3.16b},[x1]
720
721 .Lctr32_done:
722 ldr x29,[sp],#16
723 ret
724 .size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
725 #endif
OLDNEW

Powered by Google App Engine