Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(286)

Side by Side Diff: third_party/boringssl/linux-aarch64/crypto/aes/aesv8-armx64.S

Issue 2354623003: Pull boringssl generated source from boringssl_gen (Closed)
Patch Set: . Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #if defined(__aarch64__)
2 #include <openssl/arm_arch.h>
3
4 #if __ARM_MAX_ARCH__>=7
5 .text
6 #if !defined(__clang__)
7 .arch armv8-a+crypto
8 #endif
9 .align 5
10 .Lrcon:
11 .long 0x01,0x01,0x01,0x01
12 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
13 .long 0x1b,0x1b,0x1b,0x1b
14
15 .globl aes_v8_set_encrypt_key
16 .hidden aes_v8_set_encrypt_key
17 .type aes_v8_set_encrypt_key,%function
18 .align 5
19 aes_v8_set_encrypt_key:
20 .Lenc_key:
21 stp x29,x30,[sp,#-16]!
22 add x29,sp,#0
23 mov x3,#-1
24 cmp x0,#0
25 b.eq .Lenc_key_abort
26 cmp x2,#0
27 b.eq .Lenc_key_abort
28 mov x3,#-2
29 cmp w1,#128
30 b.lt .Lenc_key_abort
31 cmp w1,#256
32 b.gt .Lenc_key_abort
33 tst w1,#0x3f
34 b.ne .Lenc_key_abort
35
36 adr x3,.Lrcon
37 cmp w1,#192
38
39 eor v0.16b,v0.16b,v0.16b
40 ld1 {v3.16b},[x0],#16
41 mov w1,#8 // reuse w1
42 ld1 {v1.4s,v2.4s},[x3],#32
43
44 b.lt .Loop128
45 b.eq .L192
46 b .L256
47
48 .align 4
49 .Loop128:
50 tbl v6.16b,{v3.16b},v2.16b
51 ext v5.16b,v0.16b,v3.16b,#12
52 st1 {v3.4s},[x2],#16
53 aese v6.16b,v0.16b
54 subs w1,w1,#1
55
56 eor v3.16b,v3.16b,v5.16b
57 ext v5.16b,v0.16b,v5.16b,#12
58 eor v3.16b,v3.16b,v5.16b
59 ext v5.16b,v0.16b,v5.16b,#12
60 eor v6.16b,v6.16b,v1.16b
61 eor v3.16b,v3.16b,v5.16b
62 shl v1.16b,v1.16b,#1
63 eor v3.16b,v3.16b,v6.16b
64 b.ne .Loop128
65
66 ld1 {v1.4s},[x3]
67
68 tbl v6.16b,{v3.16b},v2.16b
69 ext v5.16b,v0.16b,v3.16b,#12
70 st1 {v3.4s},[x2],#16
71 aese v6.16b,v0.16b
72
73 eor v3.16b,v3.16b,v5.16b
74 ext v5.16b,v0.16b,v5.16b,#12
75 eor v3.16b,v3.16b,v5.16b
76 ext v5.16b,v0.16b,v5.16b,#12
77 eor v6.16b,v6.16b,v1.16b
78 eor v3.16b,v3.16b,v5.16b
79 shl v1.16b,v1.16b,#1
80 eor v3.16b,v3.16b,v6.16b
81
82 tbl v6.16b,{v3.16b},v2.16b
83 ext v5.16b,v0.16b,v3.16b,#12
84 st1 {v3.4s},[x2],#16
85 aese v6.16b,v0.16b
86
87 eor v3.16b,v3.16b,v5.16b
88 ext v5.16b,v0.16b,v5.16b,#12
89 eor v3.16b,v3.16b,v5.16b
90 ext v5.16b,v0.16b,v5.16b,#12
91 eor v6.16b,v6.16b,v1.16b
92 eor v3.16b,v3.16b,v5.16b
93 eor v3.16b,v3.16b,v6.16b
94 st1 {v3.4s},[x2]
95 add x2,x2,#0x50
96
97 mov w12,#10
98 b .Ldone
99
100 .align 4
101 .L192:
102 ld1 {v4.8b},[x0],#8
103 movi v6.16b,#8 // borrow v6.16b
104 st1 {v3.4s},[x2],#16
105 sub v2.16b,v2.16b,v6.16b // adjust the mask
106
107 .Loop192:
108 tbl v6.16b,{v4.16b},v2.16b
109 ext v5.16b,v0.16b,v3.16b,#12
110 st1 {v4.8b},[x2],#8
111 aese v6.16b,v0.16b
112 subs w1,w1,#1
113
114 eor v3.16b,v3.16b,v5.16b
115 ext v5.16b,v0.16b,v5.16b,#12
116 eor v3.16b,v3.16b,v5.16b
117 ext v5.16b,v0.16b,v5.16b,#12
118 eor v3.16b,v3.16b,v5.16b
119
120 dup v5.4s,v3.s[3]
121 eor v5.16b,v5.16b,v4.16b
122 eor v6.16b,v6.16b,v1.16b
123 ext v4.16b,v0.16b,v4.16b,#12
124 shl v1.16b,v1.16b,#1
125 eor v4.16b,v4.16b,v5.16b
126 eor v3.16b,v3.16b,v6.16b
127 eor v4.16b,v4.16b,v6.16b
128 st1 {v3.4s},[x2],#16
129 b.ne .Loop192
130
131 mov w12,#12
132 add x2,x2,#0x20
133 b .Ldone
134
135 .align 4
136 .L256:
137 ld1 {v4.16b},[x0]
138 mov w1,#7
139 mov w12,#14
140 st1 {v3.4s},[x2],#16
141
142 .Loop256:
143 tbl v6.16b,{v4.16b},v2.16b
144 ext v5.16b,v0.16b,v3.16b,#12
145 st1 {v4.4s},[x2],#16
146 aese v6.16b,v0.16b
147 subs w1,w1,#1
148
149 eor v3.16b,v3.16b,v5.16b
150 ext v5.16b,v0.16b,v5.16b,#12
151 eor v3.16b,v3.16b,v5.16b
152 ext v5.16b,v0.16b,v5.16b,#12
153 eor v6.16b,v6.16b,v1.16b
154 eor v3.16b,v3.16b,v5.16b
155 shl v1.16b,v1.16b,#1
156 eor v3.16b,v3.16b,v6.16b
157 st1 {v3.4s},[x2],#16
158 b.eq .Ldone
159
160 dup v6.4s,v3.s[3] // just splat
161 ext v5.16b,v0.16b,v4.16b,#12
162 aese v6.16b,v0.16b
163
164 eor v4.16b,v4.16b,v5.16b
165 ext v5.16b,v0.16b,v5.16b,#12
166 eor v4.16b,v4.16b,v5.16b
167 ext v5.16b,v0.16b,v5.16b,#12
168 eor v4.16b,v4.16b,v5.16b
169
170 eor v4.16b,v4.16b,v6.16b
171 b .Loop256
172
173 .Ldone:
174 str w12,[x2]
175 mov x3,#0
176
177 .Lenc_key_abort:
178 mov x0,x3 // return value
179 ldr x29,[sp],#16
180 ret
181 .size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
182
183 .globl aes_v8_set_decrypt_key
184 .hidden aes_v8_set_decrypt_key
185 .type aes_v8_set_decrypt_key,%function
186 .align 5
187 aes_v8_set_decrypt_key:
188 stp x29,x30,[sp,#-16]!
189 add x29,sp,#0
190 bl .Lenc_key
191
192 cmp x0,#0
193 b.ne .Ldec_key_abort
194
195 sub x2,x2,#240 // restore original x2
196 mov x4,#-16
197 add x0,x2,x12,lsl#4 // end of key schedule
198
199 ld1 {v0.4s},[x2]
200 ld1 {v1.4s},[x0]
201 st1 {v0.4s},[x0],x4
202 st1 {v1.4s},[x2],#16
203
204 .Loop_imc:
205 ld1 {v0.4s},[x2]
206 ld1 {v1.4s},[x0]
207 aesimc v0.16b,v0.16b
208 aesimc v1.16b,v1.16b
209 st1 {v0.4s},[x0],x4
210 st1 {v1.4s},[x2],#16
211 cmp x0,x2
212 b.hi .Loop_imc
213
214 ld1 {v0.4s},[x2]
215 aesimc v0.16b,v0.16b
216 st1 {v0.4s},[x0]
217
218 eor x0,x0,x0 // return value
219 .Ldec_key_abort:
220 ldp x29,x30,[sp],#16
221 ret
222 .size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
223 .globl aes_v8_encrypt
224 .hidden aes_v8_encrypt
225 .type aes_v8_encrypt,%function
226 .align 5
227 aes_v8_encrypt:
228 ldr w3,[x2,#240]
229 ld1 {v0.4s},[x2],#16
230 ld1 {v2.16b},[x0]
231 sub w3,w3,#2
232 ld1 {v1.4s},[x2],#16
233
234 .Loop_enc:
235 aese v2.16b,v0.16b
236 aesmc v2.16b,v2.16b
237 ld1 {v0.4s},[x2],#16
238 subs w3,w3,#2
239 aese v2.16b,v1.16b
240 aesmc v2.16b,v2.16b
241 ld1 {v1.4s},[x2],#16
242 b.gt .Loop_enc
243
244 aese v2.16b,v0.16b
245 aesmc v2.16b,v2.16b
246 ld1 {v0.4s},[x2]
247 aese v2.16b,v1.16b
248 eor v2.16b,v2.16b,v0.16b
249
250 st1 {v2.16b},[x1]
251 ret
252 .size aes_v8_encrypt,.-aes_v8_encrypt
253 .globl aes_v8_decrypt
254 .hidden aes_v8_decrypt
255 .type aes_v8_decrypt,%function
256 .align 5
257 aes_v8_decrypt:
258 ldr w3,[x2,#240]
259 ld1 {v0.4s},[x2],#16
260 ld1 {v2.16b},[x0]
261 sub w3,w3,#2
262 ld1 {v1.4s},[x2],#16
263
264 .Loop_dec:
265 aesd v2.16b,v0.16b
266 aesimc v2.16b,v2.16b
267 ld1 {v0.4s},[x2],#16
268 subs w3,w3,#2
269 aesd v2.16b,v1.16b
270 aesimc v2.16b,v2.16b
271 ld1 {v1.4s},[x2],#16
272 b.gt .Loop_dec
273
274 aesd v2.16b,v0.16b
275 aesimc v2.16b,v2.16b
276 ld1 {v0.4s},[x2]
277 aesd v2.16b,v1.16b
278 eor v2.16b,v2.16b,v0.16b
279
280 st1 {v2.16b},[x1]
281 ret
282 .size aes_v8_decrypt,.-aes_v8_decrypt
283 .globl aes_v8_cbc_encrypt
284 .hidden aes_v8_cbc_encrypt
285 .type aes_v8_cbc_encrypt,%function
286 .align 5
287 aes_v8_cbc_encrypt:
288 stp x29,x30,[sp,#-16]!
289 add x29,sp,#0
290 subs x2,x2,#16
291 mov x8,#16
292 b.lo .Lcbc_abort
293 csel x8,xzr,x8,eq
294
295 cmp w5,#0 // en- or decrypting?
296 ldr w5,[x3,#240]
297 and x2,x2,#-16
298 ld1 {v6.16b},[x4]
299 ld1 {v0.16b},[x0],x8
300
301 ld1 {v16.4s,v17.4s},[x3] // load key schedule...
302 sub w5,w5,#6
303 add x7,x3,x5,lsl#4 // pointer to last 7 round keys
304 sub w5,w5,#2
305 ld1 {v18.4s,v19.4s},[x7],#32
306 ld1 {v20.4s,v21.4s},[x7],#32
307 ld1 {v22.4s,v23.4s},[x7],#32
308 ld1 {v7.4s},[x7]
309
310 add x7,x3,#32
311 mov w6,w5
312 b.eq .Lcbc_dec
313
314 cmp w5,#2
315 eor v0.16b,v0.16b,v6.16b
316 eor v5.16b,v16.16b,v7.16b
317 b.eq .Lcbc_enc128
318
319 ld1 {v2.4s,v3.4s},[x7]
320 add x7,x3,#16
321 add x6,x3,#16*4
322 add x12,x3,#16*5
323 aese v0.16b,v16.16b
324 aesmc v0.16b,v0.16b
325 add x14,x3,#16*6
326 add x3,x3,#16*7
327 b .Lenter_cbc_enc
328
329 .align 4
330 .Loop_cbc_enc:
331 aese v0.16b,v16.16b
332 aesmc v0.16b,v0.16b
333 st1 {v6.16b},[x1],#16
334 .Lenter_cbc_enc:
335 aese v0.16b,v17.16b
336 aesmc v0.16b,v0.16b
337 aese v0.16b,v2.16b
338 aesmc v0.16b,v0.16b
339 ld1 {v16.4s},[x6]
340 cmp w5,#4
341 aese v0.16b,v3.16b
342 aesmc v0.16b,v0.16b
343 ld1 {v17.4s},[x12]
344 b.eq .Lcbc_enc192
345
346 aese v0.16b,v16.16b
347 aesmc v0.16b,v0.16b
348 ld1 {v16.4s},[x14]
349 aese v0.16b,v17.16b
350 aesmc v0.16b,v0.16b
351 ld1 {v17.4s},[x3]
352 nop
353
354 .Lcbc_enc192:
355 aese v0.16b,v16.16b
356 aesmc v0.16b,v0.16b
357 subs x2,x2,#16
358 aese v0.16b,v17.16b
359 aesmc v0.16b,v0.16b
360 csel x8,xzr,x8,eq
361 aese v0.16b,v18.16b
362 aesmc v0.16b,v0.16b
363 aese v0.16b,v19.16b
364 aesmc v0.16b,v0.16b
365 ld1 {v16.16b},[x0],x8
366 aese v0.16b,v20.16b
367 aesmc v0.16b,v0.16b
368 eor v16.16b,v16.16b,v5.16b
369 aese v0.16b,v21.16b
370 aesmc v0.16b,v0.16b
371 ld1 {v17.4s},[x7] // re-pre-load rndkey[1]
372 aese v0.16b,v22.16b
373 aesmc v0.16b,v0.16b
374 aese v0.16b,v23.16b
375 eor v6.16b,v0.16b,v7.16b
376 b.hs .Loop_cbc_enc
377
378 st1 {v6.16b},[x1],#16
379 b .Lcbc_done
380
381 .align 5
382 .Lcbc_enc128:
383 ld1 {v2.4s,v3.4s},[x7]
384 aese v0.16b,v16.16b
385 aesmc v0.16b,v0.16b
386 b .Lenter_cbc_enc128
387 .Loop_cbc_enc128:
388 aese v0.16b,v16.16b
389 aesmc v0.16b,v0.16b
390 st1 {v6.16b},[x1],#16
391 .Lenter_cbc_enc128:
392 aese v0.16b,v17.16b
393 aesmc v0.16b,v0.16b
394 subs x2,x2,#16
395 aese v0.16b,v2.16b
396 aesmc v0.16b,v0.16b
397 csel x8,xzr,x8,eq
398 aese v0.16b,v3.16b
399 aesmc v0.16b,v0.16b
400 aese v0.16b,v18.16b
401 aesmc v0.16b,v0.16b
402 aese v0.16b,v19.16b
403 aesmc v0.16b,v0.16b
404 ld1 {v16.16b},[x0],x8
405 aese v0.16b,v20.16b
406 aesmc v0.16b,v0.16b
407 aese v0.16b,v21.16b
408 aesmc v0.16b,v0.16b
409 aese v0.16b,v22.16b
410 aesmc v0.16b,v0.16b
411 eor v16.16b,v16.16b,v5.16b
412 aese v0.16b,v23.16b
413 eor v6.16b,v0.16b,v7.16b
414 b.hs .Loop_cbc_enc128
415
416 st1 {v6.16b},[x1],#16
417 b .Lcbc_done
418 .align 5
419 .Lcbc_dec:
420 ld1 {v18.16b},[x0],#16
421 subs x2,x2,#32 // bias
422 add w6,w5,#2
423 orr v3.16b,v0.16b,v0.16b
424 orr v1.16b,v0.16b,v0.16b
425 orr v19.16b,v18.16b,v18.16b
426 b.lo .Lcbc_dec_tail
427
428 orr v1.16b,v18.16b,v18.16b
429 ld1 {v18.16b},[x0],#16
430 orr v2.16b,v0.16b,v0.16b
431 orr v3.16b,v1.16b,v1.16b
432 orr v19.16b,v18.16b,v18.16b
433
434 .Loop3x_cbc_dec:
435 aesd v0.16b,v16.16b
436 aesimc v0.16b,v0.16b
437 aesd v1.16b,v16.16b
438 aesimc v1.16b,v1.16b
439 aesd v18.16b,v16.16b
440 aesimc v18.16b,v18.16b
441 ld1 {v16.4s},[x7],#16
442 subs w6,w6,#2
443 aesd v0.16b,v17.16b
444 aesimc v0.16b,v0.16b
445 aesd v1.16b,v17.16b
446 aesimc v1.16b,v1.16b
447 aesd v18.16b,v17.16b
448 aesimc v18.16b,v18.16b
449 ld1 {v17.4s},[x7],#16
450 b.gt .Loop3x_cbc_dec
451
452 aesd v0.16b,v16.16b
453 aesimc v0.16b,v0.16b
454 aesd v1.16b,v16.16b
455 aesimc v1.16b,v1.16b
456 aesd v18.16b,v16.16b
457 aesimc v18.16b,v18.16b
458 eor v4.16b,v6.16b,v7.16b
459 subs x2,x2,#0x30
460 eor v5.16b,v2.16b,v7.16b
461 csel x6,x2,x6,lo // x6, w6, is zero at this point
462 aesd v0.16b,v17.16b
463 aesimc v0.16b,v0.16b
464 aesd v1.16b,v17.16b
465 aesimc v1.16b,v1.16b
466 aesd v18.16b,v17.16b
467 aesimc v18.16b,v18.16b
468 eor v17.16b,v3.16b,v7.16b
469 add x0,x0,x6 // x0 is adjusted in such way that
470 // at exit from the loop v1.16b-v18.16b
471 // are loaded with last "words"
472 orr v6.16b,v19.16b,v19.16b
473 mov x7,x3
474 aesd v0.16b,v20.16b
475 aesimc v0.16b,v0.16b
476 aesd v1.16b,v20.16b
477 aesimc v1.16b,v1.16b
478 aesd v18.16b,v20.16b
479 aesimc v18.16b,v18.16b
480 ld1 {v2.16b},[x0],#16
481 aesd v0.16b,v21.16b
482 aesimc v0.16b,v0.16b
483 aesd v1.16b,v21.16b
484 aesimc v1.16b,v1.16b
485 aesd v18.16b,v21.16b
486 aesimc v18.16b,v18.16b
487 ld1 {v3.16b},[x0],#16
488 aesd v0.16b,v22.16b
489 aesimc v0.16b,v0.16b
490 aesd v1.16b,v22.16b
491 aesimc v1.16b,v1.16b
492 aesd v18.16b,v22.16b
493 aesimc v18.16b,v18.16b
494 ld1 {v19.16b},[x0],#16
495 aesd v0.16b,v23.16b
496 aesd v1.16b,v23.16b
497 aesd v18.16b,v23.16b
498 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
499 add w6,w5,#2
500 eor v4.16b,v4.16b,v0.16b
501 eor v5.16b,v5.16b,v1.16b
502 eor v18.16b,v18.16b,v17.16b
503 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
504 st1 {v4.16b},[x1],#16
505 orr v0.16b,v2.16b,v2.16b
506 st1 {v5.16b},[x1],#16
507 orr v1.16b,v3.16b,v3.16b
508 st1 {v18.16b},[x1],#16
509 orr v18.16b,v19.16b,v19.16b
510 b.hs .Loop3x_cbc_dec
511
512 cmn x2,#0x30
513 b.eq .Lcbc_done
514 nop
515
516 .Lcbc_dec_tail:
517 aesd v1.16b,v16.16b
518 aesimc v1.16b,v1.16b
519 aesd v18.16b,v16.16b
520 aesimc v18.16b,v18.16b
521 ld1 {v16.4s},[x7],#16
522 subs w6,w6,#2
523 aesd v1.16b,v17.16b
524 aesimc v1.16b,v1.16b
525 aesd v18.16b,v17.16b
526 aesimc v18.16b,v18.16b
527 ld1 {v17.4s},[x7],#16
528 b.gt .Lcbc_dec_tail
529
530 aesd v1.16b,v16.16b
531 aesimc v1.16b,v1.16b
532 aesd v18.16b,v16.16b
533 aesimc v18.16b,v18.16b
534 aesd v1.16b,v17.16b
535 aesimc v1.16b,v1.16b
536 aesd v18.16b,v17.16b
537 aesimc v18.16b,v18.16b
538 aesd v1.16b,v20.16b
539 aesimc v1.16b,v1.16b
540 aesd v18.16b,v20.16b
541 aesimc v18.16b,v18.16b
542 cmn x2,#0x20
543 aesd v1.16b,v21.16b
544 aesimc v1.16b,v1.16b
545 aesd v18.16b,v21.16b
546 aesimc v18.16b,v18.16b
547 eor v5.16b,v6.16b,v7.16b
548 aesd v1.16b,v22.16b
549 aesimc v1.16b,v1.16b
550 aesd v18.16b,v22.16b
551 aesimc v18.16b,v18.16b
552 eor v17.16b,v3.16b,v7.16b
553 aesd v1.16b,v23.16b
554 aesd v18.16b,v23.16b
555 b.eq .Lcbc_dec_one
556 eor v5.16b,v5.16b,v1.16b
557 eor v17.16b,v17.16b,v18.16b
558 orr v6.16b,v19.16b,v19.16b
559 st1 {v5.16b},[x1],#16
560 st1 {v17.16b},[x1],#16
561 b .Lcbc_done
562
563 .Lcbc_dec_one:
564 eor v5.16b,v5.16b,v18.16b
565 orr v6.16b,v19.16b,v19.16b
566 st1 {v5.16b},[x1],#16
567
568 .Lcbc_done:
569 st1 {v6.16b},[x4]
570 .Lcbc_abort:
571 ldr x29,[sp],#16
572 ret
573 .size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
574 .globl aes_v8_ctr32_encrypt_blocks
575 .hidden aes_v8_ctr32_encrypt_blocks
576 .type aes_v8_ctr32_encrypt_blocks,%function
577 .align 5
578 aes_v8_ctr32_encrypt_blocks:
579 stp x29,x30,[sp,#-16]!
580 add x29,sp,#0
581 ldr w5,[x3,#240]
582
583 ldr w8, [x4, #12]
584 ld1 {v0.4s},[x4]
585
586 ld1 {v16.4s,v17.4s},[x3] // load key schedule...
587 sub w5,w5,#4
588 mov x12,#16
589 cmp x2,#2
590 add x7,x3,x5,lsl#4 // pointer to last 5 round keys
591 sub w5,w5,#2
592 ld1 {v20.4s,v21.4s},[x7],#32
593 ld1 {v22.4s,v23.4s},[x7],#32
594 ld1 {v7.4s},[x7]
595 add x7,x3,#32
596 mov w6,w5
597 csel x12,xzr,x12,lo
598 #ifndef __ARMEB__
599 rev w8, w8
600 #endif
601 orr v1.16b,v0.16b,v0.16b
602 add w10, w8, #1
603 orr v18.16b,v0.16b,v0.16b
604 add w8, w8, #2
605 orr v6.16b,v0.16b,v0.16b
606 rev w10, w10
607 mov v1.s[3],w10
608 b.ls .Lctr32_tail
609 rev w12, w8
610 sub x2,x2,#3 // bias
611 mov v18.s[3],w12
612 b .Loop3x_ctr32
613
614 .align 4
615 .Loop3x_ctr32:
616 aese v0.16b,v16.16b
617 aesmc v0.16b,v0.16b
618 aese v1.16b,v16.16b
619 aesmc v1.16b,v1.16b
620 aese v18.16b,v16.16b
621 aesmc v18.16b,v18.16b
622 ld1 {v16.4s},[x7],#16
623 subs w6,w6,#2
624 aese v0.16b,v17.16b
625 aesmc v0.16b,v0.16b
626 aese v1.16b,v17.16b
627 aesmc v1.16b,v1.16b
628 aese v18.16b,v17.16b
629 aesmc v18.16b,v18.16b
630 ld1 {v17.4s},[x7],#16
631 b.gt .Loop3x_ctr32
632
633 aese v0.16b,v16.16b
634 aesmc v4.16b,v0.16b
635 aese v1.16b,v16.16b
636 aesmc v5.16b,v1.16b
637 ld1 {v2.16b},[x0],#16
638 orr v0.16b,v6.16b,v6.16b
639 aese v18.16b,v16.16b
640 aesmc v18.16b,v18.16b
641 ld1 {v3.16b},[x0],#16
642 orr v1.16b,v6.16b,v6.16b
643 aese v4.16b,v17.16b
644 aesmc v4.16b,v4.16b
645 aese v5.16b,v17.16b
646 aesmc v5.16b,v5.16b
647 ld1 {v19.16b},[x0],#16
648 mov x7,x3
649 aese v18.16b,v17.16b
650 aesmc v17.16b,v18.16b
651 orr v18.16b,v6.16b,v6.16b
652 add w9,w8,#1
653 aese v4.16b,v20.16b
654 aesmc v4.16b,v4.16b
655 aese v5.16b,v20.16b
656 aesmc v5.16b,v5.16b
657 eor v2.16b,v2.16b,v7.16b
658 add w10,w8,#2
659 aese v17.16b,v20.16b
660 aesmc v17.16b,v17.16b
661 eor v3.16b,v3.16b,v7.16b
662 add w8,w8,#3
663 aese v4.16b,v21.16b
664 aesmc v4.16b,v4.16b
665 aese v5.16b,v21.16b
666 aesmc v5.16b,v5.16b
667 eor v19.16b,v19.16b,v7.16b
668 rev w9,w9
669 aese v17.16b,v21.16b
670 aesmc v17.16b,v17.16b
671 mov v0.s[3], w9
672 rev w10,w10
673 aese v4.16b,v22.16b
674 aesmc v4.16b,v4.16b
675 aese v5.16b,v22.16b
676 aesmc v5.16b,v5.16b
677 mov v1.s[3], w10
678 rev w12,w8
679 aese v17.16b,v22.16b
680 aesmc v17.16b,v17.16b
681 mov v18.s[3], w12
682 subs x2,x2,#3
683 aese v4.16b,v23.16b
684 aese v5.16b,v23.16b
685 aese v17.16b,v23.16b
686
687 eor v2.16b,v2.16b,v4.16b
688 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
689 st1 {v2.16b},[x1],#16
690 eor v3.16b,v3.16b,v5.16b
691 mov w6,w5
692 st1 {v3.16b},[x1],#16
693 eor v19.16b,v19.16b,v17.16b
694 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
695 st1 {v19.16b},[x1],#16
696 b.hs .Loop3x_ctr32
697
698 adds x2,x2,#3
699 b.eq .Lctr32_done
700 cmp x2,#1
701 mov x12,#16
702 csel x12,xzr,x12,eq
703
704 .Lctr32_tail:
705 aese v0.16b,v16.16b
706 aesmc v0.16b,v0.16b
707 aese v1.16b,v16.16b
708 aesmc v1.16b,v1.16b
709 ld1 {v16.4s},[x7],#16
710 subs w6,w6,#2
711 aese v0.16b,v17.16b
712 aesmc v0.16b,v0.16b
713 aese v1.16b,v17.16b
714 aesmc v1.16b,v1.16b
715 ld1 {v17.4s},[x7],#16
716 b.gt .Lctr32_tail
717
718 aese v0.16b,v16.16b
719 aesmc v0.16b,v0.16b
720 aese v1.16b,v16.16b
721 aesmc v1.16b,v1.16b
722 aese v0.16b,v17.16b
723 aesmc v0.16b,v0.16b
724 aese v1.16b,v17.16b
725 aesmc v1.16b,v1.16b
726 ld1 {v2.16b},[x0],x12
727 aese v0.16b,v20.16b
728 aesmc v0.16b,v0.16b
729 aese v1.16b,v20.16b
730 aesmc v1.16b,v1.16b
731 ld1 {v3.16b},[x0]
732 aese v0.16b,v21.16b
733 aesmc v0.16b,v0.16b
734 aese v1.16b,v21.16b
735 aesmc v1.16b,v1.16b
736 eor v2.16b,v2.16b,v7.16b
737 aese v0.16b,v22.16b
738 aesmc v0.16b,v0.16b
739 aese v1.16b,v22.16b
740 aesmc v1.16b,v1.16b
741 eor v3.16b,v3.16b,v7.16b
742 aese v0.16b,v23.16b
743 aese v1.16b,v23.16b
744
745 cmp x2,#1
746 eor v2.16b,v2.16b,v0.16b
747 eor v3.16b,v3.16b,v1.16b
748 st1 {v2.16b},[x1],#16
749 b.eq .Lctr32_done
750 st1 {v3.16b},[x1]
751
752 .Lctr32_done:
753 ldr x29,[sp],#16
754 ret
755 .size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
756 #endif
757 #endif
OLDNEW
« no previous file with comments | « third_party/boringssl/err_data.c ('k') | third_party/boringssl/linux-aarch64/crypto/bn/armv8-mont.S » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698