Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(210)

Side by Side Diff: third_party/boringssl/linux-aarch64/crypto/aes/aesv8-armx64.S

Issue 1319703002: Breaking Change: merge BoringSSL branch into master (Closed) Base URL: git@github.com:dart-lang/sdk.git@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #if defined(__aarch64__)
2 #include "arm_arch.h"
3
4 #if __ARM_MAX_ARCH__>=7
5 .text
6 #if !defined(__clang__)
7 .arch armv8-a+crypto
8 #endif
9 .align 5
10 .Lrcon:
11 .long 0x01,0x01,0x01,0x01
12 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
13 .long 0x1b,0x1b,0x1b,0x1b
14
15 .globl aes_v8_set_encrypt_key
16 .type aes_v8_set_encrypt_key,%function
17 .align 5
18 aes_v8_set_encrypt_key:
19 .Lenc_key:
20 stp x29,x30,[sp,#-16]!
21 add x29,sp,#0
22 mov x3,#-1
23 cmp x0,#0
24 b.eq .Lenc_key_abort
25 cmp x2,#0
26 b.eq .Lenc_key_abort
27 mov x3,#-2
28 cmp w1,#128
29 b.lt .Lenc_key_abort
30 cmp w1,#256
31 b.gt .Lenc_key_abort
32 tst w1,#0x3f
33 b.ne .Lenc_key_abort
34
35 adr x3,.Lrcon
36 cmp w1,#192
37
38 eor v0.16b,v0.16b,v0.16b
39 ld1 {v3.16b},[x0],#16
40 mov w1,#8 // reuse w1
41 ld1 {v1.4s,v2.4s},[x3],#32
42
43 b.lt .Loop128
44 b.eq .L192
45 b .L256
46
47 .align 4
48 .Loop128:
49 tbl v6.16b,{v3.16b},v2.16b
50 ext v5.16b,v0.16b,v3.16b,#12
51 st1 {v3.4s},[x2],#16
52 aese v6.16b,v0.16b
53 subs w1,w1,#1
54
55 eor v3.16b,v3.16b,v5.16b
56 ext v5.16b,v0.16b,v5.16b,#12
57 eor v3.16b,v3.16b,v5.16b
58 ext v5.16b,v0.16b,v5.16b,#12
59 eor v6.16b,v6.16b,v1.16b
60 eor v3.16b,v3.16b,v5.16b
61 shl v1.16b,v1.16b,#1
62 eor v3.16b,v3.16b,v6.16b
63 b.ne .Loop128
64
65 ld1 {v1.4s},[x3]
66
67 tbl v6.16b,{v3.16b},v2.16b
68 ext v5.16b,v0.16b,v3.16b,#12
69 st1 {v3.4s},[x2],#16
70 aese v6.16b,v0.16b
71
72 eor v3.16b,v3.16b,v5.16b
73 ext v5.16b,v0.16b,v5.16b,#12
74 eor v3.16b,v3.16b,v5.16b
75 ext v5.16b,v0.16b,v5.16b,#12
76 eor v6.16b,v6.16b,v1.16b
77 eor v3.16b,v3.16b,v5.16b
78 shl v1.16b,v1.16b,#1
79 eor v3.16b,v3.16b,v6.16b
80
81 tbl v6.16b,{v3.16b},v2.16b
82 ext v5.16b,v0.16b,v3.16b,#12
83 st1 {v3.4s},[x2],#16
84 aese v6.16b,v0.16b
85
86 eor v3.16b,v3.16b,v5.16b
87 ext v5.16b,v0.16b,v5.16b,#12
88 eor v3.16b,v3.16b,v5.16b
89 ext v5.16b,v0.16b,v5.16b,#12
90 eor v6.16b,v6.16b,v1.16b
91 eor v3.16b,v3.16b,v5.16b
92 eor v3.16b,v3.16b,v6.16b
93 st1 {v3.4s},[x2]
94 add x2,x2,#0x50
95
96 mov w12,#10
97 b .Ldone
98
99 .align 4
100 .L192:
101 ld1 {v4.8b},[x0],#8
102 movi v6.16b,#8 // borrow v6.16b
103 st1 {v3.4s},[x2],#16
104 sub v2.16b,v2.16b,v6.16b // adjust the mask
105
106 .Loop192:
107 tbl v6.16b,{v4.16b},v2.16b
108 ext v5.16b,v0.16b,v3.16b,#12
109 st1 {v4.8b},[x2],#8
110 aese v6.16b,v0.16b
111 subs w1,w1,#1
112
113 eor v3.16b,v3.16b,v5.16b
114 ext v5.16b,v0.16b,v5.16b,#12
115 eor v3.16b,v3.16b,v5.16b
116 ext v5.16b,v0.16b,v5.16b,#12
117 eor v3.16b,v3.16b,v5.16b
118
119 dup v5.4s,v3.s[3]
120 eor v5.16b,v5.16b,v4.16b
121 eor v6.16b,v6.16b,v1.16b
122 ext v4.16b,v0.16b,v4.16b,#12
123 shl v1.16b,v1.16b,#1
124 eor v4.16b,v4.16b,v5.16b
125 eor v3.16b,v3.16b,v6.16b
126 eor v4.16b,v4.16b,v6.16b
127 st1 {v3.4s},[x2],#16
128 b.ne .Loop192
129
130 mov w12,#12
131 add x2,x2,#0x20
132 b .Ldone
133
134 .align 4
135 .L256:
136 ld1 {v4.16b},[x0]
137 mov w1,#7
138 mov w12,#14
139 st1 {v3.4s},[x2],#16
140
141 .Loop256:
142 tbl v6.16b,{v4.16b},v2.16b
143 ext v5.16b,v0.16b,v3.16b,#12
144 st1 {v4.4s},[x2],#16
145 aese v6.16b,v0.16b
146 subs w1,w1,#1
147
148 eor v3.16b,v3.16b,v5.16b
149 ext v5.16b,v0.16b,v5.16b,#12
150 eor v3.16b,v3.16b,v5.16b
151 ext v5.16b,v0.16b,v5.16b,#12
152 eor v6.16b,v6.16b,v1.16b
153 eor v3.16b,v3.16b,v5.16b
154 shl v1.16b,v1.16b,#1
155 eor v3.16b,v3.16b,v6.16b
156 st1 {v3.4s},[x2],#16
157 b.eq .Ldone
158
159 dup v6.4s,v3.s[3] // just splat
160 ext v5.16b,v0.16b,v4.16b,#12
161 aese v6.16b,v0.16b
162
163 eor v4.16b,v4.16b,v5.16b
164 ext v5.16b,v0.16b,v5.16b,#12
165 eor v4.16b,v4.16b,v5.16b
166 ext v5.16b,v0.16b,v5.16b,#12
167 eor v4.16b,v4.16b,v5.16b
168
169 eor v4.16b,v4.16b,v6.16b
170 b .Loop256
171
172 .Ldone:
173 str w12,[x2]
174 mov x3,#0
175
176 .Lenc_key_abort:
177 mov x0,x3 // return value
178 ldr x29,[sp],#16
179 ret
180 .size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
181
182 .globl aes_v8_set_decrypt_key
183 .type aes_v8_set_decrypt_key,%function
184 .align 5
185 aes_v8_set_decrypt_key:
186 stp x29,x30,[sp,#-16]!
187 add x29,sp,#0
188 bl .Lenc_key
189
190 cmp x0,#0
191 b.ne .Ldec_key_abort
192
193 sub x2,x2,#240 // restore original x2
194 mov x4,#-16
195 add x0,x2,x12,lsl#4 // end of key schedule
196
197 ld1 {v0.4s},[x2]
198 ld1 {v1.4s},[x0]
199 st1 {v0.4s},[x0],x4
200 st1 {v1.4s},[x2],#16
201
202 .Loop_imc:
203 ld1 {v0.4s},[x2]
204 ld1 {v1.4s},[x0]
205 aesimc v0.16b,v0.16b
206 aesimc v1.16b,v1.16b
207 st1 {v0.4s},[x0],x4
208 st1 {v1.4s},[x2],#16
209 cmp x0,x2
210 b.hi .Loop_imc
211
212 ld1 {v0.4s},[x2]
213 aesimc v0.16b,v0.16b
214 st1 {v0.4s},[x0]
215
216 eor x0,x0,x0 // return value
217 .Ldec_key_abort:
218 ldp x29,x30,[sp],#16
219 ret
220 .size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
221 .globl aes_v8_encrypt
222 .type aes_v8_encrypt,%function
223 .align 5
224 aes_v8_encrypt:
225 ldr w3,[x2,#240]
226 ld1 {v0.4s},[x2],#16
227 ld1 {v2.16b},[x0]
228 sub w3,w3,#2
229 ld1 {v1.4s},[x2],#16
230
231 .Loop_enc:
232 aese v2.16b,v0.16b
233 aesmc v2.16b,v2.16b
234 ld1 {v0.4s},[x2],#16
235 subs w3,w3,#2
236 aese v2.16b,v1.16b
237 aesmc v2.16b,v2.16b
238 ld1 {v1.4s},[x2],#16
239 b.gt .Loop_enc
240
241 aese v2.16b,v0.16b
242 aesmc v2.16b,v2.16b
243 ld1 {v0.4s},[x2]
244 aese v2.16b,v1.16b
245 eor v2.16b,v2.16b,v0.16b
246
247 st1 {v2.16b},[x1]
248 ret
249 .size aes_v8_encrypt,.-aes_v8_encrypt
250 .globl aes_v8_decrypt
251 .type aes_v8_decrypt,%function
252 .align 5
253 aes_v8_decrypt:
254 ldr w3,[x2,#240]
255 ld1 {v0.4s},[x2],#16
256 ld1 {v2.16b},[x0]
257 sub w3,w3,#2
258 ld1 {v1.4s},[x2],#16
259
260 .Loop_dec:
261 aesd v2.16b,v0.16b
262 aesimc v2.16b,v2.16b
263 ld1 {v0.4s},[x2],#16
264 subs w3,w3,#2
265 aesd v2.16b,v1.16b
266 aesimc v2.16b,v2.16b
267 ld1 {v1.4s},[x2],#16
268 b.gt .Loop_dec
269
270 aesd v2.16b,v0.16b
271 aesimc v2.16b,v2.16b
272 ld1 {v0.4s},[x2]
273 aesd v2.16b,v1.16b
274 eor v2.16b,v2.16b,v0.16b
275
276 st1 {v2.16b},[x1]
277 ret
278 .size aes_v8_decrypt,.-aes_v8_decrypt
279 .globl aes_v8_cbc_encrypt
280 .type aes_v8_cbc_encrypt,%function
281 .align 5
282 aes_v8_cbc_encrypt:
283 stp x29,x30,[sp,#-16]!
284 add x29,sp,#0
285 subs x2,x2,#16
286 mov x8,#16
287 b.lo .Lcbc_abort
288 csel x8,xzr,x8,eq
289
290 cmp w5,#0 // en- or decrypting?
291 ldr w5,[x3,#240]
292 and x2,x2,#-16
293 ld1 {v6.16b},[x4]
294 ld1 {v0.16b},[x0],x8
295
296 ld1 {v16.4s,v17.4s},[x3] // load key schedule...
297 sub w5,w5,#6
298 add x7,x3,x5,lsl#4 // pointer to last 7 round keys
299 sub w5,w5,#2
300 ld1 {v18.4s,v19.4s},[x7],#32
301 ld1 {v20.4s,v21.4s},[x7],#32
302 ld1 {v22.4s,v23.4s},[x7],#32
303 ld1 {v7.4s},[x7]
304
305 add x7,x3,#32
306 mov w6,w5
307 b.eq .Lcbc_dec
308
309 cmp w5,#2
310 eor v0.16b,v0.16b,v6.16b
311 eor v5.16b,v16.16b,v7.16b
312 b.eq .Lcbc_enc128
313
314 ld1 {v2.4s,v3.4s},[x7]
315 add x7,x3,#16
316 add x6,x3,#16*4
317 add x12,x3,#16*5
318 aese v0.16b,v16.16b
319 aesmc v0.16b,v0.16b
320 add x14,x3,#16*6
321 add x3,x3,#16*7
322 b .Lenter_cbc_enc
323
324 .align 4
325 .Loop_cbc_enc:
326 aese v0.16b,v16.16b
327 aesmc v0.16b,v0.16b
328 st1 {v6.16b},[x1],#16
329 .Lenter_cbc_enc:
330 aese v0.16b,v17.16b
331 aesmc v0.16b,v0.16b
332 aese v0.16b,v2.16b
333 aesmc v0.16b,v0.16b
334 ld1 {v16.4s},[x6]
335 cmp w5,#4
336 aese v0.16b,v3.16b
337 aesmc v0.16b,v0.16b
338 ld1 {v17.4s},[x12]
339 b.eq .Lcbc_enc192
340
341 aese v0.16b,v16.16b
342 aesmc v0.16b,v0.16b
343 ld1 {v16.4s},[x14]
344 aese v0.16b,v17.16b
345 aesmc v0.16b,v0.16b
346 ld1 {v17.4s},[x3]
347 nop
348
349 .Lcbc_enc192:
350 aese v0.16b,v16.16b
351 aesmc v0.16b,v0.16b
352 subs x2,x2,#16
353 aese v0.16b,v17.16b
354 aesmc v0.16b,v0.16b
355 csel x8,xzr,x8,eq
356 aese v0.16b,v18.16b
357 aesmc v0.16b,v0.16b
358 aese v0.16b,v19.16b
359 aesmc v0.16b,v0.16b
360 ld1 {v16.16b},[x0],x8
361 aese v0.16b,v20.16b
362 aesmc v0.16b,v0.16b
363 eor v16.16b,v16.16b,v5.16b
364 aese v0.16b,v21.16b
365 aesmc v0.16b,v0.16b
366 ld1 {v17.4s},[x7] // re-pre-load rndkey[1]
367 aese v0.16b,v22.16b
368 aesmc v0.16b,v0.16b
369 aese v0.16b,v23.16b
370 eor v6.16b,v0.16b,v7.16b
371 b.hs .Loop_cbc_enc
372
373 st1 {v6.16b},[x1],#16
374 b .Lcbc_done
375
376 .align 5
377 .Lcbc_enc128:
378 ld1 {v2.4s,v3.4s},[x7]
379 aese v0.16b,v16.16b
380 aesmc v0.16b,v0.16b
381 b .Lenter_cbc_enc128
382 .Loop_cbc_enc128:
383 aese v0.16b,v16.16b
384 aesmc v0.16b,v0.16b
385 st1 {v6.16b},[x1],#16
386 .Lenter_cbc_enc128:
387 aese v0.16b,v17.16b
388 aesmc v0.16b,v0.16b
389 subs x2,x2,#16
390 aese v0.16b,v2.16b
391 aesmc v0.16b,v0.16b
392 csel x8,xzr,x8,eq
393 aese v0.16b,v3.16b
394 aesmc v0.16b,v0.16b
395 aese v0.16b,v18.16b
396 aesmc v0.16b,v0.16b
397 aese v0.16b,v19.16b
398 aesmc v0.16b,v0.16b
399 ld1 {v16.16b},[x0],x8
400 aese v0.16b,v20.16b
401 aesmc v0.16b,v0.16b
402 aese v0.16b,v21.16b
403 aesmc v0.16b,v0.16b
404 aese v0.16b,v22.16b
405 aesmc v0.16b,v0.16b
406 eor v16.16b,v16.16b,v5.16b
407 aese v0.16b,v23.16b
408 eor v6.16b,v0.16b,v7.16b
409 b.hs .Loop_cbc_enc128
410
411 st1 {v6.16b},[x1],#16
412 b .Lcbc_done
413 .align 5
414 .Lcbc_dec:
415 ld1 {v18.16b},[x0],#16
416 subs x2,x2,#32 // bias
417 add w6,w5,#2
418 orr v3.16b,v0.16b,v0.16b
419 orr v1.16b,v0.16b,v0.16b
420 orr v19.16b,v18.16b,v18.16b
421 b.lo .Lcbc_dec_tail
422
423 orr v1.16b,v18.16b,v18.16b
424 ld1 {v18.16b},[x0],#16
425 orr v2.16b,v0.16b,v0.16b
426 orr v3.16b,v1.16b,v1.16b
427 orr v19.16b,v18.16b,v18.16b
428
429 .Loop3x_cbc_dec:
430 aesd v0.16b,v16.16b
431 aesimc v0.16b,v0.16b
432 aesd v1.16b,v16.16b
433 aesimc v1.16b,v1.16b
434 aesd v18.16b,v16.16b
435 aesimc v18.16b,v18.16b
436 ld1 {v16.4s},[x7],#16
437 subs w6,w6,#2
438 aesd v0.16b,v17.16b
439 aesimc v0.16b,v0.16b
440 aesd v1.16b,v17.16b
441 aesimc v1.16b,v1.16b
442 aesd v18.16b,v17.16b
443 aesimc v18.16b,v18.16b
444 ld1 {v17.4s},[x7],#16
445 b.gt .Loop3x_cbc_dec
446
447 aesd v0.16b,v16.16b
448 aesimc v0.16b,v0.16b
449 aesd v1.16b,v16.16b
450 aesimc v1.16b,v1.16b
451 aesd v18.16b,v16.16b
452 aesimc v18.16b,v18.16b
453 eor v4.16b,v6.16b,v7.16b
454 subs x2,x2,#0x30
455 eor v5.16b,v2.16b,v7.16b
456 csel x6,x2,x6,lo // x6, w6, is zero at this point
457 aesd v0.16b,v17.16b
458 aesimc v0.16b,v0.16b
459 aesd v1.16b,v17.16b
460 aesimc v1.16b,v1.16b
461 aesd v18.16b,v17.16b
462 aesimc v18.16b,v18.16b
463 eor v17.16b,v3.16b,v7.16b
464 add x0,x0,x6 // x0 is adjusted in such way that
465 // at exit from the loop v1.16b-v18.16b
466 // are loaded with last "words"
467 orr v6.16b,v19.16b,v19.16b
468 mov x7,x3
469 aesd v0.16b,v20.16b
470 aesimc v0.16b,v0.16b
471 aesd v1.16b,v20.16b
472 aesimc v1.16b,v1.16b
473 aesd v18.16b,v20.16b
474 aesimc v18.16b,v18.16b
475 ld1 {v2.16b},[x0],#16
476 aesd v0.16b,v21.16b
477 aesimc v0.16b,v0.16b
478 aesd v1.16b,v21.16b
479 aesimc v1.16b,v1.16b
480 aesd v18.16b,v21.16b
481 aesimc v18.16b,v18.16b
482 ld1 {v3.16b},[x0],#16
483 aesd v0.16b,v22.16b
484 aesimc v0.16b,v0.16b
485 aesd v1.16b,v22.16b
486 aesimc v1.16b,v1.16b
487 aesd v18.16b,v22.16b
488 aesimc v18.16b,v18.16b
489 ld1 {v19.16b},[x0],#16
490 aesd v0.16b,v23.16b
491 aesd v1.16b,v23.16b
492 aesd v18.16b,v23.16b
493 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
494 add w6,w5,#2
495 eor v4.16b,v4.16b,v0.16b
496 eor v5.16b,v5.16b,v1.16b
497 eor v18.16b,v18.16b,v17.16b
498 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
499 st1 {v4.16b},[x1],#16
500 orr v0.16b,v2.16b,v2.16b
501 st1 {v5.16b},[x1],#16
502 orr v1.16b,v3.16b,v3.16b
503 st1 {v18.16b},[x1],#16
504 orr v18.16b,v19.16b,v19.16b
505 b.hs .Loop3x_cbc_dec
506
507 cmn x2,#0x30
508 b.eq .Lcbc_done
509 nop
510
511 .Lcbc_dec_tail:
512 aesd v1.16b,v16.16b
513 aesimc v1.16b,v1.16b
514 aesd v18.16b,v16.16b
515 aesimc v18.16b,v18.16b
516 ld1 {v16.4s},[x7],#16
517 subs w6,w6,#2
518 aesd v1.16b,v17.16b
519 aesimc v1.16b,v1.16b
520 aesd v18.16b,v17.16b
521 aesimc v18.16b,v18.16b
522 ld1 {v17.4s},[x7],#16
523 b.gt .Lcbc_dec_tail
524
525 aesd v1.16b,v16.16b
526 aesimc v1.16b,v1.16b
527 aesd v18.16b,v16.16b
528 aesimc v18.16b,v18.16b
529 aesd v1.16b,v17.16b
530 aesimc v1.16b,v1.16b
531 aesd v18.16b,v17.16b
532 aesimc v18.16b,v18.16b
533 aesd v1.16b,v20.16b
534 aesimc v1.16b,v1.16b
535 aesd v18.16b,v20.16b
536 aesimc v18.16b,v18.16b
537 cmn x2,#0x20
538 aesd v1.16b,v21.16b
539 aesimc v1.16b,v1.16b
540 aesd v18.16b,v21.16b
541 aesimc v18.16b,v18.16b
542 eor v5.16b,v6.16b,v7.16b
543 aesd v1.16b,v22.16b
544 aesimc v1.16b,v1.16b
545 aesd v18.16b,v22.16b
546 aesimc v18.16b,v18.16b
547 eor v17.16b,v3.16b,v7.16b
548 aesd v1.16b,v23.16b
549 aesd v18.16b,v23.16b
550 b.eq .Lcbc_dec_one
551 eor v5.16b,v5.16b,v1.16b
552 eor v17.16b,v17.16b,v18.16b
553 orr v6.16b,v19.16b,v19.16b
554 st1 {v5.16b},[x1],#16
555 st1 {v17.16b},[x1],#16
556 b .Lcbc_done
557
558 .Lcbc_dec_one:
559 eor v5.16b,v5.16b,v18.16b
560 orr v6.16b,v19.16b,v19.16b
561 st1 {v5.16b},[x1],#16
562
563 .Lcbc_done:
564 st1 {v6.16b},[x4]
565 .Lcbc_abort:
566 ldr x29,[sp],#16
567 ret
568 .size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
569 .globl aes_v8_ctr32_encrypt_blocks
570 .type aes_v8_ctr32_encrypt_blocks,%function
571 .align 5
572 aes_v8_ctr32_encrypt_blocks:
573 stp x29,x30,[sp,#-16]!
574 add x29,sp,#0
575 ldr w5,[x3,#240]
576
577 ldr w8, [x4, #12]
578 ld1 {v0.4s},[x4]
579
580 ld1 {v16.4s,v17.4s},[x3] // load key schedule...
581 sub w5,w5,#4
582 mov x12,#16
583 cmp x2,#2
584 add x7,x3,x5,lsl#4 // pointer to last 5 round keys
585 sub w5,w5,#2
586 ld1 {v20.4s,v21.4s},[x7],#32
587 ld1 {v22.4s,v23.4s},[x7],#32
588 ld1 {v7.4s},[x7]
589 add x7,x3,#32
590 mov w6,w5
591 csel x12,xzr,x12,lo
592 #ifndef __ARMEB__
593 rev w8, w8
594 #endif
595 orr v1.16b,v0.16b,v0.16b
596 add w10, w8, #1
597 orr v18.16b,v0.16b,v0.16b
598 add w8, w8, #2
599 orr v6.16b,v0.16b,v0.16b
600 rev w10, w10
601 mov v1.s[3],w10
602 b.ls .Lctr32_tail
603 rev w12, w8
604 sub x2,x2,#3 // bias
605 mov v18.s[3],w12
606 b .Loop3x_ctr32
607
608 .align 4
609 .Loop3x_ctr32:
610 aese v0.16b,v16.16b
611 aesmc v0.16b,v0.16b
612 aese v1.16b,v16.16b
613 aesmc v1.16b,v1.16b
614 aese v18.16b,v16.16b
615 aesmc v18.16b,v18.16b
616 ld1 {v16.4s},[x7],#16
617 subs w6,w6,#2
618 aese v0.16b,v17.16b
619 aesmc v0.16b,v0.16b
620 aese v1.16b,v17.16b
621 aesmc v1.16b,v1.16b
622 aese v18.16b,v17.16b
623 aesmc v18.16b,v18.16b
624 ld1 {v17.4s},[x7],#16
625 b.gt .Loop3x_ctr32
626
627 aese v0.16b,v16.16b
628 aesmc v4.16b,v0.16b
629 aese v1.16b,v16.16b
630 aesmc v5.16b,v1.16b
631 ld1 {v2.16b},[x0],#16
632 orr v0.16b,v6.16b,v6.16b
633 aese v18.16b,v16.16b
634 aesmc v18.16b,v18.16b
635 ld1 {v3.16b},[x0],#16
636 orr v1.16b,v6.16b,v6.16b
637 aese v4.16b,v17.16b
638 aesmc v4.16b,v4.16b
639 aese v5.16b,v17.16b
640 aesmc v5.16b,v5.16b
641 ld1 {v19.16b},[x0],#16
642 mov x7,x3
643 aese v18.16b,v17.16b
644 aesmc v17.16b,v18.16b
645 orr v18.16b,v6.16b,v6.16b
646 add w9,w8,#1
647 aese v4.16b,v20.16b
648 aesmc v4.16b,v4.16b
649 aese v5.16b,v20.16b
650 aesmc v5.16b,v5.16b
651 eor v2.16b,v2.16b,v7.16b
652 add w10,w8,#2
653 aese v17.16b,v20.16b
654 aesmc v17.16b,v17.16b
655 eor v3.16b,v3.16b,v7.16b
656 add w8,w8,#3
657 aese v4.16b,v21.16b
658 aesmc v4.16b,v4.16b
659 aese v5.16b,v21.16b
660 aesmc v5.16b,v5.16b
661 eor v19.16b,v19.16b,v7.16b
662 rev w9,w9
663 aese v17.16b,v21.16b
664 aesmc v17.16b,v17.16b
665 mov v0.s[3], w9
666 rev w10,w10
667 aese v4.16b,v22.16b
668 aesmc v4.16b,v4.16b
669 aese v5.16b,v22.16b
670 aesmc v5.16b,v5.16b
671 mov v1.s[3], w10
672 rev w12,w8
673 aese v17.16b,v22.16b
674 aesmc v17.16b,v17.16b
675 mov v18.s[3], w12
676 subs x2,x2,#3
677 aese v4.16b,v23.16b
678 aese v5.16b,v23.16b
679 aese v17.16b,v23.16b
680
681 eor v2.16b,v2.16b,v4.16b
682 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
683 st1 {v2.16b},[x1],#16
684 eor v3.16b,v3.16b,v5.16b
685 mov w6,w5
686 st1 {v3.16b},[x1],#16
687 eor v19.16b,v19.16b,v17.16b
688 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
689 st1 {v19.16b},[x1],#16
690 b.hs .Loop3x_ctr32
691
692 adds x2,x2,#3
693 b.eq .Lctr32_done
694 cmp x2,#1
695 mov x12,#16
696 csel x12,xzr,x12,eq
697
698 .Lctr32_tail:
699 aese v0.16b,v16.16b
700 aesmc v0.16b,v0.16b
701 aese v1.16b,v16.16b
702 aesmc v1.16b,v1.16b
703 ld1 {v16.4s},[x7],#16
704 subs w6,w6,#2
705 aese v0.16b,v17.16b
706 aesmc v0.16b,v0.16b
707 aese v1.16b,v17.16b
708 aesmc v1.16b,v1.16b
709 ld1 {v17.4s},[x7],#16
710 b.gt .Lctr32_tail
711
712 aese v0.16b,v16.16b
713 aesmc v0.16b,v0.16b
714 aese v1.16b,v16.16b
715 aesmc v1.16b,v1.16b
716 aese v0.16b,v17.16b
717 aesmc v0.16b,v0.16b
718 aese v1.16b,v17.16b
719 aesmc v1.16b,v1.16b
720 ld1 {v2.16b},[x0],x12
721 aese v0.16b,v20.16b
722 aesmc v0.16b,v0.16b
723 aese v1.16b,v20.16b
724 aesmc v1.16b,v1.16b
725 ld1 {v3.16b},[x0]
726 aese v0.16b,v21.16b
727 aesmc v0.16b,v0.16b
728 aese v1.16b,v21.16b
729 aesmc v1.16b,v1.16b
730 eor v2.16b,v2.16b,v7.16b
731 aese v0.16b,v22.16b
732 aesmc v0.16b,v0.16b
733 aese v1.16b,v22.16b
734 aesmc v1.16b,v1.16b
735 eor v3.16b,v3.16b,v7.16b
736 aese v0.16b,v23.16b
737 aese v1.16b,v23.16b
738
739 cmp x2,#1
740 eor v2.16b,v2.16b,v0.16b
741 eor v3.16b,v3.16b,v1.16b
742 st1 {v2.16b},[x1],#16
743 b.eq .Lctr32_done
744 st1 {v3.16b},[x1]
745
746 .Lctr32_done:
747 ldr x29,[sp],#16
748 ret
749 .size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
750 #endif
751 #endif
OLDNEW
« no previous file with comments | « third_party/boringssl/err_data.c ('k') | third_party/boringssl/linux-aarch64/crypto/modes/ghashv8-armx64.S » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698