| OLD | NEW |
| (Empty) |
| 1 #if defined(__aarch64__) | |
| 2 #include <openssl/arm_arch.h> | |
| 3 | |
| 4 #if __ARM_MAX_ARCH__>=7 | |
| 5 .text | |
| 6 #if !defined(__clang__) | |
| 7 .arch armv8-a+crypto | |
| 8 #endif | |
| 9 .align 5 | |
| 10 .Lrcon: | |
| 11 .long 0x01,0x01,0x01,0x01 | |
| 12 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat | |
| 13 .long 0x1b,0x1b,0x1b,0x1b | |
| 14 | |
| 15 .globl aes_v8_set_encrypt_key | |
| 16 .hidden aes_v8_set_encrypt_key | |
| 17 .type aes_v8_set_encrypt_key,%function | |
| 18 .align 5 | |
| 19 aes_v8_set_encrypt_key: | |
| 20 .Lenc_key: | |
| 21 stp x29,x30,[sp,#-16]! | |
| 22 add x29,sp,#0 | |
| 23 mov x3,#-1 | |
| 24 cmp x0,#0 | |
| 25 b.eq .Lenc_key_abort | |
| 26 cmp x2,#0 | |
| 27 b.eq .Lenc_key_abort | |
| 28 mov x3,#-2 | |
| 29 cmp w1,#128 | |
| 30 b.lt .Lenc_key_abort | |
| 31 cmp w1,#256 | |
| 32 b.gt .Lenc_key_abort | |
| 33 tst w1,#0x3f | |
| 34 b.ne .Lenc_key_abort | |
| 35 | |
| 36 adr x3,.Lrcon | |
| 37 cmp w1,#192 | |
| 38 | |
| 39 eor v0.16b,v0.16b,v0.16b | |
| 40 ld1 {v3.16b},[x0],#16 | |
| 41 mov w1,#8 // reuse w1 | |
| 42 ld1 {v1.4s,v2.4s},[x3],#32 | |
| 43 | |
| 44 b.lt .Loop128 | |
| 45 b.eq .L192 | |
| 46 b .L256 | |
| 47 | |
| 48 .align 4 | |
| 49 .Loop128: | |
| 50 tbl v6.16b,{v3.16b},v2.16b | |
| 51 ext v5.16b,v0.16b,v3.16b,#12 | |
| 52 st1 {v3.4s},[x2],#16 | |
| 53 aese v6.16b,v0.16b | |
| 54 subs w1,w1,#1 | |
| 55 | |
| 56 eor v3.16b,v3.16b,v5.16b | |
| 57 ext v5.16b,v0.16b,v5.16b,#12 | |
| 58 eor v3.16b,v3.16b,v5.16b | |
| 59 ext v5.16b,v0.16b,v5.16b,#12 | |
| 60 eor v6.16b,v6.16b,v1.16b | |
| 61 eor v3.16b,v3.16b,v5.16b | |
| 62 shl v1.16b,v1.16b,#1 | |
| 63 eor v3.16b,v3.16b,v6.16b | |
| 64 b.ne .Loop128 | |
| 65 | |
| 66 ld1 {v1.4s},[x3] | |
| 67 | |
| 68 tbl v6.16b,{v3.16b},v2.16b | |
| 69 ext v5.16b,v0.16b,v3.16b,#12 | |
| 70 st1 {v3.4s},[x2],#16 | |
| 71 aese v6.16b,v0.16b | |
| 72 | |
| 73 eor v3.16b,v3.16b,v5.16b | |
| 74 ext v5.16b,v0.16b,v5.16b,#12 | |
| 75 eor v3.16b,v3.16b,v5.16b | |
| 76 ext v5.16b,v0.16b,v5.16b,#12 | |
| 77 eor v6.16b,v6.16b,v1.16b | |
| 78 eor v3.16b,v3.16b,v5.16b | |
| 79 shl v1.16b,v1.16b,#1 | |
| 80 eor v3.16b,v3.16b,v6.16b | |
| 81 | |
| 82 tbl v6.16b,{v3.16b},v2.16b | |
| 83 ext v5.16b,v0.16b,v3.16b,#12 | |
| 84 st1 {v3.4s},[x2],#16 | |
| 85 aese v6.16b,v0.16b | |
| 86 | |
| 87 eor v3.16b,v3.16b,v5.16b | |
| 88 ext v5.16b,v0.16b,v5.16b,#12 | |
| 89 eor v3.16b,v3.16b,v5.16b | |
| 90 ext v5.16b,v0.16b,v5.16b,#12 | |
| 91 eor v6.16b,v6.16b,v1.16b | |
| 92 eor v3.16b,v3.16b,v5.16b | |
| 93 eor v3.16b,v3.16b,v6.16b | |
| 94 st1 {v3.4s},[x2] | |
| 95 add x2,x2,#0x50 | |
| 96 | |
| 97 mov w12,#10 | |
| 98 b .Ldone | |
| 99 | |
| 100 .align 4 | |
| 101 .L192: | |
| 102 ld1 {v4.8b},[x0],#8 | |
| 103 movi v6.16b,#8 // borrow v6.16b | |
| 104 st1 {v3.4s},[x2],#16 | |
| 105 sub v2.16b,v2.16b,v6.16b // adjust the mask | |
| 106 | |
| 107 .Loop192: | |
| 108 tbl v6.16b,{v4.16b},v2.16b | |
| 109 ext v5.16b,v0.16b,v3.16b,#12 | |
| 110 st1 {v4.8b},[x2],#8 | |
| 111 aese v6.16b,v0.16b | |
| 112 subs w1,w1,#1 | |
| 113 | |
| 114 eor v3.16b,v3.16b,v5.16b | |
| 115 ext v5.16b,v0.16b,v5.16b,#12 | |
| 116 eor v3.16b,v3.16b,v5.16b | |
| 117 ext v5.16b,v0.16b,v5.16b,#12 | |
| 118 eor v3.16b,v3.16b,v5.16b | |
| 119 | |
| 120 dup v5.4s,v3.s[3] | |
| 121 eor v5.16b,v5.16b,v4.16b | |
| 122 eor v6.16b,v6.16b,v1.16b | |
| 123 ext v4.16b,v0.16b,v4.16b,#12 | |
| 124 shl v1.16b,v1.16b,#1 | |
| 125 eor v4.16b,v4.16b,v5.16b | |
| 126 eor v3.16b,v3.16b,v6.16b | |
| 127 eor v4.16b,v4.16b,v6.16b | |
| 128 st1 {v3.4s},[x2],#16 | |
| 129 b.ne .Loop192 | |
| 130 | |
| 131 mov w12,#12 | |
| 132 add x2,x2,#0x20 | |
| 133 b .Ldone | |
| 134 | |
| 135 .align 4 | |
| 136 .L256: | |
| 137 ld1 {v4.16b},[x0] | |
| 138 mov w1,#7 | |
| 139 mov w12,#14 | |
| 140 st1 {v3.4s},[x2],#16 | |
| 141 | |
| 142 .Loop256: | |
| 143 tbl v6.16b,{v4.16b},v2.16b | |
| 144 ext v5.16b,v0.16b,v3.16b,#12 | |
| 145 st1 {v4.4s},[x2],#16 | |
| 146 aese v6.16b,v0.16b | |
| 147 subs w1,w1,#1 | |
| 148 | |
| 149 eor v3.16b,v3.16b,v5.16b | |
| 150 ext v5.16b,v0.16b,v5.16b,#12 | |
| 151 eor v3.16b,v3.16b,v5.16b | |
| 152 ext v5.16b,v0.16b,v5.16b,#12 | |
| 153 eor v6.16b,v6.16b,v1.16b | |
| 154 eor v3.16b,v3.16b,v5.16b | |
| 155 shl v1.16b,v1.16b,#1 | |
| 156 eor v3.16b,v3.16b,v6.16b | |
| 157 st1 {v3.4s},[x2],#16 | |
| 158 b.eq .Ldone | |
| 159 | |
| 160 dup v6.4s,v3.s[3] // just splat | |
| 161 ext v5.16b,v0.16b,v4.16b,#12 | |
| 162 aese v6.16b,v0.16b | |
| 163 | |
| 164 eor v4.16b,v4.16b,v5.16b | |
| 165 ext v5.16b,v0.16b,v5.16b,#12 | |
| 166 eor v4.16b,v4.16b,v5.16b | |
| 167 ext v5.16b,v0.16b,v5.16b,#12 | |
| 168 eor v4.16b,v4.16b,v5.16b | |
| 169 | |
| 170 eor v4.16b,v4.16b,v6.16b | |
| 171 b .Loop256 | |
| 172 | |
| 173 .Ldone: | |
| 174 str w12,[x2] | |
| 175 mov x3,#0 | |
| 176 | |
| 177 .Lenc_key_abort: | |
| 178 mov x0,x3 // return value | |
| 179 ldr x29,[sp],#16 | |
| 180 ret | |
| 181 .size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key | |
| 182 | |
| 183 .globl aes_v8_set_decrypt_key | |
| 184 .hidden aes_v8_set_decrypt_key | |
| 185 .type aes_v8_set_decrypt_key,%function | |
| 186 .align 5 | |
| 187 aes_v8_set_decrypt_key: | |
| 188 stp x29,x30,[sp,#-16]! | |
| 189 add x29,sp,#0 | |
| 190 bl .Lenc_key | |
| 191 | |
| 192 cmp x0,#0 | |
| 193 b.ne .Ldec_key_abort | |
| 194 | |
| 195 sub x2,x2,#240 // restore original x2 | |
| 196 mov x4,#-16 | |
| 197 add x0,x2,x12,lsl#4 // end of key schedule | |
| 198 | |
| 199 ld1 {v0.4s},[x2] | |
| 200 ld1 {v1.4s},[x0] | |
| 201 st1 {v0.4s},[x0],x4 | |
| 202 st1 {v1.4s},[x2],#16 | |
| 203 | |
| 204 .Loop_imc: | |
| 205 ld1 {v0.4s},[x2] | |
| 206 ld1 {v1.4s},[x0] | |
| 207 aesimc v0.16b,v0.16b | |
| 208 aesimc v1.16b,v1.16b | |
| 209 st1 {v0.4s},[x0],x4 | |
| 210 st1 {v1.4s},[x2],#16 | |
| 211 cmp x0,x2 | |
| 212 b.hi .Loop_imc | |
| 213 | |
| 214 ld1 {v0.4s},[x2] | |
| 215 aesimc v0.16b,v0.16b | |
| 216 st1 {v0.4s},[x0] | |
| 217 | |
| 218 eor x0,x0,x0 // return value | |
| 219 .Ldec_key_abort: | |
| 220 ldp x29,x30,[sp],#16 | |
| 221 ret | |
| 222 .size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key | |
| 223 .globl aes_v8_encrypt | |
| 224 .hidden aes_v8_encrypt | |
| 225 .type aes_v8_encrypt,%function | |
| 226 .align 5 | |
| 227 aes_v8_encrypt: | |
| 228 ldr w3,[x2,#240] | |
| 229 ld1 {v0.4s},[x2],#16 | |
| 230 ld1 {v2.16b},[x0] | |
| 231 sub w3,w3,#2 | |
| 232 ld1 {v1.4s},[x2],#16 | |
| 233 | |
| 234 .Loop_enc: | |
| 235 aese v2.16b,v0.16b | |
| 236 aesmc v2.16b,v2.16b | |
| 237 ld1 {v0.4s},[x2],#16 | |
| 238 subs w3,w3,#2 | |
| 239 aese v2.16b,v1.16b | |
| 240 aesmc v2.16b,v2.16b | |
| 241 ld1 {v1.4s},[x2],#16 | |
| 242 b.gt .Loop_enc | |
| 243 | |
| 244 aese v2.16b,v0.16b | |
| 245 aesmc v2.16b,v2.16b | |
| 246 ld1 {v0.4s},[x2] | |
| 247 aese v2.16b,v1.16b | |
| 248 eor v2.16b,v2.16b,v0.16b | |
| 249 | |
| 250 st1 {v2.16b},[x1] | |
| 251 ret | |
| 252 .size aes_v8_encrypt,.-aes_v8_encrypt | |
| 253 .globl aes_v8_decrypt | |
| 254 .hidden aes_v8_decrypt | |
| 255 .type aes_v8_decrypt,%function | |
| 256 .align 5 | |
| 257 aes_v8_decrypt: | |
| 258 ldr w3,[x2,#240] | |
| 259 ld1 {v0.4s},[x2],#16 | |
| 260 ld1 {v2.16b},[x0] | |
| 261 sub w3,w3,#2 | |
| 262 ld1 {v1.4s},[x2],#16 | |
| 263 | |
| 264 .Loop_dec: | |
| 265 aesd v2.16b,v0.16b | |
| 266 aesimc v2.16b,v2.16b | |
| 267 ld1 {v0.4s},[x2],#16 | |
| 268 subs w3,w3,#2 | |
| 269 aesd v2.16b,v1.16b | |
| 270 aesimc v2.16b,v2.16b | |
| 271 ld1 {v1.4s},[x2],#16 | |
| 272 b.gt .Loop_dec | |
| 273 | |
| 274 aesd v2.16b,v0.16b | |
| 275 aesimc v2.16b,v2.16b | |
| 276 ld1 {v0.4s},[x2] | |
| 277 aesd v2.16b,v1.16b | |
| 278 eor v2.16b,v2.16b,v0.16b | |
| 279 | |
| 280 st1 {v2.16b},[x1] | |
| 281 ret | |
| 282 .size aes_v8_decrypt,.-aes_v8_decrypt | |
| 283 .globl aes_v8_cbc_encrypt | |
| 284 .hidden aes_v8_cbc_encrypt | |
| 285 .type aes_v8_cbc_encrypt,%function | |
| 286 .align 5 | |
| 287 aes_v8_cbc_encrypt: | |
| 288 stp x29,x30,[sp,#-16]! | |
| 289 add x29,sp,#0 | |
| 290 subs x2,x2,#16 | |
| 291 mov x8,#16 | |
| 292 b.lo .Lcbc_abort | |
| 293 csel x8,xzr,x8,eq | |
| 294 | |
| 295 cmp w5,#0 // en- or decrypting? | |
| 296 ldr w5,[x3,#240] | |
| 297 and x2,x2,#-16 | |
| 298 ld1 {v6.16b},[x4] | |
| 299 ld1 {v0.16b},[x0],x8 | |
| 300 | |
| 301 ld1 {v16.4s,v17.4s},[x3] // load key schedule... | |
| 302 sub w5,w5,#6 | |
| 303 add x7,x3,x5,lsl#4 // pointer to last 7 round keys | |
| 304 sub w5,w5,#2 | |
| 305 ld1 {v18.4s,v19.4s},[x7],#32 | |
| 306 ld1 {v20.4s,v21.4s},[x7],#32 | |
| 307 ld1 {v22.4s,v23.4s},[x7],#32 | |
| 308 ld1 {v7.4s},[x7] | |
| 309 | |
| 310 add x7,x3,#32 | |
| 311 mov w6,w5 | |
| 312 b.eq .Lcbc_dec | |
| 313 | |
| 314 cmp w5,#2 | |
| 315 eor v0.16b,v0.16b,v6.16b | |
| 316 eor v5.16b,v16.16b,v7.16b | |
| 317 b.eq .Lcbc_enc128 | |
| 318 | |
| 319 ld1 {v2.4s,v3.4s},[x7] | |
| 320 add x7,x3,#16 | |
| 321 add x6,x3,#16*4 | |
| 322 add x12,x3,#16*5 | |
| 323 aese v0.16b,v16.16b | |
| 324 aesmc v0.16b,v0.16b | |
| 325 add x14,x3,#16*6 | |
| 326 add x3,x3,#16*7 | |
| 327 b .Lenter_cbc_enc | |
| 328 | |
| 329 .align 4 | |
| 330 .Loop_cbc_enc: | |
| 331 aese v0.16b,v16.16b | |
| 332 aesmc v0.16b,v0.16b | |
| 333 st1 {v6.16b},[x1],#16 | |
| 334 .Lenter_cbc_enc: | |
| 335 aese v0.16b,v17.16b | |
| 336 aesmc v0.16b,v0.16b | |
| 337 aese v0.16b,v2.16b | |
| 338 aesmc v0.16b,v0.16b | |
| 339 ld1 {v16.4s},[x6] | |
| 340 cmp w5,#4 | |
| 341 aese v0.16b,v3.16b | |
| 342 aesmc v0.16b,v0.16b | |
| 343 ld1 {v17.4s},[x12] | |
| 344 b.eq .Lcbc_enc192 | |
| 345 | |
| 346 aese v0.16b,v16.16b | |
| 347 aesmc v0.16b,v0.16b | |
| 348 ld1 {v16.4s},[x14] | |
| 349 aese v0.16b,v17.16b | |
| 350 aesmc v0.16b,v0.16b | |
| 351 ld1 {v17.4s},[x3] | |
| 352 nop | |
| 353 | |
| 354 .Lcbc_enc192: | |
| 355 aese v0.16b,v16.16b | |
| 356 aesmc v0.16b,v0.16b | |
| 357 subs x2,x2,#16 | |
| 358 aese v0.16b,v17.16b | |
| 359 aesmc v0.16b,v0.16b | |
| 360 csel x8,xzr,x8,eq | |
| 361 aese v0.16b,v18.16b | |
| 362 aesmc v0.16b,v0.16b | |
| 363 aese v0.16b,v19.16b | |
| 364 aesmc v0.16b,v0.16b | |
| 365 ld1 {v16.16b},[x0],x8 | |
| 366 aese v0.16b,v20.16b | |
| 367 aesmc v0.16b,v0.16b | |
| 368 eor v16.16b,v16.16b,v5.16b | |
| 369 aese v0.16b,v21.16b | |
| 370 aesmc v0.16b,v0.16b | |
| 371 ld1 {v17.4s},[x7] // re-pre-load rndkey[1] | |
| 372 aese v0.16b,v22.16b | |
| 373 aesmc v0.16b,v0.16b | |
| 374 aese v0.16b,v23.16b | |
| 375 eor v6.16b,v0.16b,v7.16b | |
| 376 b.hs .Loop_cbc_enc | |
| 377 | |
| 378 st1 {v6.16b},[x1],#16 | |
| 379 b .Lcbc_done | |
| 380 | |
| 381 .align 5 | |
| 382 .Lcbc_enc128: | |
| 383 ld1 {v2.4s,v3.4s},[x7] | |
| 384 aese v0.16b,v16.16b | |
| 385 aesmc v0.16b,v0.16b | |
| 386 b .Lenter_cbc_enc128 | |
| 387 .Loop_cbc_enc128: | |
| 388 aese v0.16b,v16.16b | |
| 389 aesmc v0.16b,v0.16b | |
| 390 st1 {v6.16b},[x1],#16 | |
| 391 .Lenter_cbc_enc128: | |
| 392 aese v0.16b,v17.16b | |
| 393 aesmc v0.16b,v0.16b | |
| 394 subs x2,x2,#16 | |
| 395 aese v0.16b,v2.16b | |
| 396 aesmc v0.16b,v0.16b | |
| 397 csel x8,xzr,x8,eq | |
| 398 aese v0.16b,v3.16b | |
| 399 aesmc v0.16b,v0.16b | |
| 400 aese v0.16b,v18.16b | |
| 401 aesmc v0.16b,v0.16b | |
| 402 aese v0.16b,v19.16b | |
| 403 aesmc v0.16b,v0.16b | |
| 404 ld1 {v16.16b},[x0],x8 | |
| 405 aese v0.16b,v20.16b | |
| 406 aesmc v0.16b,v0.16b | |
| 407 aese v0.16b,v21.16b | |
| 408 aesmc v0.16b,v0.16b | |
| 409 aese v0.16b,v22.16b | |
| 410 aesmc v0.16b,v0.16b | |
| 411 eor v16.16b,v16.16b,v5.16b | |
| 412 aese v0.16b,v23.16b | |
| 413 eor v6.16b,v0.16b,v7.16b | |
| 414 b.hs .Loop_cbc_enc128 | |
| 415 | |
| 416 st1 {v6.16b},[x1],#16 | |
| 417 b .Lcbc_done | |
| 418 .align 5 | |
| 419 .Lcbc_dec: | |
| 420 ld1 {v18.16b},[x0],#16 | |
| 421 subs x2,x2,#32 // bias | |
| 422 add w6,w5,#2 | |
| 423 orr v3.16b,v0.16b,v0.16b | |
| 424 orr v1.16b,v0.16b,v0.16b | |
| 425 orr v19.16b,v18.16b,v18.16b | |
| 426 b.lo .Lcbc_dec_tail | |
| 427 | |
| 428 orr v1.16b,v18.16b,v18.16b | |
| 429 ld1 {v18.16b},[x0],#16 | |
| 430 orr v2.16b,v0.16b,v0.16b | |
| 431 orr v3.16b,v1.16b,v1.16b | |
| 432 orr v19.16b,v18.16b,v18.16b | |
| 433 | |
| 434 .Loop3x_cbc_dec: | |
| 435 aesd v0.16b,v16.16b | |
| 436 aesimc v0.16b,v0.16b | |
| 437 aesd v1.16b,v16.16b | |
| 438 aesimc v1.16b,v1.16b | |
| 439 aesd v18.16b,v16.16b | |
| 440 aesimc v18.16b,v18.16b | |
| 441 ld1 {v16.4s},[x7],#16 | |
| 442 subs w6,w6,#2 | |
| 443 aesd v0.16b,v17.16b | |
| 444 aesimc v0.16b,v0.16b | |
| 445 aesd v1.16b,v17.16b | |
| 446 aesimc v1.16b,v1.16b | |
| 447 aesd v18.16b,v17.16b | |
| 448 aesimc v18.16b,v18.16b | |
| 449 ld1 {v17.4s},[x7],#16 | |
| 450 b.gt .Loop3x_cbc_dec | |
| 451 | |
| 452 aesd v0.16b,v16.16b | |
| 453 aesimc v0.16b,v0.16b | |
| 454 aesd v1.16b,v16.16b | |
| 455 aesimc v1.16b,v1.16b | |
| 456 aesd v18.16b,v16.16b | |
| 457 aesimc v18.16b,v18.16b | |
| 458 eor v4.16b,v6.16b,v7.16b | |
| 459 subs x2,x2,#0x30 | |
| 460 eor v5.16b,v2.16b,v7.16b | |
| 461 csel x6,x2,x6,lo // x6, w6, is zero at this point | |
| 462 aesd v0.16b,v17.16b | |
| 463 aesimc v0.16b,v0.16b | |
| 464 aesd v1.16b,v17.16b | |
| 465 aesimc v1.16b,v1.16b | |
| 466 aesd v18.16b,v17.16b | |
| 467 aesimc v18.16b,v18.16b | |
| 468 eor v17.16b,v3.16b,v7.16b | |
| 469 add x0,x0,x6 // x0 is adjusted in such way that | |
| 470 // at exit from the loop v1.16b-v18.16b | |
| 471 // are loaded with last "words" | |
| 472 orr v6.16b,v19.16b,v19.16b | |
| 473 mov x7,x3 | |
| 474 aesd v0.16b,v20.16b | |
| 475 aesimc v0.16b,v0.16b | |
| 476 aesd v1.16b,v20.16b | |
| 477 aesimc v1.16b,v1.16b | |
| 478 aesd v18.16b,v20.16b | |
| 479 aesimc v18.16b,v18.16b | |
| 480 ld1 {v2.16b},[x0],#16 | |
| 481 aesd v0.16b,v21.16b | |
| 482 aesimc v0.16b,v0.16b | |
| 483 aesd v1.16b,v21.16b | |
| 484 aesimc v1.16b,v1.16b | |
| 485 aesd v18.16b,v21.16b | |
| 486 aesimc v18.16b,v18.16b | |
| 487 ld1 {v3.16b},[x0],#16 | |
| 488 aesd v0.16b,v22.16b | |
| 489 aesimc v0.16b,v0.16b | |
| 490 aesd v1.16b,v22.16b | |
| 491 aesimc v1.16b,v1.16b | |
| 492 aesd v18.16b,v22.16b | |
| 493 aesimc v18.16b,v18.16b | |
| 494 ld1 {v19.16b},[x0],#16 | |
| 495 aesd v0.16b,v23.16b | |
| 496 aesd v1.16b,v23.16b | |
| 497 aesd v18.16b,v23.16b | |
| 498 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] | |
| 499 add w6,w5,#2 | |
| 500 eor v4.16b,v4.16b,v0.16b | |
| 501 eor v5.16b,v5.16b,v1.16b | |
| 502 eor v18.16b,v18.16b,v17.16b | |
| 503 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] | |
| 504 st1 {v4.16b},[x1],#16 | |
| 505 orr v0.16b,v2.16b,v2.16b | |
| 506 st1 {v5.16b},[x1],#16 | |
| 507 orr v1.16b,v3.16b,v3.16b | |
| 508 st1 {v18.16b},[x1],#16 | |
| 509 orr v18.16b,v19.16b,v19.16b | |
| 510 b.hs .Loop3x_cbc_dec | |
| 511 | |
| 512 cmn x2,#0x30 | |
| 513 b.eq .Lcbc_done | |
| 514 nop | |
| 515 | |
| 516 .Lcbc_dec_tail: | |
| 517 aesd v1.16b,v16.16b | |
| 518 aesimc v1.16b,v1.16b | |
| 519 aesd v18.16b,v16.16b | |
| 520 aesimc v18.16b,v18.16b | |
| 521 ld1 {v16.4s},[x7],#16 | |
| 522 subs w6,w6,#2 | |
| 523 aesd v1.16b,v17.16b | |
| 524 aesimc v1.16b,v1.16b | |
| 525 aesd v18.16b,v17.16b | |
| 526 aesimc v18.16b,v18.16b | |
| 527 ld1 {v17.4s},[x7],#16 | |
| 528 b.gt .Lcbc_dec_tail | |
| 529 | |
| 530 aesd v1.16b,v16.16b | |
| 531 aesimc v1.16b,v1.16b | |
| 532 aesd v18.16b,v16.16b | |
| 533 aesimc v18.16b,v18.16b | |
| 534 aesd v1.16b,v17.16b | |
| 535 aesimc v1.16b,v1.16b | |
| 536 aesd v18.16b,v17.16b | |
| 537 aesimc v18.16b,v18.16b | |
| 538 aesd v1.16b,v20.16b | |
| 539 aesimc v1.16b,v1.16b | |
| 540 aesd v18.16b,v20.16b | |
| 541 aesimc v18.16b,v18.16b | |
| 542 cmn x2,#0x20 | |
| 543 aesd v1.16b,v21.16b | |
| 544 aesimc v1.16b,v1.16b | |
| 545 aesd v18.16b,v21.16b | |
| 546 aesimc v18.16b,v18.16b | |
| 547 eor v5.16b,v6.16b,v7.16b | |
| 548 aesd v1.16b,v22.16b | |
| 549 aesimc v1.16b,v1.16b | |
| 550 aesd v18.16b,v22.16b | |
| 551 aesimc v18.16b,v18.16b | |
| 552 eor v17.16b,v3.16b,v7.16b | |
| 553 aesd v1.16b,v23.16b | |
| 554 aesd v18.16b,v23.16b | |
| 555 b.eq .Lcbc_dec_one | |
| 556 eor v5.16b,v5.16b,v1.16b | |
| 557 eor v17.16b,v17.16b,v18.16b | |
| 558 orr v6.16b,v19.16b,v19.16b | |
| 559 st1 {v5.16b},[x1],#16 | |
| 560 st1 {v17.16b},[x1],#16 | |
| 561 b .Lcbc_done | |
| 562 | |
| 563 .Lcbc_dec_one: | |
| 564 eor v5.16b,v5.16b,v18.16b | |
| 565 orr v6.16b,v19.16b,v19.16b | |
| 566 st1 {v5.16b},[x1],#16 | |
| 567 | |
| 568 .Lcbc_done: | |
| 569 st1 {v6.16b},[x4] | |
| 570 .Lcbc_abort: | |
| 571 ldr x29,[sp],#16 | |
| 572 ret | |
| 573 .size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt | |
| 574 .globl aes_v8_ctr32_encrypt_blocks | |
| 575 .hidden aes_v8_ctr32_encrypt_blocks | |
| 576 .type aes_v8_ctr32_encrypt_blocks,%function | |
| 577 .align 5 | |
| 578 aes_v8_ctr32_encrypt_blocks: | |
| 579 stp x29,x30,[sp,#-16]! | |
| 580 add x29,sp,#0 | |
| 581 ldr w5,[x3,#240] | |
| 582 | |
| 583 ldr w8, [x4, #12] | |
| 584 ld1 {v0.4s},[x4] | |
| 585 | |
| 586 ld1 {v16.4s,v17.4s},[x3] // load key schedule... | |
| 587 sub w5,w5,#4 | |
| 588 mov x12,#16 | |
| 589 cmp x2,#2 | |
| 590 add x7,x3,x5,lsl#4 // pointer to last 5 round keys | |
| 591 sub w5,w5,#2 | |
| 592 ld1 {v20.4s,v21.4s},[x7],#32 | |
| 593 ld1 {v22.4s,v23.4s},[x7],#32 | |
| 594 ld1 {v7.4s},[x7] | |
| 595 add x7,x3,#32 | |
| 596 mov w6,w5 | |
| 597 csel x12,xzr,x12,lo | |
| 598 #ifndef __ARMEB__ | |
| 599 rev w8, w8 | |
| 600 #endif | |
| 601 orr v1.16b,v0.16b,v0.16b | |
| 602 add w10, w8, #1 | |
| 603 orr v18.16b,v0.16b,v0.16b | |
| 604 add w8, w8, #2 | |
| 605 orr v6.16b,v0.16b,v0.16b | |
| 606 rev w10, w10 | |
| 607 mov v1.s[3],w10 | |
| 608 b.ls .Lctr32_tail | |
| 609 rev w12, w8 | |
| 610 sub x2,x2,#3 // bias | |
| 611 mov v18.s[3],w12 | |
| 612 b .Loop3x_ctr32 | |
| 613 | |
| 614 .align 4 | |
| 615 .Loop3x_ctr32: | |
| 616 aese v0.16b,v16.16b | |
| 617 aesmc v0.16b,v0.16b | |
| 618 aese v1.16b,v16.16b | |
| 619 aesmc v1.16b,v1.16b | |
| 620 aese v18.16b,v16.16b | |
| 621 aesmc v18.16b,v18.16b | |
| 622 ld1 {v16.4s},[x7],#16 | |
| 623 subs w6,w6,#2 | |
| 624 aese v0.16b,v17.16b | |
| 625 aesmc v0.16b,v0.16b | |
| 626 aese v1.16b,v17.16b | |
| 627 aesmc v1.16b,v1.16b | |
| 628 aese v18.16b,v17.16b | |
| 629 aesmc v18.16b,v18.16b | |
| 630 ld1 {v17.4s},[x7],#16 | |
| 631 b.gt .Loop3x_ctr32 | |
| 632 | |
| 633 aese v0.16b,v16.16b | |
| 634 aesmc v4.16b,v0.16b | |
| 635 aese v1.16b,v16.16b | |
| 636 aesmc v5.16b,v1.16b | |
| 637 ld1 {v2.16b},[x0],#16 | |
| 638 orr v0.16b,v6.16b,v6.16b | |
| 639 aese v18.16b,v16.16b | |
| 640 aesmc v18.16b,v18.16b | |
| 641 ld1 {v3.16b},[x0],#16 | |
| 642 orr v1.16b,v6.16b,v6.16b | |
| 643 aese v4.16b,v17.16b | |
| 644 aesmc v4.16b,v4.16b | |
| 645 aese v5.16b,v17.16b | |
| 646 aesmc v5.16b,v5.16b | |
| 647 ld1 {v19.16b},[x0],#16 | |
| 648 mov x7,x3 | |
| 649 aese v18.16b,v17.16b | |
| 650 aesmc v17.16b,v18.16b | |
| 651 orr v18.16b,v6.16b,v6.16b | |
| 652 add w9,w8,#1 | |
| 653 aese v4.16b,v20.16b | |
| 654 aesmc v4.16b,v4.16b | |
| 655 aese v5.16b,v20.16b | |
| 656 aesmc v5.16b,v5.16b | |
| 657 eor v2.16b,v2.16b,v7.16b | |
| 658 add w10,w8,#2 | |
| 659 aese v17.16b,v20.16b | |
| 660 aesmc v17.16b,v17.16b | |
| 661 eor v3.16b,v3.16b,v7.16b | |
| 662 add w8,w8,#3 | |
| 663 aese v4.16b,v21.16b | |
| 664 aesmc v4.16b,v4.16b | |
| 665 aese v5.16b,v21.16b | |
| 666 aesmc v5.16b,v5.16b | |
| 667 eor v19.16b,v19.16b,v7.16b | |
| 668 rev w9,w9 | |
| 669 aese v17.16b,v21.16b | |
| 670 aesmc v17.16b,v17.16b | |
| 671 mov v0.s[3], w9 | |
| 672 rev w10,w10 | |
| 673 aese v4.16b,v22.16b | |
| 674 aesmc v4.16b,v4.16b | |
| 675 aese v5.16b,v22.16b | |
| 676 aesmc v5.16b,v5.16b | |
| 677 mov v1.s[3], w10 | |
| 678 rev w12,w8 | |
| 679 aese v17.16b,v22.16b | |
| 680 aesmc v17.16b,v17.16b | |
| 681 mov v18.s[3], w12 | |
| 682 subs x2,x2,#3 | |
| 683 aese v4.16b,v23.16b | |
| 684 aese v5.16b,v23.16b | |
| 685 aese v17.16b,v23.16b | |
| 686 | |
| 687 eor v2.16b,v2.16b,v4.16b | |
| 688 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] | |
| 689 st1 {v2.16b},[x1],#16 | |
| 690 eor v3.16b,v3.16b,v5.16b | |
| 691 mov w6,w5 | |
| 692 st1 {v3.16b},[x1],#16 | |
| 693 eor v19.16b,v19.16b,v17.16b | |
| 694 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] | |
| 695 st1 {v19.16b},[x1],#16 | |
| 696 b.hs .Loop3x_ctr32 | |
| 697 | |
| 698 adds x2,x2,#3 | |
| 699 b.eq .Lctr32_done | |
| 700 cmp x2,#1 | |
| 701 mov x12,#16 | |
| 702 csel x12,xzr,x12,eq | |
| 703 | |
| 704 .Lctr32_tail: | |
| 705 aese v0.16b,v16.16b | |
| 706 aesmc v0.16b,v0.16b | |
| 707 aese v1.16b,v16.16b | |
| 708 aesmc v1.16b,v1.16b | |
| 709 ld1 {v16.4s},[x7],#16 | |
| 710 subs w6,w6,#2 | |
| 711 aese v0.16b,v17.16b | |
| 712 aesmc v0.16b,v0.16b | |
| 713 aese v1.16b,v17.16b | |
| 714 aesmc v1.16b,v1.16b | |
| 715 ld1 {v17.4s},[x7],#16 | |
| 716 b.gt .Lctr32_tail | |
| 717 | |
| 718 aese v0.16b,v16.16b | |
| 719 aesmc v0.16b,v0.16b | |
| 720 aese v1.16b,v16.16b | |
| 721 aesmc v1.16b,v1.16b | |
| 722 aese v0.16b,v17.16b | |
| 723 aesmc v0.16b,v0.16b | |
| 724 aese v1.16b,v17.16b | |
| 725 aesmc v1.16b,v1.16b | |
| 726 ld1 {v2.16b},[x0],x12 | |
| 727 aese v0.16b,v20.16b | |
| 728 aesmc v0.16b,v0.16b | |
| 729 aese v1.16b,v20.16b | |
| 730 aesmc v1.16b,v1.16b | |
| 731 ld1 {v3.16b},[x0] | |
| 732 aese v0.16b,v21.16b | |
| 733 aesmc v0.16b,v0.16b | |
| 734 aese v1.16b,v21.16b | |
| 735 aesmc v1.16b,v1.16b | |
| 736 eor v2.16b,v2.16b,v7.16b | |
| 737 aese v0.16b,v22.16b | |
| 738 aesmc v0.16b,v0.16b | |
| 739 aese v1.16b,v22.16b | |
| 740 aesmc v1.16b,v1.16b | |
| 741 eor v3.16b,v3.16b,v7.16b | |
| 742 aese v0.16b,v23.16b | |
| 743 aese v1.16b,v23.16b | |
| 744 | |
| 745 cmp x2,#1 | |
| 746 eor v2.16b,v2.16b,v0.16b | |
| 747 eor v3.16b,v3.16b,v1.16b | |
| 748 st1 {v2.16b},[x1],#16 | |
| 749 b.eq .Lctr32_done | |
| 750 st1 {v3.16b},[x1] | |
| 751 | |
| 752 .Lctr32_done: | |
| 753 ldr x29,[sp],#16 | |
| 754 ret | |
| 755 .size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks | |
| 756 #endif | |
| 757 #endif | |
| OLD | NEW |