| OLD | NEW |
| (Empty) |
| 1 #if defined(__arm__) | |
| 2 #include <openssl/arm_arch.h> | |
| 3 | |
| 4 #if __ARM_MAX_ARCH__>=7 | |
| 5 .text | |
| 6 .arch armv7-a | |
| 7 .fpu neon | |
| 8 .code 32 | |
| 9 .align 5 | |
| 10 .Lrcon: | |
| 11 .long 0x01,0x01,0x01,0x01 | |
| 12 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat | |
| 13 .long 0x1b,0x1b,0x1b,0x1b | |
| 14 | |
| 15 .globl aes_v8_set_encrypt_key | |
| 16 .hidden aes_v8_set_encrypt_key | |
| 17 .type aes_v8_set_encrypt_key,%function | |
| 18 .align 5 | |
| 19 aes_v8_set_encrypt_key: | |
| 20 .Lenc_key: | |
| 21 mov r3,#-1 | |
| 22 cmp r0,#0 | |
| 23 beq .Lenc_key_abort | |
| 24 cmp r2,#0 | |
| 25 beq .Lenc_key_abort | |
| 26 mov r3,#-2 | |
| 27 cmp r1,#128 | |
| 28 blt .Lenc_key_abort | |
| 29 cmp r1,#256 | |
| 30 bgt .Lenc_key_abort | |
| 31 tst r1,#0x3f | |
| 32 bne .Lenc_key_abort | |
| 33 | |
| 34 adr r3,.Lrcon | |
| 35 cmp r1,#192 | |
| 36 | |
| 37 veor q0,q0,q0 | |
| 38 vld1.8 {q3},[r0]! | |
| 39 mov r1,#8 @ reuse r1 | |
| 40 vld1.32 {q1,q2},[r3]! | |
| 41 | |
| 42 blt .Loop128 | |
| 43 beq .L192 | |
| 44 b .L256 | |
| 45 | |
| 46 .align 4 | |
| 47 .Loop128: | |
| 48 vtbl.8 d20,{q3},d4 | |
| 49 vtbl.8 d21,{q3},d5 | |
| 50 vext.8 q9,q0,q3,#12 | |
| 51 vst1.32 {q3},[r2]! | |
| 52 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 | |
| 53 subs r1,r1,#1 | |
| 54 | |
| 55 veor q3,q3,q9 | |
| 56 vext.8 q9,q0,q9,#12 | |
| 57 veor q3,q3,q9 | |
| 58 vext.8 q9,q0,q9,#12 | |
| 59 veor q10,q10,q1 | |
| 60 veor q3,q3,q9 | |
| 61 vshl.u8 q1,q1,#1 | |
| 62 veor q3,q3,q10 | |
| 63 bne .Loop128 | |
| 64 | |
| 65 vld1.32 {q1},[r3] | |
| 66 | |
| 67 vtbl.8 d20,{q3},d4 | |
| 68 vtbl.8 d21,{q3},d5 | |
| 69 vext.8 q9,q0,q3,#12 | |
| 70 vst1.32 {q3},[r2]! | |
| 71 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 | |
| 72 | |
| 73 veor q3,q3,q9 | |
| 74 vext.8 q9,q0,q9,#12 | |
| 75 veor q3,q3,q9 | |
| 76 vext.8 q9,q0,q9,#12 | |
| 77 veor q10,q10,q1 | |
| 78 veor q3,q3,q9 | |
| 79 vshl.u8 q1,q1,#1 | |
| 80 veor q3,q3,q10 | |
| 81 | |
| 82 vtbl.8 d20,{q3},d4 | |
| 83 vtbl.8 d21,{q3},d5 | |
| 84 vext.8 q9,q0,q3,#12 | |
| 85 vst1.32 {q3},[r2]! | |
| 86 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 | |
| 87 | |
| 88 veor q3,q3,q9 | |
| 89 vext.8 q9,q0,q9,#12 | |
| 90 veor q3,q3,q9 | |
| 91 vext.8 q9,q0,q9,#12 | |
| 92 veor q10,q10,q1 | |
| 93 veor q3,q3,q9 | |
| 94 veor q3,q3,q10 | |
| 95 vst1.32 {q3},[r2] | |
| 96 add r2,r2,#0x50 | |
| 97 | |
| 98 mov r12,#10 | |
| 99 b .Ldone | |
| 100 | |
| 101 .align 4 | |
| 102 .L192: | |
| 103 vld1.8 {d16},[r0]! | |
| 104 vmov.i8 q10,#8 @ borrow q10 | |
| 105 vst1.32 {q3},[r2]! | |
| 106 vsub.i8 q2,q2,q10 @ adjust the mask | |
| 107 | |
| 108 .Loop192: | |
| 109 vtbl.8 d20,{q8},d4 | |
| 110 vtbl.8 d21,{q8},d5 | |
| 111 vext.8 q9,q0,q3,#12 | |
| 112 vst1.32 {d16},[r2]! | |
| 113 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 | |
| 114 subs r1,r1,#1 | |
| 115 | |
| 116 veor q3,q3,q9 | |
| 117 vext.8 q9,q0,q9,#12 | |
| 118 veor q3,q3,q9 | |
| 119 vext.8 q9,q0,q9,#12 | |
| 120 veor q3,q3,q9 | |
| 121 | |
| 122 vdup.32 q9,d7[1] | |
| 123 veor q9,q9,q8 | |
| 124 veor q10,q10,q1 | |
| 125 vext.8 q8,q0,q8,#12 | |
| 126 vshl.u8 q1,q1,#1 | |
| 127 veor q8,q8,q9 | |
| 128 veor q3,q3,q10 | |
| 129 veor q8,q8,q10 | |
| 130 vst1.32 {q3},[r2]! | |
| 131 bne .Loop192 | |
| 132 | |
| 133 mov r12,#12 | |
| 134 add r2,r2,#0x20 | |
| 135 b .Ldone | |
| 136 | |
| 137 .align 4 | |
| 138 .L256: | |
| 139 vld1.8 {q8},[r0] | |
| 140 mov r1,#7 | |
| 141 mov r12,#14 | |
| 142 vst1.32 {q3},[r2]! | |
| 143 | |
| 144 .Loop256: | |
| 145 vtbl.8 d20,{q8},d4 | |
| 146 vtbl.8 d21,{q8},d5 | |
| 147 vext.8 q9,q0,q3,#12 | |
| 148 vst1.32 {q8},[r2]! | |
| 149 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 | |
| 150 subs r1,r1,#1 | |
| 151 | |
| 152 veor q3,q3,q9 | |
| 153 vext.8 q9,q0,q9,#12 | |
| 154 veor q3,q3,q9 | |
| 155 vext.8 q9,q0,q9,#12 | |
| 156 veor q10,q10,q1 | |
| 157 veor q3,q3,q9 | |
| 158 vshl.u8 q1,q1,#1 | |
| 159 veor q3,q3,q10 | |
| 160 vst1.32 {q3},[r2]! | |
| 161 beq .Ldone | |
| 162 | |
| 163 vdup.32 q10,d7[1] | |
| 164 vext.8 q9,q0,q8,#12 | |
| 165 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 | |
| 166 | |
| 167 veor q8,q8,q9 | |
| 168 vext.8 q9,q0,q9,#12 | |
| 169 veor q8,q8,q9 | |
| 170 vext.8 q9,q0,q9,#12 | |
| 171 veor q8,q8,q9 | |
| 172 | |
| 173 veor q8,q8,q10 | |
| 174 b .Loop256 | |
| 175 | |
| 176 .Ldone: | |
| 177 str r12,[r2] | |
| 178 mov r3,#0 | |
| 179 | |
| 180 .Lenc_key_abort: | |
| 181 mov r0,r3 @ return value | |
| 182 | |
| 183 bx lr | |
| 184 .size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key | |
| 185 | |
| 186 .globl aes_v8_set_decrypt_key | |
| 187 .hidden aes_v8_set_decrypt_key | |
| 188 .type aes_v8_set_decrypt_key,%function | |
| 189 .align 5 | |
| 190 aes_v8_set_decrypt_key: | |
| 191 stmdb sp!,{r4,lr} | |
| 192 bl .Lenc_key | |
| 193 | |
| 194 cmp r0,#0 | |
| 195 bne .Ldec_key_abort | |
| 196 | |
| 197 sub r2,r2,#240 @ restore original r2 | |
| 198 mov r4,#-16 | |
| 199 add r0,r2,r12,lsl#4 @ end of key schedule | |
| 200 | |
| 201 vld1.32 {q0},[r2] | |
| 202 vld1.32 {q1},[r0] | |
| 203 vst1.32 {q0},[r0],r4 | |
| 204 vst1.32 {q1},[r2]! | |
| 205 | |
| 206 .Loop_imc: | |
| 207 vld1.32 {q0},[r2] | |
| 208 vld1.32 {q1},[r0] | |
| 209 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 | |
| 210 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
| 211 vst1.32 {q0},[r0],r4 | |
| 212 vst1.32 {q1},[r2]! | |
| 213 cmp r0,r2 | |
| 214 bhi .Loop_imc | |
| 215 | |
| 216 vld1.32 {q0},[r2] | |
| 217 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 | |
| 218 vst1.32 {q0},[r0] | |
| 219 | |
| 220 eor r0,r0,r0 @ return value | |
| 221 .Ldec_key_abort: | |
| 222 ldmia sp!,{r4,pc} | |
| 223 .size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key | |
| 224 .globl aes_v8_encrypt | |
| 225 .hidden aes_v8_encrypt | |
| 226 .type aes_v8_encrypt,%function | |
| 227 .align 5 | |
| 228 aes_v8_encrypt: | |
| 229 ldr r3,[r2,#240] | |
| 230 vld1.32 {q0},[r2]! | |
| 231 vld1.8 {q2},[r0] | |
| 232 sub r3,r3,#2 | |
| 233 vld1.32 {q1},[r2]! | |
| 234 | |
| 235 .Loop_enc: | |
| 236 .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 | |
| 237 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 | |
| 238 vld1.32 {q0},[r2]! | |
| 239 subs r3,r3,#2 | |
| 240 .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 | |
| 241 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 | |
| 242 vld1.32 {q1},[r2]! | |
| 243 bgt .Loop_enc | |
| 244 | |
| 245 .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 | |
| 246 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 | |
| 247 vld1.32 {q0},[r2] | |
| 248 .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 | |
| 249 veor q2,q2,q0 | |
| 250 | |
| 251 vst1.8 {q2},[r1] | |
| 252 bx lr | |
| 253 .size aes_v8_encrypt,.-aes_v8_encrypt | |
| 254 .globl aes_v8_decrypt | |
| 255 .hidden aes_v8_decrypt | |
| 256 .type aes_v8_decrypt,%function | |
| 257 .align 5 | |
| 258 aes_v8_decrypt: | |
| 259 ldr r3,[r2,#240] | |
| 260 vld1.32 {q0},[r2]! | |
| 261 vld1.8 {q2},[r0] | |
| 262 sub r3,r3,#2 | |
| 263 vld1.32 {q1},[r2]! | |
| 264 | |
| 265 .Loop_dec: | |
| 266 .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 | |
| 267 .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 | |
| 268 vld1.32 {q0},[r2]! | |
| 269 subs r3,r3,#2 | |
| 270 .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 | |
| 271 .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 | |
| 272 vld1.32 {q1},[r2]! | |
| 273 bgt .Loop_dec | |
| 274 | |
| 275 .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 | |
| 276 .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 | |
| 277 vld1.32 {q0},[r2] | |
| 278 .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 | |
| 279 veor q2,q2,q0 | |
| 280 | |
| 281 vst1.8 {q2},[r1] | |
| 282 bx lr | |
| 283 .size aes_v8_decrypt,.-aes_v8_decrypt | |
| 284 .globl aes_v8_cbc_encrypt | |
| 285 .hidden aes_v8_cbc_encrypt | |
| 286 .type aes_v8_cbc_encrypt,%function | |
| 287 .align 5 | |
| 288 aes_v8_cbc_encrypt: | |
| 289 mov ip,sp | |
| 290 stmdb sp!,{r4,r5,r6,r7,r8,lr} | |
| 291 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specificati
on says so | |
| 292 ldmia ip,{r4,r5} @ load remaining args | |
| 293 subs r2,r2,#16 | |
| 294 mov r8,#16 | |
| 295 blo .Lcbc_abort | |
| 296 moveq r8,#0 | |
| 297 | |
| 298 cmp r5,#0 @ en- or decrypting? | |
| 299 ldr r5,[r3,#240] | |
| 300 and r2,r2,#-16 | |
| 301 vld1.8 {q6},[r4] | |
| 302 vld1.8 {q0},[r0],r8 | |
| 303 | |
| 304 vld1.32 {q8,q9},[r3] @ load key schedule... | |
| 305 sub r5,r5,#6 | |
| 306 add r7,r3,r5,lsl#4 @ pointer to last 7 round keys | |
| 307 sub r5,r5,#2 | |
| 308 vld1.32 {q10,q11},[r7]! | |
| 309 vld1.32 {q12,q13},[r7]! | |
| 310 vld1.32 {q14,q15},[r7]! | |
| 311 vld1.32 {q7},[r7] | |
| 312 | |
| 313 add r7,r3,#32 | |
| 314 mov r6,r5 | |
| 315 beq .Lcbc_dec | |
| 316 | |
| 317 cmp r5,#2 | |
| 318 veor q0,q0,q6 | |
| 319 veor q5,q8,q7 | |
| 320 beq .Lcbc_enc128 | |
| 321 | |
| 322 vld1.32 {q2,q3},[r7] | |
| 323 add r7,r3,#16 | |
| 324 add r6,r3,#16*4 | |
| 325 add r12,r3,#16*5 | |
| 326 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 | |
| 327 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 328 add r14,r3,#16*6 | |
| 329 add r3,r3,#16*7 | |
| 330 b .Lenter_cbc_enc | |
| 331 | |
| 332 .align 4 | |
| 333 .Loop_cbc_enc: | |
| 334 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 | |
| 335 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 336 vst1.8 {q6},[r1]! | |
| 337 .Lenter_cbc_enc: | |
| 338 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 | |
| 339 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 340 .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 | |
| 341 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 342 vld1.32 {q8},[r6] | |
| 343 cmp r5,#4 | |
| 344 .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 | |
| 345 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 346 vld1.32 {q9},[r12] | |
| 347 beq .Lcbc_enc192 | |
| 348 | |
| 349 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 | |
| 350 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 351 vld1.32 {q8},[r14] | |
| 352 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 | |
| 353 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 354 vld1.32 {q9},[r3] | |
| 355 nop | |
| 356 | |
| 357 .Lcbc_enc192: | |
| 358 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 | |
| 359 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 360 subs r2,r2,#16 | |
| 361 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 | |
| 362 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 363 moveq r8,#0 | |
| 364 .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 | |
| 365 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 366 .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 | |
| 367 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 368 vld1.8 {q8},[r0],r8 | |
| 369 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 | |
| 370 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 371 veor q8,q8,q5 | |
| 372 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 | |
| 373 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 374 vld1.32 {q9},[r7] @ re-pre-load rndkey[1] | |
| 375 .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 | |
| 376 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 377 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 | |
| 378 veor q6,q0,q7 | |
| 379 bhs .Loop_cbc_enc | |
| 380 | |
| 381 vst1.8 {q6},[r1]! | |
| 382 b .Lcbc_done | |
| 383 | |
| 384 .align 5 | |
| 385 .Lcbc_enc128: | |
| 386 vld1.32 {q2,q3},[r7] | |
| 387 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 | |
| 388 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 389 b .Lenter_cbc_enc128 | |
| 390 .Loop_cbc_enc128: | |
| 391 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 | |
| 392 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 393 vst1.8 {q6},[r1]! | |
| 394 .Lenter_cbc_enc128: | |
| 395 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 | |
| 396 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 397 subs r2,r2,#16 | |
| 398 .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 | |
| 399 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 400 moveq r8,#0 | |
| 401 .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 | |
| 402 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 403 .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 | |
| 404 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 405 .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 | |
| 406 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 407 vld1.8 {q8},[r0],r8 | |
| 408 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 | |
| 409 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 410 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 | |
| 411 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 412 .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 | |
| 413 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 414 veor q8,q8,q5 | |
| 415 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 | |
| 416 veor q6,q0,q7 | |
| 417 bhs .Loop_cbc_enc128 | |
| 418 | |
| 419 vst1.8 {q6},[r1]! | |
| 420 b .Lcbc_done | |
| 421 .align 5 | |
| 422 .Lcbc_dec: | |
| 423 vld1.8 {q10},[r0]! | |
| 424 subs r2,r2,#32 @ bias | |
| 425 add r6,r5,#2 | |
| 426 vorr q3,q0,q0 | |
| 427 vorr q1,q0,q0 | |
| 428 vorr q11,q10,q10 | |
| 429 blo .Lcbc_dec_tail | |
| 430 | |
| 431 vorr q1,q10,q10 | |
| 432 vld1.8 {q10},[r0]! | |
| 433 vorr q2,q0,q0 | |
| 434 vorr q3,q1,q1 | |
| 435 vorr q11,q10,q10 | |
| 436 | |
| 437 .Loop3x_cbc_dec: | |
| 438 .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 | |
| 439 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 | |
| 440 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 | |
| 441 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
| 442 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 | |
| 443 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
| 444 vld1.32 {q8},[r7]! | |
| 445 subs r6,r6,#2 | |
| 446 .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 | |
| 447 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 | |
| 448 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 | |
| 449 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
| 450 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 | |
| 451 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
| 452 vld1.32 {q9},[r7]! | |
| 453 bgt .Loop3x_cbc_dec | |
| 454 | |
| 455 .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 | |
| 456 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 | |
| 457 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 | |
| 458 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
| 459 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 | |
| 460 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
| 461 veor q4,q6,q7 | |
| 462 subs r2,r2,#0x30 | |
| 463 veor q5,q2,q7 | |
| 464 movlo r6,r2 @ r6, r6, is zero at this point | |
| 465 .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 | |
| 466 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 | |
| 467 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 | |
| 468 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
| 469 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 | |
| 470 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
| 471 veor q9,q3,q7 | |
| 472 add r0,r0,r6 @ r0 is adjusted in such way that | |
| 473 @ at exit from the loop q1-q10 | |
| 474 @ are loaded with last "words" | |
| 475 vorr q6,q11,q11 | |
| 476 mov r7,r3 | |
| 477 .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 | |
| 478 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 | |
| 479 .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 | |
| 480 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
| 481 .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 | |
| 482 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
| 483 vld1.8 {q2},[r0]! | |
| 484 .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 | |
| 485 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 | |
| 486 .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 | |
| 487 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
| 488 .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 | |
| 489 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
| 490 vld1.8 {q3},[r0]! | |
| 491 .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 | |
| 492 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 | |
| 493 .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 | |
| 494 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
| 495 .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 | |
| 496 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
| 497 vld1.8 {q11},[r0]! | |
| 498 .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 | |
| 499 .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 | |
| 500 .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 | |
| 501 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] | |
| 502 add r6,r5,#2 | |
| 503 veor q4,q4,q0 | |
| 504 veor q5,q5,q1 | |
| 505 veor q10,q10,q9 | |
| 506 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] | |
| 507 vst1.8 {q4},[r1]! | |
| 508 vorr q0,q2,q2 | |
| 509 vst1.8 {q5},[r1]! | |
| 510 vorr q1,q3,q3 | |
| 511 vst1.8 {q10},[r1]! | |
| 512 vorr q10,q11,q11 | |
| 513 bhs .Loop3x_cbc_dec | |
| 514 | |
| 515 cmn r2,#0x30 | |
| 516 beq .Lcbc_done | |
| 517 nop | |
| 518 | |
| 519 .Lcbc_dec_tail: | |
| 520 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 | |
| 521 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
| 522 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 | |
| 523 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
| 524 vld1.32 {q8},[r7]! | |
| 525 subs r6,r6,#2 | |
| 526 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 | |
| 527 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
| 528 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 | |
| 529 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
| 530 vld1.32 {q9},[r7]! | |
| 531 bgt .Lcbc_dec_tail | |
| 532 | |
| 533 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 | |
| 534 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
| 535 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 | |
| 536 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
| 537 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 | |
| 538 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
| 539 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 | |
| 540 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
| 541 .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 | |
| 542 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
| 543 .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 | |
| 544 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
| 545 cmn r2,#0x20 | |
| 546 .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 | |
| 547 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
| 548 .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 | |
| 549 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
| 550 veor q5,q6,q7 | |
| 551 .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 | |
| 552 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 | |
| 553 .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 | |
| 554 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 | |
| 555 veor q9,q3,q7 | |
| 556 .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 | |
| 557 .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 | |
| 558 beq .Lcbc_dec_one | |
| 559 veor q5,q5,q1 | |
| 560 veor q9,q9,q10 | |
| 561 vorr q6,q11,q11 | |
| 562 vst1.8 {q5},[r1]! | |
| 563 vst1.8 {q9},[r1]! | |
| 564 b .Lcbc_done | |
| 565 | |
| 566 .Lcbc_dec_one: | |
| 567 veor q5,q5,q10 | |
| 568 vorr q6,q11,q11 | |
| 569 vst1.8 {q5},[r1]! | |
| 570 | |
| 571 .Lcbc_done: | |
| 572 vst1.8 {q6},[r4] | |
| 573 .Lcbc_abort: | |
| 574 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} | |
| 575 ldmia sp!,{r4,r5,r6,r7,r8,pc} | |
| 576 .size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt | |
| 577 .globl aes_v8_ctr32_encrypt_blocks | |
| 578 .hidden aes_v8_ctr32_encrypt_blocks | |
| 579 .type aes_v8_ctr32_encrypt_blocks,%function | |
| 580 .align 5 | |
| 581 aes_v8_ctr32_encrypt_blocks: | |
| 582 mov ip,sp | |
| 583 stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr} | |
| 584 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specificati
on says so | |
| 585 ldr r4, [ip] @ load remaining arg | |
| 586 ldr r5,[r3,#240] | |
| 587 | |
| 588 ldr r8, [r4, #12] | |
| 589 vld1.32 {q0},[r4] | |
| 590 | |
| 591 vld1.32 {q8,q9},[r3] @ load key schedule... | |
| 592 sub r5,r5,#4 | |
| 593 mov r12,#16 | |
| 594 cmp r2,#2 | |
| 595 add r7,r3,r5,lsl#4 @ pointer to last 5 round keys | |
| 596 sub r5,r5,#2 | |
| 597 vld1.32 {q12,q13},[r7]! | |
| 598 vld1.32 {q14,q15},[r7]! | |
| 599 vld1.32 {q7},[r7] | |
| 600 add r7,r3,#32 | |
| 601 mov r6,r5 | |
| 602 movlo r12,#0 | |
| 603 #ifndef __ARMEB__ | |
| 604 rev r8, r8 | |
| 605 #endif | |
| 606 vorr q1,q0,q0 | |
| 607 add r10, r8, #1 | |
| 608 vorr q10,q0,q0 | |
| 609 add r8, r8, #2 | |
| 610 vorr q6,q0,q0 | |
| 611 rev r10, r10 | |
| 612 vmov.32 d3[1],r10 | |
| 613 bls .Lctr32_tail | |
| 614 rev r12, r8 | |
| 615 sub r2,r2,#3 @ bias | |
| 616 vmov.32 d21[1],r12 | |
| 617 b .Loop3x_ctr32 | |
| 618 | |
| 619 .align 4 | |
| 620 .Loop3x_ctr32: | |
| 621 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 | |
| 622 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 623 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 | |
| 624 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 | |
| 625 .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 | |
| 626 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 | |
| 627 vld1.32 {q8},[r7]! | |
| 628 subs r6,r6,#2 | |
| 629 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 | |
| 630 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 631 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 | |
| 632 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 | |
| 633 .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 | |
| 634 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 | |
| 635 vld1.32 {q9},[r7]! | |
| 636 bgt .Loop3x_ctr32 | |
| 637 | |
| 638 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 | |
| 639 .byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 | |
| 640 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 | |
| 641 .byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 | |
| 642 vld1.8 {q2},[r0]! | |
| 643 vorr q0,q6,q6 | |
| 644 .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 | |
| 645 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 | |
| 646 vld1.8 {q3},[r0]! | |
| 647 vorr q1,q6,q6 | |
| 648 .byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 | |
| 649 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 | |
| 650 .byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 | |
| 651 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 | |
| 652 vld1.8 {q11},[r0]! | |
| 653 mov r7,r3 | |
| 654 .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 | |
| 655 .byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10 | |
| 656 vorr q10,q6,q6 | |
| 657 add r9,r8,#1 | |
| 658 .byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 | |
| 659 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 | |
| 660 .byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 | |
| 661 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 | |
| 662 veor q2,q2,q7 | |
| 663 add r10,r8,#2 | |
| 664 .byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 | |
| 665 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 | |
| 666 veor q3,q3,q7 | |
| 667 add r8,r8,#3 | |
| 668 .byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 | |
| 669 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 | |
| 670 .byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 | |
| 671 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 | |
| 672 veor q11,q11,q7 | |
| 673 rev r9,r9 | |
| 674 .byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 | |
| 675 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 | |
| 676 vmov.32 d1[1], r9 | |
| 677 rev r10,r10 | |
| 678 .byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 | |
| 679 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 | |
| 680 .byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 | |
| 681 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 | |
| 682 vmov.32 d3[1], r10 | |
| 683 rev r12,r8 | |
| 684 .byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 | |
| 685 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 | |
| 686 vmov.32 d21[1], r12 | |
| 687 subs r2,r2,#3 | |
| 688 .byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15 | |
| 689 .byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 | |
| 690 .byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15 | |
| 691 | |
| 692 veor q2,q2,q4 | |
| 693 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] | |
| 694 vst1.8 {q2},[r1]! | |
| 695 veor q3,q3,q5 | |
| 696 mov r6,r5 | |
| 697 vst1.8 {q3},[r1]! | |
| 698 veor q11,q11,q9 | |
| 699 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] | |
| 700 vst1.8 {q11},[r1]! | |
| 701 bhs .Loop3x_ctr32 | |
| 702 | |
| 703 adds r2,r2,#3 | |
| 704 beq .Lctr32_done | |
| 705 cmp r2,#1 | |
| 706 mov r12,#16 | |
| 707 moveq r12,#0 | |
| 708 | |
| 709 .Lctr32_tail: | |
| 710 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 | |
| 711 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 712 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 | |
| 713 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 | |
| 714 vld1.32 {q8},[r7]! | |
| 715 subs r6,r6,#2 | |
| 716 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 | |
| 717 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 718 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 | |
| 719 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 | |
| 720 vld1.32 {q9},[r7]! | |
| 721 bgt .Lctr32_tail | |
| 722 | |
| 723 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 | |
| 724 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 725 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 | |
| 726 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 | |
| 727 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 | |
| 728 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 729 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 | |
| 730 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 | |
| 731 vld1.8 {q2},[r0],r12 | |
| 732 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 | |
| 733 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 734 .byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 | |
| 735 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 | |
| 736 vld1.8 {q3},[r0] | |
| 737 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 | |
| 738 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 739 .byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 | |
| 740 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 | |
| 741 veor q2,q2,q7 | |
| 742 .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 | |
| 743 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 | |
| 744 .byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 | |
| 745 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 | |
| 746 veor q3,q3,q7 | |
| 747 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 | |
| 748 .byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 | |
| 749 | |
| 750 cmp r2,#1 | |
| 751 veor q2,q2,q0 | |
| 752 veor q3,q3,q1 | |
| 753 vst1.8 {q2},[r1]! | |
| 754 beq .Lctr32_done | |
| 755 vst1.8 {q3},[r1] | |
| 756 | |
| 757 .Lctr32_done: | |
| 758 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} | |
| 759 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} | |
| 760 .size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks | |
| 761 #endif | |
| 762 #endif | |
| OLD | NEW |