OLD | NEW |
(Empty) | |
| 1 ; AesOpt.asm -- Intel's AES. |
| 2 ; 2009-12-12 : Igor Pavlov : Public domain |
| 3 |
| 4 include 7zAsm.asm |
| 5 |
| 6 MY_ASM_START |
| 7 |
| 8 ifndef x64 |
| 9 .xmm |
| 10 endif |
| 11 |
| 12 ifdef x64 |
| 13 num equ r8 |
| 14 else |
| 15 num equ [r4 + REG_SIZE * 4] |
| 16 endif |
| 17 |
| 18 rD equ r2 |
| 19 rN equ r0 |
| 20 |
| 21 MY_PROLOG macro reg:req |
| 22 ifdef x64 |
| 23 movdqa [r4 + 8], xmm6 |
| 24 movdqa [r4 + 8 + 16], xmm7 |
| 25 endif |
| 26 |
| 27 push r3 |
| 28 push r5 |
| 29 push r6 |
| 30 |
| 31 mov rN, num |
| 32 mov x6, [r1 + 16] |
| 33 shl x6, 5 |
| 34 |
| 35 movdqa reg, [r1] |
| 36 add r1, 32 |
| 37 endm |
| 38 |
| 39 MY_EPILOG macro |
| 40 pop r6 |
| 41 pop r5 |
| 42 pop r3 |
| 43 |
| 44 ifdef x64 |
| 45 movdqa xmm6, [r4 + 8] |
| 46 movdqa xmm7, [r4 + 8 + 16] |
| 47 endif |
| 48 |
| 49 MY_ENDP |
| 50 endm |
| 51 |
| 52 ways equ 4 |
| 53 ways16 equ (ways * 16) |
| 54 |
| 55 OP_W macro op, op2 |
| 56 i = 0 |
| 57 rept ways |
| 58 op @CatStr(xmm,%i), op2 |
| 59 i = i + 1 |
| 60 endm |
| 61 endm |
| 62 |
| 63 LOAD_OP macro op:req, offs:req |
| 64 op xmm0, [r1 + r3 offs] |
| 65 endm |
| 66 |
| 67 LOAD_OP_W macro op:req, offs:req |
| 68 movdqa xmm7, [r1 + r3 offs] |
| 69 OP_W op, xmm7 |
| 70 endm |
| 71 |
| 72 |
| 73 ; ---------- AES-CBC Decode ---------- |
| 74 |
| 75 CBC_DEC_UPDATE macro reg, offs |
| 76 pxor reg, xmm6 |
| 77 movdqa xmm6, [rD + offs] |
| 78 movdqa [rD + offs], reg |
| 79 endm |
| 80 |
| 81 DECODE macro op:req |
| 82 op aesdec, +16 |
| 83 @@: |
| 84 op aesdec, +0 |
| 85 op aesdec, -16 |
| 86 sub x3, 32 |
| 87 jnz @B |
| 88 op aesdeclast, +0 |
| 89 endm |
| 90 |
| 91 MY_PROC AesCbc_Decode_Intel, 3 |
| 92 MY_PROLOG xmm6 |
| 93 |
| 94 sub x6, 32 |
| 95 |
| 96 jmp check2 |
| 97 |
| 98 align 16 |
| 99 nextBlocks2: |
| 100 mov x3, x6 |
| 101 OP_W movdqa, [rD + i * 16] |
| 102 LOAD_OP_W pxor, +32 |
| 103 DECODE LOAD_OP_W |
| 104 OP_W CBC_DEC_UPDATE, i * 16 |
| 105 add rD, ways16 |
| 106 check2: |
| 107 sub rN, ways |
| 108 jnc nextBlocks2 |
| 109 |
| 110 add rN, ways |
| 111 jmp check |
| 112 |
| 113 nextBlock: |
| 114 mov x3, x6 |
| 115 movdqa xmm1, [rD] |
| 116 LOAD_OP movdqa, +32 |
| 117 pxor xmm0, xmm1 |
| 118 DECODE LOAD_OP |
| 119 pxor xmm0, xmm6 |
| 120 movdqa [rD], xmm0 |
| 121 movdqa xmm6, xmm1 |
| 122 add rD, 16 |
| 123 check: |
| 124 sub rN, 1 |
| 125 jnc nextBlock |
| 126 |
| 127 movdqa [r1 - 32], xmm6 |
| 128 MY_EPILOG |
| 129 |
| 130 |
| 131 ; ---------- AES-CBC Encode ---------- |
| 132 |
| 133 ENCODE macro op:req |
| 134 op aesenc, -16 |
| 135 @@: |
| 136 op aesenc, +0 |
| 137 op aesenc, +16 |
| 138 add r3, 32 |
| 139 jnz @B |
| 140 op aesenclast, +0 |
| 141 endm |
| 142 |
| 143 MY_PROC AesCbc_Encode_Intel, 3 |
| 144 MY_PROLOG xmm0 |
| 145 |
| 146 add r1, r6 |
| 147 neg r6 |
| 148 add r6, 32 |
| 149 |
| 150 jmp check_e |
| 151 |
| 152 align 16 |
| 153 nextBlock_e: |
| 154 mov r3, r6 |
| 155 pxor xmm0, [rD] |
| 156 pxor xmm0, [r1 + r3 - 32] |
| 157 ENCODE LOAD_OP |
| 158 movdqa [rD], xmm0 |
| 159 add rD, 16 |
| 160 check_e: |
| 161 sub rN, 1 |
| 162 jnc nextBlock_e |
| 163 |
| 164 movdqa [r1 + r6 - 64], xmm0 |
| 165 MY_EPILOG |
| 166 |
| 167 |
| 168 ; ---------- AES-CTR ---------- |
| 169 |
| 170 XOR_UPD_1 macro reg, offs |
| 171 pxor reg, [rD + offs] |
| 172 endm |
| 173 |
| 174 XOR_UPD_2 macro reg, offs |
| 175 movdqa [rD + offs], reg |
| 176 endm |
| 177 |
| 178 MY_PROC AesCtr_Code_Intel, 3 |
| 179 MY_PROLOG xmm6 |
| 180 |
| 181 mov r5, r4 |
| 182 shr r5, 4 |
| 183 dec r5 |
| 184 shl r5, 4 |
| 185 |
| 186 mov DWORD PTR [r5], 1 |
| 187 mov DWORD PTR [r5 + 4], 0 |
| 188 mov DWORD PTR [r5 + 8], 0 |
| 189 mov DWORD PTR [r5 + 12], 0 |
| 190 |
| 191 add r1, r6 |
| 192 neg r6 |
| 193 add r6, 32 |
| 194 |
| 195 jmp check2_c |
| 196 |
| 197 align 16 |
| 198 nextBlocks2_c: |
| 199 movdqa xmm7, [r5] |
| 200 |
| 201 i = 0 |
| 202 rept ways |
| 203 paddq xmm6, xmm7 |
| 204 movdqa @CatStr(xmm,%i), xmm6 |
| 205 i = i + 1 |
| 206 endm |
| 207 |
| 208 mov r3, r6 |
| 209 LOAD_OP_W pxor, -32 |
| 210 ENCODE LOAD_OP_W |
| 211 OP_W XOR_UPD_1, i * 16 |
| 212 OP_W XOR_UPD_2, i * 16 |
| 213 add rD, ways16 |
| 214 check2_c: |
| 215 sub rN, ways |
| 216 jnc nextBlocks2_c |
| 217 |
| 218 add rN, ways |
| 219 jmp check_c |
| 220 |
| 221 nextBlock_c: |
| 222 paddq xmm6, [r5] |
| 223 mov r3, r6 |
| 224 movdqa xmm0, [r1 + r3 - 32] |
| 225 pxor xmm0, xmm6 |
| 226 ENCODE LOAD_OP |
| 227 XOR_UPD_1 xmm0, 0 |
| 228 XOR_UPD_2 xmm0, 0 |
| 229 add rD, 16 |
| 230 check_c: |
| 231 sub rN, 1 |
| 232 jnc nextBlock_c |
| 233 |
| 234 movdqa [r1 + r6 - 64], xmm6 |
| 235 MY_EPILOG |
| 236 |
| 237 end |
OLD | NEW |