Index: third_party/lzma_sdk/Asm/x86/AesOpt.asm |
=================================================================== |
--- third_party/lzma_sdk/Asm/x86/AesOpt.asm (revision 0) |
+++ third_party/lzma_sdk/Asm/x86/AesOpt.asm (revision 0) |
@@ -0,0 +1,237 @@ |
+; AesOpt.asm -- Intel's AES. |
+; 2009-12-12 : Igor Pavlov : Public domain |
+ |
+include 7zAsm.asm |
+ |
+MY_ASM_START |
+ |
+ifndef x64 |
+ .xmm |
+endif |
+ |
+ifdef x64 |
+ num equ r8 |
+else |
+ num equ [r4 + REG_SIZE * 4] |
+endif |
+ |
+rD equ r2 |
+rN equ r0 |
+ |
+MY_PROLOG macro reg:req |
+ ifdef x64 |
+ movdqa [r4 + 8], xmm6 |
+ movdqa [r4 + 8 + 16], xmm7 |
+ endif |
+ |
+ push r3 |
+ push r5 |
+ push r6 |
+ |
+ mov rN, num |
+ mov x6, [r1 + 16] |
+ shl x6, 5 |
+ |
+ movdqa reg, [r1] |
+ add r1, 32 |
+endm |
+ |
+MY_EPILOG macro |
+ pop r6 |
+ pop r5 |
+ pop r3 |
+ |
+ ifdef x64 |
+ movdqa xmm6, [r4 + 8] |
+ movdqa xmm7, [r4 + 8 + 16] |
+ endif |
+ |
+ MY_ENDP |
+endm |
+ |
+ways equ 4 |
+ways16 equ (ways * 16) |
+ |
+OP_W macro op, op2 |
+ i = 0 |
+ rept ways |
+ op @CatStr(xmm,%i), op2 |
+ i = i + 1 |
+ endm |
+endm |
+ |
+LOAD_OP macro op:req, offs:req |
+ op xmm0, [r1 + r3 offs] |
+endm |
+ |
+LOAD_OP_W macro op:req, offs:req |
+ movdqa xmm7, [r1 + r3 offs] |
+ OP_W op, xmm7 |
+endm |
+ |
+ |
+; ---------- AES-CBC Decode ---------- |
+ |
+CBC_DEC_UPDATE macro reg, offs |
+ pxor reg, xmm6 |
+ movdqa xmm6, [rD + offs] |
+ movdqa [rD + offs], reg |
+endm |
+ |
+DECODE macro op:req |
+ op aesdec, +16 |
+ @@: |
+ op aesdec, +0 |
+ op aesdec, -16 |
+ sub x3, 32 |
+ jnz @B |
+ op aesdeclast, +0 |
+endm |
+ |
+MY_PROC AesCbc_Decode_Intel, 3 |
+ MY_PROLOG xmm6 |
+ |
+ sub x6, 32 |
+ |
+ jmp check2 |
+ |
+ align 16 |
+ nextBlocks2: |
+ mov x3, x6 |
+ OP_W movdqa, [rD + i * 16] |
+ LOAD_OP_W pxor, +32 |
+ DECODE LOAD_OP_W |
+ OP_W CBC_DEC_UPDATE, i * 16 |
+ add rD, ways16 |
+ check2: |
+ sub rN, ways |
+ jnc nextBlocks2 |
+ |
+ add rN, ways |
+ jmp check |
+ |
+ nextBlock: |
+ mov x3, x6 |
+ movdqa xmm1, [rD] |
+ LOAD_OP movdqa, +32 |
+ pxor xmm0, xmm1 |
+ DECODE LOAD_OP |
+ pxor xmm0, xmm6 |
+ movdqa [rD], xmm0 |
+ movdqa xmm6, xmm1 |
+ add rD, 16 |
+ check: |
+ sub rN, 1 |
+ jnc nextBlock |
+ |
+ movdqa [r1 - 32], xmm6 |
+ MY_EPILOG |
+ |
+ |
+; ---------- AES-CBC Encode ---------- |
+ |
+ENCODE macro op:req |
+ op aesenc, -16 |
+ @@: |
+ op aesenc, +0 |
+ op aesenc, +16 |
+ add r3, 32 |
+ jnz @B |
+ op aesenclast, +0 |
+endm |
+ |
+MY_PROC AesCbc_Encode_Intel, 3 |
+ MY_PROLOG xmm0 |
+ |
+ add r1, r6 |
+ neg r6 |
+ add r6, 32 |
+ |
+ jmp check_e |
+ |
+ align 16 |
+ nextBlock_e: |
+ mov r3, r6 |
+ pxor xmm0, [rD] |
+ pxor xmm0, [r1 + r3 - 32] |
+ ENCODE LOAD_OP |
+ movdqa [rD], xmm0 |
+ add rD, 16 |
+ check_e: |
+ sub rN, 1 |
+ jnc nextBlock_e |
+ |
+ movdqa [r1 + r6 - 64], xmm0 |
+ MY_EPILOG |
+ |
+ |
+; ---------- AES-CTR ---------- |
+ |
+XOR_UPD_1 macro reg, offs |
+ pxor reg, [rD + offs] |
+endm |
+ |
+XOR_UPD_2 macro reg, offs |
+ movdqa [rD + offs], reg |
+endm |
+ |
+MY_PROC AesCtr_Code_Intel, 3 |
+ MY_PROLOG xmm6 |
+ |
+ mov r5, r4 |
+ shr r5, 4 |
+ dec r5 |
+ shl r5, 4 |
+ |
+ mov DWORD PTR [r5], 1 |
+ mov DWORD PTR [r5 + 4], 0 |
+ mov DWORD PTR [r5 + 8], 0 |
+ mov DWORD PTR [r5 + 12], 0 |
+ |
+ add r1, r6 |
+ neg r6 |
+ add r6, 32 |
+ |
+ jmp check2_c |
+ |
+ align 16 |
+ nextBlocks2_c: |
+ movdqa xmm7, [r5] |
+ |
+ i = 0 |
+ rept ways |
+ paddq xmm6, xmm7 |
+ movdqa @CatStr(xmm,%i), xmm6 |
+ i = i + 1 |
+ endm |
+ |
+ mov r3, r6 |
+ LOAD_OP_W pxor, -32 |
+ ENCODE LOAD_OP_W |
+ OP_W XOR_UPD_1, i * 16 |
+ OP_W XOR_UPD_2, i * 16 |
+ add rD, ways16 |
+ check2_c: |
+ sub rN, ways |
+ jnc nextBlocks2_c |
+ |
+ add rN, ways |
+ jmp check_c |
+ |
+ nextBlock_c: |
+ paddq xmm6, [r5] |
+ mov r3, r6 |
+ movdqa xmm0, [r1 + r3 - 32] |
+ pxor xmm0, xmm6 |
+ ENCODE LOAD_OP |
+ XOR_UPD_1 xmm0, 0 |
+ XOR_UPD_2 xmm0, 0 |
+ add rD, 16 |
+ check_c: |
+ sub rN, 1 |
+ jnc nextBlock_c |
+ |
+ movdqa [r1 + r6 - 64], xmm6 |
+ MY_EPILOG |
+ |
+end |
Property changes on: third_party\lzma_sdk\Asm\x86\AesOpt.asm |
___________________________________________________________________ |
Added: svn:executable |
+ * |