| Index: mozilla/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm
|
| diff --git a/mozilla/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm b/mozilla/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..2120c18f9dec923591ab87cdd5ae9b4d111067ad
|
| --- /dev/null
|
| +++ b/mozilla/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm
|
| @@ -0,0 +1,388 @@
|
| +; This Source Code Form is subject to the terms of the Mozilla Public
|
| +; License, v. 2.0. If a copy of the MPL was not distributed with this
|
| +; file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
| +
|
| +;
|
| +; This code is converted from mpi_amd64_gas.asm for MASM for x64.
|
| +;
|
| +
|
| +; ------------------------------------------------------------------------
|
| +;
|
| +; Implementation of s_mpv_mul_set_vec which exploits
|
| +; the 64X64->128 bit unsigned multiply instruction.
|
| +;
|
| +; ------------------------------------------------------------------------
|
| +
|
| +; r = a * digit, r and a are vectors of length len
|
| +; returns the carry digit
|
| +; r and a are 64 bit aligned.
|
| +;
|
| +; uint64_t
|
| +; s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
|
| +;
|
| +
|
| +.CODE
|
| +
|
| +s_mpv_mul_set_vec64 PROC
|
| +
|
| + ; compatibilities for paramenter registers
|
| + ;
|
| + ; About GAS and MASM, the usage of parameter registers are different.
|
| +
|
| + push rdi
|
| + push rsi
|
| +
|
| + mov rdi, rcx
|
| + mov rsi, rdx
|
| + mov edx, r8d
|
| + mov rcx, r9
|
| +
|
| + xor rax, rax
|
| + test rdx, rdx
|
| + jz L17
|
| + mov r8, rdx
|
| + xor r9, r9
|
| +
|
| +L15:
|
| + cmp r8, 8
|
| + jb L16
|
| + mov rax, [rsi]
|
| + mov r11, [8+rsi]
|
| + mul rcx
|
| + add rax, r9
|
| + adc rdx, 0
|
| + mov [0+rdi], rax
|
| + mov r9, rdx
|
| + mov rax,r11
|
| + mov r11, [16+rsi]
|
| + mul rcx
|
| + add rax,r9
|
| + adc rdx,0
|
| + mov [8+rdi],rax
|
| + mov r9,rdx
|
| + mov rax,r11
|
| + mov r11, [24+rsi]
|
| + mul rcx
|
| + add rax,r9
|
| + adc rdx,0
|
| + mov [16+rdi],rax
|
| + mov r9,rdx
|
| + mov rax,r11
|
| + mov r11, [32+rsi]
|
| + mul rcx
|
| + add rax,r9
|
| + adc rdx,0
|
| + mov [24+rdi],rax
|
| + mov r9,rdx
|
| + mov rax,r11
|
| + mov r11, [40+rsi]
|
| + mul rcx
|
| + add rax,r9
|
| + adc rdx,0
|
| + mov [32+rdi],rax
|
| + mov r9,rdx
|
| + mov rax,r11
|
| + mov r11, [48+rsi]
|
| + mul rcx
|
| + add rax,r9
|
| + adc rdx,0
|
| + mov [40+rdi],rax
|
| + mov r9,rdx
|
| + mov rax,r11
|
| + mov r11, [56+rsi]
|
| + mul rcx
|
| + add rax,r9
|
| + adc rdx,0
|
| + mov [48+rdi],rax
|
| + mov r9,rdx
|
| + mov rax,r11
|
| + mul rcx
|
| + add rax,r9
|
| + adc rdx,0
|
| + mov [56+rdi],rax
|
| + mov r9,rdx
|
| + add rsi, 64
|
| + add rdi, 64
|
| + sub r8, 8
|
| + jz L17
|
| + jmp L15
|
| +
|
| +L16:
|
| + mov rax, [0+rsi]
|
| + mul rcx
|
| + add rax, r9
|
| + adc rdx,0
|
| + mov [0+rdi],rax
|
| + mov r9,rdx
|
| + dec r8
|
| + jz L17
|
| + mov rax, [8+rsi]
|
| + mul rcx
|
| + add rax,r9
|
| + adc rdx,0
|
| + mov [8+rdi], rax
|
| + mov r9, rdx
|
| + dec r8
|
| + jz L17
|
| + mov rax, [16+rsi]
|
| + mul rcx
|
| + add rax, r9
|
| + adc rdx, 0
|
| + mov [16+rdi],rax
|
| + mov r9,rdx
|
| + dec r8
|
| + jz L17
|
| + mov rax, [24+rsi]
|
| + mul rcx
|
| + add rax, r9
|
| + adc rdx, 0
|
| + mov [24+rdi], rax
|
| + mov r9, rdx
|
| + dec r8
|
| + jz L17
|
| + mov rax, [32+rsi]
|
| + mul rcx
|
| + add rax, r9
|
| + adc rdx, 0
|
| + mov [32+rdi],rax
|
| + mov r9, rdx
|
| + dec r8
|
| + jz L17
|
| + mov rax, [40+rsi]
|
| + mul rcx
|
| + add rax, r9
|
| + adc rdx, 0
|
| + mov [40+rdi], rax
|
| + mov r9, rdx
|
| + dec r8
|
| + jz L17
|
| + mov rax, [48+rsi]
|
| + mul rcx
|
| + add rax, r9
|
| + adc rdx, 0
|
| + mov [48+rdi], rax
|
| + mov r9, rdx
|
| + dec r8
|
| + jz L17
|
| +
|
| +L17:
|
| + mov rax, r9
|
| + pop rsi
|
| + pop rdi
|
| + ret
|
| +
|
| +s_mpv_mul_set_vec64 ENDP
|
| +
|
| +
|
| +;------------------------------------------------------------------------
|
| +;
|
| +; Implementation of s_mpv_mul_add_vec which exploits
|
| +; the 64X64->128 bit unsigned multiply instruction.
|
| +;
|
| +;------------------------------------------------------------------------
|
| +
|
| +; r += a * digit, r and a are vectors of length len
|
| +; returns the carry digit
|
| +; r and a are 64 bit aligned.
|
| +;
|
| +; uint64_t
|
| +; s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
|
| +;
|
| +
|
| +s_mpv_mul_add_vec64 PROC
|
| +
|
| + ; compatibilities for paramenter registers
|
| + ;
|
| + ; About GAS and MASM, the usage of parameter registers are different.
|
| +
|
| + push rdi
|
| + push rsi
|
| +
|
| + mov rdi, rcx
|
| + mov rsi, rdx
|
| + mov edx, r8d
|
| + mov rcx, r9
|
| +
|
| + xor rax, rax
|
| + test rdx, rdx
|
| + jz L27
|
| + mov r8, rdx
|
| + xor r9, r9
|
| +
|
| +L25:
|
| + cmp r8, 8
|
| + jb L26
|
| + mov rax, [0+rsi]
|
| + mov r10, [0+rdi]
|
| + mov r11, [8+rsi]
|
| + mul rcx
|
| + add rax,r10
|
| + adc rdx,0
|
| + mov r10, [8+rdi]
|
| + add rax,r9
|
| + adc rdx,0
|
| + mov [0+rdi],rax
|
| + mov r9,rdx
|
| + mov rax,r11
|
| + mov r11, [16+rsi]
|
| + mul rcx
|
| + add rax,r10
|
| + adc rdx,0
|
| + mov r10, [16+rdi]
|
| + add rax,r9
|
| + adc rdx,0
|
| + mov [8+rdi],rax
|
| + mov r9,rdx
|
| + mov rax,r11
|
| + mov r11, [24+rsi]
|
| + mul rcx
|
| + add rax,r10
|
| + adc rdx,0
|
| + mov r10, [24+rdi]
|
| + add rax,r9
|
| + adc rdx,0
|
| + mov [16+rdi],rax
|
| + mov r9,rdx
|
| + mov rax,r11
|
| + mov r11, [32+rsi]
|
| + mul rcx
|
| + add rax,r10
|
| + adc rdx,0
|
| + mov r10, [32+rdi]
|
| + add rax,r9
|
| + adc rdx,0
|
| + mov [24+rdi],rax
|
| + mov r9,rdx
|
| + mov rax,r11
|
| + mov r11, [40+rsi]
|
| + mul rcx
|
| + add rax,r10
|
| + adc rdx,0
|
| + mov r10, [40+rdi]
|
| + add rax,r9
|
| + adc rdx,0
|
| + mov [32+rdi],rax
|
| + mov r9,rdx
|
| + mov rax,r11
|
| + mov r11, [48+rsi]
|
| + mul rcx
|
| + add rax,r10
|
| + adc rdx,0
|
| + mov r10, [48+rdi]
|
| + add rax,r9
|
| + adc rdx,0
|
| + mov [40+rdi],rax
|
| + mov r9,rdx
|
| + mov rax,r11
|
| + mov r11, [56+rsi]
|
| + mul rcx
|
| + add rax,r10
|
| + adc rdx,0
|
| + mov r10, [56+rdi]
|
| + add rax,r9
|
| + adc rdx,0
|
| + mov [48+rdi],rax
|
| + mov r9,rdx
|
| + mov rax,r11
|
| + mul rcx
|
| + add rax,r10
|
| + adc rdx,0
|
| + add rax,r9
|
| + adc rdx,0
|
| + mov [56+rdi],rax
|
| + mov r9,rdx
|
| + add rsi,64
|
| + add rdi,64
|
| + sub r8, 8
|
| + jz L27
|
| + jmp L25
|
| +
|
| +L26:
|
| + mov rax, [0+rsi]
|
| + mov r10, [0+rdi]
|
| + mul rcx
|
| + add rax,r10
|
| + adc rdx,0
|
| + add rax,r9
|
| + adc rdx,0
|
| + mov [0+rdi],rax
|
| + mov r9,rdx
|
| + dec r8
|
| + jz L27
|
| + mov rax, [8+rsi]
|
| + mov r10, [8+rdi]
|
| + mul rcx
|
| + add rax,r10
|
| + adc rdx,0
|
| + add rax,r9
|
| + adc rdx,0
|
| + mov [8+rdi],rax
|
| + mov r9,rdx
|
| + dec r8
|
| + jz L27
|
| + mov rax, [16+rsi]
|
| + mov r10, [16+rdi]
|
| + mul rcx
|
| + add rax,r10
|
| + adc rdx,0
|
| + add rax,r9
|
| + adc rdx,0
|
| + mov [16+rdi],rax
|
| + mov r9,rdx
|
| + dec r8
|
| + jz L27
|
| + mov rax, [24+rsi]
|
| + mov r10, [24+rdi]
|
| + mul rcx
|
| + add rax,r10
|
| + adc rdx,0
|
| + add rax,r9
|
| + adc rdx,0
|
| + mov [24+rdi],rax
|
| + mov r9,rdx
|
| + dec r8
|
| + jz L27
|
| + mov rax, [32+rsi]
|
| + mov r10, [32+rdi]
|
| + mul rcx
|
| + add rax,r10
|
| + adc rdx,0
|
| + add rax,r9
|
| + adc rdx,0
|
| + mov [32+rdi],rax
|
| + mov r9,rdx
|
| + dec r8
|
| + jz L27
|
| + mov rax, [40+rsi]
|
| + mov r10, [40+rdi]
|
| + mul rcx
|
| + add rax,r10
|
| + adc rdx,0
|
| + add rax,r9
|
| + adc rdx,0
|
| + mov [40+rdi],rax
|
| + mov r9,rdx
|
| + dec r8
|
| + jz L27
|
| + mov rax, [48+rsi]
|
| + mov r10, [48+rdi]
|
| + mul rcx
|
| + add rax,r10
|
| + adc rdx,0
|
| + add rax, r9
|
| + adc rdx, 0
|
| + mov [48+rdi], rax
|
| + mov r9, rdx
|
| + dec r8
|
| + jz L27
|
| +
|
| +L27:
|
| + mov rax, r9
|
| +
|
| + pop rsi
|
| + pop rdi
|
| + ret
|
| +
|
| +s_mpv_mul_add_vec64 ENDP
|
| +
|
| +END
|
|
|