Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Unified Diff: mozilla/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm

Issue 11738002: Include 64-bit optimized assembly on Windows when building x64 (Closed) Base URL: http://src.chromium.org/svn/trunk/deps/third_party/nss
Patch Set: Update checkout script Created 7 years, 12 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: mozilla/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm
diff --git a/mozilla/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm b/mozilla/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm
new file mode 100644
index 0000000000000000000000000000000000000000..2120c18f9dec923591ab87cdd5ae9b4d111067ad
--- /dev/null
+++ b/mozilla/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm
@@ -0,0 +1,388 @@
+; This Source Code Form is subject to the terms of the Mozilla Public
+; License, v. 2.0. If a copy of the MPL was not distributed with this
+; file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+;
+; This code is converted from mpi_amd64_gas.asm for MASM for x64.
+;
+
+; ------------------------------------------------------------------------
+;
+; Implementation of s_mpv_mul_set_vec which exploits
+; the 64X64->128 bit unsigned multiply instruction.
+;
+; ------------------------------------------------------------------------
+
+; r = a * digit, r and a are vectors of length len
+; returns the carry digit
+; r and a are 64 bit aligned.
+;
+; uint64_t
+; s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+;
+
+.CODE
+
+s_mpv_mul_set_vec64 PROC
+
+ ; compatibilities for paramenter registers
+ ;
+ ; About GAS and MASM, the usage of parameter registers are different.
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov edx, r8d
+ mov rcx, r9
+
+ xor rax, rax
+ test rdx, rdx
+ jz L17
+ mov r8, rdx
+ xor r9, r9
+
+L15:
+ cmp r8, 8
+ jb L16
+ mov rax, [rsi]
+ mov r11, [8+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx, 0
+ mov [0+rdi], rax
+ mov r9, rdx
+ mov rax,r11
+ mov r11, [16+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [8+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [24+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [16+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [32+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [24+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [40+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [32+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [48+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [40+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [56+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [48+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [56+rdi],rax
+ mov r9,rdx
+ add rsi, 64
+ add rdi, 64
+ sub r8, 8
+ jz L17
+ jmp L15
+
+L16:
+ mov rax, [0+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx,0
+ mov [0+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L17
+ mov rax, [8+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [8+rdi], rax
+ mov r9, rdx
+ dec r8
+ jz L17
+ mov rax, [16+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx, 0
+ mov [16+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L17
+ mov rax, [24+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx, 0
+ mov [24+rdi], rax
+ mov r9, rdx
+ dec r8
+ jz L17
+ mov rax, [32+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx, 0
+ mov [32+rdi],rax
+ mov r9, rdx
+ dec r8
+ jz L17
+ mov rax, [40+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx, 0
+ mov [40+rdi], rax
+ mov r9, rdx
+ dec r8
+ jz L17
+ mov rax, [48+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx, 0
+ mov [48+rdi], rax
+ mov r9, rdx
+ dec r8
+ jz L17
+
+L17:
+ mov rax, r9
+ pop rsi
+ pop rdi
+ ret
+
+s_mpv_mul_set_vec64 ENDP
+
+
+;------------------------------------------------------------------------
+;
+; Implementation of s_mpv_mul_add_vec which exploits
+; the 64X64->128 bit unsigned multiply instruction.
+;
+;------------------------------------------------------------------------
+
+; r += a * digit, r and a are vectors of length len
+; returns the carry digit
+; r and a are 64 bit aligned.
+;
+; uint64_t
+; s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+;
+
+s_mpv_mul_add_vec64 PROC
+
+ ; compatibilities for paramenter registers
+ ;
+ ; About GAS and MASM, the usage of parameter registers are different.
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov edx, r8d
+ mov rcx, r9
+
+ xor rax, rax
+ test rdx, rdx
+ jz L27
+ mov r8, rdx
+ xor r9, r9
+
+L25:
+ cmp r8, 8
+ jb L26
+ mov rax, [0+rsi]
+ mov r10, [0+rdi]
+ mov r11, [8+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [8+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [0+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [16+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [16+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [8+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [24+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [24+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [16+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [32+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [32+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [24+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [40+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [40+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [32+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [48+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [48+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [40+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [56+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [56+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [48+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [56+rdi],rax
+ mov r9,rdx
+ add rsi,64
+ add rdi,64
+ sub r8, 8
+ jz L27
+ jmp L25
+
+L26:
+ mov rax, [0+rsi]
+ mov r10, [0+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [0+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L27
+ mov rax, [8+rsi]
+ mov r10, [8+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [8+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L27
+ mov rax, [16+rsi]
+ mov r10, [16+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [16+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L27
+ mov rax, [24+rsi]
+ mov r10, [24+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [24+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L27
+ mov rax, [32+rsi]
+ mov r10, [32+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [32+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L27
+ mov rax, [40+rsi]
+ mov r10, [40+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [40+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L27
+ mov rax, [48+rsi]
+ mov r10, [48+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax, r9
+ adc rdx, 0
+ mov [48+rdi], rax
+ mov r9, rdx
+ dec r8
+ jz L27
+
+L27:
+ mov rax, r9
+
+ pop rsi
+ pop rdi
+ ret
+
+s_mpv_mul_add_vec64 ENDP
+
+END
« no previous file with comments | « mozilla/security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm ('k') | nss.gyp » ('j') | nss.gyp » ('J')

Powered by Google App Engine
This is Rietveld 408576698