Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(198)

Unified Diff: newlib/libc/machine/x86_64/memset.S

Issue 957063002: Avoid string instructions in x86_64 memset (Closed) Base URL: https://chromium.googlesource.com/native_client/nacl-newlib.git@master
Patch Set: Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: newlib/libc/machine/x86_64/memset.S
diff --git a/newlib/libc/machine/x86_64/memset.S b/newlib/libc/machine/x86_64/memset.S
index c16cd58106ee87c78b82f640a273e63496ddcca4..f7a4165de755894c841e3ee391ef19abc463b1fb 100644
--- a/newlib/libc/machine/x86_64/memset.S
+++ b/newlib/libc/machine/x86_64/memset.S
@@ -13,78 +13,126 @@
.global SYM (memset)
SOTYPE_FUNCTION(memset)
+ /*
+ * Avoid revealing the sandbox base address. In particular, we never read from
+ * r11, and avoid using the string instructions (stos) because they leave the
+ * full 64 bits in rdi.
+ */
+
SYM (memset):
movl edi, r9d /* Save return value */
movl esi, eax
movl edx, ecx
cmpl $16, edx
- jb byte_set
+ jb .Lbyte_set
movl edi, r8d /* Align on quad word boundary */
andl $7, r8d
- jz quadword_aligned
+ jz .Lquadword_aligned
movl $8, ecx
subl r8d, ecx
subl ecx, edx
- rep stosb al, %nacl:(rdi), r15
+.Lheader_loop:
+ movb al, %nacl:(r15, rdi)
+ inc edi
+ dec ecx
+ jnz .Lheader_loop
+
movl edx, ecx
-quadword_aligned:
+.Lquadword_aligned:
movabs $0x0101010101010101, r8
movzbl sil, eax
imul r8, rax
cmpl $256, edx
- jb quadword_set
+ jb .Lquadword_set
- shrl $7, ecx /* Store 128 bytes at a time with minimum cache polution */
+ shrl $7, ecx /* Store 128 bytes at a time */
+
+ /* Save the frame pointer and use it as a base to avoid repeated masking */
+ pushq $0
+ movl ebp, (rsp)
+ naclrestbp edi, r15
.p2align 4
-loop:
- movntiq rax, %nacl: (r15,rdi)
- movntiq rax, %nacl: 8 (r15,rdi)
- movntiq rax, %nacl: 16 (r15,rdi)
- movntiq rax, %nacl: 24 (r15,rdi)
- movntiq rax, %nacl: 32 (r15,rdi)
- movntiq rax, %nacl: 40 (r15,rdi)
- movntiq rax, %nacl: 48 (r15,rdi)
- movntiq rax, %nacl: 56 (r15,rdi)
- movntiq rax, %nacl: 64 (r15,rdi)
- movntiq rax, %nacl: 72 (r15,rdi)
- movntiq rax, %nacl: 80 (r15,rdi)
- movntiq rax, %nacl: 88 (r15,rdi)
- movntiq rax, %nacl: 96 (r15,rdi)
- movntiq rax, %nacl: 104 (r15,rdi)
- movntiq rax, %nacl: 112 (r15,rdi)
- movntiq rax, %nacl: 120 (r15,rdi)
-
- leal 128 (rdi), edi
+.Lquadword_aligned_loop:
+
+ movq rax, (rbp)
+ movq rax, 8 (rbp)
+ movq rax, 16 (rbp)
+ movq rax, 24 (rbp)
+ movq rax, 32 (rbp)
+ movq rax, 40 (rbp)
+ movq rax, 48 (rbp)
+ movq rax, 56 (rbp)
+ movq rax, 64 (rbp)
+ movq rax, 72 (rbp)
+ movq rax, 80 (rbp)
+ movq rax, 88 (rbp)
+ movq rax, 96 (rbp)
+ movq rax, 104 (rbp)
+ movq rax, 112 (rbp)
+ movq rax, 120 (rbp)
+
+ .bundle_lock
+ leal 128 (rbp), ebp
+ add r15, rbp
+ .bundle_unlock
dec ecx
- jnz loop
+ jnz .Lquadword_aligned_loop
+
+ movl ebp, edi
+ popq r8
+ naclrestbp r8d, r15
- sfence
movl edx, ecx
andl $127, ecx
- rep stosb al, %nacl:(rdi), r15
+ jz .Lfooter_loop_end
+.Lfooter_loop:
+ movb al, %nacl:(r15, rdi)
+ inc edi
+ dec ecx
+ jnz .Lfooter_loop
+.Lfooter_loop_end:
movl r9d, eax
pop r11
nacljmp r11d, r15
-byte_set:
- rep stosb al, %nacl:(rdi), r15
+.Lbyte_set:
+ testl ecx, ecx
+ jz .Lbyte_set_end
+.Lbyte_set_loop:
+ movb al, %nacl:(r15, rdi)
+ inc edi
+ dec ecx
+ jnz .Lbyte_set_loop
+.Lbyte_set_end:
movl r9d, eax
pop r11
nacljmp r11d, r15
-quadword_set:
+.Lquadword_set:
shrl $3, ecx
+ jz .Lquadword_loop_end
.p2align 4
- rep stosq rax, %nacl:(rdi), r15
+.Lquadword_loop:
+ movq rax, %nacl:(r15, rdi)
+ add $8, edi
+ dec ecx
+ jnz .Lquadword_loop
+.Lquadword_loop_end:
movl edx, ecx
andl $7, ecx
- rep stosb al, %nacl:(rdi), r15 /* Store the remaining bytes */
+ jz .Lquadword_footer_end
+.Lquadword_footer:
+ movb al, %nacl:(r15, rdi)
+ inc edi
+ dec ecx
+ jnz .Lquadword_footer
+.Lquadword_footer_end:
movl r9d, eax
pop r11
nacljmp r11d, r15
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698