| Index: newlib/libc/machine/x86_64/memset.S
|
| diff --git a/newlib/libc/machine/x86_64/memset.S b/newlib/libc/machine/x86_64/memset.S
|
| index c16cd58106ee87c78b82f640a273e63496ddcca4..f7a4165de755894c841e3ee391ef19abc463b1fb 100644
|
| --- a/newlib/libc/machine/x86_64/memset.S
|
| +++ b/newlib/libc/machine/x86_64/memset.S
|
| @@ -13,78 +13,126 @@
|
| .global SYM (memset)
|
| SOTYPE_FUNCTION(memset)
|
|
|
| + /*
|
| + * Avoid revealing the sandbox base address. In particular, we never read from
|
| + * r11, and avoid using the string instructions (stos) because they leave the
|
| + * full 64 bits in rdi.
|
| + */
|
| +
|
| SYM (memset):
|
| movl edi, r9d /* Save return value */
|
| movl esi, eax
|
| movl edx, ecx
|
| cmpl $16, edx
|
| - jb byte_set
|
| + jb .Lbyte_set
|
|
|
| movl edi, r8d /* Align on quad word boundary */
|
| andl $7, r8d
|
| - jz quadword_aligned
|
| + jz .Lquadword_aligned
|
| movl $8, ecx
|
| subl r8d, ecx
|
| subl ecx, edx
|
| - rep stosb al, %nacl:(rdi), r15
|
| +.Lheader_loop:
|
| + movb al, %nacl:(r15, rdi)
|
| + inc edi
|
| + dec ecx
|
| + jnz .Lheader_loop
|
| +
|
| movl edx, ecx
|
|
|
| -quadword_aligned:
|
| +.Lquadword_aligned:
|
| movabs $0x0101010101010101, r8
|
| movzbl sil, eax
|
| imul r8, rax
|
| cmpl $256, edx
|
| - jb quadword_set
|
| + jb .Lquadword_set
|
|
|
| - shrl $7, ecx /* Store 128 bytes at a time with minimum cache polution */
|
| + shrl $7, ecx /* Store 128 bytes at a time */
|
| +
|
| + /* Save the frame pointer and use it as a base to avoid repeated masking */
|
| + pushq $0
|
| + movl ebp, (rsp)
|
| + naclrestbp edi, r15
|
|
|
| .p2align 4
|
| -loop:
|
| - movntiq rax, %nacl: (r15,rdi)
|
| - movntiq rax, %nacl: 8 (r15,rdi)
|
| - movntiq rax, %nacl: 16 (r15,rdi)
|
| - movntiq rax, %nacl: 24 (r15,rdi)
|
| - movntiq rax, %nacl: 32 (r15,rdi)
|
| - movntiq rax, %nacl: 40 (r15,rdi)
|
| - movntiq rax, %nacl: 48 (r15,rdi)
|
| - movntiq rax, %nacl: 56 (r15,rdi)
|
| - movntiq rax, %nacl: 64 (r15,rdi)
|
| - movntiq rax, %nacl: 72 (r15,rdi)
|
| - movntiq rax, %nacl: 80 (r15,rdi)
|
| - movntiq rax, %nacl: 88 (r15,rdi)
|
| - movntiq rax, %nacl: 96 (r15,rdi)
|
| - movntiq rax, %nacl: 104 (r15,rdi)
|
| - movntiq rax, %nacl: 112 (r15,rdi)
|
| - movntiq rax, %nacl: 120 (r15,rdi)
|
| -
|
| - leal 128 (rdi), edi
|
| +.Lquadword_aligned_loop:
|
| +
|
| + movq rax, (rbp)
|
| + movq rax, 8 (rbp)
|
| + movq rax, 16 (rbp)
|
| + movq rax, 24 (rbp)
|
| + movq rax, 32 (rbp)
|
| + movq rax, 40 (rbp)
|
| + movq rax, 48 (rbp)
|
| + movq rax, 56 (rbp)
|
| + movq rax, 64 (rbp)
|
| + movq rax, 72 (rbp)
|
| + movq rax, 80 (rbp)
|
| + movq rax, 88 (rbp)
|
| + movq rax, 96 (rbp)
|
| + movq rax, 104 (rbp)
|
| + movq rax, 112 (rbp)
|
| + movq rax, 120 (rbp)
|
| +
|
| + .bundle_lock
|
| + leal 128 (rbp), ebp
|
| + add r15, rbp
|
| + .bundle_unlock
|
|
|
| dec ecx
|
| - jnz loop
|
| + jnz .Lquadword_aligned_loop
|
| +
|
| + movl ebp, edi
|
| + popq r8
|
| + naclrestbp r8d, r15
|
|
|
| - sfence
|
| movl edx, ecx
|
| andl $127, ecx
|
| - rep stosb al, %nacl:(rdi), r15
|
| + jz .Lfooter_loop_end
|
| +.Lfooter_loop:
|
| + movb al, %nacl:(r15, rdi)
|
| + inc edi
|
| + dec ecx
|
| + jnz .Lfooter_loop
|
| +.Lfooter_loop_end:
|
| movl r9d, eax
|
| pop r11
|
| nacljmp r11d, r15
|
|
|
|
|
| -byte_set:
|
| - rep stosb al, %nacl:(rdi), r15
|
| +.Lbyte_set:
|
| + testl ecx, ecx
|
| + jz .Lbyte_set_end
|
| +.Lbyte_set_loop:
|
| + movb al, %nacl:(r15, rdi)
|
| + inc edi
|
| + dec ecx
|
| + jnz .Lbyte_set_loop
|
| +.Lbyte_set_end:
|
| movl r9d, eax
|
| pop r11
|
| nacljmp r11d, r15
|
|
|
|
|
| -quadword_set:
|
| +.Lquadword_set:
|
| shrl $3, ecx
|
| + jz .Lquadword_loop_end
|
| .p2align 4
|
| - rep stosq rax, %nacl:(rdi), r15
|
| +.Lquadword_loop:
|
| + movq rax, %nacl:(r15, rdi)
|
| + add $8, edi
|
| + dec ecx
|
| + jnz .Lquadword_loop
|
| +.Lquadword_loop_end:
|
| movl edx, ecx
|
| andl $7, ecx
|
| - rep stosb al, %nacl:(rdi), r15 /* Store the remaining bytes */
|
| + jz .Lquadword_footer_end
|
| +.Lquadword_footer:
|
| + movb al, %nacl:(r15, rdi)
|
| + inc edi
|
| + dec ecx
|
| + jnz .Lquadword_footer
|
| +.Lquadword_footer_end:
|
| movl r9d, eax
|
| pop r11
|
| nacljmp r11d, r15
|
|
|