Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(477)

Side by Side Diff: newlib/libc/machine/x86_64/memset.S

Issue 957063002: Avoid string instructions in x86_64 memset (Closed) Base URL: https://chromium.googlesource.com/native_client/nacl-newlib.git@master
Patch Set: Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * ==================================================== 2 * ====================================================
3 * Copyright (C) 2007 by Ellips BV. All rights reserved. 3 * Copyright (C) 2007 by Ellips BV. All rights reserved.
4 * 4 *
5 * Permission to use, copy, modify, and distribute this 5 * Permission to use, copy, modify, and distribute this
6 * software is freely granted, provided that this notice 6 * software is freely granted, provided that this notice
7 * is preserved. 7 * is preserved.
8 * ==================================================== 8 * ====================================================
9 */ 9 */
10 10
11 #include "x86_64mach.h" 11 #include "x86_64mach.h"
12 12
13 .global SYM (memset) 13 .global SYM (memset)
14 SOTYPE_FUNCTION(memset) 14 SOTYPE_FUNCTION(memset)
15 15
16 /*
17 * Avoid revealing the sandbox base address. In particular, we never read from
18 * r11, and avoid using the string instructions (stos) because they leave the
19 * full 64 bits in rdi.
20 */
21
16 SYM (memset): 22 SYM (memset):
17 movl edi, r9d /* Save return value */ 23 movl edi, r9d /* Save return value */
18 movl esi, eax 24 movl esi, eax
19 movl edx, ecx 25 movl edx, ecx
20 cmpl $16, edx 26 cmpl $16, edx
21 jb byte_set 27 jb .Lbyte_set
22 28
23 movl edi, r8d /* Align on quad word boundary */ 29 movl edi, r8d /* Align on quad word boundary */
24 andl $7, r8d 30 andl $7, r8d
25 jz quadword_aligned 31 jz .Lquadword_aligned
26 movl $8, ecx 32 movl $8, ecx
27 subl r8d, ecx 33 subl r8d, ecx
28 subl ecx, edx 34 subl ecx, edx
29 rep stosb al, %nacl:(rdi), r15 35 .Lheader_loop:
36 movb al, %nacl:(r15, rdi)
37 inc edi
38 dec ecx
39 jnz .Lheader_loop
40
30 movl edx, ecx 41 movl edx, ecx
31 42
32 quadword_aligned: 43 .Lquadword_aligned:
33 movabs $0x0101010101010101, r8 44 movabs $0x0101010101010101, r8
34 movzbl sil, eax 45 movzbl sil, eax
35 imul r8, rax 46 imul r8, rax
36 cmpl $256, edx 47 cmpl $256, edx
37 jb quadword_set 48 jb .Lquadword_set
38 49
39 shrl $7, ecx /* Store 128 bytes at a time with minimum cach e polution */ 50 shrl $7, ecx /* Store 128 bytes at a time */
51
52 /* Save the frame pointer and use it as a base to avoid repeated masking */
53 pushq $0
54 movl ebp, (rsp)
55 naclrestbp edi, r15
40 56
41 .p2align 4 57 .p2align 4
42 loop: 58 .Lquadword_aligned_loop:
43 movntiq rax, %nacl: (r15,rdi)
44 movntiq rax, %nacl: 8 (r15,rdi)
45 movntiq rax, %nacl: 16 (r15,rdi)
46 movntiq rax, %nacl: 24 (r15,rdi)
47 movntiq rax, %nacl: 32 (r15,rdi)
48 movntiq rax, %nacl: 40 (r15,rdi)
49 movntiq rax, %nacl: 48 (r15,rdi)
50 movntiq rax, %nacl: 56 (r15,rdi)
51 movntiq rax, %nacl: 64 (r15,rdi)
52 movntiq rax, %nacl: 72 (r15,rdi)
53 movntiq rax, %nacl: 80 (r15,rdi)
54 movntiq rax, %nacl: 88 (r15,rdi)
55 movntiq rax, %nacl: 96 (r15,rdi)
56 movntiq rax, %nacl: 104 (r15,rdi)
57 movntiq rax, %nacl: 112 (r15,rdi)
58 movntiq rax, %nacl: 120 (r15,rdi)
59 59
60 leal 128 (rdi), edi 60 movq rax, (rbp)
61 movq rax, 8 (rbp)
62 movq rax, 16 (rbp)
63 movq rax, 24 (rbp)
64 movq rax, 32 (rbp)
65 movq rax, 40 (rbp)
66 movq rax, 48 (rbp)
67 movq rax, 56 (rbp)
68 movq rax, 64 (rbp)
69 movq rax, 72 (rbp)
70 movq rax, 80 (rbp)
71 movq rax, 88 (rbp)
72 movq rax, 96 (rbp)
73 movq rax, 104 (rbp)
74 movq rax, 112 (rbp)
75 movq rax, 120 (rbp)
76
77 .bundle_lock
78 leal 128 (rbp), ebp
79 add r15, rbp
80 .bundle_unlock
61 81
62 dec ecx 82 dec ecx
63 jnz loop 83 jnz .Lquadword_aligned_loop
64 84
65 sfence 85 movl ebp, edi
86 popq r8
87 naclrestbp r8d, r15
88
66 movl edx, ecx 89 movl edx, ecx
67 andl $127, ecx 90 andl $127, ecx
68 rep stosb al, %nacl:(rdi), r15 91 jz .Lfooter_loop_end
92 .Lfooter_loop:
93 movb al, %nacl:(r15, rdi)
94 inc edi
95 dec ecx
96 jnz .Lfooter_loop
97 .Lfooter_loop_end:
69 movl r9d, eax 98 movl r9d, eax
70 pop r11 99 pop r11
71 nacljmp r11d, r15 100 nacljmp r11d, r15
72 101
73 102
74 byte_set: 103 .Lbyte_set:
75 rep stosb al, %nacl:(rdi), r15 104 testl ecx, ecx
105 jz .Lbyte_set_end
106 .Lbyte_set_loop:
107 movb al, %nacl:(r15, rdi)
108 inc edi
109 dec ecx
110 jnz .Lbyte_set_loop
111 .Lbyte_set_end:
76 movl r9d, eax 112 movl r9d, eax
77 pop r11 113 pop r11
78 nacljmp r11d, r15 114 nacljmp r11d, r15
79 115
80 116
81 quadword_set: 117 .Lquadword_set:
82 shrl $3, ecx 118 shrl $3, ecx
119 jz .Lquadword_loop_end
83 .p2align 4 120 .p2align 4
84 rep stosq rax, %nacl:(rdi), r15 121 .Lquadword_loop:
122 movq rax, %nacl:(r15, rdi)
123 add $8, edi
124 dec ecx
125 jnz .Lquadword_loop
126 .Lquadword_loop_end:
85 movl edx, ecx 127 movl edx, ecx
86 andl $7, ecx 128 andl $7, ecx
87 rep stosb al, %nacl:(rdi), r15 /* Store the remaining bytes */ 129 jz .Lquadword_footer_end
130 .Lquadword_footer:
131 movb al, %nacl:(r15, rdi)
132 inc edi
133 dec ecx
134 jnz .Lquadword_footer
135 .Lquadword_footer_end:
88 movl r9d, eax 136 movl r9d, eax
89 pop r11 137 pop r11
90 nacljmp r11d, r15 138 nacljmp r11d, r15
91 139
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698