Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(67)

Side by Side Diff: newlib/libc/machine/x86_64/memset.S

Issue 957063002: Avoid string instructions in x86_64 memset (Closed) Base URL: https://chromium.googlesource.com/native_client/nacl-newlib.git@master
Patch Set: Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * ==================================================== 2 * ====================================================
3 * Copyright (C) 2007 by Ellips BV. All rights reserved. 3 * Copyright (C) 2007 by Ellips BV. All rights reserved.
4 * 4 *
5 * Permission to use, copy, modify, and distribute this 5 * Permission to use, copy, modify, and distribute this
6 * software is freely granted, provided that this notice 6 * software is freely granted, provided that this notice
7 * is preserved. 7 * is preserved.
8 * ==================================================== 8 * ====================================================
9 */ 9 */
10 10
11 #include "x86_64mach.h" 11 #include "x86_64mach.h"
12 12
13 .global SYM (memset) 13 .global SYM (memset)
14 SOTYPE_FUNCTION(memset) 14 SOTYPE_FUNCTION(memset)
15 15
16 /*
17 * Avoid revealing the sandbox base address. In particular, we never read from
18 * r11, and avoid using the string instructions (stos) because they leave the
19 * full 64 bits in rdi.
20 */
21
16 SYM (memset): 22 SYM (memset):
17 movl edi, r9d /* Save return value */ 23 movl edi, r9d /* Save return value */
18 movl esi, eax 24 movl esi, eax
19 movl edx, ecx 25 movl edx, ecx
20 cmpl $16, edx 26 cmpl $16, edx
21 jb byte_set 27 jb .Lbyte_set
22 28
23 movl edi, r8d /* Align on quad word boundary */ 29 movl edi, r8d /* Align on quad word boundary */
24 andl $7, r8d 30 andl $7, r8d
25 jz quadword_aligned 31 jz .Lquadword_aligned
26 movl $8, ecx 32 movl $8, ecx
27 subl r8d, ecx 33 subl r8d, ecx
28 subl ecx, edx 34 subl ecx, edx
29 rep stosb al, %nacl:(rdi), r15 35 .Lheader_loop:
36 movb al, %nacl:(r15, rdi)
37 inc edi
38 dec ecx
39 jnz .Lheader_loop
40
30 movl edx, ecx 41 movl edx, ecx
31 42
32 quadword_aligned: 43 .Lquadword_aligned:
33 movabs $0x0101010101010101, r8 44 movabs $0x0101010101010101, r8
34 movzbl sil, eax 45 movzbl sil, eax
35 imul r8, rax 46 imul r8, rax
36 cmpl $256, edx 47 cmpl $256, edx
37 jb quadword_set 48 jb .Lquadword_set
38 49
39 shrl $7, ecx /* Store 128 bytes at a time with minimum cach e polution */ 50 shrl $7, ecx /* Store 128 bytes at a time */
40 51
52 pushq $0
53 movl ebp, (rsp)
41 .p2align 4 54 .p2align 4
42 loop: 55 .Lquadword_aligned_loop:
43 movntiq rax, %nacl: (r15,rdi) 56 naclrestbp edi, r15
44 movntiq rax, %nacl: 8 (r15,rdi) 57 movq rax, (rbp)
45 movntiq rax, %nacl: 16 (r15,rdi) 58 movq rax, 8 (rbp)
46 movntiq rax, %nacl: 24 (r15,rdi) 59 movq rax, 16 (rbp)
47 movntiq rax, %nacl: 32 (r15,rdi) 60 movq rax, 24 (rbp)
48 movntiq rax, %nacl: 40 (r15,rdi) 61 movq rax, 32 (rbp)
49 movntiq rax, %nacl: 48 (r15,rdi) 62 movq rax, 40 (rbp)
50 movntiq rax, %nacl: 56 (r15,rdi) 63 movq rax, 48 (rbp)
51 movntiq rax, %nacl: 64 (r15,rdi) 64 movq rax, 56 (rbp)
52 movntiq rax, %nacl: 72 (r15,rdi) 65 movq rax, 64 (rbp)
53 movntiq rax, %nacl: 80 (r15,rdi) 66 movq rax, 72 (rbp)
54 movntiq rax, %nacl: 88 (r15,rdi) 67 movq rax, 80 (rbp)
55 movntiq rax, %nacl: 96 (r15,rdi) 68 movq rax, 88 (rbp)
56 movntiq rax, %nacl: 104 (r15,rdi) 69 movq rax, 96 (rbp)
57 movntiq rax, %nacl: 112 (r15,rdi) 70 movq rax, 104 (rbp)
58 movntiq rax, %nacl: 120 (r15,rdi) 71 movq rax, 112 (rbp)
72 movq rax, 120 (rbp)
59 73
60 leal 128 (rdi), edi 74 leal 128 (rdi), edi
jvoung (off chromium) 2015/02/27 20:33:52 Might have been able to add to ebp and then restor
Derek Schuff 2015/02/27 21:03:19 I like it. Done.
61 75
62 dec ecx 76 dec ecx
63 jnz loop 77 jnz .Lquadword_aligned_loop
64 78
65 sfence 79 popq r8
80 naclrestbp r8d, r15
81
66 movl edx, ecx 82 movl edx, ecx
67 andl $127, ecx 83 andl $127, ecx
68 rep stosb al, %nacl:(rdi), r15 84 jz .Lfooter_loop_end
85 .Lfooter_loop:
86 movb al, %nacl:(r15, rdi)
87 inc edi
88 dec ecx
89 jnz .Lfooter_loop
90 .Lfooter_loop_end:
69 movl r9d, eax 91 movl r9d, eax
70 pop r11 92 pop r11
71 nacljmp r11d, r15 93 nacljmp r11d, r15
72 94
73 95
74 byte_set: 96 .Lbyte_set:
75 rep stosb al, %nacl:(rdi), r15 97 testl ecx, ecx
98 jz .Lbyte_set_end
99 .Lbyte_set_loop:
100 movb al, %nacl:(r15, rdi)
101 inc edi
102 dec ecx
103 jnz .Lbyte_set_loop
104 .Lbyte_set_end:
76 movl r9d, eax 105 movl r9d, eax
77 pop r11 106 pop r11
78 nacljmp r11d, r15 107 nacljmp r11d, r15
79 108
80 109
81 quadword_set: 110 .Lquadword_set:
82 shrl $3, ecx 111 shrl $3, ecx
112 jz .Lquadword_loop_end
83 .p2align 4 113 .p2align 4
84 rep stosq rax, %nacl:(rdi), r15 114 .Lquadword_loop:
115 movq rax, %nacl:(r15, rdi)
116 add $8, edi
117 dec ecx
118 jnz .Lquadword_loop
119 .Lquadword_loop_end:
85 movl edx, ecx 120 movl edx, ecx
86 andl $7, ecx 121 andl $7, ecx
87 rep stosb al, %nacl:(rdi), r15 /* Store the remaining bytes */ 122 jz .Lquadword_footer_end
123 .Lquadword_footer:
124 movb al, %nacl:(r15, rdi)
125 inc edi
126 dec ecx
127 jnz .Lquadword_footer
128 .Lquadword_footer_end:
88 movl r9d, eax 129 movl r9d, eax
89 pop r11 130 pop r11
90 nacljmp r11d, r15 131 nacljmp r11d, r15
91 132
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698