OLD | NEW |
---|---|
1 /* | 1 /* |
2 * ==================================================== | 2 * ==================================================== |
3 * Copyright (C) 2007 by Ellips BV. All rights reserved. | 3 * Copyright (C) 2007 by Ellips BV. All rights reserved. |
4 * | 4 * |
5 * Permission to use, copy, modify, and distribute this | 5 * Permission to use, copy, modify, and distribute this |
6 * software is freely granted, provided that this notice | 6 * software is freely granted, provided that this notice |
7 * is preserved. | 7 * is preserved. |
8 * ==================================================== | 8 * ==================================================== |
9 */ | 9 */ |
10 | 10 |
11 #include "x86_64mach.h" | 11 #include "x86_64mach.h" |
12 | 12 |
13 .global SYM (memcpy) | 13 .global SYM (memcpy) |
14 SOTYPE_FUNCTION(memcpy) | 14 SOTYPE_FUNCTION(memcpy) |
15 | 15 |
16 SYM (memcpy): | 16 SYM (memcpy): |
17 movl edi, eax /* Store destination in return value */ | 17 movl edi, eax /* Store destination in return value */ |
18 cmpl $16, edx | 18 cmpl $16, edx |
19 jb byte_copy | 19 jb .Lbyte_copy |
Mark Seaborn
2015/02/18 20:03:38
FWIW, they already *are* local as long as there's
Derek Schuff
2015/02/18 20:47:40
OK, I guess technically the effect that I wanted i
| |
20 | 20 |
21 movl edi, r8d /* Align destination on quad word boundary */ | 21 movl edi, r8d /* Align destination on quad word boundary */ |
22 andl $7, r8d | 22 andl $7, r8d |
23 jz quadword_aligned | 23 jz .Lquadword_aligned |
24 movl $8, ecx | 24 movl $8, ecx |
25 subl r8d, ecx | 25 subl r8d, ecx |
26 subl ecx, edx | 26 subl ecx, edx |
27 | 27 |
28 rep movsb %nacl:(rsi), %nacl:(rdi), r15 | 28 .Lheader_loop: |
29 movb %nacl:(r15, rsi), r8b | |
30 inc esi | |
31 movb r8b, %nacl:(r15, rdi) | |
32 inc edi | |
33 dec ecx | |
34 jnz .Lheader_loop | |
29 | 35 |
30 quadword_aligned: | 36 .Lquadword_aligned: |
31 movl esi, esi /* We must clear top half for prefetch */ | |
32 cmpl $256, edx | 37 cmpl $256, edx |
33 jb quadword_copy | 38 jb .Lquadword_copy |
34 | 39 |
35 pushq rax | 40 pushq rax |
36 pushq r12 | 41 pushq r12 |
37 pushq r13 | 42 pushq r13 |
38 pushq r14 | 43 pushq r14 |
39 | 44 |
40 movl edx, ecx /* Copy 128 bytes at a time with minimum cache polution */ | 45 movl edx, ecx /* Copy 128 bytes at a time with minimum cache polution */ |
41 shrl $7, ecx | 46 shrl $7, ecx |
42 | 47 |
48 /* Avoid revealing the sandbox base address. | |
Mark Seaborn
2015/02/18 20:03:38
Nit: Should this use the NaCl style for multiline
Derek Schuff
2015/02/18 20:47:40
Done.
| |
49 * In particular this means that we don't do the following: | |
50 * movq 32(r15,rsi), r11 | |
51 * ... | |
52 * movq r11, %nacl:32(r15,rdi) | |
53 * because the latter instruction might be reached via a direct or | |
54 * indirect jump when r11 contains the sandbox base address in its | |
55 * top 32 bits, and this would write the sandbox base address into | |
56 * memory. We treat r11 as a write-only register to avoid | |
57 * revealing the sandbox base address to user code. | |
58 * Instead, we spill rdx and use that. Additionally, we avoid string | |
59 * instructions (movs) because they leave the full 64 bits in rsi/rdi. | |
60 */ | |
61 pushq $0 | |
62 movl ebp, (rsp) | |
63 pushq rdx | |
Mark Seaborn
2015/02/18 20:03:38
Nit: fix operand's indentation alignment
Derek Schuff
2015/02/18 20:47:40
Done.
| |
43 .p2align 4 | 64 .p2align 4 |
44 loop: | |
45 prefetchnta 768 (r15,rsi) | |
46 prefetchnta 832 (r15,rsi) | |
47 | 65 |
48 movq %nacl: (r15,rsi), rax | 66 .Lloop: |
49 movq %nacl: 8 (r15,rsi), r8 | 67 naclrestbp esi, r15 |
50 movq %nacl: 16 (r15,rsi), r9 | 68 movq (rbp), rax |
51 movq %nacl: 24 (r15,rsi), r10 | 69 movq 8 (rbp), r8 |
52 movq %nacl: 32 (r15,rsi), r11 | 70 movq 16 (rbp), r9 |
53 movq %nacl: 40 (r15,rsi), r12 | 71 movq 24 (rbp), r10 |
54 movq %nacl: 48 (r15,rsi), r13 | 72 movq 32 (rbp), rdx |
55 movq %nacl: 56 (r15,rsi), r14 | 73 movq 40 (rbp), r12 |
74 movq 48 (rbp), r13 | |
75 movq 56 (rbp), r14 | |
56 | 76 |
57 movntiq rax, %nacl: (r15,rdi) | 77 naclrestbp edi, r15 |
58 movntiq r8 , %nacl: 8 (r15,rdi) | 78 movq rax, (rbp) |
59 movntiq r9 , %nacl: 16 (r15,rdi) | 79 movq r8 , 8 (rbp) |
60 movntiq r10, %nacl: 24 (r15,rdi) | 80 movq r9 , 16 (rbp) |
61 movntiq r11, %nacl: 32 (r15,rdi) | 81 movq r10, 24 (rbp) |
62 movntiq r12, %nacl: 40 (r15,rdi) | 82 movq rdx, 32 (rbp) |
63 movntiq r13, %nacl: 48 (r15,rdi) | 83 movq r12, 40 (rbp) |
64 movntiq r14, %nacl: 56 (r15,rdi) | 84 movq r13, 48 (rbp) |
85 movq r14, 56 (rbp) | |
65 | 86 |
66 movq %nacl: 64 (r15,rsi), rax | 87 naclrestbp esi, r15 |
67 movq %nacl: 72 (r15,rsi), r8 | 88 movq 64 (rbp), rax |
68 movq %nacl: 80 (r15,rsi), r9 | 89 movq 72 (rbp), r8 |
69 movq %nacl: 88 (r15,rsi), r10 | 90 movq 80 (rbp), r9 |
70 movq %nacl: 96 (r15,rsi), r11 | 91 movq 88 (rbp), r10 |
71 movq %nacl: 104 (r15,rsi), r12 | 92 movq 96 (rbp), rdx |
72 movq %nacl: 112 (r15,rsi), r13 | 93 movq 104 (rbp), r12 |
73 movq %nacl: 120 (r15,rsi), r14 | 94 movq 112 (rbp), r13 |
95 movq 120 (rbp), r14 | |
74 | 96 |
75 movntiq rax, %nacl: 64 (r15,rdi) | 97 naclrestbp edi, r15 |
76 movntiq r8 , %nacl: 72 (r15,rdi) | 98 movq rax, 64 (rbp) |
77 movntiq r9 , %nacl: 80 (r15,rdi) | 99 movq r8 , 72 (rbp) |
78 movntiq r10, %nacl: 88 (r15,rdi) | 100 movq r9 , 80 (rbp) |
79 movntiq r11, %nacl: 96 (r15,rdi) | 101 movq r10, 88 (rbp) |
80 movntiq r12, %nacl: 104 (r15,rdi) | 102 movq rdx, 96 (rbp) |
81 movntiq r13, %nacl: 112 (r15,rdi) | 103 movq r12, 104 (rbp) |
82 movntiq r14, %nacl: 120 (r15,rdi) | 104 movq r13, 112 (rbp) |
105 movq r14, 120 (rbp) | |
83 | 106 |
84 leal 128 (rsi), esi | 107 leal 128 (rsi), esi |
85 leal 128 (rdi), edi | 108 leal 128 (rdi), edi |
86 | 109 |
87 dec ecx | 110 dec ecx |
88 jnz loop | 111 jnz .Lloop |
89 | 112 |
90 sfence | 113 popq rcx |
Mark Seaborn
2015/02/18 20:03:38
Please note the removal of this sfence in the comm
Derek Schuff
2015/02/18 20:47:40
correct, and done.
|
Mark Seaborn
2015/02/18 20:03:38
Earlier you push rdx but here you pop rcx. Not su
Derek Schuff
2015/02/18 20:47:40
This replaces line 91 of the original which just c
Derek Schuff
2015/02/18 20:49:24
(I forgot to add "and rdx is not used anymore in t
|
91 movl edx, ecx | 114 popq rax |
115 naclrestbp eax, r15 | |
92 andl $127, ecx | 116 andl $127, ecx |
93 rep movsb %nacl:(rsi), %nacl:(rdi), r15 | 117 jz .Lrep1_end |
118 .Lrep1: | |
119 movb %nacl:(r15, rsi), r8b | |
120 inc esi | |
121 movb r8b, %nacl:(r15, rdi) | |
122 inc edi | |
123 dec ecx | |
124 jnz .Lrep1 | |
125 .Lrep1_end: | |
94 popq r14 | 126 popq r14 |
95 popq r13 | 127 popq r13 |
96 popq r12 | 128 popq r12 |
97 popq rax | 129 popq rax |
98 pop r11 | 130 pop r11 |
99 nacljmp r11d, r15 | 131 nacljmp r11d, r15 |
100 | 132 |
101 | 133 |
102 byte_copy: | 134 .Lbyte_copy: |
103 movl edx, ecx | 135 testl edx, edx |
104 rep movsb %nacl:(rsi), %nacl:(rdi), r15 | 136 jz .Lbyte_copy_end |
137 .Lbyte_copy_loop: | |
138 movb %nacl:(r15, rsi), r8b | |
139 inc esi | |
140 movb r8b, %nacl:(r15, rdi) | |
141 inc edi | |
142 dec edx | |
143 jnz .Lbyte_copy_loop | |
144 .Lbyte_copy_end: | |
105 pop r11 | 145 pop r11 |
106 nacljmp r11d, r15 | 146 nacljmp r11d, r15 |
107 | 147 |
108 | 148 |
109 quadword_copy: | 149 .Lquadword_copy: |
110 movl edx, ecx | 150 movl edx, ecx |
111 shrl $3, ecx | 151 shrl $3, ecx |
152 jz .Lrep2_end | |
112 .p2align 4 | 153 .p2align 4 |
113 rep movsq %nacl:(rsi), %nacl:(rdi), r15 | 154 .Lrep2: |
155 movq %nacl:(r15, rsi), r8 | |
156 add $8, esi | |
157 movq r8, %nacl:(r15, rdi) | |
158 add $8, edi | |
159 dec ecx | |
160 jnz .Lrep2 | |
161 .Lrep2_end: | |
114 movl edx, ecx | 162 movl edx, ecx |
115 andl $7, ecx | 163 andl $7, ecx |
116 rep movsb %nacl:(rsi), %nacl:(rdi), r15 /* Copy the remaining bytes */ | 164 jz .Lrep3_end |
165 .Lrep3: | |
166 /* Copy the remaining bytes */ | |
167 movb %nacl:(r15, rsi), r8b | |
168 inc esi | |
169 movb r8b, %nacl:(r15, rdi) | |
170 inc edi | |
171 dec ecx | |
172 jnz .Lrep3 | |
173 .Lrep3_end: | |
117 pop r11 | 174 pop r11 |
118 nacljmp r11d, r15 | 175 nacljmp r11d, r15 |
OLD | NEW |