OLD | NEW |
1 /* | 1 /* |
2 * ==================================================== | 2 * ==================================================== |
3 * Copyright (C) 2007 by Ellips BV. All rights reserved. | 3 * Copyright (C) 2007 by Ellips BV. All rights reserved. |
4 * | 4 * |
5 * Permission to use, copy, modify, and distribute this | 5 * Permission to use, copy, modify, and distribute this |
6 * software is freely granted, provided that this notice | 6 * software is freely granted, provided that this notice |
7 * is preserved. | 7 * is preserved. |
8 * ==================================================== | 8 * ==================================================== |
9 */ | 9 */ |
10 | 10 |
11 #include "x86_64mach.h" | 11 #include "x86_64mach.h" |
12 | 12 |
13 .global SYM (memcpy) | 13 .global SYM (memcpy) |
14 SOTYPE_FUNCTION(memcpy) | 14 SOTYPE_FUNCTION(memcpy) |
15 | 15 |
16 SYM (memcpy): | 16 SYM (memcpy): |
17 movl edi, eax /* Store destination in return value */ | 17 movl edi, eax /* Store destination in return value */ |
18 cmpl $16, edx | 18 cmpl $16, edx |
19 jb byte_copy | 19 jb .Lbyte_copy |
20 | 20 |
21 movl edi, r8d /* Align destination on quad word boundary */ | 21 movl edi, r8d /* Align destination on quad word boundary */ |
22 andl $7, r8d | 22 andl $7, r8d |
23 jz quadword_aligned | 23 jz .Lquadword_aligned |
24 movl $8, ecx | 24 movl $8, ecx |
25 subl r8d, ecx | 25 subl r8d, ecx |
26 subl ecx, edx | 26 subl ecx, edx |
27 | 27 |
28 rep movsb %nacl:(rsi), %nacl:(rdi), r15 | 28 .Lheader_loop: |
| 29 movb %nacl:(r15, rsi), r8b |
| 30 inc esi |
| 31 movb r8b, %nacl:(r15, rdi) |
| 32 inc edi |
| 33 dec ecx |
| 34 jnz .Lheader_loop |
29 | 35 |
30 quadword_aligned: | 36 .Lquadword_aligned: |
31 movl esi, esi /* We must clear top half for prefetch */ | |
32 cmpl $256, edx | 37 cmpl $256, edx |
33 jb quadword_copy | 38 jb .Lquadword_copy |
34 | 39 |
35 pushq rax | 40 pushq rax |
36 pushq r12 | 41 pushq r12 |
37 pushq r13 | 42 pushq r13 |
38 pushq r14 | 43 pushq r14 |
39 | 44 |
40 movl edx, ecx /* Copy 128 bytes at a time with minimum cache
polution */ | 45 movl edx, ecx /* Copy 128 bytes at a time */ |
41 shrl $7, ecx | 46 shrl $7, ecx |
42 | 47 |
| 48 /* |
| 49 * Avoid revealing the sandbox base address. |
| 50 * In particular this means that we don't do the following: |
| 51 * movq 32(r15,rsi), r11 |
| 52 * ... |
| 53 * movq r11, %nacl:32(r15,rdi) |
| 54 * because the latter instruction might be reached via a direct or |
| 55 * indirect jump when r11 contains the sandbox base address in its |
| 56 * top 32 bits, and this would write the sandbox base address into |
| 57 * memory. We treat r11 as a write-only register to avoid |
| 58 * revealing the sandbox base address to user code. |
| 59 * Instead, we spill rdx and use that. Additionally, we avoid string |
| 60 * instructions (movs) because they leave the full 64 bits in rsi/rdi. |
| 61 */ |
| 62 pushq $0 |
| 63 movl ebp, (rsp) |
| 64 pushq rdx /* Save byte count */ |
43 .p2align 4 | 65 .p2align 4 |
44 loop: | |
45 prefetchnta 768 (r15,rsi) | |
46 prefetchnta 832 (r15,rsi) | |
47 | 66 |
48 movq %nacl: (r15,rsi), rax | 67 .Lloop: |
49 movq %nacl: 8 (r15,rsi), r8 | 68 naclrestbp esi, r15 |
50 movq %nacl: 16 (r15,rsi), r9 | 69 movq (rbp), rax |
51 movq %nacl: 24 (r15,rsi), r10 | 70 movq 8 (rbp), r8 |
52 movq %nacl: 32 (r15,rsi), r11 | 71 movq 16 (rbp), r9 |
53 movq %nacl: 40 (r15,rsi), r12 | 72 movq 24 (rbp), r10 |
54 movq %nacl: 48 (r15,rsi), r13 | 73 movq 32 (rbp), rdx |
55 movq %nacl: 56 (r15,rsi), r14 | 74 movq 40 (rbp), r12 |
| 75 movq 48 (rbp), r13 |
| 76 movq 56 (rbp), r14 |
56 | 77 |
57 movntiq rax, %nacl: (r15,rdi) | 78 naclrestbp edi, r15 |
58 movntiq r8 , %nacl: 8 (r15,rdi) | 79 movq rax, (rbp) |
59 movntiq r9 , %nacl: 16 (r15,rdi) | 80 movq r8 , 8 (rbp) |
60 movntiq r10, %nacl: 24 (r15,rdi) | 81 movq r9 , 16 (rbp) |
61 movntiq r11, %nacl: 32 (r15,rdi) | 82 movq r10, 24 (rbp) |
62 movntiq r12, %nacl: 40 (r15,rdi) | 83 movq rdx, 32 (rbp) |
63 movntiq r13, %nacl: 48 (r15,rdi) | 84 movq r12, 40 (rbp) |
64 movntiq r14, %nacl: 56 (r15,rdi) | 85 movq r13, 48 (rbp) |
| 86 movq r14, 56 (rbp) |
65 | 87 |
66 movq %nacl: 64 (r15,rsi), rax | 88 naclrestbp esi, r15 |
67 movq %nacl: 72 (r15,rsi), r8 | 89 movq 64 (rbp), rax |
68 movq %nacl: 80 (r15,rsi), r9 | 90 movq 72 (rbp), r8 |
69 movq %nacl: 88 (r15,rsi), r10 | 91 movq 80 (rbp), r9 |
70 movq %nacl: 96 (r15,rsi), r11 | 92 movq 88 (rbp), r10 |
71 movq %nacl: 104 (r15,rsi), r12 | 93 movq 96 (rbp), rdx |
72 movq %nacl: 112 (r15,rsi), r13 | 94 movq 104 (rbp), r12 |
73 movq %nacl: 120 (r15,rsi), r14 | 95 movq 112 (rbp), r13 |
| 96 movq 120 (rbp), r14 |
74 | 97 |
75 movntiq rax, %nacl: 64 (r15,rdi) | 98 naclrestbp edi, r15 |
76 movntiq r8 , %nacl: 72 (r15,rdi) | 99 movq rax, 64 (rbp) |
77 movntiq r9 , %nacl: 80 (r15,rdi) | 100 movq r8 , 72 (rbp) |
78 movntiq r10, %nacl: 88 (r15,rdi) | 101 movq r9 , 80 (rbp) |
79 movntiq r11, %nacl: 96 (r15,rdi) | 102 movq r10, 88 (rbp) |
80 movntiq r12, %nacl: 104 (r15,rdi) | 103 movq rdx, 96 (rbp) |
81 movntiq r13, %nacl: 112 (r15,rdi) | 104 movq r12, 104 (rbp) |
82 movntiq r14, %nacl: 120 (r15,rdi) | 105 movq r13, 112 (rbp) |
| 106 movq r14, 120 (rbp) |
83 | 107 |
84 leal 128 (rsi), esi | 108 leal 128 (rsi), esi |
85 leal 128 (rdi), edi | 109 leal 128 (rdi), edi |
86 | 110 |
87 dec ecx | 111 dec ecx |
88 jnz loop | 112 jnz .Lloop |
89 | 113 |
90 sfence | 114 popq rcx /* Restore byte count */ |
91 movl edx, ecx | 115 popq rax |
| 116 naclrestbp eax, r15 |
| 117 /* Copy the remaining bytes */ |
92 andl $127, ecx | 118 andl $127, ecx |
93 rep movsb %nacl:(rsi), %nacl:(rdi), r15 | 119 jz .Lrep1_end |
| 120 .Lrep1: |
| 121 movb %nacl:(r15, rsi), r8b |
| 122 inc esi |
| 123 movb r8b, %nacl:(r15, rdi) |
| 124 inc edi |
| 125 dec ecx |
| 126 jnz .Lrep1 |
| 127 .Lrep1_end: |
94 popq r14 | 128 popq r14 |
95 popq r13 | 129 popq r13 |
96 popq r12 | 130 popq r12 |
97 popq rax | 131 popq rax |
98 pop r11 | 132 pop r11 |
99 nacljmp r11d, r15 | 133 nacljmp r11d, r15 |
100 | 134 |
101 | 135 |
102 byte_copy: | 136 .Lbyte_copy: |
103 movl edx, ecx | 137 testl edx, edx |
104 rep movsb %nacl:(rsi), %nacl:(rdi), r15 | 138 jz .Lbyte_copy_end |
| 139 .Lbyte_copy_loop: |
| 140 movb %nacl:(r15, rsi), r8b |
| 141 inc esi |
| 142 movb r8b, %nacl:(r15, rdi) |
| 143 inc edi |
| 144 dec edx |
| 145 jnz .Lbyte_copy_loop |
| 146 .Lbyte_copy_end: |
105 pop r11 | 147 pop r11 |
106 nacljmp r11d, r15 | 148 nacljmp r11d, r15 |
107 | 149 |
108 | 150 |
109 quadword_copy: | 151 .Lquadword_copy: |
110 movl edx, ecx | 152 movl edx, ecx |
111 shrl $3, ecx | 153 shrl $3, ecx |
| 154 jz .Lrep2_end |
112 .p2align 4 | 155 .p2align 4 |
113 rep movsq %nacl:(rsi), %nacl:(rdi), r15 | 156 .Lrep2: |
114 movl edx, ecx | 157 movq %nacl:(r15, rsi), r8 |
115 andl $7, ecx | 158 add $8, esi |
116 rep movsb %nacl:(rsi), %nacl:(rdi), r15 /* Copy the remaining bytes */ | 159 movq r8, %nacl:(r15, rdi) |
| 160 add $8, edi |
| 161 dec ecx |
| 162 jnz .Lrep2 |
| 163 .Lrep2_end: |
| 164 andl $7, edx |
| 165 jz .Lrep3_end |
| 166 .Lrep3: |
| 167 /* Copy the remaining bytes */ |
| 168 movb %nacl:(r15, rsi), r8b |
| 169 inc esi |
| 170 movb r8b, %nacl:(r15, rdi) |
| 171 inc edi |
| 172 dec edx |
| 173 jnz .Lrep3 |
| 174 .Lrep3_end: |
117 pop r11 | 175 pop r11 |
118 nacljmp r11d, r15 | 176 nacljmp r11d, r15 |
OLD | NEW |