OLD | NEW |
---|---|
1 /* | 1 /* |
2 * ==================================================== | 2 * ==================================================== |
3 * Copyright (C) 2007 by Ellips BV. All rights reserved. | 3 * Copyright (C) 2007 by Ellips BV. All rights reserved. |
4 * | 4 * |
5 * Permission to use, copy, modify, and distribute this | 5 * Permission to use, copy, modify, and distribute this |
6 * software is freely granted, provided that this notice | 6 * software is freely granted, provided that this notice |
7 * is preserved. | 7 * is preserved. |
8 * ==================================================== | 8 * ==================================================== |
9 */ | 9 */ |
10 | 10 |
(...skipping 22 matching lines...) Expand all Loading... | |
33 jb quadword_copy | 33 jb quadword_copy |
34 | 34 |
35 pushq rax | 35 pushq rax |
36 pushq r12 | 36 pushq r12 |
37 pushq r13 | 37 pushq r13 |
38 pushq r14 | 38 pushq r14 |
39 | 39 |
40 movl edx, ecx /* Copy 128 bytes at a time with minimum cache polution */ | 40 movl edx, ecx /* Copy 128 bytes at a time with minimum cache polution */ |
41 shrl $7, ecx | 41 shrl $7, ecx |
42 | 42 |
43 /* Avoid revealing the sandbox base address. | |
44 * In particular this means that we don't do the following: | |
45 * movq 32(r15,rsi), r11 | |
46 * ... | |
47 * movq r11, %nacl:32(r15,rdi) | |
48 * because the latter instruction might be reached via a direct or | |
49 * indirect jump when r11 contains the sandbox base address in its | |
50 * top 32 bits, and this would write the sandbox base address into | |
51 * memory. We treat r11 as a write-only register to avoid | |
52 * revealing the sandbox base address to user code. | |
53 * Instead, we spill rdx and use that. | |
54 */ | |
55 pushq rdx | |
43 .p2align 4 | 56 .p2align 4 |
44 loop: | 57 loop: |
45 prefetchnta 768 (r15,rsi) | 58 prefetchnta 768 (r15,rsi) |
46 prefetchnta 832 (r15,rsi) | 59 prefetchnta 832 (r15,rsi) |
47 | 60 |
48 movq %nacl: (r15,rsi), rax | 61 movq %nacl: (r15,rsi), rax |
Mark Seaborn
2015/02/11 02:23:45
Aside: if we want to optimise this, we could use r
Derek Schuff
2015/02/11 23:16:06
Done.
| |
49 movq %nacl: 8 (r15,rsi), r8 | 62 movq %nacl: 8 (r15,rsi), r8 |
50 movq %nacl: 16 (r15,rsi), r9 | 63 movq %nacl: 16 (r15,rsi), r9 |
51 movq %nacl: 24 (r15,rsi), r10 | 64 movq %nacl: 24 (r15,rsi), r10 |
52 movq %nacl: 32 (r15,rsi), r11 | 65 movq %nacl: 32 (r15,rsi), rdx |
53 movq %nacl: 40 (r15,rsi), r12 | 66 movq %nacl: 40 (r15,rsi), r12 |
54 movq %nacl: 48 (r15,rsi), r13 | 67 movq %nacl: 48 (r15,rsi), r13 |
55 movq %nacl: 56 (r15,rsi), r14 | 68 movq %nacl: 56 (r15,rsi), r14 |
56 | 69 |
57 movntiq rax, %nacl: (r15,rdi) | 70 movntiq rax, %nacl: (r15,rdi) |
58 movntiq r8 , %nacl: 8 (r15,rdi) | 71 movntiq r8 , %nacl: 8 (r15,rdi) |
59 movntiq r9 , %nacl: 16 (r15,rdi) | 72 movntiq r9 , %nacl: 16 (r15,rdi) |
60 movntiq r10, %nacl: 24 (r15,rdi) | 73 movntiq r10, %nacl: 24 (r15,rdi) |
61 movntiq r11, %nacl: 32 (r15,rdi) | 74 movntiq rdx, %nacl: 32 (r15,rdi) |
62 movntiq r12, %nacl: 40 (r15,rdi) | 75 movntiq r12, %nacl: 40 (r15,rdi) |
63 movntiq r13, %nacl: 48 (r15,rdi) | 76 movntiq r13, %nacl: 48 (r15,rdi) |
64 movntiq r14, %nacl: 56 (r15,rdi) | 77 movntiq r14, %nacl: 56 (r15,rdi) |
65 | 78 |
66 movq %nacl: 64 (r15,rsi), rax | 79 movq %nacl: 64 (r15,rsi), rax |
67 movq %nacl: 72 (r15,rsi), r8 | 80 movq %nacl: 72 (r15,rsi), r8 |
68 movq %nacl: 80 (r15,rsi), r9 | 81 movq %nacl: 80 (r15,rsi), r9 |
69 movq %nacl: 88 (r15,rsi), r10 | 82 movq %nacl: 88 (r15,rsi), r10 |
70 movq %nacl: 96 (r15,rsi), r11 | 83 movq %nacl: 96 (r15,rsi), rdx |
71 movq %nacl: 104 (r15,rsi), r12 | 84 movq %nacl: 104 (r15,rsi), r12 |
72 movq %nacl: 112 (r15,rsi), r13 | 85 movq %nacl: 112 (r15,rsi), r13 |
73 movq %nacl: 120 (r15,rsi), r14 | 86 movq %nacl: 120 (r15,rsi), r14 |
74 | 87 |
75 movntiq rax, %nacl: 64 (r15,rdi) | 88 movntiq rax, %nacl: 64 (r15,rdi) |
76 movntiq r8 , %nacl: 72 (r15,rdi) | 89 movntiq r8 , %nacl: 72 (r15,rdi) |
77 movntiq r9 , %nacl: 80 (r15,rdi) | 90 movntiq r9 , %nacl: 80 (r15,rdi) |
78 movntiq r10, %nacl: 88 (r15,rdi) | 91 movntiq r10, %nacl: 88 (r15,rdi) |
79 movntiq r11, %nacl: 96 (r15,rdi) | 92 movntiq rdx, %nacl: 96 (r15,rdi) |
80 movntiq r12, %nacl: 104 (r15,rdi) | 93 movntiq r12, %nacl: 104 (r15,rdi) |
81 movntiq r13, %nacl: 112 (r15,rdi) | 94 movntiq r13, %nacl: 112 (r15,rdi) |
82 movntiq r14, %nacl: 120 (r15,rdi) | 95 movntiq r14, %nacl: 120 (r15,rdi) |
83 | 96 |
84 leal 128 (rsi), esi | 97 leal 128 (rsi), esi |
85 leal 128 (rdi), edi | 98 leal 128 (rdi), edi |
86 | 99 |
87 dec ecx | 100 dec ecx |
88 jnz loop | 101 jnz loop |
89 | 102 |
90 sfence | 103 sfence |
91 movl edx, ecx | 104 popq rcx |
92 andl $127, ecx | 105 andl $127, ecx |
93 rep movsb %nacl:(rsi), %nacl:(rdi), r15 | 106 rep movsb %nacl:(rsi), %nacl:(rdi), r15 |
94 popq r14 | 107 popq r14 |
95 popq r13 | 108 popq r13 |
96 popq r12 | 109 popq r12 |
97 popq rax | 110 popq rax |
98 pop r11 | 111 pop r11 |
99 nacljmp r11d, r15 | 112 nacljmp r11d, r15 |
100 | 113 |
101 | 114 |
102 byte_copy: | 115 byte_copy: |
103 movl edx, ecx | 116 movl edx, ecx |
104 rep movsb %nacl:(rsi), %nacl:(rdi), r15 | 117 rep movsb %nacl:(rsi), %nacl:(rdi), r15 |
Mark Seaborn
2015/02/11 02:23:45
This leaves base-address-extended addresses in reg
Derek Schuff
2015/02/11 18:19:11
Would it be sufficient just to clear rsi and rdi a
Derek Schuff
2015/02/11 18:36:00
...except I guess if the movsb causes a fault, the
Mark Seaborn
2015/02/11 19:21:51
In principle, yes. As you say, the instructions t
Derek Schuff
2015/02/11 23:16:06
OK, as we discussed I just made them all explicit
| |
105 pop r11 | 118 pop r11 |
106 nacljmp r11d, r15 | 119 nacljmp r11d, r15 |
107 | 120 |
108 | 121 |
109 quadword_copy: | 122 quadword_copy: |
110 movl edx, ecx | 123 movl edx, ecx |
111 shrl $3, ecx | 124 shrl $3, ecx |
112 .p2align 4 | 125 .p2align 4 |
113 rep movsq %nacl:(rsi), %nacl:(rdi), r15 | 126 rep movsq %nacl:(rsi), %nacl:(rdi), r15 |
114 movl edx, ecx | 127 movl edx, ecx |
115 andl $7, ecx | 128 andl $7, ecx |
116 rep movsb %nacl:(rsi), %nacl:(rdi), r15 /* Copy the remaining bytes */ | 129 rep movsb %nacl:(rsi), %nacl:(rdi), r15 /* Copy the remaining bytes */ |
117 pop r11 | 130 pop r11 |
118 nacljmp r11d, r15 | 131 nacljmp r11d, r15 |
OLD | NEW |