OLD | NEW |
| (Empty) |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "sandbox_impl.h" | |
6 #include "syscall_table.h" | |
7 | |
8 namespace playground { | |
9 | |
10 void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub, | |
11 SecureMem::Args* secureMem) { | |
12 SecureMem::Args args = { { { { { 0 } } } } }; | |
13 args.self = &args; | |
14 args.newSecureMem = secureMem; | |
15 args.processFdPub = processFdPub; | |
16 args.cloneFdPub = cloneFdPub; | |
17 #if defined(__x86_64__) | |
18 asm volatile( | |
19 "push %%rbx\n" | |
20 "push %%rbp\n" | |
21 "mov %0, %%rbp\n" // %rbp = args | |
22 "xor %%rbx, %%rbx\n" // initial sequence number | |
23 "lea 999f(%%rip), %%r15\n" // continue in same thread | |
24 | |
25 // Signal handlers are process-wide. This means that for security | |
26 // reasons, we cannot allow that the trusted thread ever executes any | |
27 // signal handlers. | |
28 // We prevent the execution of signal handlers by setting a signal | |
29 // mask that blocks all signals. In addition, we make sure that the | |
30 // stack pointer is invalid. | |
31 // We cannot reset the signal mask until after we have enabled | |
32 // Seccomp mode. Our sigprocmask() wrapper would normally do this by | |
33 // raising a signal, modifying the signal mask in the kernel-generated | |
34 // signal frame, and then calling sigreturn(). This presents a bit of | |
35 // a Catch-22, as all signals are masked and we can therefore not | |
36 // raise any signal that would allow us to generate the signal stack | |
37 // frame. | |
38 // Instead, we have to create the signal stack frame prior to entering | |
39 // Seccomp mode. This incidentally also helps us to restore the | |
40 // signal mask to the same value that it had prior to entering the | |
41 // sandbox. | |
42 // The signal wrapper for clone() is the second entry point into this | |
43 // code (by means of sending an IPC to its trusted thread). It goes | |
44 // through the same steps of creating a signal stack frame on the | |
45 // newly created thread's stacks prior to cloning. See clone.cc for | |
46 // details. | |
47 "mov $56+0xF000, %%eax\n" // __NR_clone + 0xF000 | |
48 "mov %%rsp, %%rcx\n" | |
49 "int $0\n" // push a signal stack frame (see clone.cc) | |
50 "mov %%rcx, 0xA0(%%rsp)\n" // pop stack upon call to sigreturn() | |
51 "mov %%rsp, %%r9\n" | |
52 "mov $2, %%rdi\n" // how = SIG_SETMASK | |
53 "pushq $-1\n" | |
54 "mov %%rsp, %%rsi\n" // set = full mask | |
55 "xor %%rdx, %%rdx\n" // old_set = NULL | |
56 "mov $8, %%r10\n" // mask all 64 signals | |
57 "mov $14, %%eax\n" // NR_rt_sigprocmask | |
58 "syscall\n" | |
59 "xor %%rsp, %%rsp\n" // invalidate the stack in all trusted code | |
60 "jmp 20f\n" // create trusted thread | |
61 | |
62 // TODO(markus): Coalesce the read() operations by reading into a bigger | |
63 // buffer. | |
64 | |
65 // Parameters: | |
66 // *%fs: secure memory region | |
67 // the page following this one contains the scratch space | |
68 // %r13: thread's side of threadFd | |
69 // %r15: processFdPub | |
70 | |
71 // Local variables: | |
72 // %rbx: sequence number for trusted calls | |
73 | |
74 // Temporary variables: | |
75 // %r8: child stack | |
76 // %r9: system call number, child stack | |
77 // %rbp: secure memory of previous thread | |
78 | |
79 // Layout of secure shared memory region (c.f. securemem.h): | |
80 // 0x00: pointer to the secure shared memory region (i.e. self) | |
81 // 0x08: sequence number; must match %rbx | |
82 // 0x10: call type; must match %eax, iff %eax == -1 || %eax == -2 | |
83 // 0x18: system call number; passed to syscall in %rax | |
84 // 0x20: first argument; passed to syscall in %rdi | |
85 // 0x28: second argument; passed to syscall in %rsi | |
86 // 0x30: third argument; passed to syscall in %rdx | |
87 // 0x38: fourth argument; passed to syscall in %r10 | |
88 // 0x40: fifth argument; passed to syscall in %r8 | |
89 // 0x48: sixth argument; passed to syscall in %r9 | |
90 // 0x50: stored return address for clone() system call | |
91 // 0x58: stored %rbp value for clone() system call | |
92 // 0x60: stored %rbx value for clone() system call | |
93 // 0x68: stored %rcx value for clone() system call | |
94 // 0x70: stored %rdx value for clone() system call | |
95 // 0x78: stored %rsi value for clone() system call | |
96 // 0x80: stored %rdi value for clone() system call | |
97 // 0x88: stored %r8 value for clone() system call | |
98 // 0x90: stored %r9 value for clone() system call | |
99 // 0x98: stored %r10 value for clone() system call | |
100 // 0xA0: stored %r11 value for clone() system call | |
101 // 0xA8: stored %r12 value for clone() system call | |
102 // 0xB0: stored %r13 value for clone() system call | |
103 // 0xB8: stored %r14 value for clone() system call | |
104 // 0xC0: stored %r15 value for clone() system call | |
105 // 0xC8: new shared memory for clone() | |
106 // 0xD0: processFdPub for talking to trusted process | |
107 // 0xD4: cloneFdPub for talking to trusted process | |
108 // 0xD8: set to non-zero, if in debugging mode | |
109 // 0xDC: most recent SHM id returned by shmget(IPC_PRIVATE) | |
110 // 0xE0: cookie assigned to us by the trusted process (TLS_COOKIE) | |
111 // 0xE8: thread id (TLS_TID) | |
112 // 0xF0: threadFdPub (TLS_THREAD_FD) | |
113 // 0x200-0x1000: securely passed verified file name(s) | |
114 | |
115 // Layout of (untrusted) scratch space: | |
116 // 0x00: syscall number; passed in %rax | |
117 // 0x04: first argument; passed in %rdi | |
118 // 0x0C: second argument; passed in %rsi | |
119 // 0x14: third argument; passed in %rdx | |
120 // 0x1C: fourth argument; passed in %r10 | |
121 // 0x24: fifth argument; passed in %r8 | |
122 // 0x2C: sixth argument; passed in %r9 | |
123 // 0x34: return value | |
124 // 0x3C: RDTSCP result (%eax) | |
125 // 0x40: RDTSCP result (%edx) | |
126 // 0x44: RDTSCP result (%ecx) | |
127 // 0x48: last system call (not used on x86-64) | |
128 // 0x4C: number of consecutive calls to a time fnc (not used on x86-64) | |
129 // 0x50: nesting level of system calls (for debugging purposes only) | |
130 // 0x54: signal mask | |
131 // 0x5C: in SEGV handler | |
132 | |
133 // We use the %fs register for accessing the secure read-only page, and | |
134 // the untrusted scratch space immediately following it. The segment | |
135 // register and the local descriptor table is set up by passing | |
136 // appropriate arguments to clone(). | |
137 | |
138 "0:xor %%rsp, %%rsp\n" | |
139 "mov $2, %%ebx\n" // %rbx = initial sequence number | |
140 | |
141 // Read request from untrusted thread, or from trusted process. In either | |
142 // case, the data that we read has to be considered untrusted. | |
143 // read(threadFd, &scratch, 4) | |
144 "1:xor %%rax, %%rax\n" // NR_read | |
145 "mov %%r13, %%rdi\n" // fd = threadFd | |
146 "mov %%fs:0x0, %%rsi\n" // secure_mem | |
147 "add $0x1000, %%rsi\n" // buf = &scratch | |
148 "mov $4, %%edx\n" // len = 4 | |
149 "2:syscall\n" | |
150 "cmp $-4, %%rax\n" // EINTR | |
151 "jz 2b\n" | |
152 "cmp %%rdx, %%rax\n" | |
153 "jnz 25f\n" // exit process | |
154 | |
155 // Retrieve system call number. It is crucial that we only dereference | |
156 // %fs:0x1000 exactly once. Afterwards, memory becomes untrusted and | |
157 // we must use the value that we have read the first time. | |
158 "mov 0(%%rsi), %%eax\n" | |
159 | |
160 // If syscall number is -1, execute an unlocked system call from the | |
161 // secure memory area | |
162 "cmp $-1, %%eax\n" | |
163 "jnz 5f\n" | |
164 "3:cmp %%rbx, %%fs:0x8\n" | |
165 "jne 25f\n" // exit process | |
166 "cmp %%fs:0x10, %%eax\n" | |
167 "jne 25f\n" // exit process | |
168 "mov %%fs:0x18, %%rax\n" | |
169 "mov %%fs:0x20, %%rdi\n" | |
170 "mov %%fs:0x28, %%rsi\n" | |
171 "mov %%fs:0x30, %%rdx\n" | |
172 "mov %%fs:0x38, %%r10\n" | |
173 "mov %%fs:0x40, %%r8\n" | |
174 "mov %%fs:0x48, %%r9\n" | |
175 "cmp %%rbx, %%fs:0x8\n" | |
176 "jne 25f\n" // exit process | |
177 "add $2, %%rbx\n" | |
178 | |
179 // shmget() gets some special treatment. Whenever we return from this | |
180 // system call, we remember the most recently returned SysV shm id. | |
181 "cmp $29, %%eax\n" // NR_shmget | |
182 "jnz 4f\n" | |
183 "syscall\n" | |
184 "mov %%rax, %%r8\n" | |
185 "mov $56, %%eax\n" // NR_clone | |
186 "mov $17, %%edi\n" // flags = SIGCHLD | |
187 "mov $1, %%esi\n" // stack = 1 | |
188 "syscall\n" | |
189 "test %%rax, %%rax\n" | |
190 "js 25f\n" // exit process | |
191 "mov %%rax, %%rdi\n" | |
192 "jnz 8f\n" // wait for child, then return result | |
193 "mov %%fs:0x0, %%rdi\n" // start = secure_mem | |
194 "mov $4096, %%esi\n" // len = 4096 | |
195 "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE | |
196 "mov $10, %%eax\n" // NR_mprotect | |
197 "syscall\n" | |
198 "mov %%r8d, 0xDC(%%rdi)\n" // set most recently returned SysV shm id | |
199 "xor %%rdi, %%rdi\n" | |
200 | |
201 // When debugging messages are enabled, warn about expensive system calls | |
202 #ifndef NDEBUG | |
203 "cmpw $0, %%fs:0xD8\n" // debug mode | |
204 "jz 27f\n" | |
205 "mov $1, %%eax\n" // NR_write | |
206 "mov $2, %%edi\n" // fd = stderr | |
207 "lea 101f(%%rip), %%rsi\n" // "This is an expensive system call" | |
208 "mov $102f-101f, %%edx\n" // len = strlen(msg) | |
209 "syscall\n" | |
210 "xor %%rdi, %%rdi\n" | |
211 #endif | |
212 | |
213 "jmp 27f\n" // exit program, no message | |
214 "4:syscall\n" | |
215 "jmp 15f\n" // return result | |
216 | |
217 // If syscall number is -2, execute locked system call from the | |
218 // secure memory area | |
219 "5:jg 12f\n" | |
220 "cmp $-2, %%eax\n" | |
221 "jnz 9f\n" | |
222 "cmp %%rbx, %%fs:0x8\n" | |
223 "jne 25f\n" // exit process | |
224 "cmp %%eax, %%fs:0x10\n" | |
225 "jne 25f\n" // exit process | |
226 | |
227 // When debugging messages are enabled, warn about expensive system calls | |
228 #ifndef NDEBUG | |
229 "cmpw $0, %%fs:0xD8\n" // debug mode | |
230 "jz 6f\n" | |
231 "mov $1, %%eax\n" // NR_write | |
232 "mov $2, %%edi\n" // fd = stderr | |
233 "lea 101f(%%rip), %%rsi\n" // "This is an expensive system call" | |
234 "mov $102f-101f, %%edx\n" // len = strlen(msg) | |
235 "syscall\n" | |
236 "6:" | |
237 #endif | |
238 | |
239 "mov %%fs:0x18, %%rax\n" | |
240 "mov %%fs:0x20, %%rdi\n" | |
241 "mov %%fs:0x28, %%rsi\n" | |
242 "mov %%fs:0x30, %%rdx\n" | |
243 "mov %%fs:0x38, %%r10\n" | |
244 "mov %%fs:0x40, %%r8\n" | |
245 "mov %%fs:0x48, %%r9\n" | |
246 "cmp %%rbx, %%fs:0x8\n" | |
247 "jne 25f\n" // exit process | |
248 | |
249 // clone() has unusual calling conventions and must be handled specially | |
250 "cmp $56, %%rax\n" // NR_clone | |
251 "jz 19f\n" | |
252 | |
253 // exit() terminates trusted thread | |
254 "cmp $60, %%eax\n" // NR_exit | |
255 "jz 18f\n" | |
256 | |
257 // Perform requested system call | |
258 "syscall\n" | |
259 | |
260 // Unlock mutex | |
261 "7:cmp %%rbx, %%fs:0x8\n" | |
262 "jne 25f\n" // exit process | |
263 "add $2, %%rbx\n" | |
264 "mov %%rax, %%r8\n" | |
265 "mov $56, %%eax\n" // NR_clone | |
266 "mov $17, %%rdi\n" // flags = SIGCHLD | |
267 "mov $1, %%rsi\n" // stack = 1 | |
268 "syscall\n" | |
269 "test %%rax, %%rax\n" | |
270 "js 25f\n" // exit process | |
271 "jz 22f\n" // unlock and exit | |
272 "mov %%rax, %%rdi\n" | |
273 "8:xor %%rsi, %%rsi\n" | |
274 "xor %%rdx, %%rdx\n" | |
275 "xor %%r10, %%r10\n" | |
276 "mov $61, %%eax\n" // NR_wait4 | |
277 "syscall\n" | |
278 "cmp $-4, %%eax\n" // EINTR | |
279 "jz 8b\n" | |
280 "mov %%r8, %%rax\n" | |
281 "jmp 15f\n" // return result | |
282 | |
283 // If syscall number is -3, read the time stamp counter | |
284 "9:cmp $-3, %%eax\n" | |
285 "jnz 10f\n" | |
286 "rdtsc\n" // sets %edx:%eax | |
287 "xor %%rcx, %%rcx\n" | |
288 "jmp 11f\n" | |
289 "10:cmp $-4, %%eax\n" | |
290 "jnz 12f\n" | |
291 "rdtscp\n" // sets %edx:%eax and %ecx | |
292 "11:add $0x3C, %%rsi\n" | |
293 "mov %%eax, 0(%%rsi)\n" | |
294 "mov %%edx, 4(%%rsi)\n" | |
295 "mov %%ecx, 8(%%rsi)\n" | |
296 "mov $12, %%edx\n" | |
297 "jmp 16f\n" // return result | |
298 | |
299 // Check in syscallTable whether this system call is unrestricted | |
300 "12:mov %%rax, %%r9\n" | |
301 #ifndef NDEBUG | |
302 "cmpw $0, %%fs:0xD8\n" // debug mode | |
303 "jnz 13f\n" | |
304 #endif | |
305 "cmp playground$maxSyscall(%%rip), %%eax\n" | |
306 "ja 25f\n" // exit process | |
307 "shl $4, %%rax\n" | |
308 "lea playground$syscallTable(%%rip), %%rdi\n" | |
309 "add %%rdi, %%rax\n" | |
310 "mov 0(%%rax), %%rax\n" | |
311 "cmp $1, %%rax\n" | |
312 "jne 25f\n" // exit process | |
313 | |
314 // Default behavior for unrestricted system calls is to just execute | |
315 // them. Read the remaining arguments first. | |
316 "13:mov %%rsi, %%r8\n" | |
317 "xor %%rax, %%rax\n" // NR_read | |
318 "mov %%r13, %%rdi\n" // fd = threadFd | |
319 "add $4, %%rsi\n" // buf = &scratch + 4 | |
320 "mov $48, %%edx\n" // len = 6*sizeof(void *) | |
321 "14:syscall\n" | |
322 "cmp $-4, %%rax\n" // EINTR | |
323 "jz 14b\n" | |
324 "cmp %%rdx, %%rax\n" | |
325 "jnz 25f\n" // exit process | |
326 "mov %%r9, %%rax\n" | |
327 "mov 0x04(%%r8), %%rdi\n" | |
328 "mov 0x0C(%%r8), %%rsi\n" | |
329 "mov 0x14(%%r8), %%rdx\n" | |
330 "mov 0x1C(%%r8), %%r10\n" | |
331 "mov 0x2C(%%r8), %%r9\n" | |
332 "mov 0x24(%%r8), %%r8\n" | |
333 "cmp $231, %%rax\n" // NR_exit_group | |
334 "jz 27f\n" // exit program, no message | |
335 "syscall\n" | |
336 | |
337 // Return result of system call to sandboxed thread | |
338 "15:mov %%fs:0x0, %%rsi\n" // secure_mem | |
339 "add $0x1034, %%rsi\n" // buf = &scratch + 52 | |
340 "mov %%rax, (%%rsi)\n" | |
341 "mov $8, %%edx\n" // len = 8 | |
342 "16:mov %%r13, %%rdi\n" // fd = threadFd | |
343 "mov $1, %%eax\n" // NR_write | |
344 "17:syscall\n" | |
345 "cmp %%rdx, %%rax\n" | |
346 "jz 1b\n" | |
347 "cmp $-4, %%rax\n" // EINTR | |
348 "jz 17b\n" | |
349 "jmp 25f\n" // exit process | |
350 | |
351 // NR_exit: | |
352 // Exit trusted thread after cleaning up resources | |
353 "18:mov %%fs:0x0, %%rsi\n" // secure_mem | |
354 "mov 0xF0(%%rsi), %%rdi\n" // fd = threadFdPub | |
355 "mov $3, %%eax\n" // NR_close | |
356 "syscall\n" | |
357 "mov %%rsi, %%rdi\n" // start = secure_mem | |
358 "mov $8192, %%esi\n" // length = 8192 | |
359 "xor %%rdx, %%rdx\n" // prot = PROT_NONE | |
360 "mov $10, %%eax\n" // NR_mprotect | |
361 "syscall\n" | |
362 "mov %%r13, %%rdi\n" // fd = threadFd | |
363 "mov $3, %%eax\n" // NR_close | |
364 "syscall\n" | |
365 "mov $56, %%eax\n" // NR_clone | |
366 "mov $17, %%rdi\n" // flags = SIGCHLD | |
367 "mov $1, %%rsi\n" // stack = 1 | |
368 "syscall\n" | |
369 "mov %%rax, %%rdi\n" | |
370 "test %%rax, %%rax\n" | |
371 "js 27f\n" // exit process | |
372 "jne 21f\n" // reap helper, exit thread | |
373 "jmp 22f\n" // unlock mutex | |
374 | |
375 // NR_clone: | |
376 // Original trusted thread calls clone() to create new nascent | |
377 // thread. This thread is (typically) fully privileged and shares all | |
378 // resources with the caller (i.e. the previous trusted thread), | |
379 // and by extension it shares all resources with the sandbox'd | |
380 // threads. | |
381 "19:mov %%fs:0x0, %%rbp\n" // %rbp = old_shared_mem | |
382 "mov %%rsi, %%r15\n" // remember child stack | |
383 "mov $1, %%rsi\n" // stack = 1 | |
384 "syscall\n" // calls NR_clone | |
385 "cmp $-4095, %%rax\n" // return codes -1..-4095 are errno values | |
386 "jae 7b\n" // unlock mutex, return result | |
387 "add $2, %%rbx\n" | |
388 "test %%rax, %%rax\n" | |
389 "jne 15b\n" // return result | |
390 | |
391 // In nascent thread, now. | |
392 "sub $2, %%rbx\n" | |
393 | |
394 // We want to maintain an invalid %rsp whenver we access untrusted | |
395 // memory. This ensures that even if an attacker can trick us into | |
396 // triggering a SIGSEGV, we will never successfully execute a signal | |
397 // handler. | |
398 // Signal handlers are inherently dangerous, as an attacker could trick | |
399 // us into returning to the wrong address by adjusting the signal stack | |
400 // right before the handler returns. | |
401 // N.B. While POSIX is curiously silent about this, it appears that on | |
402 // Linux, alternate signal stacks are a per-thread property. That is | |
403 // good. It means that this security mechanism works, even if the | |
404 // sandboxed thread manages to set up an alternate signal stack. | |
405 // | |
406 // TODO(markus): We currently do not support emulating calls to | |
407 // sys_clone() with a zero (i.e. copy) stack parameter. See clone.cc | |
408 // for a discussion on how to fix this, if this ever becomes neccessary. | |
409 "mov %%r15, %%r9\n" // %r9 = child_stack | |
410 "xor %%r15, %%r15\n" // Request to return from clone() when done | |
411 | |
412 // Get thread id of nascent thread | |
413 "20:mov $186, %%eax\n" // NR_gettid | |
414 "syscall\n" | |
415 "mov %%rax, %%r14\n" | |
416 | |
417 // Nascent thread creates socketpair() for sending requests to | |
418 // trusted thread. | |
419 // We can create the filehandles on the child's stack. Filehandles are | |
420 // always treated as untrusted. | |
421 // socketpair(AF_UNIX, SOCK_STREAM, 0, fds) | |
422 "sub $0x10, %%r9\n" | |
423 "mov %%r15, 8(%%r9)\n" // preserve return address on child stack | |
424 "mov $53, %%eax\n" // NR_socketpair | |
425 "mov $1, %%edi\n" // domain = AF_UNIX | |
426 "mov $1, %%esi\n" // type = SOCK_STREAM | |
427 "xor %%rdx, %%rdx\n" // protocol = 0 | |
428 "mov %%r9, %%r10\n" // sv = child_stack | |
429 "syscall\n" | |
430 "test %%rax, %%rax\n" | |
431 "jz 28f\n" | |
432 | |
433 // If things went wrong, we don't have an (easy) way of signaling | |
434 // the parent. For our purposes, it is sufficient to fail with a | |
435 // fatal error. | |
436 "jmp 25f\n" // exit process | |
437 "21:xor %%rsi, %%rsi\n" | |
438 "xor %%rdx, %%rdx\n" | |
439 "xor %%r10, %%r10\n" | |
440 "mov $61, %%eax\n" // NR_wait4 | |
441 "syscall\n" | |
442 "cmp $-4, %%eax\n" // EINTR | |
443 "jz 21b\n" | |
444 "jmp 23f\n" // exit thread (no message) | |
445 "22:lea playground$syscall_mutex(%%rip), %%rdi\n" | |
446 "mov $4096, %%esi\n" | |
447 "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE | |
448 "mov $10, %%eax\n" // NR_mprotect | |
449 "syscall\n" | |
450 "lock; addl $0x80000000, (%%rdi)\n" | |
451 "jz 23f\n" // exit thread | |
452 "mov $1, %%edx\n" | |
453 "mov %%rdx, %%rsi\n" // FUTEX_WAKE | |
454 "mov $202, %%eax\n" // NR_futex | |
455 "syscall\n" | |
456 "23:mov $60, %%eax\n" // NR_exit | |
457 "mov $1, %%edi\n" // status = 1 | |
458 "24:syscall\n" | |
459 "25:mov $1, %%eax\n" // NR_write | |
460 "mov $2, %%edi\n" // fd = stderr | |
461 "lea 100f(%%rip), %%rsi\n" // "Sandbox violation detected" | |
462 "mov $101f-100f, %%edx\n" // len = strlen(msg) | |
463 "syscall\n" | |
464 "26:mov $1, %%edi\n" | |
465 "27:mov $231, %%eax\n" // NR_exit_group | |
466 "jmp 24b\n" | |
467 | |
468 // The first page is mapped read-only for use as securely shared memory | |
469 "28:mov 0xC8(%%rbp), %%r12\n" // %r12 = secure shared memory | |
470 "cmp %%rbx, 8(%%rbp)\n" | |
471 "jne 25b\n" // exit process | |
472 "mov $10, %%eax\n" // NR_mprotect | |
473 "mov %%r12, %%rdi\n" // addr = secure_mem | |
474 "mov $4096, %%esi\n" // len = 4096 | |
475 "mov $1, %%edx\n" // prot = PROT_READ | |
476 "syscall\n" | |
477 | |
478 // The second page is used as scratch space by the trusted thread. | |
479 // Make it writable. | |
480 "mov $10, %%eax\n" // NR_mprotect | |
481 "add $4096, %%rdi\n" // addr = secure_mem + 4096 | |
482 "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE | |
483 "syscall\n" | |
484 | |
485 // Call clone() to create new trusted thread(). | |
486 // clone(CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD| | |
487 // CLONE_SYSVSEM|CLONE_UNTRACED|CLONE_SETTLS, stack, NULL, NULL, | |
488 // tls) | |
489 "mov 4(%%r9), %%r13d\n" // %r13 = threadFd (on child's stack) | |
490 "mov $56, %%eax\n" // NR_clone | |
491 "mov $0x8D0F00, %%edi\n" // flags = VM|FS|FILES|SIGH|THR|SYSV|UTR|TLS | |
492 "mov $1, %%rsi\n" // stack = 1 | |
493 "mov %%r12, %%r8\n" // tls = new_secure_mem | |
494 "mov 0xD0(%%rbp), %%r15d\n" // %r15 = processFdPub | |
495 "cmp %%rbx, 8(%%rbp)\n" | |
496 "jne 25b\n" // exit process | |
497 "syscall\n" | |
498 "test %%rax, %%rax\n" | |
499 "js 25b\n" // exit process | |
500 "jz 0b\n" // invoke trustedThreadFnc() | |
501 | |
502 // Copy the caller's signal mask | |
503 "mov 0x1054(%%rbp), %%rax\n" | |
504 "mov %%rax, 0x1054(%%r12)\n" | |
505 | |
506 // Done creating trusted thread. We can now get ready to return to caller | |
507 "mov %%r9, %%r8\n" // %r8 = child_stack | |
508 "mov 0(%%r9), %%r9d\n" // %r9 = threadFdPub | |
509 | |
510 // Set up thread local storage with information on how to talk to | |
511 // trusted thread and trusted process. | |
512 "lea 0xE0(%%r12), %%rsi\n" // args = &secure_mem.TLS; | |
513 "mov $158, %%eax\n" // NR_arch_prctl | |
514 "mov $0x1001, %%edi\n" // option = ARCH_SET_GS | |
515 "syscall\n" | |
516 "cmp $-4095, %%rax\n" // return codes -1..-4095 are errno values | |
517 "jae 25b\n" // exit process | |
518 | |
519 // Check whether this is the initial thread, or a newly created one. | |
520 // At startup we run the same code as when we create a new thread. At | |
521 // the very top of this function, you will find that we push 999(%rip) | |
522 // on the stack. That is the signal that we should return on the same | |
523 // stack rather than return to where clone was called. | |
524 "mov 8(%%r8), %%r15\n" | |
525 "add $0x10, %%r8\n" | |
526 "test %%r15, %%r15\n" | |
527 "jne 29f\n" | |
528 | |
529 // Returning from clone() into the newly created thread is special. We | |
530 // cannot unroll the stack, as we just set up a new stack for this | |
531 // thread. We have to explicitly restore CPU registers to the values | |
532 // that they had when the program originally called clone(). | |
533 // We patch the register values in the signal stack frame so that we | |
534 // can ask sigreturn() to restore all registers for us. | |
535 "sub $0x8, %%r8\n" | |
536 "mov 0x50(%%rbp), %%rax\n" | |
537 "mov %%rax, 0x00(%%r8)\n" // return address | |
538 "xor %%rax, %%rax\n" | |
539 "mov %%rax, 0x98(%%r8)\n" // %rax = 0 | |
540 "mov 0x58(%%rbp), %%rax\n" | |
541 "mov %%rax, 0x80(%%r8)\n" // %rbp | |
542 "mov 0x60(%%rbp), %%rax\n" | |
543 "mov %%rax, 0x88(%%r8)\n" // %rbx | |
544 "mov 0x68(%%rbp), %%rax\n" | |
545 "mov %%rax, 0xA0(%%r8)\n" // %rcx | |
546 "mov 0x70(%%rbp), %%rax\n" | |
547 "mov %%rax, 0x90(%%r8)\n" // %rdx | |
548 "mov 0x78(%%rbp), %%rax\n" | |
549 "mov %%rax, 0x78(%%r8)\n" // %rsi | |
550 "mov 0x80(%%rbp), %%rax\n" | |
551 "mov %%rax, 0x70(%%r8)\n" // %rdi | |
552 "mov 0x88(%%rbp), %%rax\n" | |
553 "mov %%rax, 0x30(%%r8)\n" // %r8 | |
554 "mov 0x90(%%rbp), %%rax\n" | |
555 "mov %%rax, 0x38(%%r8)\n" // %r9 | |
556 "mov 0x98(%%rbp), %%rax\n" | |
557 "mov %%rax, 0x40(%%r8)\n" // %r10 | |
558 "mov 0xA0(%%rbp), %%rax\n" | |
559 "mov %%rax, 0x48(%%r8)\n" // %r11 | |
560 "mov 0xA8(%%rbp), %%rax\n" | |
561 "mov %%rax, 0x50(%%r8)\n" // %r12 | |
562 "mov 0xB0(%%rbp), %%rax\n" | |
563 "mov %%rax, 0x58(%%r8)\n" // %r13 | |
564 "mov 0xB8(%%rbp), %%rax\n" | |
565 "mov %%rax, 0x60(%%r8)\n" // %r14 | |
566 "mov 0xC0(%%rbp), %%rax\n" | |
567 "mov %%rax, 0x68(%%r8)\n" // %r15 | |
568 "cmp %%rbx, 8(%%rbp)\n" | |
569 "jne 25b\n" // exit process | |
570 | |
571 // Nascent thread launches a helper that doesn't share any of our | |
572 // resources, except for pages mapped as MAP_SHARED. | |
573 // clone(SIGCHLD, stack=1) | |
574 "29:mov $56, %%eax\n" // NR_clone | |
575 "mov $17, %%rdi\n" // flags = SIGCHLD | |
576 "mov $1, %%rsi\n" // stack = 1 | |
577 "syscall\n" | |
578 "test %%rax, %%rax\n" | |
579 "js 25b\n" // exit process | |
580 "jne 31f\n" | |
581 | |
582 // Use sendmsg() to send to the trusted process the file handles for | |
583 // communicating with the new trusted thread. We also send the address | |
584 // of the secure memory area (for sanity checks) and the thread id. | |
585 "mov 0xD4(%%rbp), %%edi\n" // transport = Sandbox::cloneFdPub() | |
586 "cmp %%rbx, 8(%%rbp)\n" | |
587 "jne 25b\n" // exit process | |
588 | |
589 // 0x00 msg: | |
590 // 0x00 msg_name ($0) | |
591 // 0x08 msg_namelen ($0) | |
592 // 0x10 msg_iov (%r8 + 0x44) | |
593 // 0x18 msg_iovlen ($1) | |
594 // 0x20 msg_control (%r8 + 0x54) | |
595 // 0x28 msg_controllen ($0x18) | |
596 // 0x30 data: | |
597 // 0x30 msg_flags/err ($0) | |
598 // 0x34 secure_mem (%r12) | |
599 // 0x3C threadId (%r14d) | |
600 // 0x40 threadFdPub (%r9d) | |
601 // 0x44 iov: | |
602 // 0x44 iov_base (%r8 + 0x30) | |
603 // 0x4C iov_len ($0x14) | |
604 // 0x54 cmsg: | |
605 // 0x54 cmsg_len ($0x18) | |
606 // 0x5C cmsg_level ($1, SOL_SOCKET) | |
607 // 0x60 cmsg_type ($1, SCM_RIGHTS) | |
608 // 0x64 threadFdPub (%r9d) | |
609 // 0x68 threadFd (%r13d) | |
610 // 0x6C | |
611 "sub $0x6C, %%r8\n" | |
612 "xor %%rdx, %%rdx\n" // flags = 0 | |
613 "mov %%rdx, 0x00(%%r8)\n" // msg_name | |
614 "mov %%edx, 0x08(%%r8)\n" // msg_namelen | |
615 "mov %%edx, 0x30(%%r8)\n" // msg_flags | |
616 "mov $1, %%r11d\n" | |
617 "mov %%r11, 0x18(%%r8)\n" // msg_iovlen | |
618 "mov %%r11d, 0x5C(%%r8)\n" // cmsg_level | |
619 "mov %%r11d, 0x60(%%r8)\n" // cmsg_type | |
620 "lea 0x30(%%r8), %%r11\n" | |
621 "mov %%r11, 0x44(%%r8)\n" // iov_base | |
622 "add $0x14, %%r11\n" | |
623 "mov %%r11, 0x10(%%r8)\n" // msg_iov | |
624 "add $0x10, %%r11\n" | |
625 "mov %%r11, 0x20(%%r8)\n" // msg_control | |
626 "mov $0x14, %%r11d\n" | |
627 "mov %%r11, 0x4C(%%r8)\n" // iov_len | |
628 "add $4, %%r11d\n" | |
629 "mov %%r11, 0x28(%%r8)\n" // msg_controllen | |
630 "mov %%r11, 0x54(%%r8)\n" // cmsg_len | |
631 "mov %%r12, 0x34(%%r8)\n" // secure_mem | |
632 "mov %%r14d, 0x3C(%%r8)\n" // threadId | |
633 "mov %%r9d, 0x40(%%r8)\n" // threadFdPub | |
634 "mov %%r9d, 0x64(%%r8)\n" // threadFdPub | |
635 "mov %%r13d, 0x68(%%r8)\n" // threadFd | |
636 "mov $46, %%eax\n" // NR_sendmsg | |
637 "mov %%r8, %%rsi\n" // msg | |
638 "syscall\n" | |
639 | |
640 // Release syscall_mutex_. This signals the trusted process that | |
641 // it can write into the original thread's secure memory again. | |
642 "mov $10, %%eax\n" // NR_mprotect | |
643 "lea playground$syscall_mutex(%%rip), %%rdi\n" | |
644 "mov $4096, %%esi\n" | |
645 "mov $3, %%edx\n" // PROT_READ | PROT_WRITE | |
646 "syscall\n" | |
647 "cmp %%rbx, 8(%%rbp)\n" | |
648 "jne 25b\n" // exit process | |
649 "lock; addl $0x80000000, (%%rdi)\n" | |
650 "jz 30f\n" // exit process (no error message) | |
651 "mov $1, %%edx\n" | |
652 "mov %%rdx, %%rsi\n" // FUTEX_WAKE | |
653 "mov $202, %%eax\n" // NR_futex | |
654 "syscall\n" | |
655 "30:xor %%rdi, %%rdi\n" | |
656 "jmp 27b\n" // exit process (no error message) | |
657 | |
658 // Reap helper | |
659 "31:mov %%rax, %%rdi\n" | |
660 "32:lea -4(%%r8), %%rsi\n" | |
661 "xor %%rdx, %%rdx\n" | |
662 "xor %%r10, %%r10\n" | |
663 "mov $61, %%eax\n" // NR_wait4 | |
664 "syscall\n" | |
665 "cmp $-4, %%eax\n" // EINTR | |
666 "jz 32b\n" | |
667 "mov -4(%%r8), %%eax\n" | |
668 "test %%rax, %%rax\n" | |
669 "jnz 26b\n" // exit process (no error message) | |
670 | |
671 // Release privileges by entering seccomp mode. | |
672 "mov $157, %%eax\n" // NR_prctl | |
673 "mov $22, %%edi\n" // PR_SET_SECCOMP | |
674 "mov $1, %%esi\n" | |
675 "syscall\n" | |
676 "test %%rax, %%rax\n" | |
677 "jnz 25b\n" // exit process | |
678 | |
679 // We can finally start using the stack. Signal handlers no longer pose | |
680 // a threat to us. | |
681 "mov %%r8, %%rsp\n" | |
682 | |
683 // Back in the newly created sandboxed thread, wait for trusted process | |
684 // to receive request. It is possible for an attacker to make us | |
685 // continue even before the trusted process is done. This is OK. It'll | |
686 // result in us putting stale values into the new thread's TLS. But that | |
687 // data is considered untrusted anyway. | |
688 "push %%rax\n" | |
689 "mov $1, %%edx\n" // len = 1 | |
690 "mov %%rsp, %%rsi\n" // buf = %rsp | |
691 "mov %%r9, %%rdi\n" // fd = threadFdPub | |
692 "33:xor %%rax, %%rax\n" // NR_read | |
693 "syscall\n" | |
694 "cmp $-4, %%rax\n" // EINTR | |
695 "jz 33b\n" | |
696 "cmp %%rdx, %%rax\n" | |
697 "jne 25b\n" // exit process | |
698 "pop %%rax\n" | |
699 | |
700 // Return to caller. We are in the new thread, now. | |
701 "test %%r15, %%r15\n" | |
702 "jnz 34f\n" // Returning to createTrustedThread() | |
703 | |
704 // Returning to the place where clone() had been called. We rely on | |
705 // using rt_sigreturn() for restoring our registers. The caller already | |
706 // created a signal stack frame, and we patched the register values | |
707 // with the ones that were in effect prior to calling sandbox_clone(). | |
708 "pop %%r15\n" | |
709 "34:mov %%r15, 0xA8(%%rsp)\n" // compute new %rip | |
710 "mov $15, %%eax\n" // NR_rt_sigreturn | |
711 "syscall\n" | |
712 | |
713 ".pushsection \".rodata\"\n" | |
714 "100:.ascii \"Sandbox violation detected, program aborted\\n\"\n" | |
715 "101:.ascii \"WARNING! This is an expensive system call\\n\"\n" | |
716 "102:\n" | |
717 ".popsection\n" | |
718 | |
719 "999:pop %%rbp\n" | |
720 "pop %%rbx\n" | |
721 : | |
722 : "g"(&args) | |
723 : "rax", "rcx", "rdx", "rdi", "rsi", "r8", "r9", "r10", "r11", "r12", | |
724 "r13", "r14", "r15", "rsp", "memory" | |
725 #elif defined(__i386__) | |
726 struct user_desc u; | |
727 u.entry_number = (typeof u.entry_number)-1; | |
728 u.base_addr = 0; | |
729 u.limit = 0xfffff; | |
730 u.seg_32bit = 1; | |
731 u.contents = 0; | |
732 u.read_exec_only = 0; | |
733 u.limit_in_pages = 1; | |
734 u.seg_not_present = 0; | |
735 u.useable = 1; | |
736 SysCalls sys; | |
737 if (sys.set_thread_area(&u) < 0) { | |
738 die("Cannot set up thread local storage"); | |
739 } | |
740 asm volatile("movw %w0, %%fs" | |
741 : | |
742 : "q"(8*u.entry_number+3)); | |
743 asm volatile( | |
744 "push %%ebx\n" | |
745 "push %%ebp\n" | |
746 | |
747 // Signal handlers are process-wide. This means that for security | |
748 // reasons, we cannot allow that the trusted thread ever executes any | |
749 // signal handlers. | |
750 // We prevent the execution of signal handlers by setting a signal | |
751 // mask that blocks all signals. In addition, we make sure that the | |
752 // stack pointer is invalid. | |
753 // We cannot reset the signal mask until after we have enabled | |
754 // Seccomp mode. Our sigprocmask() wrapper would normally do this by | |
755 // raising a signal, modifying the signal mask in the kernel-generated | |
756 // signal frame, and then calling sigreturn(). This presents a bit of | |
757 // a Catch-22, as all signals are masked and we can therefore not | |
758 // raise any signal that would allow us to generate the signal stack | |
759 // frame. | |
760 // Instead, we have to create the signal stack frame prior to entering | |
761 // Seccomp mode. This incidentally also helps us to restore the | |
762 // signal mask to the same value that it had prior to entering the | |
763 // sandbox. | |
764 // The signal wrapper for clone() is the second entry point into this | |
765 // code (by means of sending an IPC to its trusted thread). It goes | |
766 // through the same steps of creating a signal stack frame on the | |
767 // newly created thread's stacks prior to cloning. See clone.cc for | |
768 // details. | |
769 "mov %0, %%edi\n" // create signal stack before accessing MMX | |
770 "mov $120+0xF000, %%eax\n" // __NR_clone + 0xF000 | |
771 "mov %%esp, %%ebp\n" | |
772 "int $0\n" // push a signal stack frame (see clone.cc) | |
773 "mov %%ebp, 0x1C(%%esp)\n" // pop stack upon call to sigreturn() | |
774 "mov %%esp, %%ebp\n" | |
775 "mov $2, %%ebx\n" // how = SIG_SETMASK | |
776 "pushl $-1\n" | |
777 "pushl $-1\n" | |
778 "mov %%esp, %%ecx\n" // set = full mask | |
779 "xor %%edx, %%edx\n" // old_set = NULL | |
780 "mov $8, %%esi\n" // mask all 64 signals | |
781 "mov $175, %%eax\n" // NR_rt_sigprocmask | |
782 "int $0x80\n" | |
783 "mov $126, %%eax\n" // NR_sigprocmask | |
784 "int $0x80\n" | |
785 "xor %%esp, %%esp\n" // invalidate the stack in all trusted code | |
786 "movd %%edi, %%mm6\n" // %mm6 = args | |
787 "lea 999f, %%edi\n" // continue in same thread | |
788 "movd %%edi, %%mm3\n" | |
789 "xor %%edi, %%edi\n" // initial sequence number | |
790 "movd %%edi, %%mm2\n" | |
791 "jmp 20f\n" // create trusted thread | |
792 | |
793 // TODO(markus): Coalesce the read() operations by reading into a bigger | |
794 // buffer. | |
795 | |
796 // Parameters: | |
797 // %mm0: thread's side of threadFd | |
798 // %mm1: processFdPub | |
799 // %mm3: return address after creation of new trusted thread | |
800 // %mm5: secure memory region | |
801 // the page following this one contains the scratch space | |
802 | |
803 // Local variables: | |
804 // %mm2: sequence number for trusted calls | |
805 // %mm4: thread id | |
806 | |
807 // Temporary variables: | |
808 // %ebp: system call number | |
809 // %mm6: secure memory of previous thread | |
810 // %mm7: temporary variable for spilling data | |
811 | |
812 // Layout of secure shared memory region (c.f. securemem.h): | |
813 // 0x00: pointer to the secure shared memory region (i.e. self) | |
814 // 0x04: sequence number; must match %mm2 | |
815 // 0x08: call type; must match %eax, iff %eax == -1 || %eax == -2 | |
816 // 0x0C: system call number; passed to syscall in %eax | |
817 // 0x10: first argument; passed to syscall in %ebx | |
818 // 0x14: second argument; passed to syscall in %ecx | |
819 // 0x18: third argument; passed to syscall in %edx | |
820 // 0x1C: fourth argument; passed to syscall in %esi | |
821 // 0x20: fifth argument; passed to syscall in %edi | |
822 // 0x24: sixth argument; passed to syscall in %ebp | |
823 // 0x28: stored return address for clone() system call | |
824 // 0x2C: stored %ebp value for clone() system call | |
825 // 0x30: stored %edi value for clone() system call | |
826 // 0x34: stored %esi value for clone() system call | |
827 // 0x38: stored %edx value for clone() system call | |
828 // 0x3C: stored %ecx value for clone() system call | |
829 // 0x40: stored %ebx value for clone() system call | |
830 // 0x44: new shared memory for clone() | |
831 // 0x48: processFdPub for talking to trusted process | |
832 // 0x4C: cloneFdPub for talking to trusted process | |
833 // 0x50: set to non-zero, if in debugging mode | |
834 // 0x54: most recent SHM id returned by shmget(IPC_PRIVATE) | |
835 // 0x58: cookie assigned to us by the trusted process (TLS_COOKIE) | |
836 // 0x60: thread id (TLS_TID) | |
837 // 0x68: threadFdPub (TLS_THREAD_FD) | |
838 // 0x200-0x1000: securely passed verified file name(s) | |
839 | |
840 // Layout of (untrusted) scratch space: | |
841 // 0x00: syscall number; passed in %eax | |
842 // 0x04: first argument; passed in %ebx | |
843 // 0x08: second argument; passed in %ecx | |
844 // 0x0C: third argument; passed in %edx | |
845 // 0x10: fourth argument; passed in %esi | |
846 // 0x14: fifth argument; passed in %edi | |
847 // 0x18: sixth argument; passed in %ebp | |
848 // 0x1C: return value | |
849 // 0x20: RDTSCP result (%eax) | |
850 // 0x24: RDTSCP result (%edx) | |
851 // 0x28: RDTSCP result (%ecx) | |
852 // 0x2C: last system call (updated in syscall.cc) | |
853 // 0x30: number of consecutive calls to a time fnc. (e.g. gettimeofday) | |
854 // 0x34: nesting level of system calls (for debugging purposes only) | |
855 // 0x38: signal mask | |
856 // 0x40: in SEGV handler | |
857 | |
858 "0:xor %%esp, %%esp\n" | |
859 "mov $2, %%eax\n" // %mm2 = initial sequence number | |
860 "movd %%eax, %%mm2\n" | |
861 | |
862 // Read request from untrusted thread, or from trusted process. In either | |
863 // case, the data that we read has to be considered untrusted. | |
864 // read(threadFd, &scratch, 4) | |
865 "1:mov $3, %%eax\n" // NR_read | |
866 "movd %%mm0, %%ebx\n" // fd = threadFd | |
867 "movd %%mm5, %%ecx\n" // secure_mem | |
868 "add $0x1000, %%ecx\n" // buf = &scratch | |
869 "mov $4, %%edx\n" // len = 4 | |
870 "2:int $0x80\n" | |
871 "cmp $-4, %%eax\n" // EINTR | |
872 "jz 2b\n" | |
873 "cmp %%edx, %%eax\n" | |
874 "jnz 25f\n" // exit process | |
875 | |
876 // Retrieve system call number. It is crucial that we only dereference | |
877 // 0x1000(%mm5) exactly once. Afterwards, memory becomes untrusted and | |
878 // we must use the value that we have read the first time. | |
879 "mov 0(%%ecx), %%eax\n" | |
880 | |
881 // If syscall number is -1, execute an unlocked system call from the | |
882 // secure memory area | |
883 "cmp $-1, %%eax\n" | |
884 "jnz 5f\n" | |
885 "3:movd %%mm2, %%ebp\n" | |
886 "cmp %%ebp, 0x4-0x1000(%%ecx)\n" | |
887 "jne 25f\n" // exit process | |
888 "cmp 0x08-0x1000(%%ecx), %%eax\n" | |
889 "jne 25f\n" // exit process | |
890 "mov 0x0C-0x1000(%%ecx), %%eax\n" | |
891 "mov 0x10-0x1000(%%ecx), %%ebx\n" | |
892 "mov 0x18-0x1000(%%ecx), %%edx\n" | |
893 "mov 0x1C-0x1000(%%ecx), %%esi\n" | |
894 "mov 0x20-0x1000(%%ecx), %%edi\n" | |
895 "mov 0x24-0x1000(%%ecx), %%ebp\n" | |
896 "mov 0x14-0x1000(%%ecx), %%ecx\n" | |
897 "movd %%edi, %%mm4\n" | |
898 "movd %%ebp, %%mm7\n" | |
899 "movd %%mm2, %%ebp\n" | |
900 "movd %%mm5, %%edi\n" | |
901 "cmp %%ebp, 4(%%edi)\n" | |
902 "jne 25f\n" // exit process | |
903 "add $2, %%ebp\n" | |
904 "movd %%ebp, %%mm2\n" | |
905 "movd %%mm4, %%edi\n" | |
906 "movd %%mm7, %%ebp\n" | |
907 | |
908 // shmget() gets some special treatment. Whenever we return from this | |
909 // system call, we remember the most recently returned SysV shm id. | |
910 "cmp $117, %%eax\n" // NR_ipc | |
911 "jnz 4f\n" | |
912 "cmp $23, %%ebx\n" // shmget() | |
913 "jnz 4f\n" | |
914 "int $0x80\n" | |
915 "mov %%eax, %%ebp\n" | |
916 "mov $120, %%eax\n" // NR_clone | |
917 "mov $17, %%ebx\n" // flags = SIGCHLD | |
918 "mov $1, %%ecx\n" // stack = 1 | |
919 "int $0x80\n" | |
920 "test %%eax, %%eax\n" | |
921 "js 25f\n" // exit process | |
922 "mov %%eax, %%ebx\n" | |
923 "jnz 8f\n" // wait for child, then return result | |
924 "movd %%mm5, %%ebx\n" // start = secure_mem | |
925 "mov $4096, %%ecx\n" // len = 4096 | |
926 "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE | |
927 "mov $125, %%eax\n" // NR_mprotect | |
928 "int $0x80\n" | |
929 "mov %%ebp, 0x54(%%ebx)\n" // set most recently returned SysV shm id | |
930 "xor %%ebx, %%ebx\n" | |
931 | |
932 // When debugging messages are enabled, warn about expensive system calls | |
933 #ifndef NDEBUG | |
934 "movd %%mm5, %%ecx\n" | |
935 "cmpw $0, 0x50(%%ecx)\n" // debug mode | |
936 "jz 27f\n" | |
937 "mov $4, %%eax\n" // NR_write | |
938 "mov $2, %%ebx\n" // fd = stderr | |
939 "lea 101f, %%ecx\n" // "This is an expensive system call" | |
940 "mov $102f-101f, %%edx\n" // len = strlen(msg) | |
941 "int $0x80\n" | |
942 "xor %%ebx, %%ebx\n" | |
943 #endif | |
944 | |
945 "jmp 27f\n" // exit program, no message | |
946 "4:int $0x80\n" | |
947 "jmp 15f\n" // return result | |
948 | |
949 // If syscall number is -2, execute locked system call from the | |
950 // secure memory area | |
951 "5:jg 12f\n" | |
952 "cmp $-2, %%eax\n" | |
953 "jnz 9f\n" | |
954 "movd %%mm2, %%ebp\n" | |
955 "cmp %%ebp, 0x4-0x1000(%%ecx)\n" | |
956 "jne 25f\n" // exit process | |
957 "cmp %%eax, 0x8-0x1000(%%ecx)\n" | |
958 "jne 25f\n" // exit process | |
959 | |
960 // When debugging messages are enabled, warn about expensive system calls | |
961 #ifndef NDEBUG | |
962 "cmpw $0, 0x50-0x1000(%%ecx)\n" | |
963 "jz 6f\n" // debug mode | |
964 "mov %%ecx, %%ebp\n" | |
965 "mov $4, %%eax\n" // NR_write | |
966 "mov $2, %%ebx\n" // fd = stderr | |
967 "lea 101f, %%ecx\n" // "This is an expensive system call" | |
968 "mov $102f-101f, %%edx\n" // len = strlen(msg) | |
969 "int $0x80\n" | |
970 "mov %%ebp, %%ecx\n" | |
971 "6:" | |
972 #endif | |
973 | |
974 "mov 0x0C-0x1000(%%ecx), %%eax\n" | |
975 "mov 0x10-0x1000(%%ecx), %%ebx\n" | |
976 "mov 0x18-0x1000(%%ecx), %%edx\n" | |
977 "mov 0x1C-0x1000(%%ecx), %%esi\n" | |
978 "mov 0x20-0x1000(%%ecx), %%edi\n" | |
979 "mov 0x24-0x1000(%%ecx), %%ebp\n" | |
980 "mov 0x14-0x1000(%%ecx), %%ecx\n" | |
981 "movd %%edi, %%mm4\n" | |
982 "movd %%ebp, %%mm7\n" | |
983 "movd %%mm2, %%ebp\n" | |
984 "movd %%mm5, %%edi\n" | |
985 "cmp %%ebp, 4(%%edi)\n" | |
986 "jne 25f\n" // exit process | |
987 | |
988 // clone() has unusual calling conventions and must be handled specially | |
989 "cmp $120, %%eax\n" // NR_clone | |
990 "jz 19f\n" | |
991 | |
992 // exit() terminates trusted thread | |
993 "cmp $1, %%eax\n" // NR_exit | |
994 "jz 18f\n" | |
995 | |
996 // Perform requested system call | |
997 "movd %%mm4, %%edi\n" | |
998 "movd %%mm7, %%ebp\n" | |
999 "int $0x80\n" | |
1000 | |
1001 // Unlock mutex | |
1002 "7:movd %%mm2, %%ebp\n" | |
1003 "movd %%mm5, %%edi\n" | |
1004 "cmp %%ebp, 4(%%edi)\n" | |
1005 "jne 25f\n" // exit process | |
1006 "add $2, %%ebp\n" | |
1007 "movd %%ebp, %%mm2\n" | |
1008 "mov %%eax, %%ebp\n" | |
1009 "mov $120, %%eax\n" // NR_clone | |
1010 "mov $17, %%ebx\n" // flags = SIGCHLD | |
1011 "mov $1, %%ecx\n" // stack = 1 | |
1012 "int $0x80\n" | |
1013 "test %%eax, %%eax\n" | |
1014 "js 25f\n" // exit process | |
1015 "jz 22f\n" // unlock and exit | |
1016 "mov %%eax, %%ebx\n" | |
1017 "8:xor %%ecx, %%ecx\n" | |
1018 "xor %%edx, %%edx\n" | |
1019 "mov $7, %%eax\n" // NR_waitpid | |
1020 "int $0x80\n" | |
1021 "cmp $-4, %%eax\n" // EINTR | |
1022 "jz 8b\n" | |
1023 "mov %%ebp, %%eax\n" | |
1024 "jmp 15f\n" // return result | |
1025 | |
1026 // If syscall number is -3, read the time stamp counter | |
1027 "9:cmp $-3, %%eax\n" | |
1028 "jnz 10f\n" | |
1029 "rdtsc\n" // sets %edx:%eax | |
1030 "xor %%ecx, %%ecx\n" | |
1031 "jmp 11f\n" | |
1032 "10:cmp $-4, %%eax\n" | |
1033 "jnz 12f\n" | |
1034 "rdtscp\n" // sets %edx:%eax and %ecx | |
1035 "11:movd %%mm5, %%ebx\n" | |
1036 "add $0x1020, %%ebx\n" | |
1037 "mov %%eax, 0(%%ebx)\n" | |
1038 "mov %%edx, 4(%%ebx)\n" | |
1039 "mov %%ecx, 8(%%ebx)\n" | |
1040 "mov %%ebx, %%ecx\n" | |
1041 "mov $12, %%edx\n" | |
1042 "jmp 16f\n" // return result | |
1043 | |
1044 // Check in syscallTable whether this system call is unrestricted | |
1045 "12:mov %%eax, %%ebp\n" | |
1046 #ifndef NDEBUG | |
1047 "cmpw $0, 0x50-0x1000(%%ecx)\n" | |
1048 "jnz 13f\n" // debug mode | |
1049 #endif | |
1050 "cmp playground$maxSyscall, %%eax\n" | |
1051 "ja 25f\n" // exit process | |
1052 "shl $3, %%eax\n" | |
1053 "add $playground$syscallTable, %%eax\n" | |
1054 "mov 0(%%eax), %%eax\n" | |
1055 "cmp $1, %%eax\n" | |
1056 "jne 25f\n" // exit process | |
1057 | |
1058 // Default behavior for unrestricted system calls is to just execute | |
1059 // them. Read the remaining arguments first. | |
1060 "13:mov $3, %%eax\n" // NR_read | |
1061 "movd %%mm0, %%ebx\n" // fd = threadFd | |
1062 "add $4, %%ecx\n" // buf = &scratch + 4 | |
1063 "mov $24, %%edx\n" // len = 6*sizeof(void *) | |
1064 "14:int $0x80\n" | |
1065 "cmp $-4, %%eax\n" // EINTR | |
1066 "jz 14b\n" | |
1067 "cmp %%edx, %%eax\n" | |
1068 "jnz 25f\n" // exit process | |
1069 "mov %%ebp, %%eax\n" | |
1070 "mov 0x00(%%ecx), %%ebx\n" | |
1071 "mov 0x08(%%ecx), %%edx\n" | |
1072 "mov 0x0C(%%ecx), %%esi\n" | |
1073 "mov 0x10(%%ecx), %%edi\n" | |
1074 "mov 0x14(%%ecx), %%ebp\n" | |
1075 "mov 0x04(%%ecx), %%ecx\n" | |
1076 "cmp $252, %%eax\n" // NR_exit_group | |
1077 "jz 27f\n" // exit program, no message | |
1078 "int $0x80\n" | |
1079 | |
1080 // Return result of system call to sandboxed thread | |
1081 "15:movd %%mm5, %%ecx\n" // secure_mem | |
1082 "add $0x101C, %%ecx\n" // buf = &scratch + 28 | |
1083 "mov %%eax, (%%ecx)\n" | |
1084 "mov $4, %%edx\n" // len = 4 | |
1085 "16:movd %%mm0, %%ebx\n" // fd = threadFd | |
1086 "mov $4, %%eax\n" // NR_write | |
1087 "17:int $0x80\n" | |
1088 "cmp %%edx, %%eax\n" | |
1089 "jz 1b\n" | |
1090 "cmp $-4, %%eax\n" // EINTR | |
1091 "jz 17b\n" | |
1092 "jmp 25f\n" // exit process | |
1093 | |
1094 // NR_exit: | |
1095 // Exit trusted thread after cleaning up resources | |
1096 "18:mov %%edi, %%ecx\n" // secure_mem | |
1097 "mov 0x68(%%ecx), %%ebx\n" // fd = threadFdPub | |
1098 "mov $6, %%eax\n" // NR_close | |
1099 "int $0x80\n" | |
1100 "mov %%ecx, %%ebx\n" // start = secure_mem | |
1101 "mov $8192, %%ecx\n" // length = 8192 | |
1102 "xor %%edx, %%edx\n" // prot = PROT_NONE | |
1103 "mov $125, %%eax\n" // NR_mprotect | |
1104 "int $0x80\n" | |
1105 "movd %%mm0, %%ebx\n" // fd = threadFd | |
1106 "mov $6, %%eax\n" // NR_close | |
1107 "int $0x80\n" | |
1108 "mov $120, %%eax\n" // NR_clone | |
1109 "mov $17, %%ebx\n" // flags = SIGCHLD | |
1110 "mov $1, %%ecx\n" // stack = 1 | |
1111 "int $0x80\n" | |
1112 "mov %%eax, %%ebx\n" | |
1113 "test %%eax, %%eax\n" | |
1114 "js 25f\n" // exit process | |
1115 "jne 21f\n" // reap helper, exit thread | |
1116 "jmp 22f\n" // unlock mutex | |
1117 | |
1118 // NR_clone: | |
1119 // Original trusted thread calls clone() to create new nascent | |
1120 // thread. This thread is (typically) fully privileged and shares all | |
1121 // resources with the caller (i.e. the previous trusted thread), | |
1122 // and by extension it shares all resources with the sandbox'd | |
1123 // threads. | |
1124 "19:movd %%edi, %%mm6\n" // %mm6 = old_shared_mem | |
1125 "movd %%mm4, %%edi\n" // child_tidptr | |
1126 "mov %%ecx, %%ebp\n" // remember child stack | |
1127 "mov $1, %%ecx\n" // stack = 1 | |
1128 "int $0x80\n" // calls NR_clone | |
1129 "cmp $-4095, %%eax\n" // return codes -1..-4095 are errno values | |
1130 "jae 7b\n" // unlock mutex, return result | |
1131 "movd %%mm2, %%edi\n" | |
1132 "add $2, %%edi\n" | |
1133 "movd %%edi, %%mm2\n" | |
1134 "test %%eax, %%eax\n" | |
1135 "jne 15b\n" // return result | |
1136 | |
1137 // In nascent thread, now. | |
1138 "sub $2, %%edi\n" | |
1139 "movd %%edi, %%mm2\n" | |
1140 | |
1141 // We want to maintain an invalid %esp whenver we access untrusted | |
1142 // memory. This ensures that even if an attacker can trick us into | |
1143 // triggering a SIGSEGV, we will never successfully execute a signal | |
1144 // handler. | |
1145 // Signal handlers are inherently dangerous, as an attacker could trick | |
1146 // us into returning to the wrong address by adjusting the signal stack | |
1147 // right before the handler returns. | |
1148 // N.B. While POSIX is curiously silent about this, it appears that on | |
1149 // Linux, alternate signal stacks are a per-thread property. That is | |
1150 // good. It means that this security mechanism works, even if the | |
1151 // sandboxed thread manages to set up an alternate signal stack. | |
1152 // | |
1153 // TODO(markus): We currently do not support emulating calls to | |
1154 // sys_clone() with a zero (i.e. copy) stack parameter. See clone.cc | |
1155 // for a discussion on how to fix this, if this ever becomes neccessary. | |
1156 "movd %%eax, %%mm3\n" // Request to return from clone() when done | |
1157 | |
1158 // Get thread id of nascent thread | |
1159 "20:mov $224, %%eax\n" // NR_gettid | |
1160 "int $0x80\n" | |
1161 "movd %%eax, %%mm4\n" | |
1162 | |
1163 // Nascent thread creates socketpair() for sending requests to | |
1164 // trusted thread. | |
1165 // We can create the filehandles on the child's stack. Filehandles are | |
1166 // always treated as untrusted. | |
1167 // socketpair(AF_UNIX, SOCK_STREAM, 0, fds) | |
1168 "mov $102, %%eax\n" // NR_socketcall | |
1169 "mov $8, %%ebx\n" // socketpair | |
1170 "sub $8, %%ebp\n" // sv = child_stack | |
1171 "mov %%ebp, -0x04(%%ebp)\n" | |
1172 "movl $0, -0x08(%%ebp)\n" // protocol = 0 | |
1173 "movl $1, -0x0C(%%ebp)\n" // type = SOCK_STREAM | |
1174 "movl $1, -0x10(%%ebp)\n" // domain = AF_UNIX | |
1175 "lea -0x10(%%ebp), %%ecx\n" | |
1176 "int $0x80\n" | |
1177 "test %%eax, %%eax\n" | |
1178 "jz 28f\n" | |
1179 | |
1180 // If things went wrong, we don't have an (easy) way of signaling | |
1181 // the parent. For our purposes, it is sufficient to fail with a | |
1182 // fatal error. | |
1183 "jmp 25f\n" // exit process | |
1184 "21:xor %%ecx, %%ecx\n" | |
1185 "xor %%edx, %%edx\n" | |
1186 "mov $7, %%eax\n" // NR_waitpid | |
1187 "int $0x80\n" | |
1188 "cmp $-4, %%eax\n" // EINTR | |
1189 "jz 21b\n" | |
1190 "jmp 23f\n" // exit thread (no message) | |
1191 "22:lea playground$syscall_mutex, %%ebx\n" | |
1192 "mov $4096, %%ecx\n" | |
1193 "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE | |
1194 "mov $125, %%eax\n" // NR_mprotect | |
1195 "int $0x80\n" | |
1196 "lock; addl $0x80000000, (%%ebx)\n" | |
1197 "jz 23f\n" // exit thread | |
1198 "mov $1, %%edx\n" | |
1199 "mov %%edx, %%ecx\n" // FUTEX_WAKE | |
1200 "mov $240, %%eax\n" // NR_futex | |
1201 "int $0x80\n" | |
1202 "23:mov $1, %%eax\n" // NR_exit | |
1203 "mov $1, %%ebx\n" // status = 1 | |
1204 "24:int $0x80\n" | |
1205 "25:mov $4, %%eax\n" // NR_write | |
1206 "mov $2, %%ebx\n" // fd = stderr | |
1207 "lea 100f, %%ecx\n" // "Sandbox violation detected" | |
1208 "mov $101f-100f, %%edx\n" // len = strlen(msg) | |
1209 "int $0x80\n" | |
1210 "26:mov $1, %%ebx\n" | |
1211 "27:mov $252, %%eax\n" // NR_exit_group | |
1212 "jmp 24b\n" | |
1213 | |
1214 // The first page is mapped read-only for use as securely shared memory | |
1215 "28:movd %%mm6, %%edi\n" // %edi = old_shared_mem | |
1216 "mov 0x44(%%edi), %%ebx\n" // addr = secure_mem | |
1217 "movd %%ebx, %%mm5\n" // %mm5 = secure_mem | |
1218 "movd %%mm2, %%esi\n" | |
1219 "cmp %%esi, 4(%%edi)\n" | |
1220 "jne 25b\n" // exit process | |
1221 "mov $125, %%eax\n" // NR_mprotect | |
1222 "mov $4096, %%ecx\n" // len = 4096 | |
1223 "mov $1, %%edx\n" // prot = PROT_READ | |
1224 "int $0x80\n" | |
1225 | |
1226 // The second page is used as scratch space by the trusted thread. | |
1227 // Make it writable. | |
1228 "mov $125, %%eax\n" // NR_mprotect | |
1229 "add $4096, %%ebx\n" // addr = secure_mem + 4096 | |
1230 "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE | |
1231 "int $0x80\n" | |
1232 | |
1233 // Call clone() to create new trusted thread(). | |
1234 // clone(CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD| | |
1235 // CLONE_SYSVSEM|CLONE_UNTRACED, stack, NULL, NULL, NULL) | |
1236 "mov 4(%%ebp), %%eax\n" // threadFd (on child's stack) | |
1237 "movd %%eax, %%mm0\n" // %mm0 = threadFd | |
1238 "mov $120, %%eax\n" // NR_clone | |
1239 "mov $0x850F00, %%ebx\n" // flags = VM|FS|FILES|SIGH|THR|SYSV|UTR | |
1240 "mov $1, %%ecx\n" // stack = 1 | |
1241 "movd 0x48(%%edi), %%mm1\n" // %mm1 = processFdPub | |
1242 "cmp %%esi, 4(%%edi)\n" | |
1243 "jne 25b\n" // exit process | |
1244 "int $0x80\n" | |
1245 "test %%eax, %%eax\n" | |
1246 "js 25b\n" // exit process | |
1247 "jz 0b\n" // invoke trustedThreadFnc() | |
1248 | |
1249 // Set up thread local storage | |
1250 "mov $0x51, %%eax\n" // seg_32bit, limit_in_pages, useable | |
1251 "mov %%eax, -0x04(%%ebp)\n" | |
1252 "mov $0xFFFFF, %%eax\n" // limit | |
1253 "mov %%eax, -0x08(%%ebp)\n" | |
1254 "movd %%mm5, %%eax\n" | |
1255 "add $0x58, %%eax\n" | |
1256 "mov %%eax, -0x0C(%%ebp)\n" // base_addr = &secure_mem.TLS | |
1257 "mov %%fs, %%eax\n" | |
1258 "shr $3, %%eax\n" | |
1259 "mov %%eax, -0x10(%%ebp)\n" // entry_number | |
1260 "mov $243, %%eax\n" // NR_set_thread_area | |
1261 "lea -0x10(%%ebp), %%ebx\n" | |
1262 "int $0x80\n" | |
1263 "test %%eax, %%eax\n" | |
1264 "jnz 25b\n" // exit process | |
1265 | |
1266 // Copy the caller's signal mask | |
1267 "movd %%mm5, %%edx\n" | |
1268 "mov 0x1038(%%edi), %%eax\n" | |
1269 "mov %%eax, 0x1038(%%edx)\n" | |
1270 "mov 0x103C(%%edi), %%eax\n" | |
1271 "mov %%eax, 0x103C(%%edx)\n" | |
1272 | |
1273 // Done creating trusted thread. We can now get ready to return to caller | |
1274 "mov 0(%%ebp), %%esi\n" // %esi = threadFdPub | |
1275 "add $8, %%ebp\n" | |
1276 | |
1277 // Check whether this is the initial thread, or a newly created one. | |
1278 // At startup we run the same code as when we create a new thread. At | |
1279 // the very top of this function, you will find that we store 999f | |
1280 // in %%mm3. That is the signal that we should return on the same | |
1281 // stack rather than return to where clone was called. | |
1282 "movd %%mm3, %%eax\n" | |
1283 "movd %%mm2, %%edx\n" | |
1284 "test %%eax, %%eax\n" | |
1285 "jne 29f\n" | |
1286 | |
1287 // Returning from clone() into the newly created thread is special. We | |
1288 // cannot unroll the stack, as we just set up a new stack for this | |
1289 // thread. We have to explicitly restore CPU registers to the values | |
1290 // that they had when the program originally called clone(). | |
1291 // We patch the register values in the signal stack frame so that we | |
1292 // can ask sigreturn() to restore all registers for us. | |
1293 "sub $0x4, %%ebp\n" | |
1294 "mov 0x28(%%edi), %%eax\n" | |
1295 "mov %%eax, 0x00(%%ebp)\n" // return address | |
1296 "xor %%eax, %%eax\n" | |
1297 "mov %%eax, 0x30(%%ebp)\n" // %eax = 0 | |
1298 "mov 0x2C(%%edi), %%eax\n" | |
1299 "mov %%eax, 0x1C(%%ebp)\n" // %ebp | |
1300 "mov 0x30(%%edi), %%eax\n" | |
1301 "mov %%eax, 0x14(%%ebp)\n" // %edi | |
1302 "mov 0x34(%%edi), %%eax\n" | |
1303 "mov %%eax, 0x18(%%ebp)\n" // %esi | |
1304 "mov 0x38(%%edi), %%eax\n" | |
1305 "mov %%eax, 0x28(%%ebp)\n" // %edx | |
1306 "mov 0x3C(%%edi), %%eax\n" | |
1307 "mov %%eax, 0x2C(%%ebp)\n" // %ecx | |
1308 "mov 0x40(%%edi), %%eax\n" | |
1309 "mov %%eax, 0x24(%%ebp)\n" // %ebx | |
1310 "cmp %%edx, 4(%%edi)\n" | |
1311 "jne 25b\n" // exit process | |
1312 | |
1313 // Nascent thread launches a helper that doesn't share any of our | |
1314 // resources, except for pages mapped as MAP_SHARED. | |
1315 // clone(SIGCHLD, stack=1) | |
1316 "29:mov $120, %%eax\n" // NR_clone | |
1317 "mov $17, %%ebx\n" // flags = SIGCHLD | |
1318 "mov $1, %%ecx\n" // stack = 1 | |
1319 "int $0x80\n" | |
1320 "test %%eax, %%eax\n" | |
1321 "js 25b\n" // exit process | |
1322 "jne 31f\n" | |
1323 | |
1324 // Use sendmsg() to send to the trusted process the file handles for | |
1325 // communicating with the new trusted thread. We also send the address | |
1326 // of the secure memory area (for sanity checks) and the thread id. | |
1327 "cmp %%edx, 4(%%edi)\n" | |
1328 "jne 25b\n" // exit process | |
1329 | |
1330 // 0x00 socketcall: | |
1331 // 0x00 socket (0x4C(%edi)) | |
1332 // 0x04 msg (%ecx + 0x0C) | |
1333 // 0x08 flags ($0) | |
1334 // 0x0C msg: | |
1335 // 0x0C msg_name ($0) | |
1336 // 0x10 msg_namelen ($0) | |
1337 // 0x14 msg_iov (%ecx + 0x34) | |
1338 // 0x18 msg_iovlen ($1) | |
1339 // 0x1C msg_control (%ecx + 0x3C) | |
1340 // 0x20 msg_controllen ($0x14) | |
1341 // 0x24 data: | |
1342 // 0x24 msg_flags/err ($0) | |
1343 // 0x28 secure_mem (%mm5) | |
1344 // 0x2C threadId (%mm4) | |
1345 // 0x30 threadFdPub (%esi) | |
1346 // 0x34 iov: | |
1347 // 0x34 iov_base (%ecx + 0x24) | |
1348 // 0x38 iov_len ($0x10) | |
1349 // 0x3C cmsg: | |
1350 // 0x3C cmsg_len ($0x14) | |
1351 // 0x40 cmsg_level ($1, SOL_SOCKET) | |
1352 // 0x44 cmsg_type ($1, SCM_RIGHTS) | |
1353 // 0x48 threadFdPub (%esi) | |
1354 // 0x4C threadFd (%mm0) | |
1355 // 0x50 | |
1356 "lea -0x50(%%ebp), %%ecx\n" | |
1357 "xor %%eax, %%eax\n" | |
1358 "mov %%eax, 0x08(%%ecx)\n" // flags | |
1359 "mov %%eax, 0x0C(%%ecx)\n" // msg_name | |
1360 "mov %%eax, 0x10(%%ecx)\n" // msg_namelen | |
1361 "mov %%eax, 0x24(%%ecx)\n" // msg_flags | |
1362 "inc %%eax\n" | |
1363 "mov %%eax, 0x18(%%ecx)\n" // msg_iovlen | |
1364 "mov %%eax, 0x40(%%ecx)\n" // cmsg_level | |
1365 "mov %%eax, 0x44(%%ecx)\n" // cmsg_type | |
1366 "movl $0x10, 0x38(%%ecx)\n" // iov_len | |
1367 "mov $0x14, %%eax\n" | |
1368 "mov %%eax, 0x20(%%ecx)\n" // msg_controllen | |
1369 "mov %%eax, 0x3C(%%ecx)\n" // cmsg_len | |
1370 "mov 0x4C(%%edi), %%eax\n" // cloneFdPub | |
1371 "mov %%eax, 0x00(%%ecx)\n" // socket | |
1372 "lea 0x0C(%%ecx), %%eax\n" | |
1373 "mov %%eax, 0x04(%%ecx)\n" // msg | |
1374 "add $0x18, %%eax\n" | |
1375 "mov %%eax, 0x34(%%ecx)\n" // iov_base | |
1376 "add $0x10, %%eax\n" | |
1377 "mov %%eax, 0x14(%%ecx)\n" // msg_iov | |
1378 "add $8, %%eax\n" | |
1379 "mov %%eax, 0x1C(%%ecx)\n" // msg_control | |
1380 "mov %%esi, 0x30(%%ecx)\n" // threadFdPub | |
1381 "mov %%esi, 0x48(%%ecx)\n" // threadFdPub | |
1382 "movd %%mm5, %%eax\n" | |
1383 "mov %%eax, 0x28(%%ecx)\n" // secure_mem | |
1384 "movd %%mm4, %%eax\n" | |
1385 "mov %%eax, 0x2C(%%ecx)\n" // threadId | |
1386 "movd %%mm0, %%eax\n" | |
1387 "mov %%eax, 0x4C(%%ecx)\n" // threadFd | |
1388 "mov $16, %%ebx\n" // sendmsg() | |
1389 "mov $102, %%eax\n" // NR_socketcall | |
1390 "int $0x80\n" | |
1391 | |
1392 // Release syscall_mutex_. This signals the trusted process that | |
1393 // it can write into the original thread's secure memory again. | |
1394 "mov $125, %%eax\n" // NR_mprotect | |
1395 "lea playground$syscall_mutex, %%ebx\n" | |
1396 "mov $4096, %%ecx\n" | |
1397 "mov $3, %%edx\n" // PROT_READ | PROT_WRITE | |
1398 "int $0x80\n" | |
1399 "movd %%mm2, %%edx\n" | |
1400 "cmp %%edx, 0x4(%%edi)\n" | |
1401 "jnz 25b\n" // exit process | |
1402 "lock; addl $0x80000000, (%%ebx)\n" | |
1403 "jz 30f\n" // exit process (no error message) | |
1404 "mov $1, %%edx\n" | |
1405 "mov %%edx, %%ecx\n" // FUTEX_WAKE | |
1406 "mov $240, %%eax\n" // NR_futex | |
1407 "int $0x80\n" | |
1408 "30:xor %%ebx, %%ebx\n" | |
1409 "jmp 27b\n" // exit process (no error message) | |
1410 | |
1411 // Reap helper | |
1412 "31:mov %%eax, %%ebx\n" | |
1413 "32:lea -4(%%ebp), %%ecx\n" | |
1414 "xor %%edx, %%edx\n" | |
1415 "mov $7, %%eax\n" // NR_waitpid | |
1416 "int $0x80\n" | |
1417 "cmp $-4, %%eax\n" // EINTR | |
1418 "jz 32b\n" | |
1419 "mov -4(%%ebp), %%eax\n" | |
1420 "test %%eax, %%eax\n" | |
1421 "jnz 26b\n" // exit process (no error message) | |
1422 | |
1423 // Release privileges by entering seccomp mode. | |
1424 "33:mov $172, %%eax\n" // NR_prctl | |
1425 "mov $22, %%ebx\n" // PR_SET_SECCOMP | |
1426 "mov $1, %%ecx\n" | |
1427 "int $0x80\n" | |
1428 "test %%eax, %%eax\n" | |
1429 "jnz 25b\n" // exit process | |
1430 | |
1431 // We can finally start using the stack. Signal handlers no longer pose | |
1432 // a threat to us. | |
1433 "mov %%ebp, %%esp\n" | |
1434 | |
1435 // Back in the newly created sandboxed thread, wait for trusted process | |
1436 // to receive request. It is possible for an attacker to make us | |
1437 // continue even before the trusted process is done. This is OK. It'll | |
1438 // result in us putting stale values into the new thread's TLS. But that | |
1439 // data is considered untrusted anyway. | |
1440 "push %%eax\n" | |
1441 "mov $1, %%edx\n" // len = 1 | |
1442 "mov %%esp, %%ecx\n" // buf = %esp | |
1443 "mov %%esi, %%ebx\n" // fd = threadFdPub | |
1444 "34:mov $3, %%eax\n" // NR_read | |
1445 "int $0x80\n" | |
1446 "cmp $-4, %%eax\n" // EINTR | |
1447 "jz 34b\n" | |
1448 "cmp %%edx, %%eax\n" | |
1449 "jne 25b\n" // exit process | |
1450 "pop %%eax\n" | |
1451 | |
1452 // Return to caller. We are in the new thread, now. | |
1453 "movd %%mm3, %%ebx\n" | |
1454 "test %%ebx, %%ebx\n" | |
1455 "jnz 35f\n" // Returning to createTrustedThread() | |
1456 | |
1457 // Returning to the place where clone() had been called. We rely on | |
1458 // using sigreturn() for restoring our registers. The caller already | |
1459 // created a signal stack frame, and we patched the register values | |
1460 // with the ones that were in effect prior to calling sandbox_clone(). | |
1461 "pop %%ebx\n" | |
1462 "35:mov %%ebx, 0x38(%%esp)\n" // compute new %eip | |
1463 "mov $119, %%eax\n" // NR_sigreturn | |
1464 "int $0x80\n" | |
1465 | |
1466 ".pushsection \".rodata\"\n" | |
1467 "100:.ascii \"Sandbox violation detected, program aborted\\n\"\n" | |
1468 "101:.ascii \"WARNING! This is an expensive system call\\n\"\n" | |
1469 "102:\n" | |
1470 ".popsection\n" | |
1471 | |
1472 "999:pop %%ebp\n" | |
1473 "pop %%ebx\n" | |
1474 : | |
1475 : "g"(&args) | |
1476 : "eax", "ecx", "edx", "edi", "esi", "esp", "memory" | |
1477 #else | |
1478 #error Unsupported target platform | |
1479 #endif | |
1480 ); | |
1481 } | |
1482 | |
1483 } // namespace | |
OLD | NEW |