Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(101)

Side by Side Diff: sandbox/linux/seccomp/trusted_thread.cc

Issue 3225010: Pull seccomp-sandbox in via DEPS rather than using an in-tree copy... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: '' Created 10 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « sandbox/linux/seccomp/trusted_process.cc ('k') | sandbox/linux/seccomp/x86_decode.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "sandbox_impl.h"
6 #include "syscall_table.h"
7
8 namespace playground {
9
10 void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
11 SecureMem::Args* secureMem) {
12 SecureMem::Args args = { { { { { 0 } } } } };
13 args.self = &args;
14 args.newSecureMem = secureMem;
15 args.processFdPub = processFdPub;
16 args.cloneFdPub = cloneFdPub;
17 #if defined(__x86_64__)
18 asm volatile(
19 "push %%rbx\n"
20 "push %%rbp\n"
21 "mov %0, %%rbp\n" // %rbp = args
22 "xor %%rbx, %%rbx\n" // initial sequence number
23 "lea 999f(%%rip), %%r15\n" // continue in same thread
24
25 // Signal handlers are process-wide. This means that for security
26 // reasons, we cannot allow that the trusted thread ever executes any
27 // signal handlers.
28 // We prevent the execution of signal handlers by setting a signal
29 // mask that blocks all signals. In addition, we make sure that the
30 // stack pointer is invalid.
31 // We cannot reset the signal mask until after we have enabled
32 // Seccomp mode. Our sigprocmask() wrapper would normally do this by
33 // raising a signal, modifying the signal mask in the kernel-generated
34 // signal frame, and then calling sigreturn(). This presents a bit of
35 // a Catch-22, as all signals are masked and we can therefore not
36 // raise any signal that would allow us to generate the signal stack
37 // frame.
38 // Instead, we have to create the signal stack frame prior to entering
39 // Seccomp mode. This incidentally also helps us to restore the
40 // signal mask to the same value that it had prior to entering the
41 // sandbox.
42 // The signal wrapper for clone() is the second entry point into this
43 // code (by means of sending an IPC to its trusted thread). It goes
44 // through the same steps of creating a signal stack frame on the
45 // newly created thread's stacks prior to cloning. See clone.cc for
46 // details.
47 "mov $56+0xF000, %%eax\n" // __NR_clone + 0xF000
48 "mov %%rsp, %%rcx\n"
49 "int $0\n" // push a signal stack frame (see clone.cc)
50 "mov %%rcx, 0xA0(%%rsp)\n" // pop stack upon call to sigreturn()
51 "mov %%rsp, %%r9\n"
52 "mov $2, %%rdi\n" // how = SIG_SETMASK
53 "pushq $-1\n"
54 "mov %%rsp, %%rsi\n" // set = full mask
55 "xor %%rdx, %%rdx\n" // old_set = NULL
56 "mov $8, %%r10\n" // mask all 64 signals
57 "mov $14, %%eax\n" // NR_rt_sigprocmask
58 "syscall\n"
59 "xor %%rsp, %%rsp\n" // invalidate the stack in all trusted code
60 "jmp 20f\n" // create trusted thread
61
62 // TODO(markus): Coalesce the read() operations by reading into a bigger
63 // buffer.
64
65 // Parameters:
66 // *%fs: secure memory region
67 // the page following this one contains the scratch space
68 // %r13: thread's side of threadFd
69 // %r15: processFdPub
70
71 // Local variables:
72 // %rbx: sequence number for trusted calls
73
74 // Temporary variables:
75 // %r8: child stack
76 // %r9: system call number, child stack
77 // %rbp: secure memory of previous thread
78
79 // Layout of secure shared memory region (c.f. securemem.h):
80 // 0x00: pointer to the secure shared memory region (i.e. self)
81 // 0x08: sequence number; must match %rbx
82 // 0x10: call type; must match %eax, iff %eax == -1 || %eax == -2
83 // 0x18: system call number; passed to syscall in %rax
84 // 0x20: first argument; passed to syscall in %rdi
85 // 0x28: second argument; passed to syscall in %rsi
86 // 0x30: third argument; passed to syscall in %rdx
87 // 0x38: fourth argument; passed to syscall in %r10
88 // 0x40: fifth argument; passed to syscall in %r8
89 // 0x48: sixth argument; passed to syscall in %r9
90 // 0x50: stored return address for clone() system call
91 // 0x58: stored %rbp value for clone() system call
92 // 0x60: stored %rbx value for clone() system call
93 // 0x68: stored %rcx value for clone() system call
94 // 0x70: stored %rdx value for clone() system call
95 // 0x78: stored %rsi value for clone() system call
96 // 0x80: stored %rdi value for clone() system call
97 // 0x88: stored %r8 value for clone() system call
98 // 0x90: stored %r9 value for clone() system call
99 // 0x98: stored %r10 value for clone() system call
100 // 0xA0: stored %r11 value for clone() system call
101 // 0xA8: stored %r12 value for clone() system call
102 // 0xB0: stored %r13 value for clone() system call
103 // 0xB8: stored %r14 value for clone() system call
104 // 0xC0: stored %r15 value for clone() system call
105 // 0xC8: new shared memory for clone()
106 // 0xD0: processFdPub for talking to trusted process
107 // 0xD4: cloneFdPub for talking to trusted process
108 // 0xD8: set to non-zero, if in debugging mode
109 // 0xDC: most recent SHM id returned by shmget(IPC_PRIVATE)
110 // 0xE0: cookie assigned to us by the trusted process (TLS_COOKIE)
111 // 0xE8: thread id (TLS_TID)
112 // 0xF0: threadFdPub (TLS_THREAD_FD)
113 // 0x200-0x1000: securely passed verified file name(s)
114
115 // Layout of (untrusted) scratch space:
116 // 0x00: syscall number; passed in %rax
117 // 0x04: first argument; passed in %rdi
118 // 0x0C: second argument; passed in %rsi
119 // 0x14: third argument; passed in %rdx
120 // 0x1C: fourth argument; passed in %r10
121 // 0x24: fifth argument; passed in %r8
122 // 0x2C: sixth argument; passed in %r9
123 // 0x34: return value
124 // 0x3C: RDTSCP result (%eax)
125 // 0x40: RDTSCP result (%edx)
126 // 0x44: RDTSCP result (%ecx)
127 // 0x48: last system call (not used on x86-64)
128 // 0x4C: number of consecutive calls to a time fnc (not used on x86-64)
129 // 0x50: nesting level of system calls (for debugging purposes only)
130 // 0x54: signal mask
131 // 0x5C: in SEGV handler
132
133 // We use the %fs register for accessing the secure read-only page, and
134 // the untrusted scratch space immediately following it. The segment
135 // register and the local descriptor table is set up by passing
136 // appropriate arguments to clone().
137
138 "0:xor %%rsp, %%rsp\n"
139 "mov $2, %%ebx\n" // %rbx = initial sequence number
140
141 // Read request from untrusted thread, or from trusted process. In either
142 // case, the data that we read has to be considered untrusted.
143 // read(threadFd, &scratch, 4)
144 "1:xor %%rax, %%rax\n" // NR_read
145 "mov %%r13, %%rdi\n" // fd = threadFd
146 "mov %%fs:0x0, %%rsi\n" // secure_mem
147 "add $0x1000, %%rsi\n" // buf = &scratch
148 "mov $4, %%edx\n" // len = 4
149 "2:syscall\n"
150 "cmp $-4, %%rax\n" // EINTR
151 "jz 2b\n"
152 "cmp %%rdx, %%rax\n"
153 "jnz 25f\n" // exit process
154
155 // Retrieve system call number. It is crucial that we only dereference
156 // %fs:0x1000 exactly once. Afterwards, memory becomes untrusted and
157 // we must use the value that we have read the first time.
158 "mov 0(%%rsi), %%eax\n"
159
160 // If syscall number is -1, execute an unlocked system call from the
161 // secure memory area
162 "cmp $-1, %%eax\n"
163 "jnz 5f\n"
164 "3:cmp %%rbx, %%fs:0x8\n"
165 "jne 25f\n" // exit process
166 "cmp %%fs:0x10, %%eax\n"
167 "jne 25f\n" // exit process
168 "mov %%fs:0x18, %%rax\n"
169 "mov %%fs:0x20, %%rdi\n"
170 "mov %%fs:0x28, %%rsi\n"
171 "mov %%fs:0x30, %%rdx\n"
172 "mov %%fs:0x38, %%r10\n"
173 "mov %%fs:0x40, %%r8\n"
174 "mov %%fs:0x48, %%r9\n"
175 "cmp %%rbx, %%fs:0x8\n"
176 "jne 25f\n" // exit process
177 "add $2, %%rbx\n"
178
179 // shmget() gets some special treatment. Whenever we return from this
180 // system call, we remember the most recently returned SysV shm id.
181 "cmp $29, %%eax\n" // NR_shmget
182 "jnz 4f\n"
183 "syscall\n"
184 "mov %%rax, %%r8\n"
185 "mov $56, %%eax\n" // NR_clone
186 "mov $17, %%edi\n" // flags = SIGCHLD
187 "mov $1, %%esi\n" // stack = 1
188 "syscall\n"
189 "test %%rax, %%rax\n"
190 "js 25f\n" // exit process
191 "mov %%rax, %%rdi\n"
192 "jnz 8f\n" // wait for child, then return result
193 "mov %%fs:0x0, %%rdi\n" // start = secure_mem
194 "mov $4096, %%esi\n" // len = 4096
195 "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE
196 "mov $10, %%eax\n" // NR_mprotect
197 "syscall\n"
198 "mov %%r8d, 0xDC(%%rdi)\n" // set most recently returned SysV shm id
199 "xor %%rdi, %%rdi\n"
200
201 // When debugging messages are enabled, warn about expensive system calls
202 #ifndef NDEBUG
203 "cmpw $0, %%fs:0xD8\n" // debug mode
204 "jz 27f\n"
205 "mov $1, %%eax\n" // NR_write
206 "mov $2, %%edi\n" // fd = stderr
207 "lea 101f(%%rip), %%rsi\n" // "This is an expensive system call"
208 "mov $102f-101f, %%edx\n" // len = strlen(msg)
209 "syscall\n"
210 "xor %%rdi, %%rdi\n"
211 #endif
212
213 "jmp 27f\n" // exit program, no message
214 "4:syscall\n"
215 "jmp 15f\n" // return result
216
217 // If syscall number is -2, execute locked system call from the
218 // secure memory area
219 "5:jg 12f\n"
220 "cmp $-2, %%eax\n"
221 "jnz 9f\n"
222 "cmp %%rbx, %%fs:0x8\n"
223 "jne 25f\n" // exit process
224 "cmp %%eax, %%fs:0x10\n"
225 "jne 25f\n" // exit process
226
227 // When debugging messages are enabled, warn about expensive system calls
228 #ifndef NDEBUG
229 "cmpw $0, %%fs:0xD8\n" // debug mode
230 "jz 6f\n"
231 "mov $1, %%eax\n" // NR_write
232 "mov $2, %%edi\n" // fd = stderr
233 "lea 101f(%%rip), %%rsi\n" // "This is an expensive system call"
234 "mov $102f-101f, %%edx\n" // len = strlen(msg)
235 "syscall\n"
236 "6:"
237 #endif
238
239 "mov %%fs:0x18, %%rax\n"
240 "mov %%fs:0x20, %%rdi\n"
241 "mov %%fs:0x28, %%rsi\n"
242 "mov %%fs:0x30, %%rdx\n"
243 "mov %%fs:0x38, %%r10\n"
244 "mov %%fs:0x40, %%r8\n"
245 "mov %%fs:0x48, %%r9\n"
246 "cmp %%rbx, %%fs:0x8\n"
247 "jne 25f\n" // exit process
248
249 // clone() has unusual calling conventions and must be handled specially
250 "cmp $56, %%rax\n" // NR_clone
251 "jz 19f\n"
252
253 // exit() terminates trusted thread
254 "cmp $60, %%eax\n" // NR_exit
255 "jz 18f\n"
256
257 // Perform requested system call
258 "syscall\n"
259
260 // Unlock mutex
261 "7:cmp %%rbx, %%fs:0x8\n"
262 "jne 25f\n" // exit process
263 "add $2, %%rbx\n"
264 "mov %%rax, %%r8\n"
265 "mov $56, %%eax\n" // NR_clone
266 "mov $17, %%rdi\n" // flags = SIGCHLD
267 "mov $1, %%rsi\n" // stack = 1
268 "syscall\n"
269 "test %%rax, %%rax\n"
270 "js 25f\n" // exit process
271 "jz 22f\n" // unlock and exit
272 "mov %%rax, %%rdi\n"
273 "8:xor %%rsi, %%rsi\n"
274 "xor %%rdx, %%rdx\n"
275 "xor %%r10, %%r10\n"
276 "mov $61, %%eax\n" // NR_wait4
277 "syscall\n"
278 "cmp $-4, %%eax\n" // EINTR
279 "jz 8b\n"
280 "mov %%r8, %%rax\n"
281 "jmp 15f\n" // return result
282
283 // If syscall number is -3, read the time stamp counter
284 "9:cmp $-3, %%eax\n"
285 "jnz 10f\n"
286 "rdtsc\n" // sets %edx:%eax
287 "xor %%rcx, %%rcx\n"
288 "jmp 11f\n"
289 "10:cmp $-4, %%eax\n"
290 "jnz 12f\n"
291 "rdtscp\n" // sets %edx:%eax and %ecx
292 "11:add $0x3C, %%rsi\n"
293 "mov %%eax, 0(%%rsi)\n"
294 "mov %%edx, 4(%%rsi)\n"
295 "mov %%ecx, 8(%%rsi)\n"
296 "mov $12, %%edx\n"
297 "jmp 16f\n" // return result
298
299 // Check in syscallTable whether this system call is unrestricted
300 "12:mov %%rax, %%r9\n"
301 #ifndef NDEBUG
302 "cmpw $0, %%fs:0xD8\n" // debug mode
303 "jnz 13f\n"
304 #endif
305 "cmp playground$maxSyscall(%%rip), %%eax\n"
306 "ja 25f\n" // exit process
307 "shl $4, %%rax\n"
308 "lea playground$syscallTable(%%rip), %%rdi\n"
309 "add %%rdi, %%rax\n"
310 "mov 0(%%rax), %%rax\n"
311 "cmp $1, %%rax\n"
312 "jne 25f\n" // exit process
313
314 // Default behavior for unrestricted system calls is to just execute
315 // them. Read the remaining arguments first.
316 "13:mov %%rsi, %%r8\n"
317 "xor %%rax, %%rax\n" // NR_read
318 "mov %%r13, %%rdi\n" // fd = threadFd
319 "add $4, %%rsi\n" // buf = &scratch + 4
320 "mov $48, %%edx\n" // len = 6*sizeof(void *)
321 "14:syscall\n"
322 "cmp $-4, %%rax\n" // EINTR
323 "jz 14b\n"
324 "cmp %%rdx, %%rax\n"
325 "jnz 25f\n" // exit process
326 "mov %%r9, %%rax\n"
327 "mov 0x04(%%r8), %%rdi\n"
328 "mov 0x0C(%%r8), %%rsi\n"
329 "mov 0x14(%%r8), %%rdx\n"
330 "mov 0x1C(%%r8), %%r10\n"
331 "mov 0x2C(%%r8), %%r9\n"
332 "mov 0x24(%%r8), %%r8\n"
333 "cmp $231, %%rax\n" // NR_exit_group
334 "jz 27f\n" // exit program, no message
335 "syscall\n"
336
337 // Return result of system call to sandboxed thread
338 "15:mov %%fs:0x0, %%rsi\n" // secure_mem
339 "add $0x1034, %%rsi\n" // buf = &scratch + 52
340 "mov %%rax, (%%rsi)\n"
341 "mov $8, %%edx\n" // len = 8
342 "16:mov %%r13, %%rdi\n" // fd = threadFd
343 "mov $1, %%eax\n" // NR_write
344 "17:syscall\n"
345 "cmp %%rdx, %%rax\n"
346 "jz 1b\n"
347 "cmp $-4, %%rax\n" // EINTR
348 "jz 17b\n"
349 "jmp 25f\n" // exit process
350
351 // NR_exit:
352 // Exit trusted thread after cleaning up resources
353 "18:mov %%fs:0x0, %%rsi\n" // secure_mem
354 "mov 0xF0(%%rsi), %%rdi\n" // fd = threadFdPub
355 "mov $3, %%eax\n" // NR_close
356 "syscall\n"
357 "mov %%rsi, %%rdi\n" // start = secure_mem
358 "mov $8192, %%esi\n" // length = 8192
359 "xor %%rdx, %%rdx\n" // prot = PROT_NONE
360 "mov $10, %%eax\n" // NR_mprotect
361 "syscall\n"
362 "mov %%r13, %%rdi\n" // fd = threadFd
363 "mov $3, %%eax\n" // NR_close
364 "syscall\n"
365 "mov $56, %%eax\n" // NR_clone
366 "mov $17, %%rdi\n" // flags = SIGCHLD
367 "mov $1, %%rsi\n" // stack = 1
368 "syscall\n"
369 "mov %%rax, %%rdi\n"
370 "test %%rax, %%rax\n"
371 "js 27f\n" // exit process
372 "jne 21f\n" // reap helper, exit thread
373 "jmp 22f\n" // unlock mutex
374
375 // NR_clone:
376 // Original trusted thread calls clone() to create new nascent
377 // thread. This thread is (typically) fully privileged and shares all
378 // resources with the caller (i.e. the previous trusted thread),
379 // and by extension it shares all resources with the sandbox'd
380 // threads.
381 "19:mov %%fs:0x0, %%rbp\n" // %rbp = old_shared_mem
382 "mov %%rsi, %%r15\n" // remember child stack
383 "mov $1, %%rsi\n" // stack = 1
384 "syscall\n" // calls NR_clone
385 "cmp $-4095, %%rax\n" // return codes -1..-4095 are errno values
386 "jae 7b\n" // unlock mutex, return result
387 "add $2, %%rbx\n"
388 "test %%rax, %%rax\n"
389 "jne 15b\n" // return result
390
391 // In nascent thread, now.
392 "sub $2, %%rbx\n"
393
394 // We want to maintain an invalid %rsp whenver we access untrusted
395 // memory. This ensures that even if an attacker can trick us into
396 // triggering a SIGSEGV, we will never successfully execute a signal
397 // handler.
398 // Signal handlers are inherently dangerous, as an attacker could trick
399 // us into returning to the wrong address by adjusting the signal stack
400 // right before the handler returns.
401 // N.B. While POSIX is curiously silent about this, it appears that on
402 // Linux, alternate signal stacks are a per-thread property. That is
403 // good. It means that this security mechanism works, even if the
404 // sandboxed thread manages to set up an alternate signal stack.
405 //
406 // TODO(markus): We currently do not support emulating calls to
407 // sys_clone() with a zero (i.e. copy) stack parameter. See clone.cc
408 // for a discussion on how to fix this, if this ever becomes neccessary.
409 "mov %%r15, %%r9\n" // %r9 = child_stack
410 "xor %%r15, %%r15\n" // Request to return from clone() when done
411
412 // Get thread id of nascent thread
413 "20:mov $186, %%eax\n" // NR_gettid
414 "syscall\n"
415 "mov %%rax, %%r14\n"
416
417 // Nascent thread creates socketpair() for sending requests to
418 // trusted thread.
419 // We can create the filehandles on the child's stack. Filehandles are
420 // always treated as untrusted.
421 // socketpair(AF_UNIX, SOCK_STREAM, 0, fds)
422 "sub $0x10, %%r9\n"
423 "mov %%r15, 8(%%r9)\n" // preserve return address on child stack
424 "mov $53, %%eax\n" // NR_socketpair
425 "mov $1, %%edi\n" // domain = AF_UNIX
426 "mov $1, %%esi\n" // type = SOCK_STREAM
427 "xor %%rdx, %%rdx\n" // protocol = 0
428 "mov %%r9, %%r10\n" // sv = child_stack
429 "syscall\n"
430 "test %%rax, %%rax\n"
431 "jz 28f\n"
432
433 // If things went wrong, we don't have an (easy) way of signaling
434 // the parent. For our purposes, it is sufficient to fail with a
435 // fatal error.
436 "jmp 25f\n" // exit process
437 "21:xor %%rsi, %%rsi\n"
438 "xor %%rdx, %%rdx\n"
439 "xor %%r10, %%r10\n"
440 "mov $61, %%eax\n" // NR_wait4
441 "syscall\n"
442 "cmp $-4, %%eax\n" // EINTR
443 "jz 21b\n"
444 "jmp 23f\n" // exit thread (no message)
445 "22:lea playground$syscall_mutex(%%rip), %%rdi\n"
446 "mov $4096, %%esi\n"
447 "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE
448 "mov $10, %%eax\n" // NR_mprotect
449 "syscall\n"
450 "lock; addl $0x80000000, (%%rdi)\n"
451 "jz 23f\n" // exit thread
452 "mov $1, %%edx\n"
453 "mov %%rdx, %%rsi\n" // FUTEX_WAKE
454 "mov $202, %%eax\n" // NR_futex
455 "syscall\n"
456 "23:mov $60, %%eax\n" // NR_exit
457 "mov $1, %%edi\n" // status = 1
458 "24:syscall\n"
459 "25:mov $1, %%eax\n" // NR_write
460 "mov $2, %%edi\n" // fd = stderr
461 "lea 100f(%%rip), %%rsi\n" // "Sandbox violation detected"
462 "mov $101f-100f, %%edx\n" // len = strlen(msg)
463 "syscall\n"
464 "26:mov $1, %%edi\n"
465 "27:mov $231, %%eax\n" // NR_exit_group
466 "jmp 24b\n"
467
468 // The first page is mapped read-only for use as securely shared memory
469 "28:mov 0xC8(%%rbp), %%r12\n" // %r12 = secure shared memory
470 "cmp %%rbx, 8(%%rbp)\n"
471 "jne 25b\n" // exit process
472 "mov $10, %%eax\n" // NR_mprotect
473 "mov %%r12, %%rdi\n" // addr = secure_mem
474 "mov $4096, %%esi\n" // len = 4096
475 "mov $1, %%edx\n" // prot = PROT_READ
476 "syscall\n"
477
478 // The second page is used as scratch space by the trusted thread.
479 // Make it writable.
480 "mov $10, %%eax\n" // NR_mprotect
481 "add $4096, %%rdi\n" // addr = secure_mem + 4096
482 "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE
483 "syscall\n"
484
485 // Call clone() to create new trusted thread().
486 // clone(CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
487 // CLONE_SYSVSEM|CLONE_UNTRACED|CLONE_SETTLS, stack, NULL, NULL,
488 // tls)
489 "mov 4(%%r9), %%r13d\n" // %r13 = threadFd (on child's stack)
490 "mov $56, %%eax\n" // NR_clone
491 "mov $0x8D0F00, %%edi\n" // flags = VM|FS|FILES|SIGH|THR|SYSV|UTR|TLS
492 "mov $1, %%rsi\n" // stack = 1
493 "mov %%r12, %%r8\n" // tls = new_secure_mem
494 "mov 0xD0(%%rbp), %%r15d\n" // %r15 = processFdPub
495 "cmp %%rbx, 8(%%rbp)\n"
496 "jne 25b\n" // exit process
497 "syscall\n"
498 "test %%rax, %%rax\n"
499 "js 25b\n" // exit process
500 "jz 0b\n" // invoke trustedThreadFnc()
501
502 // Copy the caller's signal mask
503 "mov 0x1054(%%rbp), %%rax\n"
504 "mov %%rax, 0x1054(%%r12)\n"
505
506 // Done creating trusted thread. We can now get ready to return to caller
507 "mov %%r9, %%r8\n" // %r8 = child_stack
508 "mov 0(%%r9), %%r9d\n" // %r9 = threadFdPub
509
510 // Set up thread local storage with information on how to talk to
511 // trusted thread and trusted process.
512 "lea 0xE0(%%r12), %%rsi\n" // args = &secure_mem.TLS;
513 "mov $158, %%eax\n" // NR_arch_prctl
514 "mov $0x1001, %%edi\n" // option = ARCH_SET_GS
515 "syscall\n"
516 "cmp $-4095, %%rax\n" // return codes -1..-4095 are errno values
517 "jae 25b\n" // exit process
518
519 // Check whether this is the initial thread, or a newly created one.
520 // At startup we run the same code as when we create a new thread. At
521 // the very top of this function, you will find that we push 999(%rip)
522 // on the stack. That is the signal that we should return on the same
523 // stack rather than return to where clone was called.
524 "mov 8(%%r8), %%r15\n"
525 "add $0x10, %%r8\n"
526 "test %%r15, %%r15\n"
527 "jne 29f\n"
528
529 // Returning from clone() into the newly created thread is special. We
530 // cannot unroll the stack, as we just set up a new stack for this
531 // thread. We have to explicitly restore CPU registers to the values
532 // that they had when the program originally called clone().
533 // We patch the register values in the signal stack frame so that we
534 // can ask sigreturn() to restore all registers for us.
535 "sub $0x8, %%r8\n"
536 "mov 0x50(%%rbp), %%rax\n"
537 "mov %%rax, 0x00(%%r8)\n" // return address
538 "xor %%rax, %%rax\n"
539 "mov %%rax, 0x98(%%r8)\n" // %rax = 0
540 "mov 0x58(%%rbp), %%rax\n"
541 "mov %%rax, 0x80(%%r8)\n" // %rbp
542 "mov 0x60(%%rbp), %%rax\n"
543 "mov %%rax, 0x88(%%r8)\n" // %rbx
544 "mov 0x68(%%rbp), %%rax\n"
545 "mov %%rax, 0xA0(%%r8)\n" // %rcx
546 "mov 0x70(%%rbp), %%rax\n"
547 "mov %%rax, 0x90(%%r8)\n" // %rdx
548 "mov 0x78(%%rbp), %%rax\n"
549 "mov %%rax, 0x78(%%r8)\n" // %rsi
550 "mov 0x80(%%rbp), %%rax\n"
551 "mov %%rax, 0x70(%%r8)\n" // %rdi
552 "mov 0x88(%%rbp), %%rax\n"
553 "mov %%rax, 0x30(%%r8)\n" // %r8
554 "mov 0x90(%%rbp), %%rax\n"
555 "mov %%rax, 0x38(%%r8)\n" // %r9
556 "mov 0x98(%%rbp), %%rax\n"
557 "mov %%rax, 0x40(%%r8)\n" // %r10
558 "mov 0xA0(%%rbp), %%rax\n"
559 "mov %%rax, 0x48(%%r8)\n" // %r11
560 "mov 0xA8(%%rbp), %%rax\n"
561 "mov %%rax, 0x50(%%r8)\n" // %r12
562 "mov 0xB0(%%rbp), %%rax\n"
563 "mov %%rax, 0x58(%%r8)\n" // %r13
564 "mov 0xB8(%%rbp), %%rax\n"
565 "mov %%rax, 0x60(%%r8)\n" // %r14
566 "mov 0xC0(%%rbp), %%rax\n"
567 "mov %%rax, 0x68(%%r8)\n" // %r15
568 "cmp %%rbx, 8(%%rbp)\n"
569 "jne 25b\n" // exit process
570
571 // Nascent thread launches a helper that doesn't share any of our
572 // resources, except for pages mapped as MAP_SHARED.
573 // clone(SIGCHLD, stack=1)
574 "29:mov $56, %%eax\n" // NR_clone
575 "mov $17, %%rdi\n" // flags = SIGCHLD
576 "mov $1, %%rsi\n" // stack = 1
577 "syscall\n"
578 "test %%rax, %%rax\n"
579 "js 25b\n" // exit process
580 "jne 31f\n"
581
582 // Use sendmsg() to send to the trusted process the file handles for
583 // communicating with the new trusted thread. We also send the address
584 // of the secure memory area (for sanity checks) and the thread id.
585 "mov 0xD4(%%rbp), %%edi\n" // transport = Sandbox::cloneFdPub()
586 "cmp %%rbx, 8(%%rbp)\n"
587 "jne 25b\n" // exit process
588
589 // 0x00 msg:
590 // 0x00 msg_name ($0)
591 // 0x08 msg_namelen ($0)
592 // 0x10 msg_iov (%r8 + 0x44)
593 // 0x18 msg_iovlen ($1)
594 // 0x20 msg_control (%r8 + 0x54)
595 // 0x28 msg_controllen ($0x18)
596 // 0x30 data:
597 // 0x30 msg_flags/err ($0)
598 // 0x34 secure_mem (%r12)
599 // 0x3C threadId (%r14d)
600 // 0x40 threadFdPub (%r9d)
601 // 0x44 iov:
602 // 0x44 iov_base (%r8 + 0x30)
603 // 0x4C iov_len ($0x14)
604 // 0x54 cmsg:
605 // 0x54 cmsg_len ($0x18)
606 // 0x5C cmsg_level ($1, SOL_SOCKET)
607 // 0x60 cmsg_type ($1, SCM_RIGHTS)
608 // 0x64 threadFdPub (%r9d)
609 // 0x68 threadFd (%r13d)
610 // 0x6C
611 "sub $0x6C, %%r8\n"
612 "xor %%rdx, %%rdx\n" // flags = 0
613 "mov %%rdx, 0x00(%%r8)\n" // msg_name
614 "mov %%edx, 0x08(%%r8)\n" // msg_namelen
615 "mov %%edx, 0x30(%%r8)\n" // msg_flags
616 "mov $1, %%r11d\n"
617 "mov %%r11, 0x18(%%r8)\n" // msg_iovlen
618 "mov %%r11d, 0x5C(%%r8)\n" // cmsg_level
619 "mov %%r11d, 0x60(%%r8)\n" // cmsg_type
620 "lea 0x30(%%r8), %%r11\n"
621 "mov %%r11, 0x44(%%r8)\n" // iov_base
622 "add $0x14, %%r11\n"
623 "mov %%r11, 0x10(%%r8)\n" // msg_iov
624 "add $0x10, %%r11\n"
625 "mov %%r11, 0x20(%%r8)\n" // msg_control
626 "mov $0x14, %%r11d\n"
627 "mov %%r11, 0x4C(%%r8)\n" // iov_len
628 "add $4, %%r11d\n"
629 "mov %%r11, 0x28(%%r8)\n" // msg_controllen
630 "mov %%r11, 0x54(%%r8)\n" // cmsg_len
631 "mov %%r12, 0x34(%%r8)\n" // secure_mem
632 "mov %%r14d, 0x3C(%%r8)\n" // threadId
633 "mov %%r9d, 0x40(%%r8)\n" // threadFdPub
634 "mov %%r9d, 0x64(%%r8)\n" // threadFdPub
635 "mov %%r13d, 0x68(%%r8)\n" // threadFd
636 "mov $46, %%eax\n" // NR_sendmsg
637 "mov %%r8, %%rsi\n" // msg
638 "syscall\n"
639
640 // Release syscall_mutex_. This signals the trusted process that
641 // it can write into the original thread's secure memory again.
642 "mov $10, %%eax\n" // NR_mprotect
643 "lea playground$syscall_mutex(%%rip), %%rdi\n"
644 "mov $4096, %%esi\n"
645 "mov $3, %%edx\n" // PROT_READ | PROT_WRITE
646 "syscall\n"
647 "cmp %%rbx, 8(%%rbp)\n"
648 "jne 25b\n" // exit process
649 "lock; addl $0x80000000, (%%rdi)\n"
650 "jz 30f\n" // exit process (no error message)
651 "mov $1, %%edx\n"
652 "mov %%rdx, %%rsi\n" // FUTEX_WAKE
653 "mov $202, %%eax\n" // NR_futex
654 "syscall\n"
655 "30:xor %%rdi, %%rdi\n"
656 "jmp 27b\n" // exit process (no error message)
657
658 // Reap helper
659 "31:mov %%rax, %%rdi\n"
660 "32:lea -4(%%r8), %%rsi\n"
661 "xor %%rdx, %%rdx\n"
662 "xor %%r10, %%r10\n"
663 "mov $61, %%eax\n" // NR_wait4
664 "syscall\n"
665 "cmp $-4, %%eax\n" // EINTR
666 "jz 32b\n"
667 "mov -4(%%r8), %%eax\n"
668 "test %%rax, %%rax\n"
669 "jnz 26b\n" // exit process (no error message)
670
671 // Release privileges by entering seccomp mode.
672 "mov $157, %%eax\n" // NR_prctl
673 "mov $22, %%edi\n" // PR_SET_SECCOMP
674 "mov $1, %%esi\n"
675 "syscall\n"
676 "test %%rax, %%rax\n"
677 "jnz 25b\n" // exit process
678
679 // We can finally start using the stack. Signal handlers no longer pose
680 // a threat to us.
681 "mov %%r8, %%rsp\n"
682
683 // Back in the newly created sandboxed thread, wait for trusted process
684 // to receive request. It is possible for an attacker to make us
685 // continue even before the trusted process is done. This is OK. It'll
686 // result in us putting stale values into the new thread's TLS. But that
687 // data is considered untrusted anyway.
688 "push %%rax\n"
689 "mov $1, %%edx\n" // len = 1
690 "mov %%rsp, %%rsi\n" // buf = %rsp
691 "mov %%r9, %%rdi\n" // fd = threadFdPub
692 "33:xor %%rax, %%rax\n" // NR_read
693 "syscall\n"
694 "cmp $-4, %%rax\n" // EINTR
695 "jz 33b\n"
696 "cmp %%rdx, %%rax\n"
697 "jne 25b\n" // exit process
698 "pop %%rax\n"
699
700 // Return to caller. We are in the new thread, now.
701 "test %%r15, %%r15\n"
702 "jnz 34f\n" // Returning to createTrustedThread()
703
704 // Returning to the place where clone() had been called. We rely on
705 // using rt_sigreturn() for restoring our registers. The caller already
706 // created a signal stack frame, and we patched the register values
707 // with the ones that were in effect prior to calling sandbox_clone().
708 "pop %%r15\n"
709 "34:mov %%r15, 0xA8(%%rsp)\n" // compute new %rip
710 "mov $15, %%eax\n" // NR_rt_sigreturn
711 "syscall\n"
712
713 ".pushsection \".rodata\"\n"
714 "100:.ascii \"Sandbox violation detected, program aborted\\n\"\n"
715 "101:.ascii \"WARNING! This is an expensive system call\\n\"\n"
716 "102:\n"
717 ".popsection\n"
718
719 "999:pop %%rbp\n"
720 "pop %%rbx\n"
721 :
722 : "g"(&args)
723 : "rax", "rcx", "rdx", "rdi", "rsi", "r8", "r9", "r10", "r11", "r12",
724 "r13", "r14", "r15", "rsp", "memory"
725 #elif defined(__i386__)
726 struct user_desc u;
727 u.entry_number = (typeof u.entry_number)-1;
728 u.base_addr = 0;
729 u.limit = 0xfffff;
730 u.seg_32bit = 1;
731 u.contents = 0;
732 u.read_exec_only = 0;
733 u.limit_in_pages = 1;
734 u.seg_not_present = 0;
735 u.useable = 1;
736 SysCalls sys;
737 if (sys.set_thread_area(&u) < 0) {
738 die("Cannot set up thread local storage");
739 }
740 asm volatile("movw %w0, %%fs"
741 :
742 : "q"(8*u.entry_number+3));
743 asm volatile(
744 "push %%ebx\n"
745 "push %%ebp\n"
746
747 // Signal handlers are process-wide. This means that for security
748 // reasons, we cannot allow that the trusted thread ever executes any
749 // signal handlers.
750 // We prevent the execution of signal handlers by setting a signal
751 // mask that blocks all signals. In addition, we make sure that the
752 // stack pointer is invalid.
753 // We cannot reset the signal mask until after we have enabled
754 // Seccomp mode. Our sigprocmask() wrapper would normally do this by
755 // raising a signal, modifying the signal mask in the kernel-generated
756 // signal frame, and then calling sigreturn(). This presents a bit of
757 // a Catch-22, as all signals are masked and we can therefore not
758 // raise any signal that would allow us to generate the signal stack
759 // frame.
760 // Instead, we have to create the signal stack frame prior to entering
761 // Seccomp mode. This incidentally also helps us to restore the
762 // signal mask to the same value that it had prior to entering the
763 // sandbox.
764 // The signal wrapper for clone() is the second entry point into this
765 // code (by means of sending an IPC to its trusted thread). It goes
766 // through the same steps of creating a signal stack frame on the
767 // newly created thread's stacks prior to cloning. See clone.cc for
768 // details.
769 "mov %0, %%edi\n" // create signal stack before accessing MMX
770 "mov $120+0xF000, %%eax\n" // __NR_clone + 0xF000
771 "mov %%esp, %%ebp\n"
772 "int $0\n" // push a signal stack frame (see clone.cc)
773 "mov %%ebp, 0x1C(%%esp)\n" // pop stack upon call to sigreturn()
774 "mov %%esp, %%ebp\n"
775 "mov $2, %%ebx\n" // how = SIG_SETMASK
776 "pushl $-1\n"
777 "pushl $-1\n"
778 "mov %%esp, %%ecx\n" // set = full mask
779 "xor %%edx, %%edx\n" // old_set = NULL
780 "mov $8, %%esi\n" // mask all 64 signals
781 "mov $175, %%eax\n" // NR_rt_sigprocmask
782 "int $0x80\n"
783 "mov $126, %%eax\n" // NR_sigprocmask
784 "int $0x80\n"
785 "xor %%esp, %%esp\n" // invalidate the stack in all trusted code
786 "movd %%edi, %%mm6\n" // %mm6 = args
787 "lea 999f, %%edi\n" // continue in same thread
788 "movd %%edi, %%mm3\n"
789 "xor %%edi, %%edi\n" // initial sequence number
790 "movd %%edi, %%mm2\n"
791 "jmp 20f\n" // create trusted thread
792
793 // TODO(markus): Coalesce the read() operations by reading into a bigger
794 // buffer.
795
796 // Parameters:
797 // %mm0: thread's side of threadFd
798 // %mm1: processFdPub
799 // %mm3: return address after creation of new trusted thread
800 // %mm5: secure memory region
801 // the page following this one contains the scratch space
802
803 // Local variables:
804 // %mm2: sequence number for trusted calls
805 // %mm4: thread id
806
807 // Temporary variables:
808 // %ebp: system call number
809 // %mm6: secure memory of previous thread
810 // %mm7: temporary variable for spilling data
811
812 // Layout of secure shared memory region (c.f. securemem.h):
813 // 0x00: pointer to the secure shared memory region (i.e. self)
814 // 0x04: sequence number; must match %mm2
815 // 0x08: call type; must match %eax, iff %eax == -1 || %eax == -2
816 // 0x0C: system call number; passed to syscall in %eax
817 // 0x10: first argument; passed to syscall in %ebx
818 // 0x14: second argument; passed to syscall in %ecx
819 // 0x18: third argument; passed to syscall in %edx
820 // 0x1C: fourth argument; passed to syscall in %esi
821 // 0x20: fifth argument; passed to syscall in %edi
822 // 0x24: sixth argument; passed to syscall in %ebp
823 // 0x28: stored return address for clone() system call
824 // 0x2C: stored %ebp value for clone() system call
825 // 0x30: stored %edi value for clone() system call
826 // 0x34: stored %esi value for clone() system call
827 // 0x38: stored %edx value for clone() system call
828 // 0x3C: stored %ecx value for clone() system call
829 // 0x40: stored %ebx value for clone() system call
830 // 0x44: new shared memory for clone()
831 // 0x48: processFdPub for talking to trusted process
832 // 0x4C: cloneFdPub for talking to trusted process
833 // 0x50: set to non-zero, if in debugging mode
834 // 0x54: most recent SHM id returned by shmget(IPC_PRIVATE)
835 // 0x58: cookie assigned to us by the trusted process (TLS_COOKIE)
836 // 0x60: thread id (TLS_TID)
837 // 0x68: threadFdPub (TLS_THREAD_FD)
838 // 0x200-0x1000: securely passed verified file name(s)
839
840 // Layout of (untrusted) scratch space:
841 // 0x00: syscall number; passed in %eax
842 // 0x04: first argument; passed in %ebx
843 // 0x08: second argument; passed in %ecx
844 // 0x0C: third argument; passed in %edx
845 // 0x10: fourth argument; passed in %esi
846 // 0x14: fifth argument; passed in %edi
847 // 0x18: sixth argument; passed in %ebp
848 // 0x1C: return value
849 // 0x20: RDTSCP result (%eax)
850 // 0x24: RDTSCP result (%edx)
851 // 0x28: RDTSCP result (%ecx)
852 // 0x2C: last system call (updated in syscall.cc)
853 // 0x30: number of consecutive calls to a time fnc. (e.g. gettimeofday)
854 // 0x34: nesting level of system calls (for debugging purposes only)
855 // 0x38: signal mask
856 // 0x40: in SEGV handler
857
858 "0:xor %%esp, %%esp\n"
859 "mov $2, %%eax\n" // %mm2 = initial sequence number
860 "movd %%eax, %%mm2\n"
861
862 // Read request from untrusted thread, or from trusted process. In either
863 // case, the data that we read has to be considered untrusted.
864 // read(threadFd, &scratch, 4)
865 "1:mov $3, %%eax\n" // NR_read
866 "movd %%mm0, %%ebx\n" // fd = threadFd
867 "movd %%mm5, %%ecx\n" // secure_mem
868 "add $0x1000, %%ecx\n" // buf = &scratch
869 "mov $4, %%edx\n" // len = 4
870 "2:int $0x80\n"
871 "cmp $-4, %%eax\n" // EINTR
872 "jz 2b\n"
873 "cmp %%edx, %%eax\n"
874 "jnz 25f\n" // exit process
875
876 // Retrieve system call number. It is crucial that we only dereference
877 // 0x1000(%mm5) exactly once. Afterwards, memory becomes untrusted and
878 // we must use the value that we have read the first time.
879 "mov 0(%%ecx), %%eax\n"
880
881 // If syscall number is -1, execute an unlocked system call from the
882 // secure memory area
883 "cmp $-1, %%eax\n"
884 "jnz 5f\n"
885 "3:movd %%mm2, %%ebp\n"
886 "cmp %%ebp, 0x4-0x1000(%%ecx)\n"
887 "jne 25f\n" // exit process
888 "cmp 0x08-0x1000(%%ecx), %%eax\n"
889 "jne 25f\n" // exit process
890 "mov 0x0C-0x1000(%%ecx), %%eax\n"
891 "mov 0x10-0x1000(%%ecx), %%ebx\n"
892 "mov 0x18-0x1000(%%ecx), %%edx\n"
893 "mov 0x1C-0x1000(%%ecx), %%esi\n"
894 "mov 0x20-0x1000(%%ecx), %%edi\n"
895 "mov 0x24-0x1000(%%ecx), %%ebp\n"
896 "mov 0x14-0x1000(%%ecx), %%ecx\n"
897 "movd %%edi, %%mm4\n"
898 "movd %%ebp, %%mm7\n"
899 "movd %%mm2, %%ebp\n"
900 "movd %%mm5, %%edi\n"
901 "cmp %%ebp, 4(%%edi)\n"
902 "jne 25f\n" // exit process
903 "add $2, %%ebp\n"
904 "movd %%ebp, %%mm2\n"
905 "movd %%mm4, %%edi\n"
906 "movd %%mm7, %%ebp\n"
907
908 // shmget() gets some special treatment. Whenever we return from this
909 // system call, we remember the most recently returned SysV shm id.
910 "cmp $117, %%eax\n" // NR_ipc
911 "jnz 4f\n"
912 "cmp $23, %%ebx\n" // shmget()
913 "jnz 4f\n"
914 "int $0x80\n"
915 "mov %%eax, %%ebp\n"
916 "mov $120, %%eax\n" // NR_clone
917 "mov $17, %%ebx\n" // flags = SIGCHLD
918 "mov $1, %%ecx\n" // stack = 1
919 "int $0x80\n"
920 "test %%eax, %%eax\n"
921 "js 25f\n" // exit process
922 "mov %%eax, %%ebx\n"
923 "jnz 8f\n" // wait for child, then return result
924 "movd %%mm5, %%ebx\n" // start = secure_mem
925 "mov $4096, %%ecx\n" // len = 4096
926 "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE
927 "mov $125, %%eax\n" // NR_mprotect
928 "int $0x80\n"
929 "mov %%ebp, 0x54(%%ebx)\n" // set most recently returned SysV shm id
930 "xor %%ebx, %%ebx\n"
931
932 // When debugging messages are enabled, warn about expensive system calls
933 #ifndef NDEBUG
934 "movd %%mm5, %%ecx\n"
935 "cmpw $0, 0x50(%%ecx)\n" // debug mode
936 "jz 27f\n"
937 "mov $4, %%eax\n" // NR_write
938 "mov $2, %%ebx\n" // fd = stderr
939 "lea 101f, %%ecx\n" // "This is an expensive system call"
940 "mov $102f-101f, %%edx\n" // len = strlen(msg)
941 "int $0x80\n"
942 "xor %%ebx, %%ebx\n"
943 #endif
944
945 "jmp 27f\n" // exit program, no message
946 "4:int $0x80\n"
947 "jmp 15f\n" // return result
948
949 // If syscall number is -2, execute locked system call from the
950 // secure memory area
951 "5:jg 12f\n"
952 "cmp $-2, %%eax\n"
953 "jnz 9f\n"
954 "movd %%mm2, %%ebp\n"
955 "cmp %%ebp, 0x4-0x1000(%%ecx)\n"
956 "jne 25f\n" // exit process
957 "cmp %%eax, 0x8-0x1000(%%ecx)\n"
958 "jne 25f\n" // exit process
959
960 // When debugging messages are enabled, warn about expensive system calls
961 #ifndef NDEBUG
962 "cmpw $0, 0x50-0x1000(%%ecx)\n"
963 "jz 6f\n" // debug mode
964 "mov %%ecx, %%ebp\n"
965 "mov $4, %%eax\n" // NR_write
966 "mov $2, %%ebx\n" // fd = stderr
967 "lea 101f, %%ecx\n" // "This is an expensive system call"
968 "mov $102f-101f, %%edx\n" // len = strlen(msg)
969 "int $0x80\n"
970 "mov %%ebp, %%ecx\n"
971 "6:"
972 #endif
973
974 "mov 0x0C-0x1000(%%ecx), %%eax\n"
975 "mov 0x10-0x1000(%%ecx), %%ebx\n"
976 "mov 0x18-0x1000(%%ecx), %%edx\n"
977 "mov 0x1C-0x1000(%%ecx), %%esi\n"
978 "mov 0x20-0x1000(%%ecx), %%edi\n"
979 "mov 0x24-0x1000(%%ecx), %%ebp\n"
980 "mov 0x14-0x1000(%%ecx), %%ecx\n"
981 "movd %%edi, %%mm4\n"
982 "movd %%ebp, %%mm7\n"
983 "movd %%mm2, %%ebp\n"
984 "movd %%mm5, %%edi\n"
985 "cmp %%ebp, 4(%%edi)\n"
986 "jne 25f\n" // exit process
987
988 // clone() has unusual calling conventions and must be handled specially
989 "cmp $120, %%eax\n" // NR_clone
990 "jz 19f\n"
991
992 // exit() terminates trusted thread
993 "cmp $1, %%eax\n" // NR_exit
994 "jz 18f\n"
995
996 // Perform requested system call
997 "movd %%mm4, %%edi\n"
998 "movd %%mm7, %%ebp\n"
999 "int $0x80\n"
1000
1001 // Unlock mutex
1002 "7:movd %%mm2, %%ebp\n"
1003 "movd %%mm5, %%edi\n"
1004 "cmp %%ebp, 4(%%edi)\n"
1005 "jne 25f\n" // exit process
1006 "add $2, %%ebp\n"
1007 "movd %%ebp, %%mm2\n"
1008 "mov %%eax, %%ebp\n"
1009 "mov $120, %%eax\n" // NR_clone
1010 "mov $17, %%ebx\n" // flags = SIGCHLD
1011 "mov $1, %%ecx\n" // stack = 1
1012 "int $0x80\n"
1013 "test %%eax, %%eax\n"
1014 "js 25f\n" // exit process
1015 "jz 22f\n" // unlock and exit
1016 "mov %%eax, %%ebx\n"
1017 "8:xor %%ecx, %%ecx\n"
1018 "xor %%edx, %%edx\n"
1019 "mov $7, %%eax\n" // NR_waitpid
1020 "int $0x80\n"
1021 "cmp $-4, %%eax\n" // EINTR
1022 "jz 8b\n"
1023 "mov %%ebp, %%eax\n"
1024 "jmp 15f\n" // return result
1025
1026 // If syscall number is -3, read the time stamp counter
1027 "9:cmp $-3, %%eax\n"
1028 "jnz 10f\n"
1029 "rdtsc\n" // sets %edx:%eax
1030 "xor %%ecx, %%ecx\n"
1031 "jmp 11f\n"
1032 "10:cmp $-4, %%eax\n"
1033 "jnz 12f\n"
1034 "rdtscp\n" // sets %edx:%eax and %ecx
1035 "11:movd %%mm5, %%ebx\n"
1036 "add $0x1020, %%ebx\n"
1037 "mov %%eax, 0(%%ebx)\n"
1038 "mov %%edx, 4(%%ebx)\n"
1039 "mov %%ecx, 8(%%ebx)\n"
1040 "mov %%ebx, %%ecx\n"
1041 "mov $12, %%edx\n"
1042 "jmp 16f\n" // return result
1043
1044 // Check in syscallTable whether this system call is unrestricted
1045 "12:mov %%eax, %%ebp\n"
1046 #ifndef NDEBUG
1047 "cmpw $0, 0x50-0x1000(%%ecx)\n"
1048 "jnz 13f\n" // debug mode
1049 #endif
1050 "cmp playground$maxSyscall, %%eax\n"
1051 "ja 25f\n" // exit process
1052 "shl $3, %%eax\n"
1053 "add $playground$syscallTable, %%eax\n"
1054 "mov 0(%%eax), %%eax\n"
1055 "cmp $1, %%eax\n"
1056 "jne 25f\n" // exit process
1057
1058 // Default behavior for unrestricted system calls is to just execute
1059 // them. Read the remaining arguments first.
1060 "13:mov $3, %%eax\n" // NR_read
1061 "movd %%mm0, %%ebx\n" // fd = threadFd
1062 "add $4, %%ecx\n" // buf = &scratch + 4
1063 "mov $24, %%edx\n" // len = 6*sizeof(void *)
1064 "14:int $0x80\n"
1065 "cmp $-4, %%eax\n" // EINTR
1066 "jz 14b\n"
1067 "cmp %%edx, %%eax\n"
1068 "jnz 25f\n" // exit process
1069 "mov %%ebp, %%eax\n"
1070 "mov 0x00(%%ecx), %%ebx\n"
1071 "mov 0x08(%%ecx), %%edx\n"
1072 "mov 0x0C(%%ecx), %%esi\n"
1073 "mov 0x10(%%ecx), %%edi\n"
1074 "mov 0x14(%%ecx), %%ebp\n"
1075 "mov 0x04(%%ecx), %%ecx\n"
1076 "cmp $252, %%eax\n" // NR_exit_group
1077 "jz 27f\n" // exit program, no message
1078 "int $0x80\n"
1079
1080 // Return result of system call to sandboxed thread
1081 "15:movd %%mm5, %%ecx\n" // secure_mem
1082 "add $0x101C, %%ecx\n" // buf = &scratch + 28
1083 "mov %%eax, (%%ecx)\n"
1084 "mov $4, %%edx\n" // len = 4
1085 "16:movd %%mm0, %%ebx\n" // fd = threadFd
1086 "mov $4, %%eax\n" // NR_write
1087 "17:int $0x80\n"
1088 "cmp %%edx, %%eax\n"
1089 "jz 1b\n"
1090 "cmp $-4, %%eax\n" // EINTR
1091 "jz 17b\n"
1092 "jmp 25f\n" // exit process
1093
1094 // NR_exit:
1095 // Exit trusted thread after cleaning up resources
1096 "18:mov %%edi, %%ecx\n" // secure_mem
1097 "mov 0x68(%%ecx), %%ebx\n" // fd = threadFdPub
1098 "mov $6, %%eax\n" // NR_close
1099 "int $0x80\n"
1100 "mov %%ecx, %%ebx\n" // start = secure_mem
1101 "mov $8192, %%ecx\n" // length = 8192
1102 "xor %%edx, %%edx\n" // prot = PROT_NONE
1103 "mov $125, %%eax\n" // NR_mprotect
1104 "int $0x80\n"
1105 "movd %%mm0, %%ebx\n" // fd = threadFd
1106 "mov $6, %%eax\n" // NR_close
1107 "int $0x80\n"
1108 "mov $120, %%eax\n" // NR_clone
1109 "mov $17, %%ebx\n" // flags = SIGCHLD
1110 "mov $1, %%ecx\n" // stack = 1
1111 "int $0x80\n"
1112 "mov %%eax, %%ebx\n"
1113 "test %%eax, %%eax\n"
1114 "js 25f\n" // exit process
1115 "jne 21f\n" // reap helper, exit thread
1116 "jmp 22f\n" // unlock mutex
1117
1118 // NR_clone:
1119 // Original trusted thread calls clone() to create new nascent
1120 // thread. This thread is (typically) fully privileged and shares all
1121 // resources with the caller (i.e. the previous trusted thread),
1122 // and by extension it shares all resources with the sandbox'd
1123 // threads.
1124 "19:movd %%edi, %%mm6\n" // %mm6 = old_shared_mem
1125 "movd %%mm4, %%edi\n" // child_tidptr
1126 "mov %%ecx, %%ebp\n" // remember child stack
1127 "mov $1, %%ecx\n" // stack = 1
1128 "int $0x80\n" // calls NR_clone
1129 "cmp $-4095, %%eax\n" // return codes -1..-4095 are errno values
1130 "jae 7b\n" // unlock mutex, return result
1131 "movd %%mm2, %%edi\n"
1132 "add $2, %%edi\n"
1133 "movd %%edi, %%mm2\n"
1134 "test %%eax, %%eax\n"
1135 "jne 15b\n" // return result
1136
1137 // In nascent thread, now.
1138 "sub $2, %%edi\n"
1139 "movd %%edi, %%mm2\n"
1140
1141 // We want to maintain an invalid %esp whenver we access untrusted
1142 // memory. This ensures that even if an attacker can trick us into
1143 // triggering a SIGSEGV, we will never successfully execute a signal
1144 // handler.
1145 // Signal handlers are inherently dangerous, as an attacker could trick
1146 // us into returning to the wrong address by adjusting the signal stack
1147 // right before the handler returns.
1148 // N.B. While POSIX is curiously silent about this, it appears that on
1149 // Linux, alternate signal stacks are a per-thread property. That is
1150 // good. It means that this security mechanism works, even if the
1151 // sandboxed thread manages to set up an alternate signal stack.
1152 //
1153 // TODO(markus): We currently do not support emulating calls to
1154 // sys_clone() with a zero (i.e. copy) stack parameter. See clone.cc
1155 // for a discussion on how to fix this, if this ever becomes neccessary.
1156 "movd %%eax, %%mm3\n" // Request to return from clone() when done
1157
1158 // Get thread id of nascent thread
1159 "20:mov $224, %%eax\n" // NR_gettid
1160 "int $0x80\n"
1161 "movd %%eax, %%mm4\n"
1162
1163 // Nascent thread creates socketpair() for sending requests to
1164 // trusted thread.
1165 // We can create the filehandles on the child's stack. Filehandles are
1166 // always treated as untrusted.
1167 // socketpair(AF_UNIX, SOCK_STREAM, 0, fds)
1168 "mov $102, %%eax\n" // NR_socketcall
1169 "mov $8, %%ebx\n" // socketpair
1170 "sub $8, %%ebp\n" // sv = child_stack
1171 "mov %%ebp, -0x04(%%ebp)\n"
1172 "movl $0, -0x08(%%ebp)\n" // protocol = 0
1173 "movl $1, -0x0C(%%ebp)\n" // type = SOCK_STREAM
1174 "movl $1, -0x10(%%ebp)\n" // domain = AF_UNIX
1175 "lea -0x10(%%ebp), %%ecx\n"
1176 "int $0x80\n"
1177 "test %%eax, %%eax\n"
1178 "jz 28f\n"
1179
1180 // If things went wrong, we don't have an (easy) way of signaling
1181 // the parent. For our purposes, it is sufficient to fail with a
1182 // fatal error.
1183 "jmp 25f\n" // exit process
1184 "21:xor %%ecx, %%ecx\n"
1185 "xor %%edx, %%edx\n"
1186 "mov $7, %%eax\n" // NR_waitpid
1187 "int $0x80\n"
1188 "cmp $-4, %%eax\n" // EINTR
1189 "jz 21b\n"
1190 "jmp 23f\n" // exit thread (no message)
1191 "22:lea playground$syscall_mutex, %%ebx\n"
1192 "mov $4096, %%ecx\n"
1193 "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE
1194 "mov $125, %%eax\n" // NR_mprotect
1195 "int $0x80\n"
1196 "lock; addl $0x80000000, (%%ebx)\n"
1197 "jz 23f\n" // exit thread
1198 "mov $1, %%edx\n"
1199 "mov %%edx, %%ecx\n" // FUTEX_WAKE
1200 "mov $240, %%eax\n" // NR_futex
1201 "int $0x80\n"
1202 "23:mov $1, %%eax\n" // NR_exit
1203 "mov $1, %%ebx\n" // status = 1
1204 "24:int $0x80\n"
1205 "25:mov $4, %%eax\n" // NR_write
1206 "mov $2, %%ebx\n" // fd = stderr
1207 "lea 100f, %%ecx\n" // "Sandbox violation detected"
1208 "mov $101f-100f, %%edx\n" // len = strlen(msg)
1209 "int $0x80\n"
1210 "26:mov $1, %%ebx\n"
1211 "27:mov $252, %%eax\n" // NR_exit_group
1212 "jmp 24b\n"
1213
1214 // The first page is mapped read-only for use as securely shared memory
1215 "28:movd %%mm6, %%edi\n" // %edi = old_shared_mem
1216 "mov 0x44(%%edi), %%ebx\n" // addr = secure_mem
1217 "movd %%ebx, %%mm5\n" // %mm5 = secure_mem
1218 "movd %%mm2, %%esi\n"
1219 "cmp %%esi, 4(%%edi)\n"
1220 "jne 25b\n" // exit process
1221 "mov $125, %%eax\n" // NR_mprotect
1222 "mov $4096, %%ecx\n" // len = 4096
1223 "mov $1, %%edx\n" // prot = PROT_READ
1224 "int $0x80\n"
1225
1226 // The second page is used as scratch space by the trusted thread.
1227 // Make it writable.
1228 "mov $125, %%eax\n" // NR_mprotect
1229 "add $4096, %%ebx\n" // addr = secure_mem + 4096
1230 "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE
1231 "int $0x80\n"
1232
1233 // Call clone() to create new trusted thread().
1234 // clone(CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
1235 // CLONE_SYSVSEM|CLONE_UNTRACED, stack, NULL, NULL, NULL)
1236 "mov 4(%%ebp), %%eax\n" // threadFd (on child's stack)
1237 "movd %%eax, %%mm0\n" // %mm0 = threadFd
1238 "mov $120, %%eax\n" // NR_clone
1239 "mov $0x850F00, %%ebx\n" // flags = VM|FS|FILES|SIGH|THR|SYSV|UTR
1240 "mov $1, %%ecx\n" // stack = 1
1241 "movd 0x48(%%edi), %%mm1\n" // %mm1 = processFdPub
1242 "cmp %%esi, 4(%%edi)\n"
1243 "jne 25b\n" // exit process
1244 "int $0x80\n"
1245 "test %%eax, %%eax\n"
1246 "js 25b\n" // exit process
1247 "jz 0b\n" // invoke trustedThreadFnc()
1248
1249 // Set up thread local storage
1250 "mov $0x51, %%eax\n" // seg_32bit, limit_in_pages, useable
1251 "mov %%eax, -0x04(%%ebp)\n"
1252 "mov $0xFFFFF, %%eax\n" // limit
1253 "mov %%eax, -0x08(%%ebp)\n"
1254 "movd %%mm5, %%eax\n"
1255 "add $0x58, %%eax\n"
1256 "mov %%eax, -0x0C(%%ebp)\n" // base_addr = &secure_mem.TLS
1257 "mov %%fs, %%eax\n"
1258 "shr $3, %%eax\n"
1259 "mov %%eax, -0x10(%%ebp)\n" // entry_number
1260 "mov $243, %%eax\n" // NR_set_thread_area
1261 "lea -0x10(%%ebp), %%ebx\n"
1262 "int $0x80\n"
1263 "test %%eax, %%eax\n"
1264 "jnz 25b\n" // exit process
1265
1266 // Copy the caller's signal mask
1267 "movd %%mm5, %%edx\n"
1268 "mov 0x1038(%%edi), %%eax\n"
1269 "mov %%eax, 0x1038(%%edx)\n"
1270 "mov 0x103C(%%edi), %%eax\n"
1271 "mov %%eax, 0x103C(%%edx)\n"
1272
1273 // Done creating trusted thread. We can now get ready to return to caller
1274 "mov 0(%%ebp), %%esi\n" // %esi = threadFdPub
1275 "add $8, %%ebp\n"
1276
1277 // Check whether this is the initial thread, or a newly created one.
1278 // At startup we run the same code as when we create a new thread. At
1279 // the very top of this function, you will find that we store 999f
1280 // in %%mm3. That is the signal that we should return on the same
1281 // stack rather than return to where clone was called.
1282 "movd %%mm3, %%eax\n"
1283 "movd %%mm2, %%edx\n"
1284 "test %%eax, %%eax\n"
1285 "jne 29f\n"
1286
1287 // Returning from clone() into the newly created thread is special. We
1288 // cannot unroll the stack, as we just set up a new stack for this
1289 // thread. We have to explicitly restore CPU registers to the values
1290 // that they had when the program originally called clone().
1291 // We patch the register values in the signal stack frame so that we
1292 // can ask sigreturn() to restore all registers for us.
1293 "sub $0x4, %%ebp\n"
1294 "mov 0x28(%%edi), %%eax\n"
1295 "mov %%eax, 0x00(%%ebp)\n" // return address
1296 "xor %%eax, %%eax\n"
1297 "mov %%eax, 0x30(%%ebp)\n" // %eax = 0
1298 "mov 0x2C(%%edi), %%eax\n"
1299 "mov %%eax, 0x1C(%%ebp)\n" // %ebp
1300 "mov 0x30(%%edi), %%eax\n"
1301 "mov %%eax, 0x14(%%ebp)\n" // %edi
1302 "mov 0x34(%%edi), %%eax\n"
1303 "mov %%eax, 0x18(%%ebp)\n" // %esi
1304 "mov 0x38(%%edi), %%eax\n"
1305 "mov %%eax, 0x28(%%ebp)\n" // %edx
1306 "mov 0x3C(%%edi), %%eax\n"
1307 "mov %%eax, 0x2C(%%ebp)\n" // %ecx
1308 "mov 0x40(%%edi), %%eax\n"
1309 "mov %%eax, 0x24(%%ebp)\n" // %ebx
1310 "cmp %%edx, 4(%%edi)\n"
1311 "jne 25b\n" // exit process
1312
1313 // Nascent thread launches a helper that doesn't share any of our
1314 // resources, except for pages mapped as MAP_SHARED.
1315 // clone(SIGCHLD, stack=1)
1316 "29:mov $120, %%eax\n" // NR_clone
1317 "mov $17, %%ebx\n" // flags = SIGCHLD
1318 "mov $1, %%ecx\n" // stack = 1
1319 "int $0x80\n"
1320 "test %%eax, %%eax\n"
1321 "js 25b\n" // exit process
1322 "jne 31f\n"
1323
1324 // Use sendmsg() to send to the trusted process the file handles for
1325 // communicating with the new trusted thread. We also send the address
1326 // of the secure memory area (for sanity checks) and the thread id.
1327 "cmp %%edx, 4(%%edi)\n"
1328 "jne 25b\n" // exit process
1329
1330 // 0x00 socketcall:
1331 // 0x00 socket (0x4C(%edi))
1332 // 0x04 msg (%ecx + 0x0C)
1333 // 0x08 flags ($0)
1334 // 0x0C msg:
1335 // 0x0C msg_name ($0)
1336 // 0x10 msg_namelen ($0)
1337 // 0x14 msg_iov (%ecx + 0x34)
1338 // 0x18 msg_iovlen ($1)
1339 // 0x1C msg_control (%ecx + 0x3C)
1340 // 0x20 msg_controllen ($0x14)
1341 // 0x24 data:
1342 // 0x24 msg_flags/err ($0)
1343 // 0x28 secure_mem (%mm5)
1344 // 0x2C threadId (%mm4)
1345 // 0x30 threadFdPub (%esi)
1346 // 0x34 iov:
1347 // 0x34 iov_base (%ecx + 0x24)
1348 // 0x38 iov_len ($0x10)
1349 // 0x3C cmsg:
1350 // 0x3C cmsg_len ($0x14)
1351 // 0x40 cmsg_level ($1, SOL_SOCKET)
1352 // 0x44 cmsg_type ($1, SCM_RIGHTS)
1353 // 0x48 threadFdPub (%esi)
1354 // 0x4C threadFd (%mm0)
1355 // 0x50
1356 "lea -0x50(%%ebp), %%ecx\n"
1357 "xor %%eax, %%eax\n"
1358 "mov %%eax, 0x08(%%ecx)\n" // flags
1359 "mov %%eax, 0x0C(%%ecx)\n" // msg_name
1360 "mov %%eax, 0x10(%%ecx)\n" // msg_namelen
1361 "mov %%eax, 0x24(%%ecx)\n" // msg_flags
1362 "inc %%eax\n"
1363 "mov %%eax, 0x18(%%ecx)\n" // msg_iovlen
1364 "mov %%eax, 0x40(%%ecx)\n" // cmsg_level
1365 "mov %%eax, 0x44(%%ecx)\n" // cmsg_type
1366 "movl $0x10, 0x38(%%ecx)\n" // iov_len
1367 "mov $0x14, %%eax\n"
1368 "mov %%eax, 0x20(%%ecx)\n" // msg_controllen
1369 "mov %%eax, 0x3C(%%ecx)\n" // cmsg_len
1370 "mov 0x4C(%%edi), %%eax\n" // cloneFdPub
1371 "mov %%eax, 0x00(%%ecx)\n" // socket
1372 "lea 0x0C(%%ecx), %%eax\n"
1373 "mov %%eax, 0x04(%%ecx)\n" // msg
1374 "add $0x18, %%eax\n"
1375 "mov %%eax, 0x34(%%ecx)\n" // iov_base
1376 "add $0x10, %%eax\n"
1377 "mov %%eax, 0x14(%%ecx)\n" // msg_iov
1378 "add $8, %%eax\n"
1379 "mov %%eax, 0x1C(%%ecx)\n" // msg_control
1380 "mov %%esi, 0x30(%%ecx)\n" // threadFdPub
1381 "mov %%esi, 0x48(%%ecx)\n" // threadFdPub
1382 "movd %%mm5, %%eax\n"
1383 "mov %%eax, 0x28(%%ecx)\n" // secure_mem
1384 "movd %%mm4, %%eax\n"
1385 "mov %%eax, 0x2C(%%ecx)\n" // threadId
1386 "movd %%mm0, %%eax\n"
1387 "mov %%eax, 0x4C(%%ecx)\n" // threadFd
1388 "mov $16, %%ebx\n" // sendmsg()
1389 "mov $102, %%eax\n" // NR_socketcall
1390 "int $0x80\n"
1391
1392 // Release syscall_mutex_. This signals the trusted process that
1393 // it can write into the original thread's secure memory again.
1394 "mov $125, %%eax\n" // NR_mprotect
1395 "lea playground$syscall_mutex, %%ebx\n"
1396 "mov $4096, %%ecx\n"
1397 "mov $3, %%edx\n" // PROT_READ | PROT_WRITE
1398 "int $0x80\n"
1399 "movd %%mm2, %%edx\n"
1400 "cmp %%edx, 0x4(%%edi)\n"
1401 "jnz 25b\n" // exit process
1402 "lock; addl $0x80000000, (%%ebx)\n"
1403 "jz 30f\n" // exit process (no error message)
1404 "mov $1, %%edx\n"
1405 "mov %%edx, %%ecx\n" // FUTEX_WAKE
1406 "mov $240, %%eax\n" // NR_futex
1407 "int $0x80\n"
1408 "30:xor %%ebx, %%ebx\n"
1409 "jmp 27b\n" // exit process (no error message)
1410
1411 // Reap helper
1412 "31:mov %%eax, %%ebx\n"
1413 "32:lea -4(%%ebp), %%ecx\n"
1414 "xor %%edx, %%edx\n"
1415 "mov $7, %%eax\n" // NR_waitpid
1416 "int $0x80\n"
1417 "cmp $-4, %%eax\n" // EINTR
1418 "jz 32b\n"
1419 "mov -4(%%ebp), %%eax\n"
1420 "test %%eax, %%eax\n"
1421 "jnz 26b\n" // exit process (no error message)
1422
1423 // Release privileges by entering seccomp mode.
1424 "33:mov $172, %%eax\n" // NR_prctl
1425 "mov $22, %%ebx\n" // PR_SET_SECCOMP
1426 "mov $1, %%ecx\n"
1427 "int $0x80\n"
1428 "test %%eax, %%eax\n"
1429 "jnz 25b\n" // exit process
1430
1431 // We can finally start using the stack. Signal handlers no longer pose
1432 // a threat to us.
1433 "mov %%ebp, %%esp\n"
1434
1435 // Back in the newly created sandboxed thread, wait for trusted process
1436 // to receive request. It is possible for an attacker to make us
1437 // continue even before the trusted process is done. This is OK. It'll
1438 // result in us putting stale values into the new thread's TLS. But that
1439 // data is considered untrusted anyway.
1440 "push %%eax\n"
1441 "mov $1, %%edx\n" // len = 1
1442 "mov %%esp, %%ecx\n" // buf = %esp
1443 "mov %%esi, %%ebx\n" // fd = threadFdPub
1444 "34:mov $3, %%eax\n" // NR_read
1445 "int $0x80\n"
1446 "cmp $-4, %%eax\n" // EINTR
1447 "jz 34b\n"
1448 "cmp %%edx, %%eax\n"
1449 "jne 25b\n" // exit process
1450 "pop %%eax\n"
1451
1452 // Return to caller. We are in the new thread, now.
1453 "movd %%mm3, %%ebx\n"
1454 "test %%ebx, %%ebx\n"
1455 "jnz 35f\n" // Returning to createTrustedThread()
1456
1457 // Returning to the place where clone() had been called. We rely on
1458 // using sigreturn() for restoring our registers. The caller already
1459 // created a signal stack frame, and we patched the register values
1460 // with the ones that were in effect prior to calling sandbox_clone().
1461 "pop %%ebx\n"
1462 "35:mov %%ebx, 0x38(%%esp)\n" // compute new %eip
1463 "mov $119, %%eax\n" // NR_sigreturn
1464 "int $0x80\n"
1465
1466 ".pushsection \".rodata\"\n"
1467 "100:.ascii \"Sandbox violation detected, program aborted\\n\"\n"
1468 "101:.ascii \"WARNING! This is an expensive system call\\n\"\n"
1469 "102:\n"
1470 ".popsection\n"
1471
1472 "999:pop %%ebp\n"
1473 "pop %%ebx\n"
1474 :
1475 : "g"(&args)
1476 : "eax", "ecx", "edx", "edi", "esi", "esp", "memory"
1477 #else
1478 #error Unsupported target platform
1479 #endif
1480 );
1481 }
1482
1483 } // namespace
OLDNEW
« no previous file with comments | « sandbox/linux/seccomp/trusted_process.cc ('k') | sandbox/linux/seccomp/x86_decode.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698