| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "sandbox_impl.h" | |
| 6 #include "syscall_table.h" | |
| 7 | |
| 8 namespace playground { | |
| 9 | |
| 10 void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub, | |
| 11 SecureMem::Args* secureMem) { | |
| 12 SecureMem::Args args = { { { { { 0 } } } } }; | |
| 13 args.self = &args; | |
| 14 args.newSecureMem = secureMem; | |
| 15 args.processFdPub = processFdPub; | |
| 16 args.cloneFdPub = cloneFdPub; | |
| 17 #if defined(__x86_64__) | |
| 18 asm volatile( | |
| 19 "push %%rbx\n" | |
| 20 "push %%rbp\n" | |
| 21 "mov %0, %%rbp\n" // %rbp = args | |
| 22 "xor %%rbx, %%rbx\n" // initial sequence number | |
| 23 "lea 999f(%%rip), %%r15\n" // continue in same thread | |
| 24 | |
| 25 // Signal handlers are process-wide. This means that for security | |
| 26 // reasons, we cannot allow that the trusted thread ever executes any | |
| 27 // signal handlers. | |
| 28 // We prevent the execution of signal handlers by setting a signal | |
| 29 // mask that blocks all signals. In addition, we make sure that the | |
| 30 // stack pointer is invalid. | |
| 31 // We cannot reset the signal mask until after we have enabled | |
| 32 // Seccomp mode. Our sigprocmask() wrapper would normally do this by | |
| 33 // raising a signal, modifying the signal mask in the kernel-generated | |
| 34 // signal frame, and then calling sigreturn(). This presents a bit of | |
| 35 // a Catch-22, as all signals are masked and we can therefore not | |
| 36 // raise any signal that would allow us to generate the signal stack | |
| 37 // frame. | |
| 38 // Instead, we have to create the signal stack frame prior to entering | |
| 39 // Seccomp mode. This incidentally also helps us to restore the | |
| 40 // signal mask to the same value that it had prior to entering the | |
| 41 // sandbox. | |
| 42 // The signal wrapper for clone() is the second entry point into this | |
| 43 // code (by means of sending an IPC to its trusted thread). It goes | |
| 44 // through the same steps of creating a signal stack frame on the | |
| 45 // newly created thread's stacks prior to cloning. See clone.cc for | |
| 46 // details. | |
| 47 "mov $56+0xF000, %%eax\n" // __NR_clone + 0xF000 | |
| 48 "mov %%rsp, %%rcx\n" | |
| 49 "int $0\n" // push a signal stack frame (see clone.cc) | |
| 50 "mov %%rcx, 0xA0(%%rsp)\n" // pop stack upon call to sigreturn() | |
| 51 "mov %%rsp, %%r9\n" | |
| 52 "mov $2, %%rdi\n" // how = SIG_SETMASK | |
| 53 "pushq $-1\n" | |
| 54 "mov %%rsp, %%rsi\n" // set = full mask | |
| 55 "xor %%rdx, %%rdx\n" // old_set = NULL | |
| 56 "mov $8, %%r10\n" // mask all 64 signals | |
| 57 "mov $14, %%eax\n" // NR_rt_sigprocmask | |
| 58 "syscall\n" | |
| 59 "xor %%rsp, %%rsp\n" // invalidate the stack in all trusted code | |
| 60 "jmp 20f\n" // create trusted thread | |
| 61 | |
| 62 // TODO(markus): Coalesce the read() operations by reading into a bigger | |
| 63 // buffer. | |
| 64 | |
| 65 // Parameters: | |
| 66 // *%fs: secure memory region | |
| 67 // the page following this one contains the scratch space | |
| 68 // %r13: thread's side of threadFd | |
| 69 // %r15: processFdPub | |
| 70 | |
| 71 // Local variables: | |
| 72 // %rbx: sequence number for trusted calls | |
| 73 | |
| 74 // Temporary variables: | |
| 75 // %r8: child stack | |
| 76 // %r9: system call number, child stack | |
| 77 // %rbp: secure memory of previous thread | |
| 78 | |
| 79 // Layout of secure shared memory region (c.f. securemem.h): | |
| 80 // 0x00: pointer to the secure shared memory region (i.e. self) | |
| 81 // 0x08: sequence number; must match %rbx | |
| 82 // 0x10: call type; must match %eax, iff %eax == -1 || %eax == -2 | |
| 83 // 0x18: system call number; passed to syscall in %rax | |
| 84 // 0x20: first argument; passed to syscall in %rdi | |
| 85 // 0x28: second argument; passed to syscall in %rsi | |
| 86 // 0x30: third argument; passed to syscall in %rdx | |
| 87 // 0x38: fourth argument; passed to syscall in %r10 | |
| 88 // 0x40: fifth argument; passed to syscall in %r8 | |
| 89 // 0x48: sixth argument; passed to syscall in %r9 | |
| 90 // 0x50: stored return address for clone() system call | |
| 91 // 0x58: stored %rbp value for clone() system call | |
| 92 // 0x60: stored %rbx value for clone() system call | |
| 93 // 0x68: stored %rcx value for clone() system call | |
| 94 // 0x70: stored %rdx value for clone() system call | |
| 95 // 0x78: stored %rsi value for clone() system call | |
| 96 // 0x80: stored %rdi value for clone() system call | |
| 97 // 0x88: stored %r8 value for clone() system call | |
| 98 // 0x90: stored %r9 value for clone() system call | |
| 99 // 0x98: stored %r10 value for clone() system call | |
| 100 // 0xA0: stored %r11 value for clone() system call | |
| 101 // 0xA8: stored %r12 value for clone() system call | |
| 102 // 0xB0: stored %r13 value for clone() system call | |
| 103 // 0xB8: stored %r14 value for clone() system call | |
| 104 // 0xC0: stored %r15 value for clone() system call | |
| 105 // 0xC8: new shared memory for clone() | |
| 106 // 0xD0: processFdPub for talking to trusted process | |
| 107 // 0xD4: cloneFdPub for talking to trusted process | |
| 108 // 0xD8: set to non-zero, if in debugging mode | |
| 109 // 0xDC: most recent SHM id returned by shmget(IPC_PRIVATE) | |
| 110 // 0xE0: cookie assigned to us by the trusted process (TLS_COOKIE) | |
| 111 // 0xE8: thread id (TLS_TID) | |
| 112 // 0xF0: threadFdPub (TLS_THREAD_FD) | |
| 113 // 0x200-0x1000: securely passed verified file name(s) | |
| 114 | |
| 115 // Layout of (untrusted) scratch space: | |
| 116 // 0x00: syscall number; passed in %rax | |
| 117 // 0x04: first argument; passed in %rdi | |
| 118 // 0x0C: second argument; passed in %rsi | |
| 119 // 0x14: third argument; passed in %rdx | |
| 120 // 0x1C: fourth argument; passed in %r10 | |
| 121 // 0x24: fifth argument; passed in %r8 | |
| 122 // 0x2C: sixth argument; passed in %r9 | |
| 123 // 0x34: return value | |
| 124 // 0x3C: RDTSCP result (%eax) | |
| 125 // 0x40: RDTSCP result (%edx) | |
| 126 // 0x44: RDTSCP result (%ecx) | |
| 127 // 0x48: last system call (not used on x86-64) | |
| 128 // 0x4C: number of consecutive calls to a time fnc (not used on x86-64) | |
| 129 // 0x50: nesting level of system calls (for debugging purposes only) | |
| 130 // 0x54: signal mask | |
| 131 // 0x5C: in SEGV handler | |
| 132 | |
| 133 // We use the %fs register for accessing the secure read-only page, and | |
| 134 // the untrusted scratch space immediately following it. The segment | |
| 135 // register and the local descriptor table is set up by passing | |
| 136 // appropriate arguments to clone(). | |
| 137 | |
| 138 "0:xor %%rsp, %%rsp\n" | |
| 139 "mov $2, %%ebx\n" // %rbx = initial sequence number | |
| 140 | |
| 141 // Read request from untrusted thread, or from trusted process. In either | |
| 142 // case, the data that we read has to be considered untrusted. | |
| 143 // read(threadFd, &scratch, 4) | |
| 144 "1:xor %%rax, %%rax\n" // NR_read | |
| 145 "mov %%r13, %%rdi\n" // fd = threadFd | |
| 146 "mov %%fs:0x0, %%rsi\n" // secure_mem | |
| 147 "add $0x1000, %%rsi\n" // buf = &scratch | |
| 148 "mov $4, %%edx\n" // len = 4 | |
| 149 "2:syscall\n" | |
| 150 "cmp $-4, %%rax\n" // EINTR | |
| 151 "jz 2b\n" | |
| 152 "cmp %%rdx, %%rax\n" | |
| 153 "jnz 25f\n" // exit process | |
| 154 | |
| 155 // Retrieve system call number. It is crucial that we only dereference | |
| 156 // %fs:0x1000 exactly once. Afterwards, memory becomes untrusted and | |
| 157 // we must use the value that we have read the first time. | |
| 158 "mov 0(%%rsi), %%eax\n" | |
| 159 | |
| 160 // If syscall number is -1, execute an unlocked system call from the | |
| 161 // secure memory area | |
| 162 "cmp $-1, %%eax\n" | |
| 163 "jnz 5f\n" | |
| 164 "3:cmp %%rbx, %%fs:0x8\n" | |
| 165 "jne 25f\n" // exit process | |
| 166 "cmp %%fs:0x10, %%eax\n" | |
| 167 "jne 25f\n" // exit process | |
| 168 "mov %%fs:0x18, %%rax\n" | |
| 169 "mov %%fs:0x20, %%rdi\n" | |
| 170 "mov %%fs:0x28, %%rsi\n" | |
| 171 "mov %%fs:0x30, %%rdx\n" | |
| 172 "mov %%fs:0x38, %%r10\n" | |
| 173 "mov %%fs:0x40, %%r8\n" | |
| 174 "mov %%fs:0x48, %%r9\n" | |
| 175 "cmp %%rbx, %%fs:0x8\n" | |
| 176 "jne 25f\n" // exit process | |
| 177 "add $2, %%rbx\n" | |
| 178 | |
| 179 // shmget() gets some special treatment. Whenever we return from this | |
| 180 // system call, we remember the most recently returned SysV shm id. | |
| 181 "cmp $29, %%eax\n" // NR_shmget | |
| 182 "jnz 4f\n" | |
| 183 "syscall\n" | |
| 184 "mov %%rax, %%r8\n" | |
| 185 "mov $56, %%eax\n" // NR_clone | |
| 186 "mov $17, %%edi\n" // flags = SIGCHLD | |
| 187 "mov $1, %%esi\n" // stack = 1 | |
| 188 "syscall\n" | |
| 189 "test %%rax, %%rax\n" | |
| 190 "js 25f\n" // exit process | |
| 191 "mov %%rax, %%rdi\n" | |
| 192 "jnz 8f\n" // wait for child, then return result | |
| 193 "mov %%fs:0x0, %%rdi\n" // start = secure_mem | |
| 194 "mov $4096, %%esi\n" // len = 4096 | |
| 195 "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE | |
| 196 "mov $10, %%eax\n" // NR_mprotect | |
| 197 "syscall\n" | |
| 198 "mov %%r8d, 0xDC(%%rdi)\n" // set most recently returned SysV shm id | |
| 199 "xor %%rdi, %%rdi\n" | |
| 200 | |
| 201 // When debugging messages are enabled, warn about expensive system calls | |
| 202 #ifndef NDEBUG | |
| 203 "cmpw $0, %%fs:0xD8\n" // debug mode | |
| 204 "jz 27f\n" | |
| 205 "mov $1, %%eax\n" // NR_write | |
| 206 "mov $2, %%edi\n" // fd = stderr | |
| 207 "lea 101f(%%rip), %%rsi\n" // "This is an expensive system call" | |
| 208 "mov $102f-101f, %%edx\n" // len = strlen(msg) | |
| 209 "syscall\n" | |
| 210 "xor %%rdi, %%rdi\n" | |
| 211 #endif | |
| 212 | |
| 213 "jmp 27f\n" // exit program, no message | |
| 214 "4:syscall\n" | |
| 215 "jmp 15f\n" // return result | |
| 216 | |
| 217 // If syscall number is -2, execute locked system call from the | |
| 218 // secure memory area | |
| 219 "5:jg 12f\n" | |
| 220 "cmp $-2, %%eax\n" | |
| 221 "jnz 9f\n" | |
| 222 "cmp %%rbx, %%fs:0x8\n" | |
| 223 "jne 25f\n" // exit process | |
| 224 "cmp %%eax, %%fs:0x10\n" | |
| 225 "jne 25f\n" // exit process | |
| 226 | |
| 227 // When debugging messages are enabled, warn about expensive system calls | |
| 228 #ifndef NDEBUG | |
| 229 "cmpw $0, %%fs:0xD8\n" // debug mode | |
| 230 "jz 6f\n" | |
| 231 "mov $1, %%eax\n" // NR_write | |
| 232 "mov $2, %%edi\n" // fd = stderr | |
| 233 "lea 101f(%%rip), %%rsi\n" // "This is an expensive system call" | |
| 234 "mov $102f-101f, %%edx\n" // len = strlen(msg) | |
| 235 "syscall\n" | |
| 236 "6:" | |
| 237 #endif | |
| 238 | |
| 239 "mov %%fs:0x18, %%rax\n" | |
| 240 "mov %%fs:0x20, %%rdi\n" | |
| 241 "mov %%fs:0x28, %%rsi\n" | |
| 242 "mov %%fs:0x30, %%rdx\n" | |
| 243 "mov %%fs:0x38, %%r10\n" | |
| 244 "mov %%fs:0x40, %%r8\n" | |
| 245 "mov %%fs:0x48, %%r9\n" | |
| 246 "cmp %%rbx, %%fs:0x8\n" | |
| 247 "jne 25f\n" // exit process | |
| 248 | |
| 249 // clone() has unusual calling conventions and must be handled specially | |
| 250 "cmp $56, %%rax\n" // NR_clone | |
| 251 "jz 19f\n" | |
| 252 | |
| 253 // exit() terminates trusted thread | |
| 254 "cmp $60, %%eax\n" // NR_exit | |
| 255 "jz 18f\n" | |
| 256 | |
| 257 // Perform requested system call | |
| 258 "syscall\n" | |
| 259 | |
| 260 // Unlock mutex | |
| 261 "7:cmp %%rbx, %%fs:0x8\n" | |
| 262 "jne 25f\n" // exit process | |
| 263 "add $2, %%rbx\n" | |
| 264 "mov %%rax, %%r8\n" | |
| 265 "mov $56, %%eax\n" // NR_clone | |
| 266 "mov $17, %%rdi\n" // flags = SIGCHLD | |
| 267 "mov $1, %%rsi\n" // stack = 1 | |
| 268 "syscall\n" | |
| 269 "test %%rax, %%rax\n" | |
| 270 "js 25f\n" // exit process | |
| 271 "jz 22f\n" // unlock and exit | |
| 272 "mov %%rax, %%rdi\n" | |
| 273 "8:xor %%rsi, %%rsi\n" | |
| 274 "xor %%rdx, %%rdx\n" | |
| 275 "xor %%r10, %%r10\n" | |
| 276 "mov $61, %%eax\n" // NR_wait4 | |
| 277 "syscall\n" | |
| 278 "cmp $-4, %%eax\n" // EINTR | |
| 279 "jz 8b\n" | |
| 280 "mov %%r8, %%rax\n" | |
| 281 "jmp 15f\n" // return result | |
| 282 | |
| 283 // If syscall number is -3, read the time stamp counter | |
| 284 "9:cmp $-3, %%eax\n" | |
| 285 "jnz 10f\n" | |
| 286 "rdtsc\n" // sets %edx:%eax | |
| 287 "xor %%rcx, %%rcx\n" | |
| 288 "jmp 11f\n" | |
| 289 "10:cmp $-4, %%eax\n" | |
| 290 "jnz 12f\n" | |
| 291 "rdtscp\n" // sets %edx:%eax and %ecx | |
| 292 "11:add $0x3C, %%rsi\n" | |
| 293 "mov %%eax, 0(%%rsi)\n" | |
| 294 "mov %%edx, 4(%%rsi)\n" | |
| 295 "mov %%ecx, 8(%%rsi)\n" | |
| 296 "mov $12, %%edx\n" | |
| 297 "jmp 16f\n" // return result | |
| 298 | |
| 299 // Check in syscallTable whether this system call is unrestricted | |
| 300 "12:mov %%rax, %%r9\n" | |
| 301 #ifndef NDEBUG | |
| 302 "cmpw $0, %%fs:0xD8\n" // debug mode | |
| 303 "jnz 13f\n" | |
| 304 #endif | |
| 305 "cmp playground$maxSyscall(%%rip), %%eax\n" | |
| 306 "ja 25f\n" // exit process | |
| 307 "shl $4, %%rax\n" | |
| 308 "lea playground$syscallTable(%%rip), %%rdi\n" | |
| 309 "add %%rdi, %%rax\n" | |
| 310 "mov 0(%%rax), %%rax\n" | |
| 311 "cmp $1, %%rax\n" | |
| 312 "jne 25f\n" // exit process | |
| 313 | |
| 314 // Default behavior for unrestricted system calls is to just execute | |
| 315 // them. Read the remaining arguments first. | |
| 316 "13:mov %%rsi, %%r8\n" | |
| 317 "xor %%rax, %%rax\n" // NR_read | |
| 318 "mov %%r13, %%rdi\n" // fd = threadFd | |
| 319 "add $4, %%rsi\n" // buf = &scratch + 4 | |
| 320 "mov $48, %%edx\n" // len = 6*sizeof(void *) | |
| 321 "14:syscall\n" | |
| 322 "cmp $-4, %%rax\n" // EINTR | |
| 323 "jz 14b\n" | |
| 324 "cmp %%rdx, %%rax\n" | |
| 325 "jnz 25f\n" // exit process | |
| 326 "mov %%r9, %%rax\n" | |
| 327 "mov 0x04(%%r8), %%rdi\n" | |
| 328 "mov 0x0C(%%r8), %%rsi\n" | |
| 329 "mov 0x14(%%r8), %%rdx\n" | |
| 330 "mov 0x1C(%%r8), %%r10\n" | |
| 331 "mov 0x2C(%%r8), %%r9\n" | |
| 332 "mov 0x24(%%r8), %%r8\n" | |
| 333 "cmp $231, %%rax\n" // NR_exit_group | |
| 334 "jz 27f\n" // exit program, no message | |
| 335 "syscall\n" | |
| 336 | |
| 337 // Return result of system call to sandboxed thread | |
| 338 "15:mov %%fs:0x0, %%rsi\n" // secure_mem | |
| 339 "add $0x1034, %%rsi\n" // buf = &scratch + 52 | |
| 340 "mov %%rax, (%%rsi)\n" | |
| 341 "mov $8, %%edx\n" // len = 8 | |
| 342 "16:mov %%r13, %%rdi\n" // fd = threadFd | |
| 343 "mov $1, %%eax\n" // NR_write | |
| 344 "17:syscall\n" | |
| 345 "cmp %%rdx, %%rax\n" | |
| 346 "jz 1b\n" | |
| 347 "cmp $-4, %%rax\n" // EINTR | |
| 348 "jz 17b\n" | |
| 349 "jmp 25f\n" // exit process | |
| 350 | |
| 351 // NR_exit: | |
| 352 // Exit trusted thread after cleaning up resources | |
| 353 "18:mov %%fs:0x0, %%rsi\n" // secure_mem | |
| 354 "mov 0xF0(%%rsi), %%rdi\n" // fd = threadFdPub | |
| 355 "mov $3, %%eax\n" // NR_close | |
| 356 "syscall\n" | |
| 357 "mov %%rsi, %%rdi\n" // start = secure_mem | |
| 358 "mov $8192, %%esi\n" // length = 8192 | |
| 359 "xor %%rdx, %%rdx\n" // prot = PROT_NONE | |
| 360 "mov $10, %%eax\n" // NR_mprotect | |
| 361 "syscall\n" | |
| 362 "mov %%r13, %%rdi\n" // fd = threadFd | |
| 363 "mov $3, %%eax\n" // NR_close | |
| 364 "syscall\n" | |
| 365 "mov $56, %%eax\n" // NR_clone | |
| 366 "mov $17, %%rdi\n" // flags = SIGCHLD | |
| 367 "mov $1, %%rsi\n" // stack = 1 | |
| 368 "syscall\n" | |
| 369 "mov %%rax, %%rdi\n" | |
| 370 "test %%rax, %%rax\n" | |
| 371 "js 27f\n" // exit process | |
| 372 "jne 21f\n" // reap helper, exit thread | |
| 373 "jmp 22f\n" // unlock mutex | |
| 374 | |
| 375 // NR_clone: | |
| 376 // Original trusted thread calls clone() to create new nascent | |
| 377 // thread. This thread is (typically) fully privileged and shares all | |
| 378 // resources with the caller (i.e. the previous trusted thread), | |
| 379 // and by extension it shares all resources with the sandbox'd | |
| 380 // threads. | |
| 381 "19:mov %%fs:0x0, %%rbp\n" // %rbp = old_shared_mem | |
| 382 "mov %%rsi, %%r15\n" // remember child stack | |
| 383 "mov $1, %%rsi\n" // stack = 1 | |
| 384 "syscall\n" // calls NR_clone | |
| 385 "cmp $-4095, %%rax\n" // return codes -1..-4095 are errno values | |
| 386 "jae 7b\n" // unlock mutex, return result | |
| 387 "add $2, %%rbx\n" | |
| 388 "test %%rax, %%rax\n" | |
| 389 "jne 15b\n" // return result | |
| 390 | |
| 391 // In nascent thread, now. | |
| 392 "sub $2, %%rbx\n" | |
| 393 | |
| 394 // We want to maintain an invalid %rsp whenver we access untrusted | |
| 395 // memory. This ensures that even if an attacker can trick us into | |
| 396 // triggering a SIGSEGV, we will never successfully execute a signal | |
| 397 // handler. | |
| 398 // Signal handlers are inherently dangerous, as an attacker could trick | |
| 399 // us into returning to the wrong address by adjusting the signal stack | |
| 400 // right before the handler returns. | |
| 401 // N.B. While POSIX is curiously silent about this, it appears that on | |
| 402 // Linux, alternate signal stacks are a per-thread property. That is | |
| 403 // good. It means that this security mechanism works, even if the | |
| 404 // sandboxed thread manages to set up an alternate signal stack. | |
| 405 // | |
| 406 // TODO(markus): We currently do not support emulating calls to | |
| 407 // sys_clone() with a zero (i.e. copy) stack parameter. See clone.cc | |
| 408 // for a discussion on how to fix this, if this ever becomes neccessary. | |
| 409 "mov %%r15, %%r9\n" // %r9 = child_stack | |
| 410 "xor %%r15, %%r15\n" // Request to return from clone() when done | |
| 411 | |
| 412 // Get thread id of nascent thread | |
| 413 "20:mov $186, %%eax\n" // NR_gettid | |
| 414 "syscall\n" | |
| 415 "mov %%rax, %%r14\n" | |
| 416 | |
| 417 // Nascent thread creates socketpair() for sending requests to | |
| 418 // trusted thread. | |
| 419 // We can create the filehandles on the child's stack. Filehandles are | |
| 420 // always treated as untrusted. | |
| 421 // socketpair(AF_UNIX, SOCK_STREAM, 0, fds) | |
| 422 "sub $0x10, %%r9\n" | |
| 423 "mov %%r15, 8(%%r9)\n" // preserve return address on child stack | |
| 424 "mov $53, %%eax\n" // NR_socketpair | |
| 425 "mov $1, %%edi\n" // domain = AF_UNIX | |
| 426 "mov $1, %%esi\n" // type = SOCK_STREAM | |
| 427 "xor %%rdx, %%rdx\n" // protocol = 0 | |
| 428 "mov %%r9, %%r10\n" // sv = child_stack | |
| 429 "syscall\n" | |
| 430 "test %%rax, %%rax\n" | |
| 431 "jz 28f\n" | |
| 432 | |
| 433 // If things went wrong, we don't have an (easy) way of signaling | |
| 434 // the parent. For our purposes, it is sufficient to fail with a | |
| 435 // fatal error. | |
| 436 "jmp 25f\n" // exit process | |
| 437 "21:xor %%rsi, %%rsi\n" | |
| 438 "xor %%rdx, %%rdx\n" | |
| 439 "xor %%r10, %%r10\n" | |
| 440 "mov $61, %%eax\n" // NR_wait4 | |
| 441 "syscall\n" | |
| 442 "cmp $-4, %%eax\n" // EINTR | |
| 443 "jz 21b\n" | |
| 444 "jmp 23f\n" // exit thread (no message) | |
| 445 "22:lea playground$syscall_mutex(%%rip), %%rdi\n" | |
| 446 "mov $4096, %%esi\n" | |
| 447 "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE | |
| 448 "mov $10, %%eax\n" // NR_mprotect | |
| 449 "syscall\n" | |
| 450 "lock; addl $0x80000000, (%%rdi)\n" | |
| 451 "jz 23f\n" // exit thread | |
| 452 "mov $1, %%edx\n" | |
| 453 "mov %%rdx, %%rsi\n" // FUTEX_WAKE | |
| 454 "mov $202, %%eax\n" // NR_futex | |
| 455 "syscall\n" | |
| 456 "23:mov $60, %%eax\n" // NR_exit | |
| 457 "mov $1, %%edi\n" // status = 1 | |
| 458 "24:syscall\n" | |
| 459 "25:mov $1, %%eax\n" // NR_write | |
| 460 "mov $2, %%edi\n" // fd = stderr | |
| 461 "lea 100f(%%rip), %%rsi\n" // "Sandbox violation detected" | |
| 462 "mov $101f-100f, %%edx\n" // len = strlen(msg) | |
| 463 "syscall\n" | |
| 464 "26:mov $1, %%edi\n" | |
| 465 "27:mov $231, %%eax\n" // NR_exit_group | |
| 466 "jmp 24b\n" | |
| 467 | |
| 468 // The first page is mapped read-only for use as securely shared memory | |
| 469 "28:mov 0xC8(%%rbp), %%r12\n" // %r12 = secure shared memory | |
| 470 "cmp %%rbx, 8(%%rbp)\n" | |
| 471 "jne 25b\n" // exit process | |
| 472 "mov $10, %%eax\n" // NR_mprotect | |
| 473 "mov %%r12, %%rdi\n" // addr = secure_mem | |
| 474 "mov $4096, %%esi\n" // len = 4096 | |
| 475 "mov $1, %%edx\n" // prot = PROT_READ | |
| 476 "syscall\n" | |
| 477 | |
| 478 // The second page is used as scratch space by the trusted thread. | |
| 479 // Make it writable. | |
| 480 "mov $10, %%eax\n" // NR_mprotect | |
| 481 "add $4096, %%rdi\n" // addr = secure_mem + 4096 | |
| 482 "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE | |
| 483 "syscall\n" | |
| 484 | |
| 485 // Call clone() to create new trusted thread(). | |
| 486 // clone(CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD| | |
| 487 // CLONE_SYSVSEM|CLONE_UNTRACED|CLONE_SETTLS, stack, NULL, NULL, | |
| 488 // tls) | |
| 489 "mov 4(%%r9), %%r13d\n" // %r13 = threadFd (on child's stack) | |
| 490 "mov $56, %%eax\n" // NR_clone | |
| 491 "mov $0x8D0F00, %%edi\n" // flags = VM|FS|FILES|SIGH|THR|SYSV|UTR|TLS | |
| 492 "mov $1, %%rsi\n" // stack = 1 | |
| 493 "mov %%r12, %%r8\n" // tls = new_secure_mem | |
| 494 "mov 0xD0(%%rbp), %%r15d\n" // %r15 = processFdPub | |
| 495 "cmp %%rbx, 8(%%rbp)\n" | |
| 496 "jne 25b\n" // exit process | |
| 497 "syscall\n" | |
| 498 "test %%rax, %%rax\n" | |
| 499 "js 25b\n" // exit process | |
| 500 "jz 0b\n" // invoke trustedThreadFnc() | |
| 501 | |
| 502 // Copy the caller's signal mask | |
| 503 "mov 0x1054(%%rbp), %%rax\n" | |
| 504 "mov %%rax, 0x1054(%%r12)\n" | |
| 505 | |
| 506 // Done creating trusted thread. We can now get ready to return to caller | |
| 507 "mov %%r9, %%r8\n" // %r8 = child_stack | |
| 508 "mov 0(%%r9), %%r9d\n" // %r9 = threadFdPub | |
| 509 | |
| 510 // Set up thread local storage with information on how to talk to | |
| 511 // trusted thread and trusted process. | |
| 512 "lea 0xE0(%%r12), %%rsi\n" // args = &secure_mem.TLS; | |
| 513 "mov $158, %%eax\n" // NR_arch_prctl | |
| 514 "mov $0x1001, %%edi\n" // option = ARCH_SET_GS | |
| 515 "syscall\n" | |
| 516 "cmp $-4095, %%rax\n" // return codes -1..-4095 are errno values | |
| 517 "jae 25b\n" // exit process | |
| 518 | |
| 519 // Check whether this is the initial thread, or a newly created one. | |
| 520 // At startup we run the same code as when we create a new thread. At | |
| 521 // the very top of this function, you will find that we push 999(%rip) | |
| 522 // on the stack. That is the signal that we should return on the same | |
| 523 // stack rather than return to where clone was called. | |
| 524 "mov 8(%%r8), %%r15\n" | |
| 525 "add $0x10, %%r8\n" | |
| 526 "test %%r15, %%r15\n" | |
| 527 "jne 29f\n" | |
| 528 | |
| 529 // Returning from clone() into the newly created thread is special. We | |
| 530 // cannot unroll the stack, as we just set up a new stack for this | |
| 531 // thread. We have to explicitly restore CPU registers to the values | |
| 532 // that they had when the program originally called clone(). | |
| 533 // We patch the register values in the signal stack frame so that we | |
| 534 // can ask sigreturn() to restore all registers for us. | |
| 535 "sub $0x8, %%r8\n" | |
| 536 "mov 0x50(%%rbp), %%rax\n" | |
| 537 "mov %%rax, 0x00(%%r8)\n" // return address | |
| 538 "xor %%rax, %%rax\n" | |
| 539 "mov %%rax, 0x98(%%r8)\n" // %rax = 0 | |
| 540 "mov 0x58(%%rbp), %%rax\n" | |
| 541 "mov %%rax, 0x80(%%r8)\n" // %rbp | |
| 542 "mov 0x60(%%rbp), %%rax\n" | |
| 543 "mov %%rax, 0x88(%%r8)\n" // %rbx | |
| 544 "mov 0x68(%%rbp), %%rax\n" | |
| 545 "mov %%rax, 0xA0(%%r8)\n" // %rcx | |
| 546 "mov 0x70(%%rbp), %%rax\n" | |
| 547 "mov %%rax, 0x90(%%r8)\n" // %rdx | |
| 548 "mov 0x78(%%rbp), %%rax\n" | |
| 549 "mov %%rax, 0x78(%%r8)\n" // %rsi | |
| 550 "mov 0x80(%%rbp), %%rax\n" | |
| 551 "mov %%rax, 0x70(%%r8)\n" // %rdi | |
| 552 "mov 0x88(%%rbp), %%rax\n" | |
| 553 "mov %%rax, 0x30(%%r8)\n" // %r8 | |
| 554 "mov 0x90(%%rbp), %%rax\n" | |
| 555 "mov %%rax, 0x38(%%r8)\n" // %r9 | |
| 556 "mov 0x98(%%rbp), %%rax\n" | |
| 557 "mov %%rax, 0x40(%%r8)\n" // %r10 | |
| 558 "mov 0xA0(%%rbp), %%rax\n" | |
| 559 "mov %%rax, 0x48(%%r8)\n" // %r11 | |
| 560 "mov 0xA8(%%rbp), %%rax\n" | |
| 561 "mov %%rax, 0x50(%%r8)\n" // %r12 | |
| 562 "mov 0xB0(%%rbp), %%rax\n" | |
| 563 "mov %%rax, 0x58(%%r8)\n" // %r13 | |
| 564 "mov 0xB8(%%rbp), %%rax\n" | |
| 565 "mov %%rax, 0x60(%%r8)\n" // %r14 | |
| 566 "mov 0xC0(%%rbp), %%rax\n" | |
| 567 "mov %%rax, 0x68(%%r8)\n" // %r15 | |
| 568 "cmp %%rbx, 8(%%rbp)\n" | |
| 569 "jne 25b\n" // exit process | |
| 570 | |
| 571 // Nascent thread launches a helper that doesn't share any of our | |
| 572 // resources, except for pages mapped as MAP_SHARED. | |
| 573 // clone(SIGCHLD, stack=1) | |
| 574 "29:mov $56, %%eax\n" // NR_clone | |
| 575 "mov $17, %%rdi\n" // flags = SIGCHLD | |
| 576 "mov $1, %%rsi\n" // stack = 1 | |
| 577 "syscall\n" | |
| 578 "test %%rax, %%rax\n" | |
| 579 "js 25b\n" // exit process | |
| 580 "jne 31f\n" | |
| 581 | |
| 582 // Use sendmsg() to send to the trusted process the file handles for | |
| 583 // communicating with the new trusted thread. We also send the address | |
| 584 // of the secure memory area (for sanity checks) and the thread id. | |
| 585 "mov 0xD4(%%rbp), %%edi\n" // transport = Sandbox::cloneFdPub() | |
| 586 "cmp %%rbx, 8(%%rbp)\n" | |
| 587 "jne 25b\n" // exit process | |
| 588 | |
| 589 // 0x00 msg: | |
| 590 // 0x00 msg_name ($0) | |
| 591 // 0x08 msg_namelen ($0) | |
| 592 // 0x10 msg_iov (%r8 + 0x44) | |
| 593 // 0x18 msg_iovlen ($1) | |
| 594 // 0x20 msg_control (%r8 + 0x54) | |
| 595 // 0x28 msg_controllen ($0x18) | |
| 596 // 0x30 data: | |
| 597 // 0x30 msg_flags/err ($0) | |
| 598 // 0x34 secure_mem (%r12) | |
| 599 // 0x3C threadId (%r14d) | |
| 600 // 0x40 threadFdPub (%r9d) | |
| 601 // 0x44 iov: | |
| 602 // 0x44 iov_base (%r8 + 0x30) | |
| 603 // 0x4C iov_len ($0x14) | |
| 604 // 0x54 cmsg: | |
| 605 // 0x54 cmsg_len ($0x18) | |
| 606 // 0x5C cmsg_level ($1, SOL_SOCKET) | |
| 607 // 0x60 cmsg_type ($1, SCM_RIGHTS) | |
| 608 // 0x64 threadFdPub (%r9d) | |
| 609 // 0x68 threadFd (%r13d) | |
| 610 // 0x6C | |
| 611 "sub $0x6C, %%r8\n" | |
| 612 "xor %%rdx, %%rdx\n" // flags = 0 | |
| 613 "mov %%rdx, 0x00(%%r8)\n" // msg_name | |
| 614 "mov %%edx, 0x08(%%r8)\n" // msg_namelen | |
| 615 "mov %%edx, 0x30(%%r8)\n" // msg_flags | |
| 616 "mov $1, %%r11d\n" | |
| 617 "mov %%r11, 0x18(%%r8)\n" // msg_iovlen | |
| 618 "mov %%r11d, 0x5C(%%r8)\n" // cmsg_level | |
| 619 "mov %%r11d, 0x60(%%r8)\n" // cmsg_type | |
| 620 "lea 0x30(%%r8), %%r11\n" | |
| 621 "mov %%r11, 0x44(%%r8)\n" // iov_base | |
| 622 "add $0x14, %%r11\n" | |
| 623 "mov %%r11, 0x10(%%r8)\n" // msg_iov | |
| 624 "add $0x10, %%r11\n" | |
| 625 "mov %%r11, 0x20(%%r8)\n" // msg_control | |
| 626 "mov $0x14, %%r11d\n" | |
| 627 "mov %%r11, 0x4C(%%r8)\n" // iov_len | |
| 628 "add $4, %%r11d\n" | |
| 629 "mov %%r11, 0x28(%%r8)\n" // msg_controllen | |
| 630 "mov %%r11, 0x54(%%r8)\n" // cmsg_len | |
| 631 "mov %%r12, 0x34(%%r8)\n" // secure_mem | |
| 632 "mov %%r14d, 0x3C(%%r8)\n" // threadId | |
| 633 "mov %%r9d, 0x40(%%r8)\n" // threadFdPub | |
| 634 "mov %%r9d, 0x64(%%r8)\n" // threadFdPub | |
| 635 "mov %%r13d, 0x68(%%r8)\n" // threadFd | |
| 636 "mov $46, %%eax\n" // NR_sendmsg | |
| 637 "mov %%r8, %%rsi\n" // msg | |
| 638 "syscall\n" | |
| 639 | |
| 640 // Release syscall_mutex_. This signals the trusted process that | |
| 641 // it can write into the original thread's secure memory again. | |
| 642 "mov $10, %%eax\n" // NR_mprotect | |
| 643 "lea playground$syscall_mutex(%%rip), %%rdi\n" | |
| 644 "mov $4096, %%esi\n" | |
| 645 "mov $3, %%edx\n" // PROT_READ | PROT_WRITE | |
| 646 "syscall\n" | |
| 647 "cmp %%rbx, 8(%%rbp)\n" | |
| 648 "jne 25b\n" // exit process | |
| 649 "lock; addl $0x80000000, (%%rdi)\n" | |
| 650 "jz 30f\n" // exit process (no error message) | |
| 651 "mov $1, %%edx\n" | |
| 652 "mov %%rdx, %%rsi\n" // FUTEX_WAKE | |
| 653 "mov $202, %%eax\n" // NR_futex | |
| 654 "syscall\n" | |
| 655 "30:xor %%rdi, %%rdi\n" | |
| 656 "jmp 27b\n" // exit process (no error message) | |
| 657 | |
| 658 // Reap helper | |
| 659 "31:mov %%rax, %%rdi\n" | |
| 660 "32:lea -4(%%r8), %%rsi\n" | |
| 661 "xor %%rdx, %%rdx\n" | |
| 662 "xor %%r10, %%r10\n" | |
| 663 "mov $61, %%eax\n" // NR_wait4 | |
| 664 "syscall\n" | |
| 665 "cmp $-4, %%eax\n" // EINTR | |
| 666 "jz 32b\n" | |
| 667 "mov -4(%%r8), %%eax\n" | |
| 668 "test %%rax, %%rax\n" | |
| 669 "jnz 26b\n" // exit process (no error message) | |
| 670 | |
| 671 // Release privileges by entering seccomp mode. | |
| 672 "mov $157, %%eax\n" // NR_prctl | |
| 673 "mov $22, %%edi\n" // PR_SET_SECCOMP | |
| 674 "mov $1, %%esi\n" | |
| 675 "syscall\n" | |
| 676 "test %%rax, %%rax\n" | |
| 677 "jnz 25b\n" // exit process | |
| 678 | |
| 679 // We can finally start using the stack. Signal handlers no longer pose | |
| 680 // a threat to us. | |
| 681 "mov %%r8, %%rsp\n" | |
| 682 | |
| 683 // Back in the newly created sandboxed thread, wait for trusted process | |
| 684 // to receive request. It is possible for an attacker to make us | |
| 685 // continue even before the trusted process is done. This is OK. It'll | |
| 686 // result in us putting stale values into the new thread's TLS. But that | |
| 687 // data is considered untrusted anyway. | |
| 688 "push %%rax\n" | |
| 689 "mov $1, %%edx\n" // len = 1 | |
| 690 "mov %%rsp, %%rsi\n" // buf = %rsp | |
| 691 "mov %%r9, %%rdi\n" // fd = threadFdPub | |
| 692 "33:xor %%rax, %%rax\n" // NR_read | |
| 693 "syscall\n" | |
| 694 "cmp $-4, %%rax\n" // EINTR | |
| 695 "jz 33b\n" | |
| 696 "cmp %%rdx, %%rax\n" | |
| 697 "jne 25b\n" // exit process | |
| 698 "pop %%rax\n" | |
| 699 | |
| 700 // Return to caller. We are in the new thread, now. | |
| 701 "test %%r15, %%r15\n" | |
| 702 "jnz 34f\n" // Returning to createTrustedThread() | |
| 703 | |
| 704 // Returning to the place where clone() had been called. We rely on | |
| 705 // using rt_sigreturn() for restoring our registers. The caller already | |
| 706 // created a signal stack frame, and we patched the register values | |
| 707 // with the ones that were in effect prior to calling sandbox_clone(). | |
| 708 "pop %%r15\n" | |
| 709 "34:mov %%r15, 0xA8(%%rsp)\n" // compute new %rip | |
| 710 "mov $15, %%eax\n" // NR_rt_sigreturn | |
| 711 "syscall\n" | |
| 712 | |
| 713 ".pushsection \".rodata\"\n" | |
| 714 "100:.ascii \"Sandbox violation detected, program aborted\\n\"\n" | |
| 715 "101:.ascii \"WARNING! This is an expensive system call\\n\"\n" | |
| 716 "102:\n" | |
| 717 ".popsection\n" | |
| 718 | |
| 719 "999:pop %%rbp\n" | |
| 720 "pop %%rbx\n" | |
| 721 : | |
| 722 : "g"(&args) | |
| 723 : "rax", "rcx", "rdx", "rdi", "rsi", "r8", "r9", "r10", "r11", "r12", | |
| 724 "r13", "r14", "r15", "rsp", "memory" | |
| 725 #elif defined(__i386__) | |
| 726 struct user_desc u; | |
| 727 u.entry_number = (typeof u.entry_number)-1; | |
| 728 u.base_addr = 0; | |
| 729 u.limit = 0xfffff; | |
| 730 u.seg_32bit = 1; | |
| 731 u.contents = 0; | |
| 732 u.read_exec_only = 0; | |
| 733 u.limit_in_pages = 1; | |
| 734 u.seg_not_present = 0; | |
| 735 u.useable = 1; | |
| 736 SysCalls sys; | |
| 737 if (sys.set_thread_area(&u) < 0) { | |
| 738 die("Cannot set up thread local storage"); | |
| 739 } | |
| 740 asm volatile("movw %w0, %%fs" | |
| 741 : | |
| 742 : "q"(8*u.entry_number+3)); | |
| 743 asm volatile( | |
| 744 "push %%ebx\n" | |
| 745 "push %%ebp\n" | |
| 746 | |
| 747 // Signal handlers are process-wide. This means that for security | |
| 748 // reasons, we cannot allow that the trusted thread ever executes any | |
| 749 // signal handlers. | |
| 750 // We prevent the execution of signal handlers by setting a signal | |
| 751 // mask that blocks all signals. In addition, we make sure that the | |
| 752 // stack pointer is invalid. | |
| 753 // We cannot reset the signal mask until after we have enabled | |
| 754 // Seccomp mode. Our sigprocmask() wrapper would normally do this by | |
| 755 // raising a signal, modifying the signal mask in the kernel-generated | |
| 756 // signal frame, and then calling sigreturn(). This presents a bit of | |
| 757 // a Catch-22, as all signals are masked and we can therefore not | |
| 758 // raise any signal that would allow us to generate the signal stack | |
| 759 // frame. | |
| 760 // Instead, we have to create the signal stack frame prior to entering | |
| 761 // Seccomp mode. This incidentally also helps us to restore the | |
| 762 // signal mask to the same value that it had prior to entering the | |
| 763 // sandbox. | |
| 764 // The signal wrapper for clone() is the second entry point into this | |
| 765 // code (by means of sending an IPC to its trusted thread). It goes | |
| 766 // through the same steps of creating a signal stack frame on the | |
| 767 // newly created thread's stacks prior to cloning. See clone.cc for | |
| 768 // details. | |
| 769 "mov %0, %%edi\n" // create signal stack before accessing MMX | |
| 770 "mov $120+0xF000, %%eax\n" // __NR_clone + 0xF000 | |
| 771 "mov %%esp, %%ebp\n" | |
| 772 "int $0\n" // push a signal stack frame (see clone.cc) | |
| 773 "mov %%ebp, 0x1C(%%esp)\n" // pop stack upon call to sigreturn() | |
| 774 "mov %%esp, %%ebp\n" | |
| 775 "mov $2, %%ebx\n" // how = SIG_SETMASK | |
| 776 "pushl $-1\n" | |
| 777 "pushl $-1\n" | |
| 778 "mov %%esp, %%ecx\n" // set = full mask | |
| 779 "xor %%edx, %%edx\n" // old_set = NULL | |
| 780 "mov $8, %%esi\n" // mask all 64 signals | |
| 781 "mov $175, %%eax\n" // NR_rt_sigprocmask | |
| 782 "int $0x80\n" | |
| 783 "mov $126, %%eax\n" // NR_sigprocmask | |
| 784 "int $0x80\n" | |
| 785 "xor %%esp, %%esp\n" // invalidate the stack in all trusted code | |
| 786 "movd %%edi, %%mm6\n" // %mm6 = args | |
| 787 "lea 999f, %%edi\n" // continue in same thread | |
| 788 "movd %%edi, %%mm3\n" | |
| 789 "xor %%edi, %%edi\n" // initial sequence number | |
| 790 "movd %%edi, %%mm2\n" | |
| 791 "jmp 20f\n" // create trusted thread | |
| 792 | |
| 793 // TODO(markus): Coalesce the read() operations by reading into a bigger | |
| 794 // buffer. | |
| 795 | |
| 796 // Parameters: | |
| 797 // %mm0: thread's side of threadFd | |
| 798 // %mm1: processFdPub | |
| 799 // %mm3: return address after creation of new trusted thread | |
| 800 // %mm5: secure memory region | |
| 801 // the page following this one contains the scratch space | |
| 802 | |
| 803 // Local variables: | |
| 804 // %mm2: sequence number for trusted calls | |
| 805 // %mm4: thread id | |
| 806 | |
| 807 // Temporary variables: | |
| 808 // %ebp: system call number | |
| 809 // %mm6: secure memory of previous thread | |
| 810 // %mm7: temporary variable for spilling data | |
| 811 | |
| 812 // Layout of secure shared memory region (c.f. securemem.h): | |
| 813 // 0x00: pointer to the secure shared memory region (i.e. self) | |
| 814 // 0x04: sequence number; must match %mm2 | |
| 815 // 0x08: call type; must match %eax, iff %eax == -1 || %eax == -2 | |
| 816 // 0x0C: system call number; passed to syscall in %eax | |
| 817 // 0x10: first argument; passed to syscall in %ebx | |
| 818 // 0x14: second argument; passed to syscall in %ecx | |
| 819 // 0x18: third argument; passed to syscall in %edx | |
| 820 // 0x1C: fourth argument; passed to syscall in %esi | |
| 821 // 0x20: fifth argument; passed to syscall in %edi | |
| 822 // 0x24: sixth argument; passed to syscall in %ebp | |
| 823 // 0x28: stored return address for clone() system call | |
| 824 // 0x2C: stored %ebp value for clone() system call | |
| 825 // 0x30: stored %edi value for clone() system call | |
| 826 // 0x34: stored %esi value for clone() system call | |
| 827 // 0x38: stored %edx value for clone() system call | |
| 828 // 0x3C: stored %ecx value for clone() system call | |
| 829 // 0x40: stored %ebx value for clone() system call | |
| 830 // 0x44: new shared memory for clone() | |
| 831 // 0x48: processFdPub for talking to trusted process | |
| 832 // 0x4C: cloneFdPub for talking to trusted process | |
| 833 // 0x50: set to non-zero, if in debugging mode | |
| 834 // 0x54: most recent SHM id returned by shmget(IPC_PRIVATE) | |
| 835 // 0x58: cookie assigned to us by the trusted process (TLS_COOKIE) | |
| 836 // 0x60: thread id (TLS_TID) | |
| 837 // 0x68: threadFdPub (TLS_THREAD_FD) | |
| 838 // 0x200-0x1000: securely passed verified file name(s) | |
| 839 | |
| 840 // Layout of (untrusted) scratch space: | |
| 841 // 0x00: syscall number; passed in %eax | |
| 842 // 0x04: first argument; passed in %ebx | |
| 843 // 0x08: second argument; passed in %ecx | |
| 844 // 0x0C: third argument; passed in %edx | |
| 845 // 0x10: fourth argument; passed in %esi | |
| 846 // 0x14: fifth argument; passed in %edi | |
| 847 // 0x18: sixth argument; passed in %ebp | |
| 848 // 0x1C: return value | |
| 849 // 0x20: RDTSCP result (%eax) | |
| 850 // 0x24: RDTSCP result (%edx) | |
| 851 // 0x28: RDTSCP result (%ecx) | |
| 852 // 0x2C: last system call (updated in syscall.cc) | |
| 853 // 0x30: number of consecutive calls to a time fnc. (e.g. gettimeofday) | |
| 854 // 0x34: nesting level of system calls (for debugging purposes only) | |
| 855 // 0x38: signal mask | |
| 856 // 0x40: in SEGV handler | |
| 857 | |
| 858 "0:xor %%esp, %%esp\n" | |
| 859 "mov $2, %%eax\n" // %mm2 = initial sequence number | |
| 860 "movd %%eax, %%mm2\n" | |
| 861 | |
| 862 // Read request from untrusted thread, or from trusted process. In either | |
| 863 // case, the data that we read has to be considered untrusted. | |
| 864 // read(threadFd, &scratch, 4) | |
| 865 "1:mov $3, %%eax\n" // NR_read | |
| 866 "movd %%mm0, %%ebx\n" // fd = threadFd | |
| 867 "movd %%mm5, %%ecx\n" // secure_mem | |
| 868 "add $0x1000, %%ecx\n" // buf = &scratch | |
| 869 "mov $4, %%edx\n" // len = 4 | |
| 870 "2:int $0x80\n" | |
| 871 "cmp $-4, %%eax\n" // EINTR | |
| 872 "jz 2b\n" | |
| 873 "cmp %%edx, %%eax\n" | |
| 874 "jnz 25f\n" // exit process | |
| 875 | |
| 876 // Retrieve system call number. It is crucial that we only dereference | |
| 877 // 0x1000(%mm5) exactly once. Afterwards, memory becomes untrusted and | |
| 878 // we must use the value that we have read the first time. | |
| 879 "mov 0(%%ecx), %%eax\n" | |
| 880 | |
| 881 // If syscall number is -1, execute an unlocked system call from the | |
| 882 // secure memory area | |
| 883 "cmp $-1, %%eax\n" | |
| 884 "jnz 5f\n" | |
| 885 "3:movd %%mm2, %%ebp\n" | |
| 886 "cmp %%ebp, 0x4-0x1000(%%ecx)\n" | |
| 887 "jne 25f\n" // exit process | |
| 888 "cmp 0x08-0x1000(%%ecx), %%eax\n" | |
| 889 "jne 25f\n" // exit process | |
| 890 "mov 0x0C-0x1000(%%ecx), %%eax\n" | |
| 891 "mov 0x10-0x1000(%%ecx), %%ebx\n" | |
| 892 "mov 0x18-0x1000(%%ecx), %%edx\n" | |
| 893 "mov 0x1C-0x1000(%%ecx), %%esi\n" | |
| 894 "mov 0x20-0x1000(%%ecx), %%edi\n" | |
| 895 "mov 0x24-0x1000(%%ecx), %%ebp\n" | |
| 896 "mov 0x14-0x1000(%%ecx), %%ecx\n" | |
| 897 "movd %%edi, %%mm4\n" | |
| 898 "movd %%ebp, %%mm7\n" | |
| 899 "movd %%mm2, %%ebp\n" | |
| 900 "movd %%mm5, %%edi\n" | |
| 901 "cmp %%ebp, 4(%%edi)\n" | |
| 902 "jne 25f\n" // exit process | |
| 903 "add $2, %%ebp\n" | |
| 904 "movd %%ebp, %%mm2\n" | |
| 905 "movd %%mm4, %%edi\n" | |
| 906 "movd %%mm7, %%ebp\n" | |
| 907 | |
| 908 // shmget() gets some special treatment. Whenever we return from this | |
| 909 // system call, we remember the most recently returned SysV shm id. | |
| 910 "cmp $117, %%eax\n" // NR_ipc | |
| 911 "jnz 4f\n" | |
| 912 "cmp $23, %%ebx\n" // shmget() | |
| 913 "jnz 4f\n" | |
| 914 "int $0x80\n" | |
| 915 "mov %%eax, %%ebp\n" | |
| 916 "mov $120, %%eax\n" // NR_clone | |
| 917 "mov $17, %%ebx\n" // flags = SIGCHLD | |
| 918 "mov $1, %%ecx\n" // stack = 1 | |
| 919 "int $0x80\n" | |
| 920 "test %%eax, %%eax\n" | |
| 921 "js 25f\n" // exit process | |
| 922 "mov %%eax, %%ebx\n" | |
| 923 "jnz 8f\n" // wait for child, then return result | |
| 924 "movd %%mm5, %%ebx\n" // start = secure_mem | |
| 925 "mov $4096, %%ecx\n" // len = 4096 | |
| 926 "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE | |
| 927 "mov $125, %%eax\n" // NR_mprotect | |
| 928 "int $0x80\n" | |
| 929 "mov %%ebp, 0x54(%%ebx)\n" // set most recently returned SysV shm id | |
| 930 "xor %%ebx, %%ebx\n" | |
| 931 | |
| 932 // When debugging messages are enabled, warn about expensive system calls | |
| 933 #ifndef NDEBUG | |
| 934 "movd %%mm5, %%ecx\n" | |
| 935 "cmpw $0, 0x50(%%ecx)\n" // debug mode | |
| 936 "jz 27f\n" | |
| 937 "mov $4, %%eax\n" // NR_write | |
| 938 "mov $2, %%ebx\n" // fd = stderr | |
| 939 "lea 101f, %%ecx\n" // "This is an expensive system call" | |
| 940 "mov $102f-101f, %%edx\n" // len = strlen(msg) | |
| 941 "int $0x80\n" | |
| 942 "xor %%ebx, %%ebx\n" | |
| 943 #endif | |
| 944 | |
| 945 "jmp 27f\n" // exit program, no message | |
| 946 "4:int $0x80\n" | |
| 947 "jmp 15f\n" // return result | |
| 948 | |
| 949 // If syscall number is -2, execute locked system call from the | |
| 950 // secure memory area | |
| 951 "5:jg 12f\n" | |
| 952 "cmp $-2, %%eax\n" | |
| 953 "jnz 9f\n" | |
| 954 "movd %%mm2, %%ebp\n" | |
| 955 "cmp %%ebp, 0x4-0x1000(%%ecx)\n" | |
| 956 "jne 25f\n" // exit process | |
| 957 "cmp %%eax, 0x8-0x1000(%%ecx)\n" | |
| 958 "jne 25f\n" // exit process | |
| 959 | |
| 960 // When debugging messages are enabled, warn about expensive system calls | |
| 961 #ifndef NDEBUG | |
| 962 "cmpw $0, 0x50-0x1000(%%ecx)\n" | |
| 963 "jz 6f\n" // debug mode | |
| 964 "mov %%ecx, %%ebp\n" | |
| 965 "mov $4, %%eax\n" // NR_write | |
| 966 "mov $2, %%ebx\n" // fd = stderr | |
| 967 "lea 101f, %%ecx\n" // "This is an expensive system call" | |
| 968 "mov $102f-101f, %%edx\n" // len = strlen(msg) | |
| 969 "int $0x80\n" | |
| 970 "mov %%ebp, %%ecx\n" | |
| 971 "6:" | |
| 972 #endif | |
| 973 | |
| 974 "mov 0x0C-0x1000(%%ecx), %%eax\n" | |
| 975 "mov 0x10-0x1000(%%ecx), %%ebx\n" | |
| 976 "mov 0x18-0x1000(%%ecx), %%edx\n" | |
| 977 "mov 0x1C-0x1000(%%ecx), %%esi\n" | |
| 978 "mov 0x20-0x1000(%%ecx), %%edi\n" | |
| 979 "mov 0x24-0x1000(%%ecx), %%ebp\n" | |
| 980 "mov 0x14-0x1000(%%ecx), %%ecx\n" | |
| 981 "movd %%edi, %%mm4\n" | |
| 982 "movd %%ebp, %%mm7\n" | |
| 983 "movd %%mm2, %%ebp\n" | |
| 984 "movd %%mm5, %%edi\n" | |
| 985 "cmp %%ebp, 4(%%edi)\n" | |
| 986 "jne 25f\n" // exit process | |
| 987 | |
| 988 // clone() has unusual calling conventions and must be handled specially | |
| 989 "cmp $120, %%eax\n" // NR_clone | |
| 990 "jz 19f\n" | |
| 991 | |
| 992 // exit() terminates trusted thread | |
| 993 "cmp $1, %%eax\n" // NR_exit | |
| 994 "jz 18f\n" | |
| 995 | |
| 996 // Perform requested system call | |
| 997 "movd %%mm4, %%edi\n" | |
| 998 "movd %%mm7, %%ebp\n" | |
| 999 "int $0x80\n" | |
| 1000 | |
| 1001 // Unlock mutex | |
| 1002 "7:movd %%mm2, %%ebp\n" | |
| 1003 "movd %%mm5, %%edi\n" | |
| 1004 "cmp %%ebp, 4(%%edi)\n" | |
| 1005 "jne 25f\n" // exit process | |
| 1006 "add $2, %%ebp\n" | |
| 1007 "movd %%ebp, %%mm2\n" | |
| 1008 "mov %%eax, %%ebp\n" | |
| 1009 "mov $120, %%eax\n" // NR_clone | |
| 1010 "mov $17, %%ebx\n" // flags = SIGCHLD | |
| 1011 "mov $1, %%ecx\n" // stack = 1 | |
| 1012 "int $0x80\n" | |
| 1013 "test %%eax, %%eax\n" | |
| 1014 "js 25f\n" // exit process | |
| 1015 "jz 22f\n" // unlock and exit | |
| 1016 "mov %%eax, %%ebx\n" | |
| 1017 "8:xor %%ecx, %%ecx\n" | |
| 1018 "xor %%edx, %%edx\n" | |
| 1019 "mov $7, %%eax\n" // NR_waitpid | |
| 1020 "int $0x80\n" | |
| 1021 "cmp $-4, %%eax\n" // EINTR | |
| 1022 "jz 8b\n" | |
| 1023 "mov %%ebp, %%eax\n" | |
| 1024 "jmp 15f\n" // return result | |
| 1025 | |
| 1026 // If syscall number is -3, read the time stamp counter | |
| 1027 "9:cmp $-3, %%eax\n" | |
| 1028 "jnz 10f\n" | |
| 1029 "rdtsc\n" // sets %edx:%eax | |
| 1030 "xor %%ecx, %%ecx\n" | |
| 1031 "jmp 11f\n" | |
| 1032 "10:cmp $-4, %%eax\n" | |
| 1033 "jnz 12f\n" | |
| 1034 "rdtscp\n" // sets %edx:%eax and %ecx | |
| 1035 "11:movd %%mm5, %%ebx\n" | |
| 1036 "add $0x1020, %%ebx\n" | |
| 1037 "mov %%eax, 0(%%ebx)\n" | |
| 1038 "mov %%edx, 4(%%ebx)\n" | |
| 1039 "mov %%ecx, 8(%%ebx)\n" | |
| 1040 "mov %%ebx, %%ecx\n" | |
| 1041 "mov $12, %%edx\n" | |
| 1042 "jmp 16f\n" // return result | |
| 1043 | |
| 1044 // Check in syscallTable whether this system call is unrestricted | |
| 1045 "12:mov %%eax, %%ebp\n" | |
| 1046 #ifndef NDEBUG | |
| 1047 "cmpw $0, 0x50-0x1000(%%ecx)\n" | |
| 1048 "jnz 13f\n" // debug mode | |
| 1049 #endif | |
| 1050 "cmp playground$maxSyscall, %%eax\n" | |
| 1051 "ja 25f\n" // exit process | |
| 1052 "shl $3, %%eax\n" | |
| 1053 "add $playground$syscallTable, %%eax\n" | |
| 1054 "mov 0(%%eax), %%eax\n" | |
| 1055 "cmp $1, %%eax\n" | |
| 1056 "jne 25f\n" // exit process | |
| 1057 | |
| 1058 // Default behavior for unrestricted system calls is to just execute | |
| 1059 // them. Read the remaining arguments first. | |
| 1060 "13:mov $3, %%eax\n" // NR_read | |
| 1061 "movd %%mm0, %%ebx\n" // fd = threadFd | |
| 1062 "add $4, %%ecx\n" // buf = &scratch + 4 | |
| 1063 "mov $24, %%edx\n" // len = 6*sizeof(void *) | |
| 1064 "14:int $0x80\n" | |
| 1065 "cmp $-4, %%eax\n" // EINTR | |
| 1066 "jz 14b\n" | |
| 1067 "cmp %%edx, %%eax\n" | |
| 1068 "jnz 25f\n" // exit process | |
| 1069 "mov %%ebp, %%eax\n" | |
| 1070 "mov 0x00(%%ecx), %%ebx\n" | |
| 1071 "mov 0x08(%%ecx), %%edx\n" | |
| 1072 "mov 0x0C(%%ecx), %%esi\n" | |
| 1073 "mov 0x10(%%ecx), %%edi\n" | |
| 1074 "mov 0x14(%%ecx), %%ebp\n" | |
| 1075 "mov 0x04(%%ecx), %%ecx\n" | |
| 1076 "cmp $252, %%eax\n" // NR_exit_group | |
| 1077 "jz 27f\n" // exit program, no message | |
| 1078 "int $0x80\n" | |
| 1079 | |
| 1080 // Return result of system call to sandboxed thread | |
| 1081 "15:movd %%mm5, %%ecx\n" // secure_mem | |
| 1082 "add $0x101C, %%ecx\n" // buf = &scratch + 28 | |
| 1083 "mov %%eax, (%%ecx)\n" | |
| 1084 "mov $4, %%edx\n" // len = 4 | |
| 1085 "16:movd %%mm0, %%ebx\n" // fd = threadFd | |
| 1086 "mov $4, %%eax\n" // NR_write | |
| 1087 "17:int $0x80\n" | |
| 1088 "cmp %%edx, %%eax\n" | |
| 1089 "jz 1b\n" | |
| 1090 "cmp $-4, %%eax\n" // EINTR | |
| 1091 "jz 17b\n" | |
| 1092 "jmp 25f\n" // exit process | |
| 1093 | |
| 1094 // NR_exit: | |
| 1095 // Exit trusted thread after cleaning up resources | |
| 1096 "18:mov %%edi, %%ecx\n" // secure_mem | |
| 1097 "mov 0x68(%%ecx), %%ebx\n" // fd = threadFdPub | |
| 1098 "mov $6, %%eax\n" // NR_close | |
| 1099 "int $0x80\n" | |
| 1100 "mov %%ecx, %%ebx\n" // start = secure_mem | |
| 1101 "mov $8192, %%ecx\n" // length = 8192 | |
| 1102 "xor %%edx, %%edx\n" // prot = PROT_NONE | |
| 1103 "mov $125, %%eax\n" // NR_mprotect | |
| 1104 "int $0x80\n" | |
| 1105 "movd %%mm0, %%ebx\n" // fd = threadFd | |
| 1106 "mov $6, %%eax\n" // NR_close | |
| 1107 "int $0x80\n" | |
| 1108 "mov $120, %%eax\n" // NR_clone | |
| 1109 "mov $17, %%ebx\n" // flags = SIGCHLD | |
| 1110 "mov $1, %%ecx\n" // stack = 1 | |
| 1111 "int $0x80\n" | |
| 1112 "mov %%eax, %%ebx\n" | |
| 1113 "test %%eax, %%eax\n" | |
| 1114 "js 25f\n" // exit process | |
| 1115 "jne 21f\n" // reap helper, exit thread | |
| 1116 "jmp 22f\n" // unlock mutex | |
| 1117 | |
| 1118 // NR_clone: | |
| 1119 // Original trusted thread calls clone() to create new nascent | |
| 1120 // thread. This thread is (typically) fully privileged and shares all | |
| 1121 // resources with the caller (i.e. the previous trusted thread), | |
| 1122 // and by extension it shares all resources with the sandbox'd | |
| 1123 // threads. | |
| 1124 "19:movd %%edi, %%mm6\n" // %mm6 = old_shared_mem | |
| 1125 "movd %%mm4, %%edi\n" // child_tidptr | |
| 1126 "mov %%ecx, %%ebp\n" // remember child stack | |
| 1127 "mov $1, %%ecx\n" // stack = 1 | |
| 1128 "int $0x80\n" // calls NR_clone | |
| 1129 "cmp $-4095, %%eax\n" // return codes -1..-4095 are errno values | |
| 1130 "jae 7b\n" // unlock mutex, return result | |
| 1131 "movd %%mm2, %%edi\n" | |
| 1132 "add $2, %%edi\n" | |
| 1133 "movd %%edi, %%mm2\n" | |
| 1134 "test %%eax, %%eax\n" | |
| 1135 "jne 15b\n" // return result | |
| 1136 | |
| 1137 // In nascent thread, now. | |
| 1138 "sub $2, %%edi\n" | |
| 1139 "movd %%edi, %%mm2\n" | |
| 1140 | |
| 1141 // We want to maintain an invalid %esp whenver we access untrusted | |
| 1142 // memory. This ensures that even if an attacker can trick us into | |
| 1143 // triggering a SIGSEGV, we will never successfully execute a signal | |
| 1144 // handler. | |
| 1145 // Signal handlers are inherently dangerous, as an attacker could trick | |
| 1146 // us into returning to the wrong address by adjusting the signal stack | |
| 1147 // right before the handler returns. | |
| 1148 // N.B. While POSIX is curiously silent about this, it appears that on | |
| 1149 // Linux, alternate signal stacks are a per-thread property. That is | |
| 1150 // good. It means that this security mechanism works, even if the | |
| 1151 // sandboxed thread manages to set up an alternate signal stack. | |
| 1152 // | |
| 1153 // TODO(markus): We currently do not support emulating calls to | |
| 1154 // sys_clone() with a zero (i.e. copy) stack parameter. See clone.cc | |
| 1155 // for a discussion on how to fix this, if this ever becomes neccessary. | |
| 1156 "movd %%eax, %%mm3\n" // Request to return from clone() when done | |
| 1157 | |
| 1158 // Get thread id of nascent thread | |
| 1159 "20:mov $224, %%eax\n" // NR_gettid | |
| 1160 "int $0x80\n" | |
| 1161 "movd %%eax, %%mm4\n" | |
| 1162 | |
| 1163 // Nascent thread creates socketpair() for sending requests to | |
| 1164 // trusted thread. | |
| 1165 // We can create the filehandles on the child's stack. Filehandles are | |
| 1166 // always treated as untrusted. | |
| 1167 // socketpair(AF_UNIX, SOCK_STREAM, 0, fds) | |
| 1168 "mov $102, %%eax\n" // NR_socketcall | |
| 1169 "mov $8, %%ebx\n" // socketpair | |
| 1170 "sub $8, %%ebp\n" // sv = child_stack | |
| 1171 "mov %%ebp, -0x04(%%ebp)\n" | |
| 1172 "movl $0, -0x08(%%ebp)\n" // protocol = 0 | |
| 1173 "movl $1, -0x0C(%%ebp)\n" // type = SOCK_STREAM | |
| 1174 "movl $1, -0x10(%%ebp)\n" // domain = AF_UNIX | |
| 1175 "lea -0x10(%%ebp), %%ecx\n" | |
| 1176 "int $0x80\n" | |
| 1177 "test %%eax, %%eax\n" | |
| 1178 "jz 28f\n" | |
| 1179 | |
| 1180 // If things went wrong, we don't have an (easy) way of signaling | |
| 1181 // the parent. For our purposes, it is sufficient to fail with a | |
| 1182 // fatal error. | |
| 1183 "jmp 25f\n" // exit process | |
| 1184 "21:xor %%ecx, %%ecx\n" | |
| 1185 "xor %%edx, %%edx\n" | |
| 1186 "mov $7, %%eax\n" // NR_waitpid | |
| 1187 "int $0x80\n" | |
| 1188 "cmp $-4, %%eax\n" // EINTR | |
| 1189 "jz 21b\n" | |
| 1190 "jmp 23f\n" // exit thread (no message) | |
| 1191 "22:lea playground$syscall_mutex, %%ebx\n" | |
| 1192 "mov $4096, %%ecx\n" | |
| 1193 "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE | |
| 1194 "mov $125, %%eax\n" // NR_mprotect | |
| 1195 "int $0x80\n" | |
| 1196 "lock; addl $0x80000000, (%%ebx)\n" | |
| 1197 "jz 23f\n" // exit thread | |
| 1198 "mov $1, %%edx\n" | |
| 1199 "mov %%edx, %%ecx\n" // FUTEX_WAKE | |
| 1200 "mov $240, %%eax\n" // NR_futex | |
| 1201 "int $0x80\n" | |
| 1202 "23:mov $1, %%eax\n" // NR_exit | |
| 1203 "mov $1, %%ebx\n" // status = 1 | |
| 1204 "24:int $0x80\n" | |
| 1205 "25:mov $4, %%eax\n" // NR_write | |
| 1206 "mov $2, %%ebx\n" // fd = stderr | |
| 1207 "lea 100f, %%ecx\n" // "Sandbox violation detected" | |
| 1208 "mov $101f-100f, %%edx\n" // len = strlen(msg) | |
| 1209 "int $0x80\n" | |
| 1210 "26:mov $1, %%ebx\n" | |
| 1211 "27:mov $252, %%eax\n" // NR_exit_group | |
| 1212 "jmp 24b\n" | |
| 1213 | |
| 1214 // The first page is mapped read-only for use as securely shared memory | |
| 1215 "28:movd %%mm6, %%edi\n" // %edi = old_shared_mem | |
| 1216 "mov 0x44(%%edi), %%ebx\n" // addr = secure_mem | |
| 1217 "movd %%ebx, %%mm5\n" // %mm5 = secure_mem | |
| 1218 "movd %%mm2, %%esi\n" | |
| 1219 "cmp %%esi, 4(%%edi)\n" | |
| 1220 "jne 25b\n" // exit process | |
| 1221 "mov $125, %%eax\n" // NR_mprotect | |
| 1222 "mov $4096, %%ecx\n" // len = 4096 | |
| 1223 "mov $1, %%edx\n" // prot = PROT_READ | |
| 1224 "int $0x80\n" | |
| 1225 | |
| 1226 // The second page is used as scratch space by the trusted thread. | |
| 1227 // Make it writable. | |
| 1228 "mov $125, %%eax\n" // NR_mprotect | |
| 1229 "add $4096, %%ebx\n" // addr = secure_mem + 4096 | |
| 1230 "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE | |
| 1231 "int $0x80\n" | |
| 1232 | |
| 1233 // Call clone() to create new trusted thread(). | |
| 1234 // clone(CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD| | |
| 1235 // CLONE_SYSVSEM|CLONE_UNTRACED, stack, NULL, NULL, NULL) | |
| 1236 "mov 4(%%ebp), %%eax\n" // threadFd (on child's stack) | |
| 1237 "movd %%eax, %%mm0\n" // %mm0 = threadFd | |
| 1238 "mov $120, %%eax\n" // NR_clone | |
| 1239 "mov $0x850F00, %%ebx\n" // flags = VM|FS|FILES|SIGH|THR|SYSV|UTR | |
| 1240 "mov $1, %%ecx\n" // stack = 1 | |
| 1241 "movd 0x48(%%edi), %%mm1\n" // %mm1 = processFdPub | |
| 1242 "cmp %%esi, 4(%%edi)\n" | |
| 1243 "jne 25b\n" // exit process | |
| 1244 "int $0x80\n" | |
| 1245 "test %%eax, %%eax\n" | |
| 1246 "js 25b\n" // exit process | |
| 1247 "jz 0b\n" // invoke trustedThreadFnc() | |
| 1248 | |
| 1249 // Set up thread local storage | |
| 1250 "mov $0x51, %%eax\n" // seg_32bit, limit_in_pages, useable | |
| 1251 "mov %%eax, -0x04(%%ebp)\n" | |
| 1252 "mov $0xFFFFF, %%eax\n" // limit | |
| 1253 "mov %%eax, -0x08(%%ebp)\n" | |
| 1254 "movd %%mm5, %%eax\n" | |
| 1255 "add $0x58, %%eax\n" | |
| 1256 "mov %%eax, -0x0C(%%ebp)\n" // base_addr = &secure_mem.TLS | |
| 1257 "mov %%fs, %%eax\n" | |
| 1258 "shr $3, %%eax\n" | |
| 1259 "mov %%eax, -0x10(%%ebp)\n" // entry_number | |
| 1260 "mov $243, %%eax\n" // NR_set_thread_area | |
| 1261 "lea -0x10(%%ebp), %%ebx\n" | |
| 1262 "int $0x80\n" | |
| 1263 "test %%eax, %%eax\n" | |
| 1264 "jnz 25b\n" // exit process | |
| 1265 | |
| 1266 // Copy the caller's signal mask | |
| 1267 "movd %%mm5, %%edx\n" | |
| 1268 "mov 0x1038(%%edi), %%eax\n" | |
| 1269 "mov %%eax, 0x1038(%%edx)\n" | |
| 1270 "mov 0x103C(%%edi), %%eax\n" | |
| 1271 "mov %%eax, 0x103C(%%edx)\n" | |
| 1272 | |
| 1273 // Done creating trusted thread. We can now get ready to return to caller | |
| 1274 "mov 0(%%ebp), %%esi\n" // %esi = threadFdPub | |
| 1275 "add $8, %%ebp\n" | |
| 1276 | |
| 1277 // Check whether this is the initial thread, or a newly created one. | |
| 1278 // At startup we run the same code as when we create a new thread. At | |
| 1279 // the very top of this function, you will find that we store 999f | |
| 1280 // in %%mm3. That is the signal that we should return on the same | |
| 1281 // stack rather than return to where clone was called. | |
| 1282 "movd %%mm3, %%eax\n" | |
| 1283 "movd %%mm2, %%edx\n" | |
| 1284 "test %%eax, %%eax\n" | |
| 1285 "jne 29f\n" | |
| 1286 | |
| 1287 // Returning from clone() into the newly created thread is special. We | |
| 1288 // cannot unroll the stack, as we just set up a new stack for this | |
| 1289 // thread. We have to explicitly restore CPU registers to the values | |
| 1290 // that they had when the program originally called clone(). | |
| 1291 // We patch the register values in the signal stack frame so that we | |
| 1292 // can ask sigreturn() to restore all registers for us. | |
| 1293 "sub $0x4, %%ebp\n" | |
| 1294 "mov 0x28(%%edi), %%eax\n" | |
| 1295 "mov %%eax, 0x00(%%ebp)\n" // return address | |
| 1296 "xor %%eax, %%eax\n" | |
| 1297 "mov %%eax, 0x30(%%ebp)\n" // %eax = 0 | |
| 1298 "mov 0x2C(%%edi), %%eax\n" | |
| 1299 "mov %%eax, 0x1C(%%ebp)\n" // %ebp | |
| 1300 "mov 0x30(%%edi), %%eax\n" | |
| 1301 "mov %%eax, 0x14(%%ebp)\n" // %edi | |
| 1302 "mov 0x34(%%edi), %%eax\n" | |
| 1303 "mov %%eax, 0x18(%%ebp)\n" // %esi | |
| 1304 "mov 0x38(%%edi), %%eax\n" | |
| 1305 "mov %%eax, 0x28(%%ebp)\n" // %edx | |
| 1306 "mov 0x3C(%%edi), %%eax\n" | |
| 1307 "mov %%eax, 0x2C(%%ebp)\n" // %ecx | |
| 1308 "mov 0x40(%%edi), %%eax\n" | |
| 1309 "mov %%eax, 0x24(%%ebp)\n" // %ebx | |
| 1310 "cmp %%edx, 4(%%edi)\n" | |
| 1311 "jne 25b\n" // exit process | |
| 1312 | |
| 1313 // Nascent thread launches a helper that doesn't share any of our | |
| 1314 // resources, except for pages mapped as MAP_SHARED. | |
| 1315 // clone(SIGCHLD, stack=1) | |
| 1316 "29:mov $120, %%eax\n" // NR_clone | |
| 1317 "mov $17, %%ebx\n" // flags = SIGCHLD | |
| 1318 "mov $1, %%ecx\n" // stack = 1 | |
| 1319 "int $0x80\n" | |
| 1320 "test %%eax, %%eax\n" | |
| 1321 "js 25b\n" // exit process | |
| 1322 "jne 31f\n" | |
| 1323 | |
| 1324 // Use sendmsg() to send to the trusted process the file handles for | |
| 1325 // communicating with the new trusted thread. We also send the address | |
| 1326 // of the secure memory area (for sanity checks) and the thread id. | |
| 1327 "cmp %%edx, 4(%%edi)\n" | |
| 1328 "jne 25b\n" // exit process | |
| 1329 | |
| 1330 // 0x00 socketcall: | |
| 1331 // 0x00 socket (0x4C(%edi)) | |
| 1332 // 0x04 msg (%ecx + 0x0C) | |
| 1333 // 0x08 flags ($0) | |
| 1334 // 0x0C msg: | |
| 1335 // 0x0C msg_name ($0) | |
| 1336 // 0x10 msg_namelen ($0) | |
| 1337 // 0x14 msg_iov (%ecx + 0x34) | |
| 1338 // 0x18 msg_iovlen ($1) | |
| 1339 // 0x1C msg_control (%ecx + 0x3C) | |
| 1340 // 0x20 msg_controllen ($0x14) | |
| 1341 // 0x24 data: | |
| 1342 // 0x24 msg_flags/err ($0) | |
| 1343 // 0x28 secure_mem (%mm5) | |
| 1344 // 0x2C threadId (%mm4) | |
| 1345 // 0x30 threadFdPub (%esi) | |
| 1346 // 0x34 iov: | |
| 1347 // 0x34 iov_base (%ecx + 0x24) | |
| 1348 // 0x38 iov_len ($0x10) | |
| 1349 // 0x3C cmsg: | |
| 1350 // 0x3C cmsg_len ($0x14) | |
| 1351 // 0x40 cmsg_level ($1, SOL_SOCKET) | |
| 1352 // 0x44 cmsg_type ($1, SCM_RIGHTS) | |
| 1353 // 0x48 threadFdPub (%esi) | |
| 1354 // 0x4C threadFd (%mm0) | |
| 1355 // 0x50 | |
| 1356 "lea -0x50(%%ebp), %%ecx\n" | |
| 1357 "xor %%eax, %%eax\n" | |
| 1358 "mov %%eax, 0x08(%%ecx)\n" // flags | |
| 1359 "mov %%eax, 0x0C(%%ecx)\n" // msg_name | |
| 1360 "mov %%eax, 0x10(%%ecx)\n" // msg_namelen | |
| 1361 "mov %%eax, 0x24(%%ecx)\n" // msg_flags | |
| 1362 "inc %%eax\n" | |
| 1363 "mov %%eax, 0x18(%%ecx)\n" // msg_iovlen | |
| 1364 "mov %%eax, 0x40(%%ecx)\n" // cmsg_level | |
| 1365 "mov %%eax, 0x44(%%ecx)\n" // cmsg_type | |
| 1366 "movl $0x10, 0x38(%%ecx)\n" // iov_len | |
| 1367 "mov $0x14, %%eax\n" | |
| 1368 "mov %%eax, 0x20(%%ecx)\n" // msg_controllen | |
| 1369 "mov %%eax, 0x3C(%%ecx)\n" // cmsg_len | |
| 1370 "mov 0x4C(%%edi), %%eax\n" // cloneFdPub | |
| 1371 "mov %%eax, 0x00(%%ecx)\n" // socket | |
| 1372 "lea 0x0C(%%ecx), %%eax\n" | |
| 1373 "mov %%eax, 0x04(%%ecx)\n" // msg | |
| 1374 "add $0x18, %%eax\n" | |
| 1375 "mov %%eax, 0x34(%%ecx)\n" // iov_base | |
| 1376 "add $0x10, %%eax\n" | |
| 1377 "mov %%eax, 0x14(%%ecx)\n" // msg_iov | |
| 1378 "add $8, %%eax\n" | |
| 1379 "mov %%eax, 0x1C(%%ecx)\n" // msg_control | |
| 1380 "mov %%esi, 0x30(%%ecx)\n" // threadFdPub | |
| 1381 "mov %%esi, 0x48(%%ecx)\n" // threadFdPub | |
| 1382 "movd %%mm5, %%eax\n" | |
| 1383 "mov %%eax, 0x28(%%ecx)\n" // secure_mem | |
| 1384 "movd %%mm4, %%eax\n" | |
| 1385 "mov %%eax, 0x2C(%%ecx)\n" // threadId | |
| 1386 "movd %%mm0, %%eax\n" | |
| 1387 "mov %%eax, 0x4C(%%ecx)\n" // threadFd | |
| 1388 "mov $16, %%ebx\n" // sendmsg() | |
| 1389 "mov $102, %%eax\n" // NR_socketcall | |
| 1390 "int $0x80\n" | |
| 1391 | |
| 1392 // Release syscall_mutex_. This signals the trusted process that | |
| 1393 // it can write into the original thread's secure memory again. | |
| 1394 "mov $125, %%eax\n" // NR_mprotect | |
| 1395 "lea playground$syscall_mutex, %%ebx\n" | |
| 1396 "mov $4096, %%ecx\n" | |
| 1397 "mov $3, %%edx\n" // PROT_READ | PROT_WRITE | |
| 1398 "int $0x80\n" | |
| 1399 "movd %%mm2, %%edx\n" | |
| 1400 "cmp %%edx, 0x4(%%edi)\n" | |
| 1401 "jnz 25b\n" // exit process | |
| 1402 "lock; addl $0x80000000, (%%ebx)\n" | |
| 1403 "jz 30f\n" // exit process (no error message) | |
| 1404 "mov $1, %%edx\n" | |
| 1405 "mov %%edx, %%ecx\n" // FUTEX_WAKE | |
| 1406 "mov $240, %%eax\n" // NR_futex | |
| 1407 "int $0x80\n" | |
| 1408 "30:xor %%ebx, %%ebx\n" | |
| 1409 "jmp 27b\n" // exit process (no error message) | |
| 1410 | |
| 1411 // Reap helper | |
| 1412 "31:mov %%eax, %%ebx\n" | |
| 1413 "32:lea -4(%%ebp), %%ecx\n" | |
| 1414 "xor %%edx, %%edx\n" | |
| 1415 "mov $7, %%eax\n" // NR_waitpid | |
| 1416 "int $0x80\n" | |
| 1417 "cmp $-4, %%eax\n" // EINTR | |
| 1418 "jz 32b\n" | |
| 1419 "mov -4(%%ebp), %%eax\n" | |
| 1420 "test %%eax, %%eax\n" | |
| 1421 "jnz 26b\n" // exit process (no error message) | |
| 1422 | |
| 1423 // Release privileges by entering seccomp mode. | |
| 1424 "33:mov $172, %%eax\n" // NR_prctl | |
| 1425 "mov $22, %%ebx\n" // PR_SET_SECCOMP | |
| 1426 "mov $1, %%ecx\n" | |
| 1427 "int $0x80\n" | |
| 1428 "test %%eax, %%eax\n" | |
| 1429 "jnz 25b\n" // exit process | |
| 1430 | |
| 1431 // We can finally start using the stack. Signal handlers no longer pose | |
| 1432 // a threat to us. | |
| 1433 "mov %%ebp, %%esp\n" | |
| 1434 | |
| 1435 // Back in the newly created sandboxed thread, wait for trusted process | |
| 1436 // to receive request. It is possible for an attacker to make us | |
| 1437 // continue even before the trusted process is done. This is OK. It'll | |
| 1438 // result in us putting stale values into the new thread's TLS. But that | |
| 1439 // data is considered untrusted anyway. | |
| 1440 "push %%eax\n" | |
| 1441 "mov $1, %%edx\n" // len = 1 | |
| 1442 "mov %%esp, %%ecx\n" // buf = %esp | |
| 1443 "mov %%esi, %%ebx\n" // fd = threadFdPub | |
| 1444 "34:mov $3, %%eax\n" // NR_read | |
| 1445 "int $0x80\n" | |
| 1446 "cmp $-4, %%eax\n" // EINTR | |
| 1447 "jz 34b\n" | |
| 1448 "cmp %%edx, %%eax\n" | |
| 1449 "jne 25b\n" // exit process | |
| 1450 "pop %%eax\n" | |
| 1451 | |
| 1452 // Return to caller. We are in the new thread, now. | |
| 1453 "movd %%mm3, %%ebx\n" | |
| 1454 "test %%ebx, %%ebx\n" | |
| 1455 "jnz 35f\n" // Returning to createTrustedThread() | |
| 1456 | |
| 1457 // Returning to the place where clone() had been called. We rely on | |
| 1458 // using sigreturn() for restoring our registers. The caller already | |
| 1459 // created a signal stack frame, and we patched the register values | |
| 1460 // with the ones that were in effect prior to calling sandbox_clone(). | |
| 1461 "pop %%ebx\n" | |
| 1462 "35:mov %%ebx, 0x38(%%esp)\n" // compute new %eip | |
| 1463 "mov $119, %%eax\n" // NR_sigreturn | |
| 1464 "int $0x80\n" | |
| 1465 | |
| 1466 ".pushsection \".rodata\"\n" | |
| 1467 "100:.ascii \"Sandbox violation detected, program aborted\\n\"\n" | |
| 1468 "101:.ascii \"WARNING! This is an expensive system call\\n\"\n" | |
| 1469 "102:\n" | |
| 1470 ".popsection\n" | |
| 1471 | |
| 1472 "999:pop %%ebp\n" | |
| 1473 "pop %%ebx\n" | |
| 1474 : | |
| 1475 : "g"(&args) | |
| 1476 : "eax", "ecx", "edx", "edi", "esi", "esp", "memory" | |
| 1477 #else | |
| 1478 #error Unsupported target platform | |
| 1479 #endif | |
| 1480 ); | |
| 1481 } | |
| 1482 | |
| 1483 } // namespace | |
| OLD | NEW |