| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #define XOPEN_SOURCE 500 | |
| 6 #include <algorithm> | |
| 7 #include <elf.h> | |
| 8 #include <errno.h> | |
| 9 #include <errno.h> | |
| 10 #include <fcntl.h> | |
| 11 #include <linux/unistd.h> | |
| 12 #include <set> | |
| 13 #include <signal.h> | |
| 14 #include <stdarg.h> | |
| 15 #include <stdio.h> | |
| 16 #include <stdlib.h> | |
| 17 #include <sys/ptrace.h> | |
| 18 #include <sys/resource.h> | |
| 19 #include <sys/stat.h> | |
| 20 #include <sys/types.h> | |
| 21 | |
| 22 #include "allocator.h" | |
| 23 #include "debug.h" | |
| 24 #include "library.h" | |
| 25 #include "sandbox_impl.h" | |
| 26 #include "syscall.h" | |
| 27 #include "syscall_table.h" | |
| 28 #include "x86_decode.h" | |
| 29 | |
| 30 #if defined(__x86_64__) | |
| 31 typedef Elf64_Phdr Elf_Phdr; | |
| 32 typedef Elf64_Rela Elf_Rel; | |
| 33 | |
| 34 typedef Elf64_Half Elf_Half; | |
| 35 typedef Elf64_Word Elf_Word; | |
| 36 typedef Elf64_Sword Elf_Sword; | |
| 37 typedef Elf64_Xword Elf_Xword; | |
| 38 typedef Elf64_Sxword Elf_Sxword; | |
| 39 typedef Elf64_Off Elf_Off; | |
| 40 typedef Elf64_Section Elf_Section; | |
| 41 typedef Elf64_Versym Elf_Versym; | |
| 42 | |
| 43 #define ELF_ST_BIND ELF64_ST_BIND | |
| 44 #define ELF_ST_TYPE ELF64_ST_TYPE | |
| 45 #define ELF_ST_INFO ELF64_ST_INFO | |
| 46 #define ELF_R_SYM ELF64_R_SYM | |
| 47 #define ELF_R_TYPE ELF64_R_TYPE | |
| 48 #define ELF_R_INFO ELF64_R_INFO | |
| 49 | |
| 50 #define ELF_REL_PLT ".rela.plt" | |
| 51 #define ELF_JUMP_SLOT R_X86_64_JUMP_SLOT | |
| 52 #elif defined(__i386__) | |
| 53 typedef Elf32_Phdr Elf_Phdr; | |
| 54 typedef Elf32_Rel Elf_Rel; | |
| 55 | |
| 56 typedef Elf32_Half Elf_Half; | |
| 57 typedef Elf32_Word Elf_Word; | |
| 58 typedef Elf32_Sword Elf_Sword; | |
| 59 typedef Elf32_Xword Elf_Xword; | |
| 60 typedef Elf32_Sxword Elf_Sxword; | |
| 61 typedef Elf32_Off Elf_Off; | |
| 62 typedef Elf32_Section Elf_Section; | |
| 63 typedef Elf32_Versym Elf_Versym; | |
| 64 | |
| 65 #define ELF_ST_BIND ELF32_ST_BIND | |
| 66 #define ELF_ST_TYPE ELF32_ST_TYPE | |
| 67 #define ELF_ST_INFO ELF32_ST_INFO | |
| 68 #define ELF_R_SYM ELF32_R_SYM | |
| 69 #define ELF_R_TYPE ELF32_R_TYPE | |
| 70 #define ELF_R_INFO ELF32_R_INFO | |
| 71 | |
| 72 #define ELF_REL_PLT ".rel.plt" | |
| 73 #define ELF_JUMP_SLOT R_386_JMP_SLOT | |
| 74 #else | |
| 75 #error Unsupported target platform | |
| 76 #endif | |
| 77 | |
| 78 namespace playground { | |
| 79 | |
| 80 char* Library::__kernel_vsyscall; | |
| 81 char* Library::__kernel_sigreturn; | |
| 82 char* Library::__kernel_rt_sigreturn; | |
| 83 | |
| 84 Library::~Library() { | |
| 85 if (image_size_) { | |
| 86 // We no longer need access to a full mapping of the underlying library | |
| 87 // file. Move the temporarily extended mapping back to where we originally | |
| 88 // found. Make sure to preserve any changes that we might have made since. | |
| 89 Sandbox::SysCalls sys; | |
| 90 sys.mprotect(image_, 4096, PROT_READ | PROT_WRITE | PROT_EXEC); | |
| 91 if (memcmp(image_, memory_ranges_.rbegin()->second.start, 4096)) { | |
| 92 // Only copy data, if we made any changes in this data. Otherwise there | |
| 93 // is no need to create another modified COW mapping. | |
| 94 memcpy(image_, memory_ranges_.rbegin()->second.start, 4096); | |
| 95 } | |
| 96 sys.mprotect(image_, 4096, PROT_READ | PROT_EXEC); | |
| 97 sys.mremap(image_, image_size_, 4096, MREMAP_MAYMOVE | MREMAP_FIXED, | |
| 98 memory_ranges_.rbegin()->second.start); | |
| 99 } | |
| 100 } | |
| 101 | |
| 102 char* Library::getBytes(char* dst, const char* src, ssize_t len) { | |
| 103 // Some kernels don't allow accessing the VDSO from write() | |
| 104 if (isVDSO_ && | |
| 105 src >= memory_ranges_.begin()->second.start && | |
| 106 src <= memory_ranges_.begin()->second.stop) { | |
| 107 ssize_t max = | |
| 108 reinterpret_cast<char *>(memory_ranges_.begin()->second.stop) - src; | |
| 109 if (len > max) { | |
| 110 len = max; | |
| 111 } | |
| 112 memcpy(dst, src, len); | |
| 113 return dst; | |
| 114 } | |
| 115 | |
| 116 // Read up to "len" bytes from "src" and copy them to "dst". Short | |
| 117 // copies are possible, if we are at the end of a mapping. Returns | |
| 118 // NULL, if the operation failed completely. | |
| 119 static int helper_socket[2]; | |
| 120 Sandbox::SysCalls sys; | |
| 121 if (!helper_socket[0] && !helper_socket[1]) { | |
| 122 // Copy data through a socketpair, as this allows us to access it | |
| 123 // without incurring a segmentation fault. | |
| 124 sys.socketpair(AF_UNIX, SOCK_STREAM, 0, helper_socket); | |
| 125 } | |
| 126 char* ptr = dst; | |
| 127 int inc = 4096; | |
| 128 while (len > 0) { | |
| 129 ssize_t l = inc == 1 ? inc : 4096 - (reinterpret_cast<long>(src) & 0xFFF); | |
| 130 if (l > len) { | |
| 131 l = len; | |
| 132 } | |
| 133 l = NOINTR_SYS(sys.write(helper_socket[0], src, l)); | |
| 134 if (l == -1) { | |
| 135 if (sys.my_errno == EFAULT) { | |
| 136 if (inc == 1) { | |
| 137 if (ptr == dst) { | |
| 138 return NULL; | |
| 139 } | |
| 140 break; | |
| 141 } | |
| 142 inc = 1; | |
| 143 continue; | |
| 144 } else { | |
| 145 return NULL; | |
| 146 } | |
| 147 } | |
| 148 l = sys.read(helper_socket[1], ptr, l); | |
| 149 if (l <= 0) { | |
| 150 return NULL; | |
| 151 } | |
| 152 ptr += l; | |
| 153 src += l; | |
| 154 len -= l; | |
| 155 } | |
| 156 return dst; | |
| 157 } | |
| 158 | |
| 159 char *Library::get(Elf_Addr offset, char *buf, size_t len) { | |
| 160 if (!valid_) { | |
| 161 memset(buf, 0, len); | |
| 162 return NULL; | |
| 163 } | |
| 164 RangeMap::const_iterator iter = memory_ranges_.lower_bound(offset); | |
| 165 if (iter == memory_ranges_.end()) { | |
| 166 memset(buf, 0, len); | |
| 167 return NULL; | |
| 168 } | |
| 169 offset -= iter->first; | |
| 170 long size = reinterpret_cast<char *>(iter->second.stop) - | |
| 171 reinterpret_cast<char *>(iter->second.start); | |
| 172 if (offset > size - len) { | |
| 173 memset(buf, 0, len); | |
| 174 return NULL; | |
| 175 } | |
| 176 char *src = reinterpret_cast<char *>(iter->second.start) + offset; | |
| 177 memset(buf, 0, len); | |
| 178 if (!getBytes(buf, src, len)) { | |
| 179 return NULL; | |
| 180 } | |
| 181 return buf; | |
| 182 } | |
| 183 | |
| 184 Library::string Library::get(Elf_Addr offset) { | |
| 185 if (!valid_) { | |
| 186 return ""; | |
| 187 } | |
| 188 RangeMap::const_iterator iter = memory_ranges_.lower_bound(offset); | |
| 189 if (iter == memory_ranges_.end()) { | |
| 190 return ""; | |
| 191 } | |
| 192 offset -= iter->first; | |
| 193 const char *start = reinterpret_cast<char *>(iter->second.start) + offset; | |
| 194 const char *stop = reinterpret_cast<char *>(iter->second.stop) + offset; | |
| 195 char buf[4096] = { 0 }; | |
| 196 getBytes(buf, start, stop - start >= (int)sizeof(buf) ? | |
| 197 sizeof(buf) - 1 : stop - start); | |
| 198 start = buf; | |
| 199 stop = buf; | |
| 200 while (*stop) { | |
| 201 ++stop; | |
| 202 } | |
| 203 string s = stop > start ? string(start, stop - start) : ""; | |
| 204 return s; | |
| 205 } | |
| 206 | |
| 207 char *Library::getOriginal(Elf_Addr offset, char *buf, size_t len) { | |
| 208 if (!valid_) { | |
| 209 memset(buf, 0, len); | |
| 210 return NULL; | |
| 211 } | |
| 212 Sandbox::SysCalls sys; | |
| 213 if (!image_ && !isVDSO_ && !memory_ranges_.empty() && | |
| 214 memory_ranges_.rbegin()->first == 0) { | |
| 215 // Extend the mapping of the very first page of the underlying library | |
| 216 // file. This way, we can read the original file contents of the entire | |
| 217 // library. | |
| 218 // We have to be careful, because doing so temporarily removes the first | |
| 219 // 4096 bytes of the library from memory. And we don't want to accidentally | |
| 220 // unmap code that we are executing. So, only use functions that can be | |
| 221 // inlined. | |
| 222 void* start = memory_ranges_.rbegin()->second.start; | |
| 223 image_size_ = memory_ranges_.begin()->first + | |
| 224 (reinterpret_cast<char *>(memory_ranges_.begin()->second.stop) - | |
| 225 reinterpret_cast<char *>(memory_ranges_.begin()->second.start)); | |
| 226 if (image_size_ < 8192) { | |
| 227 // It is possible to create a library that is only a single page in | |
| 228 // size. In that case, we have to make sure that we artificially map | |
| 229 // one extra page past the end of it, as our code relies on mremap() | |
| 230 // actually moving the mapping. | |
| 231 image_size_ = 8192; | |
| 232 } | |
| 233 image_ = reinterpret_cast<char *>(sys.mremap(start, 4096, image_size_, | |
| 234 MREMAP_MAYMOVE)); | |
| 235 if (image_size_ == 8192 && image_ == start) { | |
| 236 // We really mean it, when we say we want the memory to be moved. | |
| 237 image_ = reinterpret_cast<char *>(sys.mremap(start, 4096, image_size_, | |
| 238 MREMAP_MAYMOVE)); | |
| 239 sys.munmap(reinterpret_cast<char *>(start) + 4096, 4096); | |
| 240 } | |
| 241 if (image_ == MAP_FAILED) { | |
| 242 image_ = NULL; | |
| 243 } else { | |
| 244 sys.MMAP(start, 4096, PROT_READ | PROT_WRITE | PROT_EXEC, | |
| 245 MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); | |
| 246 for (int i = 4096 / sizeof(long); --i; | |
| 247 reinterpret_cast<long *>(start)[i] = | |
| 248 reinterpret_cast<long *>(image_)[i]); | |
| 249 } | |
| 250 } | |
| 251 | |
| 252 if (image_) { | |
| 253 if (offset + len > image_size_) { | |
| 254 // It is quite likely that we initially did not map the entire file as | |
| 255 // we did not know how large it is. So, if necessary, try to extend the | |
| 256 // mapping. | |
| 257 size_t new_size = (offset + len + 4095) & ~4095; | |
| 258 char* tmp = | |
| 259 reinterpret_cast<char *>(sys.mremap(image_, image_size_, new_size, | |
| 260 MREMAP_MAYMOVE)); | |
| 261 if (tmp != MAP_FAILED) { | |
| 262 image_ = tmp; | |
| 263 image_size_ = new_size; | |
| 264 } | |
| 265 } | |
| 266 if (buf && offset + len <= image_size_) { | |
| 267 return reinterpret_cast<char *>(memcpy(buf, image_ + offset, len)); | |
| 268 } | |
| 269 return NULL; | |
| 270 } | |
| 271 return buf ? get(offset, buf, len) : NULL; | |
| 272 } | |
| 273 | |
| 274 Library::string Library::getOriginal(Elf_Addr offset) { | |
| 275 if (!valid_) { | |
| 276 return ""; | |
| 277 } | |
| 278 // Make sure we actually have a mapping that we can access. If the string | |
| 279 // is located at the end of the image, we might not yet have extended the | |
| 280 // mapping sufficiently. | |
| 281 if (!image_ || image_size_ <= offset) { | |
| 282 getOriginal(offset, NULL, 1); | |
| 283 } | |
| 284 | |
| 285 if (image_) { | |
| 286 if (offset < image_size_) { | |
| 287 char* start = image_ + offset; | |
| 288 char* stop = start; | |
| 289 while (stop < image_ + image_size_ && *stop) { | |
| 290 ++stop; | |
| 291 if (stop >= image_ + image_size_) { | |
| 292 getOriginal(stop - image_, NULL, 1); | |
| 293 } | |
| 294 } | |
| 295 return string(start, stop - start); | |
| 296 } | |
| 297 return ""; | |
| 298 } | |
| 299 return get(offset); | |
| 300 } | |
| 301 | |
| 302 const Elf_Ehdr* Library::getEhdr() { | |
| 303 if (!valid_) { | |
| 304 return NULL; | |
| 305 } | |
| 306 return &ehdr_; | |
| 307 } | |
| 308 | |
| 309 const Elf_Shdr* Library::getSection(const string& section) { | |
| 310 if (!valid_) { | |
| 311 return NULL; | |
| 312 } | |
| 313 SectionTable::const_iterator iter = section_table_.find(section); | |
| 314 if (iter == section_table_.end()) { | |
| 315 return NULL; | |
| 316 } | |
| 317 return &iter->second.second; | |
| 318 } | |
| 319 | |
| 320 int Library::getSectionIndex(const string& section) { | |
| 321 if (!valid_) { | |
| 322 return -1; | |
| 323 } | |
| 324 SectionTable::const_iterator iter = section_table_.find(section); | |
| 325 if (iter == section_table_.end()) { | |
| 326 return -1; | |
| 327 } | |
| 328 return iter->second.first; | |
| 329 } | |
| 330 | |
| 331 void Library::makeWritable(bool state) const { | |
| 332 for (RangeMap::const_iterator iter = memory_ranges_.begin(); | |
| 333 iter != memory_ranges_.end(); ++iter) { | |
| 334 const Range& range = iter->second; | |
| 335 long length = reinterpret_cast<char *>(range.stop) - | |
| 336 reinterpret_cast<char *>(range.start); | |
| 337 Sandbox::SysCalls sys; | |
| 338 sys.mprotect(range.start, length, | |
| 339 range.prot | (state ? PROT_WRITE : 0)); | |
| 340 } | |
| 341 } | |
| 342 | |
| 343 bool Library::isSafeInsn(unsigned short insn) { | |
| 344 // Check if the instruction has no unexpected side-effects. If so, it can | |
| 345 // be safely relocated from the function that we are patching into the | |
| 346 // out-of-line scratch space that we are setting up. This is often necessary | |
| 347 // to make room for the JMP into the scratch space. | |
| 348 return ((insn & 0x7) < 0x6 && (insn & 0xF0) < 0x40 | |
| 349 /* ADD, OR, ADC, SBB, AND, SUB, XOR, CMP */) || | |
| 350 #if defined(__x86_64__) | |
| 351 insn == 0x63 /* MOVSXD */ || | |
| 352 #endif | |
| 353 (insn >= 0x80 && insn <= 0x8E /* ADD, OR, ADC, | |
| 354 SBB, AND, SUB, XOR, CMP, TEST, XCHG, MOV, LEA */) || | |
| 355 (insn == 0x90) || /* NOP */ | |
| 356 (insn >= 0xA0 && insn <= 0xA9) /* MOV, TEST */ || | |
| 357 (insn >= 0xB0 && insn <= 0xBF /* MOV */) || | |
| 358 (insn >= 0xC0 && insn <= 0xC1) || /* Bit Shift */ | |
| 359 (insn >= 0xD0 && insn <= 0xD3) || /* Bit Shift */ | |
| 360 (insn >= 0xC6 && insn <= 0xC7 /* MOV */) || | |
| 361 (insn == 0xF7) /* TEST, NOT, NEG, MUL, IMUL, DIV, IDIV */; | |
| 362 } | |
| 363 | |
| 364 char* Library::getScratchSpace(const Maps* maps, char* near, int needed, | |
| 365 char** extraSpace, int* extraLength) { | |
| 366 if (needed > *extraLength || | |
| 367 labs(*extraSpace - reinterpret_cast<char *>(near)) > (1536 << 20)) { | |
| 368 if (*extraSpace) { | |
| 369 // Start a new scratch page and mark any previous page as write-protected | |
| 370 Sandbox::SysCalls sys; | |
| 371 sys.mprotect(*extraSpace, 4096, PROT_READ|PROT_EXEC); | |
| 372 } | |
| 373 // Our new scratch space is initially executable and writable. | |
| 374 *extraLength = 4096; | |
| 375 *extraSpace = maps->allocNearAddr(near, *extraLength, | |
| 376 PROT_READ|PROT_WRITE|PROT_EXEC); | |
| 377 } | |
| 378 if (*extraSpace) { | |
| 379 *extraLength -= needed; | |
| 380 return *extraSpace + *extraLength; | |
| 381 } | |
| 382 Sandbox::die("Insufficient space to intercept system call"); | |
| 383 } | |
| 384 | |
| 385 void Library::patchSystemCallsInFunction(const Maps* maps, char *start, | |
| 386 char *end, char** extraSpace, | |
| 387 int* extraLength) { | |
| 388 std::set<char *, std::less<char *>, SystemAllocator<char *> > branch_targets; | |
| 389 for (char *ptr = start; ptr < end; ) { | |
| 390 unsigned short insn = next_inst((const char **)&ptr, __WORDSIZE == 64); | |
| 391 char *target; | |
| 392 if ((insn >= 0x70 && insn <= 0x7F) /* Jcc */ || insn == 0xEB /* JMP */) { | |
| 393 target = ptr + (reinterpret_cast<signed char *>(ptr))[-1]; | |
| 394 } else if (insn == 0xE8 /* CALL */ || insn == 0xE9 /* JMP */ || | |
| 395 (insn >= 0x0F80 && insn <= 0x0F8F) /* Jcc */) { | |
| 396 target = ptr + (reinterpret_cast<int *>(ptr))[-1]; | |
| 397 } else { | |
| 398 continue; | |
| 399 } | |
| 400 branch_targets.insert(target); | |
| 401 } | |
| 402 struct Code { | |
| 403 char* addr; | |
| 404 int len; | |
| 405 unsigned short insn; | |
| 406 bool is_ip_relative; | |
| 407 } code[5] = { { 0 } }; | |
| 408 int codeIdx = 0; | |
| 409 char* ptr = start; | |
| 410 while (ptr < end) { | |
| 411 // Keep a ring-buffer of the last few instruction in order to find the | |
| 412 // correct place to patch the code. | |
| 413 char *mod_rm; | |
| 414 code[codeIdx].addr = ptr; | |
| 415 code[codeIdx].insn = next_inst((const char **)&ptr, __WORDSIZE == 64, | |
| 416 0, 0, &mod_rm, 0, 0); | |
| 417 code[codeIdx].len = ptr - code[codeIdx].addr; | |
| 418 code[codeIdx].is_ip_relative = | |
| 419 #if defined(__x86_64__) | |
| 420 mod_rm && (*mod_rm & 0xC7) == 0x5; | |
| 421 #else | |
| 422 false; | |
| 423 #endif | |
| 424 | |
| 425 // Whenever we find a system call, we patch it with a jump to out-of-line | |
| 426 // code that redirects to our system call wrapper. | |
| 427 bool is_syscall = true; | |
| 428 #if defined(__x86_64__) | |
| 429 bool is_indirect_call = false; | |
| 430 if (code[codeIdx].insn == 0x0F05 /* SYSCALL */ || | |
| 431 // In addition, on x86-64, we need to redirect all CALLs between the | |
| 432 // VDSO and the VSyscalls page. We want these to jump to our own | |
| 433 // modified copy of the VSyscalls. As we know that the VSyscalls are | |
| 434 // always more than 2GB away from the VDSO, the compiler has to | |
| 435 // generate some form of indirect jumps. We can find all indirect | |
| 436 // CALLs and redirect them to a separate scratch area, where we can | |
| 437 // inspect the destination address. If it indeed points to the | |
| 438 // VSyscall area, we then adjust the destination address accordingly. | |
| 439 (is_indirect_call = | |
| 440 (isVDSO_ && vsys_offset_ && code[codeIdx].insn == 0xFF && | |
| 441 !code[codeIdx].is_ip_relative && | |
| 442 mod_rm && (*mod_rm & 0x38) == 0x10 /* CALL (indirect) */))) { | |
| 443 is_syscall = !is_indirect_call; | |
| 444 #elif defined(__i386__) | |
| 445 bool is_gs_call = false; | |
| 446 if (code[codeIdx].len == 7 && | |
| 447 code[codeIdx].insn == 0xFF && | |
| 448 code[codeIdx].addr[2] == '\x15' /* CALL (indirect) */ && | |
| 449 code[codeIdx].addr[0] == '\x65' /* %gs prefix */) { | |
| 450 char* target; | |
| 451 asm volatile("mov %%gs:(%1), %0\n" | |
| 452 : "=a"(target) | |
| 453 : "c"(*reinterpret_cast<int *>(code[codeIdx].addr+3))); | |
| 454 if (target == __kernel_vsyscall) { | |
| 455 is_gs_call = true; | |
| 456 // TODO(markus): also handle the other vsyscalls | |
| 457 } | |
| 458 } | |
| 459 if (is_gs_call || | |
| 460 (code[codeIdx].insn == 0xCD && | |
| 461 code[codeIdx].addr[1] == '\x80' /* INT $0x80 */)) { | |
| 462 #else | |
| 463 #error Unsupported target platform | |
| 464 #endif | |
| 465 // Found a system call. Search backwards to figure out how to redirect | |
| 466 // the code. We will need to overwrite a couple of instructions and, | |
| 467 // of course, move these instructions somewhere else. | |
| 468 int startIdx = codeIdx; | |
| 469 int endIdx = codeIdx; | |
| 470 int length = code[codeIdx].len; | |
| 471 for (int idx = codeIdx; | |
| 472 (idx = (idx + (sizeof(code) / sizeof(struct Code)) - 1) % | |
| 473 (sizeof(code) / sizeof(struct Code))) != codeIdx; ) { | |
| 474 std::set<char *>::const_iterator iter = | |
| 475 std::upper_bound(branch_targets.begin(), branch_targets.end(), | |
| 476 code[idx].addr); | |
| 477 if (iter != branch_targets.end() && *iter < ptr) { | |
| 478 // Found a branch pointing to somewhere past our instruction. This | |
| 479 // instruction cannot be moved safely. Leave it in place. | |
| 480 break; | |
| 481 } | |
| 482 if (code[idx].addr && !code[idx].is_ip_relative && | |
| 483 isSafeInsn(code[idx].insn)) { | |
| 484 // These are all benign instructions with no side-effects and no | |
| 485 // dependency on the program counter. We should be able to safely | |
| 486 // relocate them. | |
| 487 startIdx = idx; | |
| 488 length = ptr - code[startIdx].addr; | |
| 489 } else { | |
| 490 break; | |
| 491 } | |
| 492 } | |
| 493 // Search forward past the system call, too. Sometimes, we can only | |
| 494 // find relocatable instructions following the system call. | |
| 495 #if defined(__i386__) | |
| 496 findEndIdx: | |
| 497 #endif | |
| 498 char *next = ptr; | |
| 499 for (int i = codeIdx; | |
| 500 next < end && | |
| 501 (i = (i + 1) % (sizeof(code) / sizeof(struct Code))) != startIdx; | |
| 502 ) { | |
| 503 std::set<char *>::const_iterator iter = | |
| 504 std::lower_bound(branch_targets.begin(), branch_targets.end(), | |
| 505 next); | |
| 506 if (iter != branch_targets.end() && *iter == next) { | |
| 507 // Found branch target pointing to our instruction | |
| 508 break; | |
| 509 } | |
| 510 char *tmp_rm; | |
| 511 code[i].addr = next; | |
| 512 code[i].insn = next_inst((const char **)&next, __WORDSIZE == 64, | |
| 513 0, 0, &tmp_rm, 0, 0); | |
| 514 code[i].len = next - code[i].addr; | |
| 515 code[i].is_ip_relative = tmp_rm && (*tmp_rm & 0xC7) == 0x5; | |
| 516 if (!code[i].is_ip_relative && isSafeInsn(code[i].insn)) { | |
| 517 endIdx = i; | |
| 518 length = next - code[startIdx].addr; | |
| 519 } else { | |
| 520 break; | |
| 521 } | |
| 522 } | |
| 523 // We now know, how many instructions neighboring the system call we | |
| 524 // can safely overwrite. On x86-32 we need six bytes, and on x86-64 | |
| 525 // We need five bytes to insert a JMPQ and a 32bit address. We then | |
| 526 // jump to a code fragment that safely forwards to our system call | |
| 527 // wrapper. | |
| 528 // On x86-64, this is complicated by the fact that the API allows up | |
| 529 // to 128 bytes of red-zones below the current stack pointer. So, we | |
| 530 // cannot write to the stack until we have adjusted the stack | |
| 531 // pointer. | |
| 532 // On both x86-32 and x86-64 we take care to leave the stack unchanged | |
| 533 // while we are executing the preamble and postamble. This allows us | |
| 534 // to treat instructions that reference %esp/%rsp as safe for | |
| 535 // relocation. | |
| 536 // In particular, this means that on x86-32 we cannot use CALL, but | |
| 537 // have to use a PUSH/RET combination to change the instruction pointer. | |
| 538 // On x86-64, we can instead use a 32bit JMPQ. | |
| 539 // | |
| 540 // .. .. .. .. ; any leading instructions copied from original code | |
| 541 // 48 81 EC 80 00 00 00 SUB $0x80, %rsp | |
| 542 // 50 PUSH %rax | |
| 543 // 48 8D 05 .. .. .. .. LEA ...(%rip), %rax | |
| 544 // 50 PUSH %rax | |
| 545 // 48 B8 .. .. .. .. MOV $syscallWrapper, %rax | |
| 546 // .. .. .. .. | |
| 547 // 50 PUSH %rax | |
| 548 // 48 8D 05 06 00 00 00 LEA 6(%rip), %rax | |
| 549 // 48 87 44 24 10 XCHG %rax, 16(%rsp) | |
| 550 // C3 RETQ | |
| 551 // 48 81 C4 80 00 00 00 ADD $0x80, %rsp | |
| 552 // .. .. .. .. ; any trailing instructions copied from original code | |
| 553 // E9 .. .. .. .. JMPQ ... | |
| 554 // | |
| 555 // Total: 52 bytes + any bytes that were copied | |
| 556 // | |
| 557 // On x86-32, the stack is available and we can do: | |
| 558 // | |
| 559 // TODO(markus): Try to maintain frame pointers on x86-32 | |
| 560 // | |
| 561 // .. .. .. .. ; any leading instructions copied from original code | |
| 562 // 68 .. .. .. .. PUSH return_addr | |
| 563 // 68 .. .. .. .. PUSH $syscallWrapper | |
| 564 // C3 RET | |
| 565 // .. .. .. .. ; any trailing instructions copied from original code | |
| 566 // 68 .. .. .. .. PUSH return_addr | |
| 567 // C3 RET | |
| 568 // | |
| 569 // Total: 17 bytes + any bytes that were copied | |
| 570 // | |
| 571 // For indirect jumps from the VDSO to the VSyscall page, we instead | |
| 572 // replace the following code (this is only necessary on x86-64). This | |
| 573 // time, we don't have to worry about red zones: | |
| 574 // | |
| 575 // .. .. .. .. ; any leading instructions copied from original code | |
| 576 // E8 00 00 00 00 CALL . | |
| 577 // 48 83 04 24 .. ADDQ $.., (%rsp) | |
| 578 // FF .. .. .. .. .. PUSH .. ; from original CALL instruction | |
| 579 // 48 81 3C 24 00 00 00 FF CMPQ $0xFFFFFFFFFF000000, 0(%rsp) | |
| 580 // 72 10 JB . + 16 | |
| 581 // 81 2C 24 .. .. .. .. SUBL ..., 0(%rsp) | |
| 582 // C7 44 24 04 00 00 00 00 MOVL $0, 4(%rsp) | |
| 583 // C3 RETQ | |
| 584 // 48 87 04 24 XCHG %rax,(%rsp) | |
| 585 // 48 89 44 24 08 MOV %rax,0x8(%rsp) | |
| 586 // 58 POP %rax | |
| 587 // C3 RETQ | |
| 588 // .. .. .. .. ; any trailing instructions copied from original code | |
| 589 // E9 .. .. .. .. JMPQ ... | |
| 590 // | |
| 591 // Total: 52 bytes + any bytes that were copied | |
| 592 | |
| 593 if (length < (__WORDSIZE == 32 ? 6 : 5)) { | |
| 594 // There are a very small number of instruction sequences that we | |
| 595 // cannot easily intercept, and that have been observed in real world | |
| 596 // examples. Handle them here: | |
| 597 #if defined(__i386__) | |
| 598 int diff; | |
| 599 if (!memcmp(code[codeIdx].addr, "\xCD\x80\xEB", 3) && | |
| 600 (diff = *reinterpret_cast<signed char *>( | |
| 601 code[codeIdx].addr + 3)) < 0 && diff >= -6) { | |
| 602 // We have seen... | |
| 603 // for (;;) { | |
| 604 // _exit(0); | |
| 605 // } | |
| 606 // ..get compiled to: | |
| 607 // B8 01 00 00 00 MOV $__NR_exit, %eax | |
| 608 // 66 90 XCHG %ax, %ax | |
| 609 // 31 DB 0:XOR %ebx, %ebx | |
| 610 // CD 80 INT $0x80 | |
| 611 // EB FA JMP 0b | |
| 612 // The JMP is really superfluous as the system call never returns. | |
| 613 // And there are in fact no returning system calls that need to be | |
| 614 // unconditionally repeated in an infinite loop. | |
| 615 // If we replace the JMP with NOPs, the system call can successfully | |
| 616 // be intercepted. | |
| 617 *reinterpret_cast<unsigned short *>(code[codeIdx].addr + 2) = 0x9090; | |
| 618 goto findEndIdx; | |
| 619 } | |
| 620 #elif defined(__x86_64__) | |
| 621 std::set<char *>::const_iterator iter; | |
| 622 #endif | |
| 623 // If we cannot figure out any other way to intercept this system call, | |
| 624 // we replace it with a call to INT0. This causes a SEGV which we then | |
| 625 // handle in the signal handler. That's a lot slower than rewriting the | |
| 626 // instruction with a jump, but it should only happen very rarely. | |
| 627 if (is_syscall) { | |
| 628 memcpy(code[codeIdx].addr, "\xCD", 2); | |
| 629 if (code[codeIdx].len > 2) { | |
| 630 memset(code[codeIdx].addr + 2, 0x90, code[codeIdx].len - 2); | |
| 631 } | |
| 632 goto replaced; | |
| 633 } | |
| 634 #if defined(__x86_64__) | |
| 635 // On x86-64, we occasionally see code like this in the VDSO: | |
| 636 // 48 8B 05 CF FE FF FF MOV -0x131(%rip),%rax | |
| 637 // FF 50 20 CALLQ *0x20(%rax) | |
| 638 // By default, we would not replace the MOV instruction, as it is | |
| 639 // IP relative. But if the following instruction is also IP relative, | |
| 640 // we are left with only three bytes which is not enough to insert a | |
| 641 // jump. | |
| 642 // We recognize this particular situation, and as long as the CALLQ | |
| 643 // is not a branch target, we decide to still relocate the entire | |
| 644 // sequence. We just have to make sure that we then patch up the | |
| 645 // IP relative addressing. | |
| 646 else if (is_indirect_call && startIdx == codeIdx && | |
| 647 code[startIdx = (startIdx + (sizeof(code) / | |
| 648 sizeof(struct Code)) - 1) % | |
| 649 (sizeof(code) / sizeof(struct Code))].addr && | |
| 650 ptr - code[startIdx].addr >= 5 && | |
| 651 code[startIdx].is_ip_relative && | |
| 652 isSafeInsn(code[startIdx].insn) && | |
| 653 ((iter = std::upper_bound(branch_targets.begin(), | |
| 654 branch_targets.end(), | |
| 655 code[startIdx].addr)) == | |
| 656 branch_targets.end() || *iter >= ptr)) { | |
| 657 // We changed startIdx to include the IP relative instruction. | |
| 658 // When copying this preamble, we make sure to patch up the | |
| 659 // offset. | |
| 660 } | |
| 661 #endif | |
| 662 else { | |
| 663 Sandbox::die("Cannot intercept system call"); | |
| 664 } | |
| 665 } | |
| 666 int needed = (__WORDSIZE == 32 ? 6 : 5) - code[codeIdx].len; | |
| 667 int first = codeIdx; | |
| 668 while (needed > 0 && first != startIdx) { | |
| 669 first = (first + (sizeof(code) / sizeof(struct Code)) - 1) % | |
| 670 (sizeof(code) / sizeof(struct Code)); | |
| 671 needed -= code[first].len; | |
| 672 } | |
| 673 int second = codeIdx; | |
| 674 while (needed > 0) { | |
| 675 second = (second + 1) % (sizeof(code) / sizeof(struct Code)); | |
| 676 needed -= code[second].len; | |
| 677 } | |
| 678 int preamble = code[codeIdx].addr - code[first].addr; | |
| 679 int postamble = code[second].addr + code[second].len - | |
| 680 code[codeIdx].addr - code[codeIdx].len; | |
| 681 | |
| 682 // The following is all the code that construct the various bits of | |
| 683 // assembly code. | |
| 684 #if defined(__x86_64__) | |
| 685 if (is_indirect_call) { | |
| 686 needed = 52 + preamble + code[codeIdx].len + postamble; | |
| 687 } else { | |
| 688 needed = 52 + preamble + postamble; | |
| 689 } | |
| 690 #elif defined(__i386__) | |
| 691 needed = 17 + preamble + postamble; | |
| 692 #else | |
| 693 #error Unsupported target platform | |
| 694 #endif | |
| 695 | |
| 696 // Allocate scratch space and copy the preamble of code that was moved | |
| 697 // from the function that we are patching. | |
| 698 char* dest = getScratchSpace(maps, code[first].addr, needed, | |
| 699 extraSpace, extraLength); | |
| 700 memcpy(dest, code[first].addr, preamble); | |
| 701 | |
| 702 // For jumps from the VDSO to the VSyscalls we sometimes allow exactly | |
| 703 // one IP relative instruction in the preamble. | |
| 704 if (code[first].is_ip_relative) { | |
| 705 *reinterpret_cast<int *>(dest + (code[codeIdx].addr - | |
| 706 code[first].addr) - 4) | |
| 707 -= dest - code[first].addr; | |
| 708 } | |
| 709 | |
| 710 // For indirect calls, we need to copy the actual CALL instruction and | |
| 711 // turn it into a PUSH instruction. | |
| 712 #if defined(__x86_64__) | |
| 713 if (is_indirect_call) { | |
| 714 memcpy(dest + preamble, "\xE8\x00\x00\x00\x00\x48\x83\x04\x24", 9); | |
| 715 dest[preamble + 9] = code[codeIdx].len + 42; | |
| 716 memcpy(dest + preamble + 10, code[codeIdx].addr, code[codeIdx].len); | |
| 717 | |
| 718 // Convert CALL -> PUSH | |
| 719 dest[preamble + 10 + (mod_rm - code[codeIdx].addr)] |= 0x20; | |
| 720 preamble += 10 + code[codeIdx].len; | |
| 721 } | |
| 722 #endif | |
| 723 | |
| 724 // Copy the static body of the assembly code. | |
| 725 memcpy(dest + preamble, | |
| 726 #if defined(__x86_64__) | |
| 727 is_indirect_call ? | |
| 728 "\x48\x81\x3C\x24\x00\x00\x00\xFF\x72\x10\x81\x2C\x24\x00\x00\x00" | |
| 729 "\x00\xC7\x44\x24\x04\x00\x00\x00\x00\xC3\x48\x87\x04\x24\x48\x89" | |
| 730 "\x44\x24\x08\x58\xC3" : | |
| 731 "\x48\x81\xEC\x80\x00\x00\x00\x50\x48\x8D\x05\x00\x00\x00\x00\x50" | |
| 732 "\x48\xB8\x00\x00\x00\x00\x00\x00\x00\x00\x50\x48\x8D\x05\x06\x00" | |
| 733 "\x00\x00\x48\x87\x44\x24\x10\xC3\x48\x81\xC4\x80\x00\x00", | |
| 734 is_indirect_call ? 37 : 47 | |
| 735 #elif defined(__i386__) | |
| 736 "\x68\x00\x00\x00\x00\x68\x00\x00\x00\x00\xC3", 11 | |
| 737 #else | |
| 738 #error Unsupported target platform | |
| 739 #endif | |
| 740 ); | |
| 741 | |
| 742 // Copy the postamble that was moved from the function that we are | |
| 743 // patching. | |
| 744 memcpy(dest + preamble + | |
| 745 #if defined(__x86_64__) | |
| 746 (is_indirect_call ? 37 : 47), | |
| 747 #elif defined(__i386__) | |
| 748 11, | |
| 749 #else | |
| 750 #error Unsupported target platform | |
| 751 #endif | |
| 752 code[codeIdx].addr + code[codeIdx].len, | |
| 753 postamble); | |
| 754 | |
| 755 // Patch up the various computed values | |
| 756 #if defined(__x86_64__) | |
| 757 int post = preamble + (is_indirect_call ? 37 : 47) + postamble; | |
| 758 dest[post] = '\xE9'; | |
| 759 *reinterpret_cast<int *>(dest + post + 1) = | |
| 760 (code[second].addr + code[second].len) - (dest + post + 5); | |
| 761 if (is_indirect_call) { | |
| 762 *reinterpret_cast<int *>(dest + preamble + 13) = vsys_offset_; | |
| 763 } else { | |
| 764 *reinterpret_cast<int *>(dest + preamble + 11) = | |
| 765 (code[second].addr + code[second].len) - (dest + preamble + 15); | |
| 766 *reinterpret_cast<void **>(dest + preamble + 18) = | |
| 767 reinterpret_cast<void *>(&syscallWrapper); | |
| 768 } | |
| 769 #elif defined(__i386__) | |
| 770 *(dest + preamble + 11 + postamble) = '\x68'; // PUSH | |
| 771 *reinterpret_cast<char **>(dest + preamble + 12 + postamble) = | |
| 772 code[second].addr + code[second].len; | |
| 773 *(dest + preamble + 16 + postamble) = '\xC3'; // RET | |
| 774 *reinterpret_cast<char **>(dest + preamble + 1) = | |
| 775 dest + preamble + 11; | |
| 776 *reinterpret_cast<void (**)()>(dest + preamble + 6) = syscallWrapper; | |
| 777 #else | |
| 778 #error Unsupported target platform | |
| 779 #endif | |
| 780 | |
| 781 // Pad unused space in the original function with NOPs | |
| 782 memset(code[first].addr, 0x90 /* NOP */, | |
| 783 code[second].addr + code[second].len - code[first].addr); | |
| 784 | |
| 785 // Replace the system call with an unconditional jump to our new code. | |
| 786 #if defined(__x86_64__) | |
| 787 *code[first].addr = '\xE9'; // JMPQ | |
| 788 *reinterpret_cast<int *>(code[first].addr + 1) = | |
| 789 dest - (code[first].addr + 5); | |
| 790 #elif defined(__i386__) | |
| 791 code[first].addr[0] = '\x68'; // PUSH | |
| 792 *reinterpret_cast<char **>(code[first].addr + 1) = dest; | |
| 793 code[first].addr[5] = '\xC3'; // RET | |
| 794 #else | |
| 795 #error Unsupported target platform | |
| 796 #endif | |
| 797 } | |
| 798 replaced: | |
| 799 codeIdx = (codeIdx + 1) % (sizeof(code) / sizeof(struct Code)); | |
| 800 } | |
| 801 } | |
| 802 | |
| 803 void Library::patchVDSO(char** extraSpace, int* extraLength){ | |
| 804 #if defined(__i386__) | |
| 805 Sandbox::SysCalls sys; | |
| 806 if (!__kernel_vsyscall || | |
| 807 sys.mprotect(reinterpret_cast<void *>( | |
| 808 reinterpret_cast<long>(__kernel_vsyscall) & ~0xFFF), | |
| 809 4096, PROT_READ|PROT_WRITE|PROT_EXEC)) { | |
| 810 return; | |
| 811 } | |
| 812 | |
| 813 // x86-32 has a small number of well-defined functions in the VDSO library. | |
| 814 // These functions do not easily lend themselves to be rewritten by the | |
| 815 // automatic code. Instead, we explicitly find new definitions for them. | |
| 816 // | |
| 817 // We don't bother with optimizing the syscall instruction instead always | |
| 818 // use INT $0x80, no matter whether the hardware supports more modern | |
| 819 // calling conventions. | |
| 820 // | |
| 821 // TODO(markus): Investigate whether it is worthwhile to optimize this | |
| 822 // code path and use the platform-specific entry code. | |
| 823 if (__kernel_vsyscall) { | |
| 824 // Replace the kernel entry point with: | |
| 825 // | |
| 826 // E9 .. .. .. .. JMP syscallWrapper | |
| 827 *__kernel_vsyscall = '\xE9'; | |
| 828 *reinterpret_cast<long *>(__kernel_vsyscall + 1) = | |
| 829 reinterpret_cast<char *>(&syscallWrapper) - | |
| 830 reinterpret_cast<char *>(__kernel_vsyscall + 5); | |
| 831 } | |
| 832 if (__kernel_sigreturn) { | |
| 833 // Replace the sigreturn() system call with a jump to code that does: | |
| 834 // | |
| 835 // 58 POP %eax | |
| 836 // B8 77 00 00 00 MOV $0x77, %eax | |
| 837 // E8 .. .. .. .. CALL syscallWrapper | |
| 838 char* dest = getScratchSpace(maps_, __kernel_sigreturn, 11, extraSpace, | |
| 839 extraLength); | |
| 840 memcpy(dest, "\x58\xB8\x77\x00\x00\x00\xE8", 7); | |
| 841 *reinterpret_cast<long *>(dest + 7) = | |
| 842 reinterpret_cast<char *>(&syscallWrapper) - dest - 11;; | |
| 843 *__kernel_sigreturn = '\xE9'; | |
| 844 *reinterpret_cast<long *>(__kernel_sigreturn + 1) = | |
| 845 dest - reinterpret_cast<char *>(__kernel_sigreturn) - 5; | |
| 846 } | |
| 847 if (__kernel_rt_sigreturn) { | |
| 848 // Replace the rt_sigreturn() system call with a jump to code that does: | |
| 849 // | |
| 850 // B8 AD 00 00 00 MOV $0xAD, %eax | |
| 851 // E8 .. .. .. .. CALL syscallWrapper | |
| 852 char* dest = getScratchSpace(maps_, __kernel_rt_sigreturn, 10, extraSpace, | |
| 853 extraLength); | |
| 854 memcpy(dest, "\xB8\xAD\x00\x00\x00\xE8", 6); | |
| 855 *reinterpret_cast<long *>(dest + 6) = | |
| 856 reinterpret_cast<char *>(&syscallWrapper) - dest - 10; | |
| 857 *__kernel_rt_sigreturn = '\xE9'; | |
| 858 *reinterpret_cast<long *>(__kernel_rt_sigreturn + 1) = | |
| 859 dest - reinterpret_cast<char *>(__kernel_rt_sigreturn) - 5; | |
| 860 } | |
| 861 #endif | |
| 862 } | |
| 863 | |
| 864 int Library::patchVSystemCalls() { | |
| 865 #if defined(__x86_64__) | |
| 866 // VSyscalls live in a shared 4kB page at the top of the address space. This | |
| 867 // page cannot be unmapped nor remapped. We have to create a copy within | |
| 868 // 2GB of the page, and rewrite all IP-relative accesses to shared variables. | |
| 869 // As the top of the address space is not accessible by mmap(), this means | |
| 870 // that we need to wrap around addresses to the bottom 2GB of the address | |
| 871 // space. | |
| 872 // Only x86-64 has VSyscalls. | |
| 873 if (maps_->vsyscall()) { | |
| 874 char* copy = maps_->allocNearAddr(maps_->vsyscall(), 0x1000, | |
| 875 PROT_READ|PROT_WRITE|PROT_EXEC); | |
| 876 char* extraSpace = copy; | |
| 877 int extraLength = 0x1000; | |
| 878 memcpy(copy, maps_->vsyscall(), 0x1000); | |
| 879 long adjust = (long)maps_->vsyscall() - (long)copy; | |
| 880 for (int vsys = 0; vsys < 0x1000; vsys += 0x400) { | |
| 881 char* start = copy + vsys; | |
| 882 char* end = start + 0x400; | |
| 883 | |
| 884 // There can only be up to four VSyscalls starting at an offset of | |
| 885 // n*0x1000, each. VSyscalls are invoked by functions in the VDSO | |
| 886 // and provide fast implementations of a time source. We don't exactly | |
| 887 // know where the code and where the data is in the VSyscalls page. | |
| 888 // So, we disassemble the code for each function and find all branch | |
| 889 // targets within the function in order to find the last address of | |
| 890 // function. | |
| 891 for (char *last = start, *vars = end, *ptr = start; ptr < end; ) { | |
| 892 new_function: | |
| 893 char* mod_rm; | |
| 894 unsigned short insn = next_inst((const char **)&ptr, true, 0, 0, | |
| 895 &mod_rm, 0, 0); | |
| 896 if (mod_rm && (*mod_rm & 0xC7) == 0x5) { | |
| 897 // Instruction has IP relative addressing mode. Adjust to reference | |
| 898 // the variables in the original VSyscall segment. | |
| 899 long offset = *reinterpret_cast<int *>(mod_rm + 1); | |
| 900 char* var = ptr + offset; | |
| 901 if (var >= ptr && var < vars) { | |
| 902 // Variables are stored somewhere past all the functions. Remember | |
| 903 // the first variable in the VSyscall slot, so that we stop | |
| 904 // scanning for instructions once we reach that address. | |
| 905 vars = var; | |
| 906 } | |
| 907 offset += adjust; | |
| 908 if ((offset >> 32) && (offset >> 32) != -1) { | |
| 909 Sandbox::die("Cannot patch [vsystemcall]"); | |
| 910 } | |
| 911 *reinterpret_cast<int *>(mod_rm + 1) = offset; | |
| 912 } | |
| 913 | |
| 914 // Check for jump targets to higher addresses (but within our own | |
| 915 // VSyscall slot). They extend the possible end-address of this | |
| 916 // function. | |
| 917 char *target = 0; | |
| 918 if ((insn >= 0x70 && insn <= 0x7F) /* Jcc */ || | |
| 919 insn == 0xEB /* JMP */) { | |
| 920 target = ptr + (reinterpret_cast<signed char *>(ptr))[-1]; | |
| 921 } else if (insn == 0xE8 /* CALL */ || insn == 0xE9 /* JMP */ || | |
| 922 (insn >= 0x0F80 && insn <= 0x0F8F) /* Jcc */) { | |
| 923 target = ptr + (reinterpret_cast<int *>(ptr))[-1]; | |
| 924 } | |
| 925 | |
| 926 // The function end is found, once the loop reaches the last valid | |
| 927 // address in the VSyscall slot, or once it finds a RET instruction | |
| 928 // that is not followed by any jump targets. Unconditional jumps that | |
| 929 // point backwards are treated the same as a RET instruction. | |
| 930 if (insn == 0xC3 /* RET */ || | |
| 931 (target < ptr && | |
| 932 (insn == 0xEB /* JMP */ || insn == 0xE9 /* JMP */))) { | |
| 933 if (last >= ptr) { | |
| 934 continue; | |
| 935 } else { | |
| 936 // The function can optionally be followed by more functions in | |
| 937 // the same VSyscall slot. Allow for alignment to a 16 byte | |
| 938 // boundary. If we then find more non-zero bytes, and if this is | |
| 939 // not the known start of the variables, assume a new function | |
| 940 // started. | |
| 941 for (; ptr < vars; ++ptr) { | |
| 942 if ((long)ptr & 0xF) { | |
| 943 if (*ptr && *ptr != '\x90' /* NOP */) { | |
| 944 goto new_function; | |
| 945 } | |
| 946 *ptr = '\x90'; // NOP | |
| 947 } else { | |
| 948 if (*ptr && *ptr != '\x90' /* NOP */) { | |
| 949 goto new_function; | |
| 950 } | |
| 951 break; | |
| 952 } | |
| 953 } | |
| 954 | |
| 955 // Translate all SYSCALLs to jumps into our system call handler. | |
| 956 patchSystemCallsInFunction(NULL, start, ptr, | |
| 957 &extraSpace, &extraLength); | |
| 958 break; | |
| 959 } | |
| 960 } | |
| 961 | |
| 962 // Adjust assumed end address for this function, if a valid jump | |
| 963 // target has been found that originates from the current instruction. | |
| 964 if (target > last && target < start + 0x100) { | |
| 965 last = target; | |
| 966 } | |
| 967 } | |
| 968 } | |
| 969 | |
| 970 // We are done. Write-protect our code and make it executable. | |
| 971 Sandbox::SysCalls sys; | |
| 972 sys.mprotect(copy, 0x1000, PROT_READ|PROT_EXEC); | |
| 973 return maps_->vsyscall() - copy; | |
| 974 } | |
| 975 #endif | |
| 976 return 0; | |
| 977 } | |
| 978 | |
| 979 void Library::patchSystemCalls() { | |
| 980 if (!valid_) { | |
| 981 return; | |
| 982 } | |
| 983 int extraLength = 0; | |
| 984 char* extraSpace = NULL; | |
| 985 if (isVDSO_) { | |
| 986 // patchVDSO() calls patchSystemCallsInFunction() which needs vsys_offset_ | |
| 987 // iff processing the VDSO library. So, make sure we call | |
| 988 // patchVSystemCalls() first. | |
| 989 vsys_offset_ = patchVSystemCalls(); | |
| 990 #if defined(__i386__) | |
| 991 patchVDSO(&extraSpace, &extraLength); | |
| 992 return; | |
| 993 #endif | |
| 994 } | |
| 995 SectionTable::const_iterator iter; | |
| 996 if ((iter = section_table_.find(".text")) == section_table_.end()) { | |
| 997 return; | |
| 998 } | |
| 999 const Elf_Shdr& shdr = iter->second.second; | |
| 1000 char* start = reinterpret_cast<char *>(shdr.sh_addr + asr_offset_); | |
| 1001 char* stop = start + shdr.sh_size; | |
| 1002 char* func = start; | |
| 1003 int nopcount = 0; | |
| 1004 bool has_syscall = false; | |
| 1005 for (char *ptr = start; ptr < stop; ptr++) { | |
| 1006 #if defined(__x86_64__) | |
| 1007 if ((*ptr == '\x0F' && ptr[1] == '\x05' /* SYSCALL */) || | |
| 1008 (isVDSO_ && *ptr == '\xFF')) { | |
| 1009 #elif defined(__i386__) | |
| 1010 if ((*ptr == '\xCD' && ptr[1] == '\x80' /* INT $0x80 */) || | |
| 1011 (*ptr == '\x65' && ptr[1] == '\xFF' && | |
| 1012 ptr[2] == '\x15' /* CALL %gs:.. */)) { | |
| 1013 #else | |
| 1014 #error Unsupported target platform | |
| 1015 #endif | |
| 1016 ptr++; | |
| 1017 has_syscall = true; | |
| 1018 nopcount = 0; | |
| 1019 } else if (*ptr == '\x90' /* NOP */) { | |
| 1020 nopcount++; | |
| 1021 } else if (!(reinterpret_cast<long>(ptr) & 0xF)) { | |
| 1022 if (nopcount > 2) { | |
| 1023 // This is very likely the beginning of a new function. Functions | |
| 1024 // are aligned on 16 byte boundaries and the preceding function is | |
| 1025 // padded out with NOPs. | |
| 1026 // | |
| 1027 // For performance reasons, we quickly scan the entire text segment | |
| 1028 // for potential SYSCALLs, and then patch the code in increments of | |
| 1029 // individual functions. | |
| 1030 if (has_syscall) { | |
| 1031 has_syscall = false; | |
| 1032 // Our quick scan of the function found a potential system call. | |
| 1033 // Do a more thorough scan, now. | |
| 1034 patchSystemCallsInFunction(maps_, func, ptr, &extraSpace, | |
| 1035 &extraLength); | |
| 1036 } | |
| 1037 func = ptr; | |
| 1038 } | |
| 1039 nopcount = 0; | |
| 1040 } else { | |
| 1041 nopcount = 0; | |
| 1042 } | |
| 1043 } | |
| 1044 if (has_syscall) { | |
| 1045 // Patch any remaining system calls that were in the last function before | |
| 1046 // the loop terminated. | |
| 1047 patchSystemCallsInFunction(maps_, func, stop, &extraSpace, &extraLength); | |
| 1048 } | |
| 1049 | |
| 1050 // Mark our scratch space as write-protected and executable. | |
| 1051 if (extraSpace) { | |
| 1052 Sandbox::SysCalls sys; | |
| 1053 sys.mprotect(extraSpace, 4096, PROT_READ|PROT_EXEC); | |
| 1054 } | |
| 1055 } | |
| 1056 | |
| 1057 bool Library::parseElf() { | |
| 1058 valid_ = true; | |
| 1059 | |
| 1060 // Verify ELF header | |
| 1061 Elf_Shdr str_shdr; | |
| 1062 if (!getOriginal(0, &ehdr_) || | |
| 1063 ehdr_.e_ehsize < sizeof(Elf_Ehdr) || | |
| 1064 ehdr_.e_phentsize < sizeof(Elf_Phdr) || | |
| 1065 ehdr_.e_shentsize < sizeof(Elf_Shdr) || | |
| 1066 !getOriginal(ehdr_.e_shoff + ehdr_.e_shstrndx * ehdr_.e_shentsize, | |
| 1067 &str_shdr)) { | |
| 1068 // Not all memory mappings are necessarily ELF files. Skip memory | |
| 1069 // mappings that we cannot identify. | |
| 1070 error: | |
| 1071 valid_ = false; | |
| 1072 return false; | |
| 1073 } | |
| 1074 | |
| 1075 // Parse section table and find all sections in this ELF file | |
| 1076 for (int i = 0; i < ehdr_.e_shnum; i++) { | |
| 1077 Elf_Shdr shdr; | |
| 1078 if (!getOriginal(ehdr_.e_shoff + i*ehdr_.e_shentsize, &shdr)) { | |
| 1079 continue; | |
| 1080 } | |
| 1081 section_table_.insert( | |
| 1082 std::make_pair(getOriginal(str_shdr.sh_offset + shdr.sh_name), | |
| 1083 std::make_pair(i, shdr))); | |
| 1084 } | |
| 1085 | |
| 1086 // Compute the offset of entries in the .text segment | |
| 1087 const Elf_Shdr* text = getSection(".text"); | |
| 1088 if (text == NULL) { | |
| 1089 // On x86-32, the VDSO is unusual in as much as it does not have a single | |
| 1090 // ".text" section. Instead, it has one section per function. Each | |
| 1091 // section name starts with ".text". We just need to pick an arbitrary | |
| 1092 // one in order to find the asr_offset_ -- which would typically be zero | |
| 1093 // for the VDSO. | |
| 1094 for (SectionTable::const_iterator iter = section_table_.begin(); | |
| 1095 iter != section_table_.end(); ++iter) { | |
| 1096 if (!strncmp(iter->first.c_str(), ".text", 5)) { | |
| 1097 text = &iter->second.second; | |
| 1098 break; | |
| 1099 } | |
| 1100 } | |
| 1101 } | |
| 1102 | |
| 1103 // Now that we know where the .text segment is located, we can compute the | |
| 1104 // asr_offset_. | |
| 1105 if (text) { | |
| 1106 RangeMap::const_iterator iter = | |
| 1107 memory_ranges_.lower_bound(text->sh_offset); | |
| 1108 if (iter != memory_ranges_.end()) { | |
| 1109 asr_offset_ = reinterpret_cast<char *>(iter->second.start) - | |
| 1110 (text->sh_addr - (text->sh_offset - iter->first)); | |
| 1111 } else { | |
| 1112 goto error; | |
| 1113 } | |
| 1114 } else { | |
| 1115 goto error; | |
| 1116 } | |
| 1117 | |
| 1118 return !isVDSO_ || parseSymbols(); | |
| 1119 } | |
| 1120 | |
| 1121 bool Library::parseSymbols() { | |
| 1122 if (!valid_) { | |
| 1123 return false; | |
| 1124 } | |
| 1125 | |
| 1126 Elf_Shdr str_shdr; | |
| 1127 getOriginal(ehdr_.e_shoff + ehdr_.e_shstrndx * ehdr_.e_shentsize, &str_shdr); | |
| 1128 | |
| 1129 // Find PLT and symbol tables | |
| 1130 const Elf_Shdr* plt = getSection(ELF_REL_PLT); | |
| 1131 const Elf_Shdr* symtab = getSection(".dynsym"); | |
| 1132 Elf_Shdr strtab = { 0 }; | |
| 1133 if (symtab) { | |
| 1134 if (symtab->sh_link >= ehdr_.e_shnum || | |
| 1135 !getOriginal(ehdr_.e_shoff + symtab->sh_link * ehdr_.e_shentsize, | |
| 1136 &strtab)) { | |
| 1137 Debug::message("Cannot find valid symbol table\n"); | |
| 1138 valid_ = false; | |
| 1139 return false; | |
| 1140 } | |
| 1141 } | |
| 1142 | |
| 1143 if (plt && symtab) { | |
| 1144 // Parse PLT table and add its entries | |
| 1145 for (int i = plt->sh_size/sizeof(Elf_Rel); --i >= 0; ) { | |
| 1146 Elf_Rel rel; | |
| 1147 if (!getOriginal(plt->sh_offset + i * sizeof(Elf_Rel), &rel) || | |
| 1148 ELF_R_SYM(rel.r_info)*sizeof(Elf_Sym) >= symtab->sh_size) { | |
| 1149 Debug::message("Encountered invalid plt entry\n"); | |
| 1150 valid_ = false; | |
| 1151 return false; | |
| 1152 } | |
| 1153 | |
| 1154 if (ELF_R_TYPE(rel.r_info) != ELF_JUMP_SLOT) { | |
| 1155 continue; | |
| 1156 } | |
| 1157 Elf_Sym sym; | |
| 1158 if (!getOriginal(symtab->sh_offset + | |
| 1159 ELF_R_SYM(rel.r_info)*sizeof(Elf_Sym), &sym) || | |
| 1160 sym.st_shndx >= ehdr_.e_shnum) { | |
| 1161 Debug::message("Encountered invalid symbol for plt entry\n"); | |
| 1162 valid_ = false; | |
| 1163 return false; | |
| 1164 } | |
| 1165 string name = getOriginal(strtab.sh_offset + sym.st_name); | |
| 1166 if (name.empty()) { | |
| 1167 continue; | |
| 1168 } | |
| 1169 plt_entries_.insert(std::make_pair(name, rel.r_offset)); | |
| 1170 } | |
| 1171 } | |
| 1172 | |
| 1173 if (symtab) { | |
| 1174 // Parse symbol table and add its entries | |
| 1175 for (Elf_Addr addr = 0; addr < symtab->sh_size; addr += sizeof(Elf_Sym)) { | |
| 1176 Elf_Sym sym; | |
| 1177 if (!getOriginal(symtab->sh_offset + addr, &sym) || | |
| 1178 (sym.st_shndx >= ehdr_.e_shnum && | |
| 1179 sym.st_shndx < SHN_LORESERVE)) { | |
| 1180 Debug::message("Encountered invalid symbol\n"); | |
| 1181 valid_ = false; | |
| 1182 return false; | |
| 1183 } | |
| 1184 string name = getOriginal(strtab.sh_offset + sym.st_name); | |
| 1185 if (name.empty()) { | |
| 1186 continue; | |
| 1187 } | |
| 1188 symbols_.insert(std::make_pair(name, sym)); | |
| 1189 } | |
| 1190 } | |
| 1191 | |
| 1192 SymbolTable::const_iterator iter = symbols_.find("__kernel_vsyscall"); | |
| 1193 if (iter != symbols_.end() && iter->second.st_value) { | |
| 1194 __kernel_vsyscall = asr_offset_ + iter->second.st_value; | |
| 1195 } | |
| 1196 iter = symbols_.find("__kernel_sigreturn"); | |
| 1197 if (iter != symbols_.end() && iter->second.st_value) { | |
| 1198 __kernel_sigreturn = asr_offset_ + iter->second.st_value; | |
| 1199 } | |
| 1200 iter = symbols_.find("__kernel_rt_sigreturn"); | |
| 1201 if (iter != symbols_.end() && iter->second.st_value) { | |
| 1202 __kernel_rt_sigreturn = asr_offset_ + iter->second.st_value; | |
| 1203 } | |
| 1204 | |
| 1205 return true; | |
| 1206 } | |
| 1207 | |
| 1208 } // namespace | |
| OLD | NEW |