| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2006, Google Inc. | |
| 2 // All rights reserved. | |
| 3 // | |
| 4 // Redistribution and use in source and binary forms, with or without | |
| 5 // modification, are permitted provided that the following conditions are | |
| 6 // met: | |
| 7 // | |
| 8 // * Redistributions of source code must retain the above copyright | |
| 9 // notice, this list of conditions and the following disclaimer. | |
| 10 // * Redistributions in binary form must reproduce the above | |
| 11 // copyright notice, this list of conditions and the following disclaimer | |
| 12 // in the documentation and/or other materials provided with the | |
| 13 // distribution. | |
| 14 // * Neither the name of Google Inc. nor the names of its | |
| 15 // contributors may be used to endorse or promote products derived from | |
| 16 // this software without specific prior written permission. | |
| 17 // | |
| 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
| 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 29 // | |
| 30 // Author: Satoru Takabayashi | |
| 31 // Stack-footprint reduction work done by Raksit Ashok | |
| 32 // | |
| 33 // Implementation note: | |
| 34 // | |
| 35 // We don't use heaps but only use stacks. We want to reduce the | |
| 36 // stack consumption so that the symbolizer can run on small stacks. | |
| 37 // | |
| 38 // Here are some numbers collected with GCC 4.1.0 on x86: | |
| 39 // - sizeof(Elf32_Sym) = 16 | |
| 40 // - sizeof(Elf32_Shdr) = 40 | |
| 41 // - sizeof(Elf64_Sym) = 24 | |
| 42 // - sizeof(Elf64_Shdr) = 64 | |
| 43 // | |
| 44 // This implementation is intended to be async-signal-safe but uses | |
| 45 // some functions which are not guaranteed to be so, such as memchr() | |
| 46 // and memmove(). We assume they are async-signal-safe. | |
| 47 // | |
| 48 // Additional header can be specified by the GLOG_BUILD_CONFIG_INCLUDE | |
| 49 // macro to add platform specific defines (e.g. OS_OPENBSD). | |
| 50 | |
| 51 #ifdef GLOG_BUILD_CONFIG_INCLUDE | |
| 52 #include GLOG_BUILD_CONFIG_INCLUDE | |
| 53 #endif // GLOG_BUILD_CONFIG_INCLUDE | |
| 54 | |
| 55 #include "utilities.h" | |
| 56 | |
| 57 #if defined(HAVE_SYMBOLIZE) | |
| 58 | |
| 59 #include <limits> | |
| 60 | |
| 61 #include "symbolize.h" | |
| 62 #include "demangle.h" | |
| 63 | |
| 64 _START_GOOGLE_NAMESPACE_ | |
| 65 | |
| 66 // We don't use assert() since it's not guaranteed to be | |
| 67 // async-signal-safe. Instead we define a minimal assertion | |
| 68 // macro. So far, we don't need pretty printing for __FILE__, etc. | |
| 69 | |
| 70 // A wrapper for abort() to make it callable in ? :. | |
| 71 static int AssertFail() { | |
| 72 abort(); | |
| 73 return 0; // Should not reach. | |
| 74 } | |
| 75 | |
| 76 #define SAFE_ASSERT(expr) ((expr) ? 0 : AssertFail()) | |
| 77 | |
| 78 static SymbolizeCallback g_symbolize_callback = NULL; | |
| 79 void InstallSymbolizeCallback(SymbolizeCallback callback) { | |
| 80 g_symbolize_callback = callback; | |
| 81 } | |
| 82 | |
| 83 static SymbolizeOpenObjectFileCallback g_symbolize_open_object_file_callback = | |
| 84 NULL; | |
| 85 void InstallSymbolizeOpenObjectFileCallback( | |
| 86 SymbolizeOpenObjectFileCallback callback) { | |
| 87 g_symbolize_open_object_file_callback = callback; | |
| 88 } | |
| 89 | |
| 90 // This function wraps the Demangle function to provide an interface | |
| 91 // where the input symbol is demangled in-place. | |
| 92 // To keep stack consumption low, we would like this function to not | |
| 93 // get inlined. | |
| 94 static ATTRIBUTE_NOINLINE void DemangleInplace(char *out, int out_size) { | |
| 95 char demangled[256]; // Big enough for sane demangled symbols. | |
| 96 if (Demangle(out, demangled, sizeof(demangled))) { | |
| 97 // Demangling succeeded. Copy to out if the space allows. | |
| 98 size_t len = strlen(demangled); | |
| 99 if (len + 1 <= (size_t)out_size) { // +1 for '\0'. | |
| 100 SAFE_ASSERT(len < sizeof(demangled)); | |
| 101 memmove(out, demangled, len + 1); | |
| 102 } | |
| 103 } | |
| 104 } | |
| 105 | |
| 106 _END_GOOGLE_NAMESPACE_ | |
| 107 | |
| 108 #if defined(__ELF__) | |
| 109 | |
| 110 #include <dlfcn.h> | |
| 111 #if defined(OS_OPENBSD) | |
| 112 #include <sys/exec_elf.h> | |
| 113 #else | |
| 114 #include <elf.h> | |
| 115 #endif | |
| 116 #include <errno.h> | |
| 117 #include <fcntl.h> | |
| 118 #include <limits.h> | |
| 119 #include <stdint.h> | |
| 120 #include <stdio.h> | |
| 121 #include <stdlib.h> | |
| 122 #include <stddef.h> | |
| 123 #include <string.h> | |
| 124 #include <sys/stat.h> | |
| 125 #include <sys/types.h> | |
| 126 #include <unistd.h> | |
| 127 | |
| 128 #include "symbolize.h" | |
| 129 #include "config.h" | |
| 130 #include "glog/raw_logging.h" | |
| 131 | |
| 132 // Re-runs fn until it doesn't cause EINTR. | |
| 133 #define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR) | |
| 134 | |
| 135 _START_GOOGLE_NAMESPACE_ | |
| 136 | |
| 137 // Read up to "count" bytes from file descriptor "fd" into the buffer | |
| 138 // starting at "buf" while handling short reads and EINTR. On | |
| 139 // success, return the number of bytes read. Otherwise, return -1. | |
| 140 static ssize_t ReadPersistent(const int fd, void *buf, const size_t count) { | |
| 141 SAFE_ASSERT(fd >= 0); | |
| 142 SAFE_ASSERT(count <= std::numeric_limits<ssize_t>::max()); | |
| 143 char *buf0 = reinterpret_cast<char *>(buf); | |
| 144 ssize_t num_bytes = 0; | |
| 145 while (num_bytes < count) { | |
| 146 ssize_t len; | |
| 147 NO_INTR(len = read(fd, buf0 + num_bytes, count - num_bytes)); | |
| 148 if (len < 0) { // There was an error other than EINTR. | |
| 149 return -1; | |
| 150 } | |
| 151 if (len == 0) { // Reached EOF. | |
| 152 break; | |
| 153 } | |
| 154 num_bytes += len; | |
| 155 } | |
| 156 SAFE_ASSERT(num_bytes <= count); | |
| 157 return num_bytes; | |
| 158 } | |
| 159 | |
| 160 // Read up to "count" bytes from "offset" in the file pointed by file | |
| 161 // descriptor "fd" into the buffer starting at "buf". On success, | |
| 162 // return the number of bytes read. Otherwise, return -1. | |
| 163 static ssize_t ReadFromOffset(const int fd, void *buf, | |
| 164 const size_t count, const off_t offset) { | |
| 165 off_t off = lseek(fd, offset, SEEK_SET); | |
| 166 if (off == (off_t)-1) { | |
| 167 return -1; | |
| 168 } | |
| 169 return ReadPersistent(fd, buf, count); | |
| 170 } | |
| 171 | |
| 172 // Try reading exactly "count" bytes from "offset" bytes in a file | |
| 173 // pointed by "fd" into the buffer starting at "buf" while handling | |
| 174 // short reads and EINTR. On success, return true. Otherwise, return | |
| 175 // false. | |
| 176 static bool ReadFromOffsetExact(const int fd, void *buf, | |
| 177 const size_t count, const off_t offset) { | |
| 178 ssize_t len = ReadFromOffset(fd, buf, count, offset); | |
| 179 return len == count; | |
| 180 } | |
| 181 | |
| 182 // Returns elf_header.e_type if the file pointed by fd is an ELF binary. | |
| 183 static int FileGetElfType(const int fd) { | |
| 184 ElfW(Ehdr) elf_header; | |
| 185 if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) { | |
| 186 return -1; | |
| 187 } | |
| 188 if (memcmp(elf_header.e_ident, ELFMAG, SELFMAG) != 0) { | |
| 189 return -1; | |
| 190 } | |
| 191 return elf_header.e_type; | |
| 192 } | |
| 193 | |
| 194 // Read the section headers in the given ELF binary, and if a section | |
| 195 // of the specified type is found, set the output to this section header | |
| 196 // and return true. Otherwise, return false. | |
| 197 // To keep stack consumption low, we would like this function to not get | |
| 198 // inlined. | |
| 199 static ATTRIBUTE_NOINLINE bool | |
| 200 GetSectionHeaderByType(const int fd, ElfW(Half) sh_num, const off_t sh_offset, | |
| 201 ElfW(Word) type, ElfW(Shdr) *out) { | |
| 202 // Read at most 16 section headers at a time to save read calls. | |
| 203 ElfW(Shdr) buf[16]; | |
| 204 for (int i = 0; i < sh_num;) { | |
| 205 const ssize_t num_bytes_left = (sh_num - i) * sizeof(buf[0]); | |
| 206 const ssize_t num_bytes_to_read = | |
| 207 (sizeof(buf) > num_bytes_left) ? num_bytes_left : sizeof(buf); | |
| 208 const ssize_t len = ReadFromOffset(fd, buf, num_bytes_to_read, | |
| 209 sh_offset + i * sizeof(buf[0])); | |
| 210 SAFE_ASSERT(len % sizeof(buf[0]) == 0); | |
| 211 const ssize_t num_headers_in_buf = len / sizeof(buf[0]); | |
| 212 SAFE_ASSERT(num_headers_in_buf <= sizeof(buf) / sizeof(buf[0])); | |
| 213 for (int j = 0; j < num_headers_in_buf; ++j) { | |
| 214 if (buf[j].sh_type == type) { | |
| 215 *out = buf[j]; | |
| 216 return true; | |
| 217 } | |
| 218 } | |
| 219 i += num_headers_in_buf; | |
| 220 } | |
| 221 return false; | |
| 222 } | |
| 223 | |
| 224 // There is no particular reason to limit section name to 63 characters, | |
| 225 // but there has (as yet) been no need for anything longer either. | |
| 226 const int kMaxSectionNameLen = 64; | |
| 227 | |
| 228 // name_len should include terminating '\0'. | |
| 229 bool GetSectionHeaderByName(int fd, const char *name, size_t name_len, | |
| 230 ElfW(Shdr) *out) { | |
| 231 ElfW(Ehdr) elf_header; | |
| 232 if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) { | |
| 233 return false; | |
| 234 } | |
| 235 | |
| 236 ElfW(Shdr) shstrtab; | |
| 237 off_t shstrtab_offset = (elf_header.e_shoff + | |
| 238 elf_header.e_shentsize * elf_header.e_shstrndx); | |
| 239 if (!ReadFromOffsetExact(fd, &shstrtab, sizeof(shstrtab), shstrtab_offset)) { | |
| 240 return false; | |
| 241 } | |
| 242 | |
| 243 for (int i = 0; i < elf_header.e_shnum; ++i) { | |
| 244 off_t section_header_offset = (elf_header.e_shoff + | |
| 245 elf_header.e_shentsize * i); | |
| 246 if (!ReadFromOffsetExact(fd, out, sizeof(*out), section_header_offset)) { | |
| 247 return false; | |
| 248 } | |
| 249 char header_name[kMaxSectionNameLen]; | |
| 250 if (sizeof(header_name) < name_len) { | |
| 251 RAW_LOG(WARNING, "Section name '%s' is too long (%" PRIuS "); " | |
| 252 "section will not be found (even if present).", name, name_len); | |
| 253 // No point in even trying. | |
| 254 return false; | |
| 255 } | |
| 256 off_t name_offset = shstrtab.sh_offset + out->sh_name; | |
| 257 ssize_t n_read = ReadFromOffset(fd, &header_name, name_len, name_offset); | |
| 258 if (n_read == -1) { | |
| 259 return false; | |
| 260 } else if (n_read != name_len) { | |
| 261 // Short read -- name could be at end of file. | |
| 262 continue; | |
| 263 } | |
| 264 if (memcmp(header_name, name, name_len) == 0) { | |
| 265 return true; | |
| 266 } | |
| 267 } | |
| 268 return false; | |
| 269 } | |
| 270 | |
| 271 // Read a symbol table and look for the symbol containing the | |
| 272 // pc. Iterate over symbols in a symbol table and look for the symbol | |
| 273 // containing "pc". On success, return true and write the symbol name | |
| 274 // to out. Otherwise, return false. | |
| 275 // To keep stack consumption low, we would like this function to not get | |
| 276 // inlined. | |
| 277 static ATTRIBUTE_NOINLINE bool | |
| 278 FindSymbol(uint64_t pc, const int fd, char *out, int out_size, | |
| 279 uint64_t symbol_offset, const ElfW(Shdr) *strtab, | |
| 280 const ElfW(Shdr) *symtab) { | |
| 281 if (symtab == NULL) { | |
| 282 return false; | |
| 283 } | |
| 284 const int num_symbols = symtab->sh_size / symtab->sh_entsize; | |
| 285 for (int i = 0; i < num_symbols;) { | |
| 286 off_t offset = symtab->sh_offset + i * symtab->sh_entsize; | |
| 287 | |
| 288 // If we are reading Elf64_Sym's, we want to limit this array to | |
| 289 // 32 elements (to keep stack consumption low), otherwise we can | |
| 290 // have a 64 element Elf32_Sym array. | |
| 291 #if __WORDSIZE == 64 | |
| 292 #define NUM_SYMBOLS 32 | |
| 293 #else | |
| 294 #define NUM_SYMBOLS 64 | |
| 295 #endif | |
| 296 | |
| 297 // Read at most NUM_SYMBOLS symbols at once to save read() calls. | |
| 298 ElfW(Sym) buf[NUM_SYMBOLS]; | |
| 299 const ssize_t len = ReadFromOffset(fd, &buf, sizeof(buf), offset); | |
| 300 SAFE_ASSERT(len % sizeof(buf[0]) == 0); | |
| 301 const ssize_t num_symbols_in_buf = len / sizeof(buf[0]); | |
| 302 SAFE_ASSERT(num_symbols_in_buf <= sizeof(buf)/sizeof(buf[0])); | |
| 303 for (int j = 0; j < num_symbols_in_buf; ++j) { | |
| 304 const ElfW(Sym)& symbol = buf[j]; | |
| 305 uint64_t start_address = symbol.st_value; | |
| 306 start_address += symbol_offset; | |
| 307 uint64_t end_address = start_address + symbol.st_size; | |
| 308 if (symbol.st_value != 0 && // Skip null value symbols. | |
| 309 symbol.st_shndx != 0 && // Skip undefined symbols. | |
| 310 start_address <= pc && pc < end_address) { | |
| 311 ssize_t len1 = ReadFromOffset(fd, out, out_size, | |
| 312 strtab->sh_offset + symbol.st_name); | |
| 313 if (len1 <= 0 || memchr(out, '\0', out_size) == NULL) { | |
| 314 return false; | |
| 315 } | |
| 316 return true; // Obtained the symbol name. | |
| 317 } | |
| 318 } | |
| 319 i += num_symbols_in_buf; | |
| 320 } | |
| 321 return false; | |
| 322 } | |
| 323 | |
| 324 // Get the symbol name of "pc" from the file pointed by "fd". Process | |
| 325 // both regular and dynamic symbol tables if necessary. On success, | |
| 326 // write the symbol name to "out" and return true. Otherwise, return | |
| 327 // false. | |
| 328 static bool GetSymbolFromObjectFile(const int fd, uint64_t pc, | |
| 329 char *out, int out_size, | |
| 330 uint64_t map_start_address) { | |
| 331 // Read the ELF header. | |
| 332 ElfW(Ehdr) elf_header; | |
| 333 if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) { | |
| 334 return false; | |
| 335 } | |
| 336 | |
| 337 uint64_t symbol_offset = 0; | |
| 338 if (elf_header.e_type == ET_DYN) { // DSO needs offset adjustment. | |
| 339 symbol_offset = map_start_address; | |
| 340 } | |
| 341 | |
| 342 ElfW(Shdr) symtab, strtab; | |
| 343 | |
| 344 // Consult a regular symbol table first. | |
| 345 if (GetSectionHeaderByType(fd, elf_header.e_shnum, elf_header.e_shoff, | |
| 346 SHT_SYMTAB, &symtab)) { | |
| 347 if (!ReadFromOffsetExact(fd, &strtab, sizeof(strtab), elf_header.e_shoff + | |
| 348 symtab.sh_link * sizeof(symtab))) { | |
| 349 return false; | |
| 350 } | |
| 351 if (FindSymbol(pc, fd, out, out_size, symbol_offset, | |
| 352 &strtab, &symtab)) { | |
| 353 return true; // Found the symbol in a regular symbol table. | |
| 354 } | |
| 355 } | |
| 356 | |
| 357 // If the symbol is not found, then consult a dynamic symbol table. | |
| 358 if (GetSectionHeaderByType(fd, elf_header.e_shnum, elf_header.e_shoff, | |
| 359 SHT_DYNSYM, &symtab)) { | |
| 360 if (!ReadFromOffsetExact(fd, &strtab, sizeof(strtab), elf_header.e_shoff + | |
| 361 symtab.sh_link * sizeof(symtab))) { | |
| 362 return false; | |
| 363 } | |
| 364 if (FindSymbol(pc, fd, out, out_size, symbol_offset, | |
| 365 &strtab, &symtab)) { | |
| 366 return true; // Found the symbol in a dynamic symbol table. | |
| 367 } | |
| 368 } | |
| 369 | |
| 370 return false; | |
| 371 } | |
| 372 | |
| 373 namespace { | |
| 374 // Thin wrapper around a file descriptor so that the file descriptor | |
| 375 // gets closed for sure. | |
| 376 struct FileDescriptor { | |
| 377 const int fd_; | |
| 378 explicit FileDescriptor(int fd) : fd_(fd) {} | |
| 379 ~FileDescriptor() { | |
| 380 if (fd_ >= 0) { | |
| 381 NO_INTR(close(fd_)); | |
| 382 } | |
| 383 } | |
| 384 int get() { return fd_; } | |
| 385 | |
| 386 private: | |
| 387 explicit FileDescriptor(const FileDescriptor&); | |
| 388 void operator=(const FileDescriptor&); | |
| 389 }; | |
| 390 | |
| 391 // Helper class for reading lines from file. | |
| 392 // | |
| 393 // Note: we don't use ProcMapsIterator since the object is big (it has | |
| 394 // a 5k array member) and uses async-unsafe functions such as sscanf() | |
| 395 // and snprintf(). | |
| 396 class LineReader { | |
| 397 public: | |
| 398 explicit LineReader(int fd, char *buf, int buf_len) : fd_(fd), | |
| 399 buf_(buf), buf_len_(buf_len), bol_(buf), eol_(buf), eod_(buf) { | |
| 400 } | |
| 401 | |
| 402 // Read '\n'-terminated line from file. On success, modify "bol" | |
| 403 // and "eol", then return true. Otherwise, return false. | |
| 404 // | |
| 405 // Note: if the last line doesn't end with '\n', the line will be | |
| 406 // dropped. It's an intentional behavior to make the code simple. | |
| 407 bool ReadLine(const char **bol, const char **eol) { | |
| 408 if (BufferIsEmpty()) { // First time. | |
| 409 const ssize_t num_bytes = ReadPersistent(fd_, buf_, buf_len_); | |
| 410 if (num_bytes <= 0) { // EOF or error. | |
| 411 return false; | |
| 412 } | |
| 413 eod_ = buf_ + num_bytes; | |
| 414 bol_ = buf_; | |
| 415 } else { | |
| 416 bol_ = eol_ + 1; // Advance to the next line in the buffer. | |
| 417 SAFE_ASSERT(bol_ <= eod_); // "bol_" can point to "eod_". | |
| 418 if (!HasCompleteLine()) { | |
| 419 const int incomplete_line_length = eod_ - bol_; | |
| 420 // Move the trailing incomplete line to the beginning. | |
| 421 memmove(buf_, bol_, incomplete_line_length); | |
| 422 // Read text from file and append it. | |
| 423 char * const append_pos = buf_ + incomplete_line_length; | |
| 424 const int capacity_left = buf_len_ - incomplete_line_length; | |
| 425 const ssize_t num_bytes = ReadPersistent(fd_, append_pos, | |
| 426 capacity_left); | |
| 427 if (num_bytes <= 0) { // EOF or error. | |
| 428 return false; | |
| 429 } | |
| 430 eod_ = append_pos + num_bytes; | |
| 431 bol_ = buf_; | |
| 432 } | |
| 433 } | |
| 434 eol_ = FindLineFeed(); | |
| 435 if (eol_ == NULL) { // '\n' not found. Malformed line. | |
| 436 return false; | |
| 437 } | |
| 438 *eol_ = '\0'; // Replace '\n' with '\0'. | |
| 439 | |
| 440 *bol = bol_; | |
| 441 *eol = eol_; | |
| 442 return true; | |
| 443 } | |
| 444 | |
| 445 // Beginning of line. | |
| 446 const char *bol() { | |
| 447 return bol_; | |
| 448 } | |
| 449 | |
| 450 // End of line. | |
| 451 const char *eol() { | |
| 452 return eol_; | |
| 453 } | |
| 454 | |
| 455 private: | |
| 456 explicit LineReader(const LineReader&); | |
| 457 void operator=(const LineReader&); | |
| 458 | |
| 459 char *FindLineFeed() { | |
| 460 return reinterpret_cast<char *>(memchr(bol_, '\n', eod_ - bol_)); | |
| 461 } | |
| 462 | |
| 463 bool BufferIsEmpty() { | |
| 464 return buf_ == eod_; | |
| 465 } | |
| 466 | |
| 467 bool HasCompleteLine() { | |
| 468 return !BufferIsEmpty() && FindLineFeed() != NULL; | |
| 469 } | |
| 470 | |
| 471 const int fd_; | |
| 472 char * const buf_; | |
| 473 const int buf_len_; | |
| 474 char *bol_; | |
| 475 char *eol_; | |
| 476 const char *eod_; // End of data in "buf_". | |
| 477 }; | |
| 478 } // namespace | |
| 479 | |
| 480 // Place the hex number read from "start" into "*hex". The pointer to | |
| 481 // the first non-hex character or "end" is returned. | |
| 482 static char *GetHex(const char *start, const char *end, uint64_t *hex) { | |
| 483 *hex = 0; | |
| 484 const char *p; | |
| 485 for (p = start; p < end; ++p) { | |
| 486 int ch = *p; | |
| 487 if ((ch >= '0' && ch <= '9') || | |
| 488 (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f')) { | |
| 489 *hex = (*hex << 4) | (ch < 'A' ? ch - '0' : (ch & 0xF) + 9); | |
| 490 } else { // Encountered the first non-hex character. | |
| 491 break; | |
| 492 } | |
| 493 } | |
| 494 SAFE_ASSERT(p <= end); | |
| 495 return const_cast<char *>(p); | |
| 496 } | |
| 497 | |
| 498 // Searches for the object file (from /proc/self/maps) that contains | |
| 499 // the specified pc. If found, sets |start_address| to the start address | |
| 500 // of where this object file is mapped in memory, sets the module base | |
| 501 // address into |base_address|, copies the object file name into | |
| 502 // |out_file_name|, and attempts to open the object file. If the object | |
| 503 // file is opened successfully, returns the file descriptor. Otherwise, | |
| 504 // returns -1. |out_file_name_size| is the size of the file name buffer | |
| 505 // (including the null-terminator). | |
| 506 static ATTRIBUTE_NOINLINE int | |
| 507 OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc, | |
| 508 uint64_t &start_address, | |
| 509 uint64_t &base_address, | |
| 510 char *out_file_name, | |
| 511 int out_file_name_size) { | |
| 512 int object_fd; | |
| 513 | |
| 514 // Open /proc/self/maps. | |
| 515 int maps_fd; | |
| 516 NO_INTR(maps_fd = open("/proc/self/maps", O_RDONLY)); | |
| 517 FileDescriptor wrapped_maps_fd(maps_fd); | |
| 518 if (wrapped_maps_fd.get() < 0) { | |
| 519 return -1; | |
| 520 } | |
| 521 | |
| 522 // Iterate over maps and look for the map containing the pc. Then | |
| 523 // look into the symbol tables inside. | |
| 524 char buf[1024]; // Big enough for line of sane /proc/self/maps | |
| 525 int num_maps = 0; | |
| 526 LineReader reader(wrapped_maps_fd.get(), buf, sizeof(buf)); | |
| 527 while (true) { | |
| 528 num_maps++; | |
| 529 const char *cursor; | |
| 530 const char *eol; | |
| 531 if (!reader.ReadLine(&cursor, &eol)) { // EOF or malformed line. | |
| 532 return -1; | |
| 533 } | |
| 534 | |
| 535 // Start parsing line in /proc/self/maps. Here is an example: | |
| 536 // | |
| 537 // 08048000-0804c000 r-xp 00000000 08:01 2142121 /bin/cat | |
| 538 // | |
| 539 // We want start address (08048000), end address (0804c000), flags | |
| 540 // (r-xp) and file name (/bin/cat). | |
| 541 | |
| 542 // Read start address. | |
| 543 cursor = GetHex(cursor, eol, &start_address); | |
| 544 if (cursor == eol || *cursor != '-') { | |
| 545 return -1; // Malformed line. | |
| 546 } | |
| 547 ++cursor; // Skip '-'. | |
| 548 | |
| 549 // Read end address. | |
| 550 uint64_t end_address; | |
| 551 cursor = GetHex(cursor, eol, &end_address); | |
| 552 if (cursor == eol || *cursor != ' ') { | |
| 553 return -1; // Malformed line. | |
| 554 } | |
| 555 ++cursor; // Skip ' '. | |
| 556 | |
| 557 // Check start and end addresses. | |
| 558 if (!(start_address <= pc && pc < end_address)) { | |
| 559 continue; // We skip this map. PC isn't in this map. | |
| 560 } | |
| 561 | |
| 562 // Read flags. Skip flags until we encounter a space or eol. | |
| 563 const char * const flags_start = cursor; | |
| 564 while (cursor < eol && *cursor != ' ') { | |
| 565 ++cursor; | |
| 566 } | |
| 567 // We expect at least four letters for flags (ex. "r-xp"). | |
| 568 if (cursor == eol || cursor < flags_start + 4) { | |
| 569 return -1; // Malformed line. | |
| 570 } | |
| 571 | |
| 572 // Check flags. We are only interested in "r-x" maps. | |
| 573 if (memcmp(flags_start, "r-x", 3) != 0) { // Not a "r-x" map. | |
| 574 continue; // We skip this map. | |
| 575 } | |
| 576 ++cursor; // Skip ' '. | |
| 577 | |
| 578 // Read file offset. | |
| 579 uint64_t file_offset; | |
| 580 cursor = GetHex(cursor, eol, &file_offset); | |
| 581 if (cursor == eol || *cursor != ' ') { | |
| 582 return -1; // Malformed line. | |
| 583 } | |
| 584 ++cursor; // Skip ' '. | |
| 585 | |
| 586 // Don't subtract 'start_address' from the first entry: | |
| 587 // * If a binary is compiled w/o -pie, then the first entry in | |
| 588 // process maps is likely the binary itself (all dynamic libs | |
| 589 // are mapped higher in address space). For such a binary, | |
| 590 // instruction offset in binary coincides with the actual | |
| 591 // instruction address in virtual memory (as code section | |
| 592 // is mapped to a fixed memory range). | |
| 593 // * If a binary is compiled with -pie, all the modules are | |
| 594 // mapped high at address space (in particular, higher than | |
| 595 // shadow memory of the tool), so the module can't be the | |
| 596 // first entry. | |
| 597 base_address = ((num_maps == 1) ? 0U : start_address) - file_offset; | |
| 598 | |
| 599 // Skip to file name. "cursor" now points to dev. We need to | |
| 600 // skip at least two spaces for dev and inode. | |
| 601 int num_spaces = 0; | |
| 602 while (cursor < eol) { | |
| 603 if (*cursor == ' ') { | |
| 604 ++num_spaces; | |
| 605 } else if (num_spaces >= 2) { | |
| 606 // The first non-space character after skipping two spaces | |
| 607 // is the beginning of the file name. | |
| 608 break; | |
| 609 } | |
| 610 ++cursor; | |
| 611 } | |
| 612 if (cursor == eol) { | |
| 613 return -1; // Malformed line. | |
| 614 } | |
| 615 | |
| 616 // Finally, "cursor" now points to file name of our interest. | |
| 617 NO_INTR(object_fd = open(cursor, O_RDONLY)); | |
| 618 if (object_fd < 0) { | |
| 619 // Failed to open object file. Copy the object file name to | |
| 620 // |out_file_name|. | |
| 621 strncpy(out_file_name, cursor, out_file_name_size); | |
| 622 // Making sure |out_file_name| is always null-terminated. | |
| 623 out_file_name[out_file_name_size - 1] = '\0'; | |
| 624 return -1; | |
| 625 } | |
| 626 return object_fd; | |
| 627 } | |
| 628 } | |
| 629 | |
| 630 // POSIX doesn't define any async-signal safe function for converting | |
| 631 // an integer to ASCII. We'll have to define our own version. | |
| 632 // itoa_r() converts a (signed) integer to ASCII. It returns "buf", if the | |
| 633 // conversion was successful or NULL otherwise. It never writes more than "sz" | |
| 634 // bytes. Output will be truncated as needed, and a NUL character is always | |
| 635 // appended. | |
| 636 // NOTE: code from sandbox/linux/seccomp-bpf/demo.cc. | |
| 637 char *itoa_r(intptr_t i, char *buf, size_t sz, int base, size_t padding) { | |
| 638 // Make sure we can write at least one NUL byte. | |
| 639 size_t n = 1; | |
| 640 if (n > sz) | |
| 641 return NULL; | |
| 642 | |
| 643 if (base < 2 || base > 16) { | |
| 644 buf[0] = '\000'; | |
| 645 return NULL; | |
| 646 } | |
| 647 | |
| 648 char *start = buf; | |
| 649 | |
| 650 uintptr_t j = i; | |
| 651 | |
| 652 // Handle negative numbers (only for base 10). | |
| 653 if (i < 0 && base == 10) { | |
| 654 // This does "j = -i" while avoiding integer overflow. | |
| 655 j = static_cast<uintptr_t>(-(i + 1)) + 1; | |
| 656 | |
| 657 // Make sure we can write the '-' character. | |
| 658 if (++n > sz) { | |
| 659 buf[0] = '\000'; | |
| 660 return NULL; | |
| 661 } | |
| 662 *start++ = '-'; | |
| 663 } | |
| 664 | |
| 665 // Loop until we have converted the entire number. Output at least one | |
| 666 // character (i.e. '0'). | |
| 667 char *ptr = start; | |
| 668 do { | |
| 669 // Make sure there is still enough space left in our output buffer. | |
| 670 if (++n > sz) { | |
| 671 buf[0] = '\000'; | |
| 672 return NULL; | |
| 673 } | |
| 674 | |
| 675 // Output the next digit. | |
| 676 *ptr++ = "0123456789abcdef"[j % base]; | |
| 677 j /= base; | |
| 678 | |
| 679 if (padding > 0) | |
| 680 padding--; | |
| 681 } while (j > 0 || padding > 0); | |
| 682 | |
| 683 // Terminate the output with a NUL character. | |
| 684 *ptr = '\000'; | |
| 685 | |
| 686 // Conversion to ASCII actually resulted in the digits being in reverse | |
| 687 // order. We can't easily generate them in forward order, as we can't tell | |
| 688 // the number of characters needed until we are done converting. | |
| 689 // So, now, we reverse the string (except for the possible "-" sign). | |
| 690 while (--ptr > start) { | |
| 691 char ch = *ptr; | |
| 692 *ptr = *start; | |
| 693 *start++ = ch; | |
| 694 } | |
| 695 return buf; | |
| 696 } | |
| 697 | |
| 698 // Safely appends string |source| to string |dest|. Never writes past the | |
| 699 // buffer size |dest_size| and guarantees that |dest| is null-terminated. | |
| 700 void SafeAppendString(const char* source, char* dest, int dest_size) { | |
| 701 int dest_string_length = strlen(dest); | |
| 702 SAFE_ASSERT(dest_string_length < dest_size); | |
| 703 dest += dest_string_length; | |
| 704 dest_size -= dest_string_length; | |
| 705 strncpy(dest, source, dest_size); | |
| 706 // Making sure |dest| is always null-terminated. | |
| 707 dest[dest_size - 1] = '\0'; | |
| 708 } | |
| 709 | |
| 710 // Converts a 64-bit value into a hex string, and safely appends it to |dest|. | |
| 711 // Never writes past the buffer size |dest_size| and guarantees that |dest| is | |
| 712 // null-terminated. | |
| 713 void SafeAppendHexNumber(uint64_t value, char* dest, int dest_size) { | |
| 714 // 64-bit numbers in hex can have up to 16 digits. | |
| 715 char buf[17] = {'\0'}; | |
| 716 SafeAppendString(itoa_r(value, buf, sizeof(buf), 16, 0), dest, dest_size); | |
| 717 } | |
| 718 | |
| 719 // The implementation of our symbolization routine. If it | |
| 720 // successfully finds the symbol containing "pc" and obtains the | |
| 721 // symbol name, returns true and write the symbol name to "out". | |
| 722 // Otherwise, returns false. If Callback function is installed via | |
| 723 // InstallSymbolizeCallback(), the function is also called in this function, | |
| 724 // and "out" is used as its output. | |
| 725 // To keep stack consumption low, we would like this function to not | |
| 726 // get inlined. | |
| 727 static ATTRIBUTE_NOINLINE bool SymbolizeAndDemangle(void *pc, char *out, | |
| 728 int out_size) { | |
| 729 uint64_t pc0 = reinterpret_cast<uintptr_t>(pc); | |
| 730 uint64_t start_address = 0; | |
| 731 uint64_t base_address = 0; | |
| 732 int object_fd = -1; | |
| 733 | |
| 734 if (out_size < 1) { | |
| 735 return false; | |
| 736 } | |
| 737 out[0] = '\0'; | |
| 738 SafeAppendString("(", out, out_size); | |
| 739 | |
| 740 if (g_symbolize_open_object_file_callback) { | |
| 741 object_fd = g_symbolize_open_object_file_callback(pc0, start_address, | |
| 742 base_address, out + 1, | |
| 743 out_size - 1); | |
| 744 } else { | |
| 745 object_fd = OpenObjectFileContainingPcAndGetStartAddress(pc0, start_address, | |
| 746 base_address, | |
| 747 out + 1, | |
| 748 out_size - 1); | |
| 749 } | |
| 750 | |
| 751 // Check whether a file name was returned. | |
| 752 if (object_fd < 0) { | |
| 753 if (out[1]) { | |
| 754 // The object file containing PC was determined successfully however the | |
| 755 // object file was not opened successfully. This is still considered | |
| 756 // success because the object file name and offset are known and tools | |
| 757 // like asan_symbolize.py can be used for the symbolization. | |
| 758 out[out_size - 1] = '\0'; // Making sure |out| is always null-terminated. | |
| 759 SafeAppendString("+0x", out, out_size); | |
| 760 SafeAppendHexNumber(pc0 - base_address, out, out_size); | |
| 761 SafeAppendString(")", out, out_size); | |
| 762 return true; | |
| 763 } | |
| 764 // Failed to determine the object file containing PC. Bail out. | |
| 765 return false; | |
| 766 } | |
| 767 FileDescriptor wrapped_object_fd(object_fd); | |
| 768 int elf_type = FileGetElfType(wrapped_object_fd.get()); | |
| 769 if (elf_type == -1) { | |
| 770 return false; | |
| 771 } | |
| 772 if (g_symbolize_callback) { | |
| 773 // Run the call back if it's installed. | |
| 774 // Note: relocation (and much of the rest of this code) will be | |
| 775 // wrong for prelinked shared libraries and PIE executables. | |
| 776 uint64 relocation = (elf_type == ET_DYN) ? start_address : 0; | |
| 777 int num_bytes_written = g_symbolize_callback(wrapped_object_fd.get(), | |
| 778 pc, out, out_size, | |
| 779 relocation); | |
| 780 if (num_bytes_written > 0) { | |
| 781 out += num_bytes_written; | |
| 782 out_size -= num_bytes_written; | |
| 783 } | |
| 784 } | |
| 785 if (!GetSymbolFromObjectFile(wrapped_object_fd.get(), pc0, | |
| 786 out, out_size, start_address)) { | |
| 787 return false; | |
| 788 } | |
| 789 | |
| 790 // Symbolization succeeded. Now we try to demangle the symbol. | |
| 791 DemangleInplace(out, out_size); | |
| 792 return true; | |
| 793 } | |
| 794 | |
| 795 _END_GOOGLE_NAMESPACE_ | |
| 796 | |
| 797 #elif defined(OS_MACOSX) && defined(HAVE_DLADDR) | |
| 798 | |
| 799 #include <dlfcn.h> | |
| 800 #include <string.h> | |
| 801 | |
| 802 _START_GOOGLE_NAMESPACE_ | |
| 803 | |
| 804 static ATTRIBUTE_NOINLINE bool SymbolizeAndDemangle(void *pc, char *out, | |
| 805 int out_size) { | |
| 806 Dl_info info; | |
| 807 if (dladdr(pc, &info)) { | |
| 808 if ((int)strlen(info.dli_sname) < out_size) { | |
| 809 strcpy(out, info.dli_sname); | |
| 810 // Symbolization succeeded. Now we try to demangle the symbol. | |
| 811 DemangleInplace(out, out_size); | |
| 812 return true; | |
| 813 } | |
| 814 } | |
| 815 return false; | |
| 816 } | |
| 817 | |
| 818 _END_GOOGLE_NAMESPACE_ | |
| 819 | |
| 820 #else | |
| 821 # error BUG: HAVE_SYMBOLIZE was wrongly set | |
| 822 #endif | |
| 823 | |
| 824 _START_GOOGLE_NAMESPACE_ | |
| 825 | |
| 826 bool Symbolize(void *pc, char *out, int out_size) { | |
| 827 SAFE_ASSERT(out_size >= 0); | |
| 828 return SymbolizeAndDemangle(pc, out, out_size); | |
| 829 } | |
| 830 | |
| 831 _END_GOOGLE_NAMESPACE_ | |
| 832 | |
| 833 #else /* HAVE_SYMBOLIZE */ | |
| 834 | |
| 835 #include <assert.h> | |
| 836 | |
| 837 #include "config.h" | |
| 838 | |
| 839 _START_GOOGLE_NAMESPACE_ | |
| 840 | |
| 841 // TODO: Support other environments. | |
| 842 bool Symbolize(void *pc, char *out, int out_size) { | |
| 843 assert(0); | |
| 844 return false; | |
| 845 } | |
| 846 | |
| 847 _END_GOOGLE_NAMESPACE_ | |
| 848 | |
| 849 #endif | |
| OLD | NEW |