OLD | NEW |
(Empty) | |
| 1 // Copyright 2005 Google Inc. All Rights Reserved. |
| 2 // Author: chatham@google.com (Andrew Chatham) |
| 3 // Author: satorux@google.com (Satoru Takabayashi) |
| 4 // |
| 5 // Code for reading in ELF files. |
| 6 // |
| 7 // For information on the ELF format, see |
| 8 // http://www.x86.org/ftp/manuals/tools/elf.pdf |
| 9 // |
| 10 // I also liked: |
| 11 // http://www.caldera.com/developers/gabi/1998-04-29/contents.html |
| 12 // |
| 13 // A note about types: When dealing with the file format, we use types |
| 14 // like Elf32_Word, but in the public interfaces we treat all |
| 15 // addresses as uint64. As a result, we should be able to symbolize |
| 16 // 64-bit binaries from a 32-bit process (which we don't do, |
| 17 // anyway). size_t should therefore be avoided, except where required |
| 18 // by things like mmap(). |
| 19 // |
| 20 // Although most of this code can deal with arbitrary ELF files of |
| 21 // either word size, the public ElfReader interface only examines |
| 22 // files loaded into the current address space, which must all match |
| 23 // __WORDSIZE. This code cannot handle ELF files with a non-native |
| 24 // byte ordering. |
| 25 // |
| 26 // TODO(chatham): It would be nice if we could accomplish this task |
| 27 // without using malloc(), so we could use it as the process is dying. |
| 28 |
| 29 #ifndef _GNU_SOURCE |
| 30 #define _GNU_SOURCE // needed for pread() |
| 31 #endif |
| 32 |
| 33 #include <sys/types.h> |
| 34 #include <sys/stat.h> |
| 35 #include <sys/mman.h> |
| 36 #include <unistd.h> |
| 37 #include <fcntl.h> |
| 38 #include <elf.h> |
| 39 #include <string.h> |
| 40 |
| 41 #include <algorithm> |
| 42 #include <map> |
| 43 #include <string> |
| 44 #include <vector> |
| 45 #include "zlib.h" |
| 46 |
| 47 #include "elf_reader.h" |
| 48 //#include "using_std_string.h" |
| 49 // EM_AARCH64 is not defined by elf.h of GRTE v3 on x86. |
| 50 // TODO(dougkwan): Remove this when v17 is retired. |
| 51 #if !defined(EM_AARCH64) |
| 52 #define EM_AARCH64 183 /* ARM AARCH64 */ |
| 53 #endif |
| 54 |
| 55 // TODO(dthomson): Can be removed once all Java code is using the Google3 |
| 56 // launcher. We need to avoid processing PLT functions as it causes memory |
| 57 // fragmentation in malloc, which is fixed in tcmalloc - and if the Google3 |
| 58 // launcher is used the JVM will then use tcmalloc. b/13735638 |
| 59 //DEFINE_bool(elfreader_process_dynsyms, true, |
| 60 // "Activate PLT function processing"); |
| 61 |
| 62 using std::string; |
| 63 using std::vector; |
| 64 |
| 65 namespace { |
| 66 |
| 67 // The lowest bit of an ARM symbol value is used to indicate a Thumb address. |
| 68 const int kARMThumbBitOffset = 0; |
| 69 |
| 70 // Converts an ARM Thumb symbol value to a true aligned address value. |
| 71 template <typename T> |
| 72 T AdjustARMThumbSymbolValue(const T& symbol_table_value) { |
| 73 return symbol_table_value & ~(1 << kARMThumbBitOffset); |
| 74 } |
| 75 |
| 76 // Names of PLT-related sections. |
| 77 const char kElfPLTRelSectionName[] = ".rel.plt"; // Use Rel struct. |
| 78 const char kElfPLTRelaSectionName[] = ".rela.plt"; // Use Rela struct. |
| 79 const char kElfPLTSectionName[] = ".plt"; |
| 80 const char kElfDynSymSectionName[] = ".dynsym"; |
| 81 |
| 82 const int kX86PLTCodeSize = 0x10; // Size of one x86 PLT function in bytes. |
| 83 const int kARMPLTCodeSize = 0xc; |
| 84 const int kAARCH64PLTCodeSize = 0x10; |
| 85 |
| 86 const int kX86PLT0Size = 0x10; // Size of the special PLT0 entry. |
| 87 const int kARMPLT0Size = 0x14; |
| 88 const int kAARCH64PLT0Size = 0x20; |
| 89 |
| 90 // Suffix for PLT functions when it needs to be explicitly identified as such. |
| 91 const char kPLTFunctionSuffix[] = "@plt"; |
| 92 |
| 93 } // namespace |
| 94 |
| 95 namespace dwarf2reader { |
| 96 |
| 97 template <class ElfArch> class ElfReaderImpl; |
| 98 |
| 99 // 32-bit and 64-bit ELF files are processed exactly the same, except |
| 100 // for various field sizes. Elf32 and Elf64 encompass all of the |
| 101 // differences between the two formats, and all format-specific code |
| 102 // in this file is templated on one of them. |
| 103 class Elf32 { |
| 104 public: |
| 105 typedef Elf32_Ehdr Ehdr; |
| 106 typedef Elf32_Shdr Shdr; |
| 107 typedef Elf32_Phdr Phdr; |
| 108 typedef Elf32_Word Word; |
| 109 typedef Elf32_Sym Sym; |
| 110 typedef Elf32_Rel Rel; |
| 111 typedef Elf32_Rela Rela; |
| 112 |
| 113 // What should be in the EI_CLASS header. |
| 114 static const int kElfClass = ELFCLASS32; |
| 115 |
| 116 // Given a symbol pointer, return the binding type (eg STB_WEAK). |
| 117 static char Bind(const Elf32_Sym *sym) { |
| 118 return ELF32_ST_BIND(sym->st_info); |
| 119 } |
| 120 // Given a symbol pointer, return the symbol type (eg STT_FUNC). |
| 121 static char Type(const Elf32_Sym *sym) { |
| 122 return ELF32_ST_TYPE(sym->st_info); |
| 123 } |
| 124 |
| 125 // Extract the symbol index from the r_info field of a relocation. |
| 126 static int r_sym(const Elf32_Word r_info) { |
| 127 return ELF32_R_SYM(r_info); |
| 128 } |
| 129 }; |
| 130 |
| 131 |
| 132 class Elf64 { |
| 133 public: |
| 134 typedef Elf64_Ehdr Ehdr; |
| 135 typedef Elf64_Shdr Shdr; |
| 136 typedef Elf64_Phdr Phdr; |
| 137 typedef Elf64_Word Word; |
| 138 typedef Elf64_Sym Sym; |
| 139 typedef Elf64_Rel Rel; |
| 140 typedef Elf64_Rela Rela; |
| 141 |
| 142 // What should be in the EI_CLASS header. |
| 143 static const int kElfClass = ELFCLASS64; |
| 144 |
| 145 static char Bind(const Elf64_Sym *sym) { |
| 146 return ELF64_ST_BIND(sym->st_info); |
| 147 } |
| 148 static char Type(const Elf64_Sym *sym) { |
| 149 return ELF64_ST_TYPE(sym->st_info); |
| 150 } |
| 151 static int r_sym(const Elf64_Xword r_info) { |
| 152 return ELF64_R_SYM(r_info); |
| 153 } |
| 154 }; |
| 155 |
| 156 |
| 157 // ElfSectionReader mmaps a section of an ELF file ("section" is ELF |
| 158 // terminology). The ElfReaderImpl object providing the section header |
| 159 // must exist for the lifetime of this object. |
| 160 // |
| 161 // The motivation for mmaping individual sections of the file is that |
| 162 // many Google executables are large enough when unstripped that we |
| 163 // have to worry about running out of virtual address space. |
| 164 // |
| 165 // For compressed sections we have no choice but to allocate memory. |
| 166 template<class ElfArch> |
| 167 class ElfSectionReader { |
| 168 public: |
| 169 ElfSectionReader(const char *name, const string &path, int fd, |
| 170 const typename ElfArch::Shdr §ion_header) |
| 171 : contents_aligned_(NULL), |
| 172 contents_(NULL), |
| 173 header_(section_header) { |
| 174 // Back up to the beginning of the page we're interested in. |
| 175 const size_t additional = header_.sh_offset % getpagesize(); |
| 176 const size_t offset_aligned = header_.sh_offset - additional; |
| 177 section_size_ = header_.sh_size; |
| 178 size_aligned_ = section_size_ + additional; |
| 179 // If the section has been stripped or is empty, do not attempt |
| 180 // to process its contents. |
| 181 if (header_.sh_type == SHT_NOBITS || header_.sh_size == 0) |
| 182 return; |
| 183 contents_aligned_ = mmap(NULL, size_aligned_, PROT_READ, MAP_SHARED, |
| 184 fd, offset_aligned); |
| 185 // Set where the offset really should begin. |
| 186 contents_ = reinterpret_cast<char *>(contents_aligned_) + |
| 187 (header_.sh_offset - offset_aligned); |
| 188 |
| 189 // Check for and handle any compressed contents. |
| 190 //if (strncmp(name, ".zdebug_", strlen(".zdebug_")) == 0) |
| 191 // DecompressZlibContents(); |
| 192 // TODO(saugustine): Add support for proposed elf-section flag |
| 193 // "SHF_COMPRESS". |
| 194 } |
| 195 |
| 196 ~ElfSectionReader() { |
| 197 if (contents_aligned_ != NULL) |
| 198 munmap(contents_aligned_, size_aligned_); |
| 199 else |
| 200 delete[] contents_; |
| 201 } |
| 202 |
| 203 // Return the section header for this section. |
| 204 typename ElfArch::Shdr const &header() const { return header_; } |
| 205 |
| 206 // Return memory at the given offset within this section. |
| 207 const char *GetOffset(typename ElfArch::Word bytes) const { |
| 208 return contents_ + bytes; |
| 209 } |
| 210 |
| 211 const char *contents() const { return contents_; } |
| 212 size_t section_size() const { return section_size_; } |
| 213 |
| 214 private: |
| 215 // page-aligned file contents |
| 216 void *contents_aligned_; |
| 217 // contents as usable by the client. For non-compressed sections, |
| 218 // pointer within contents_aligned_ to where the section data |
| 219 // begins; for compressed sections, pointer to the decompressed |
| 220 // data. |
| 221 char *contents_; |
| 222 // size of contents_aligned_ |
| 223 size_t size_aligned_; |
| 224 // size of contents. |
| 225 size_t section_size_; |
| 226 const typename ElfArch::Shdr header_; |
| 227 }; |
| 228 |
| 229 // An iterator over symbols in a given section. It handles walking |
| 230 // through the entries in the specified section and mapping symbol |
| 231 // entries to their names in the appropriate string table (in |
| 232 // another section). |
| 233 template<class ElfArch> |
| 234 class SymbolIterator { |
| 235 public: |
| 236 SymbolIterator(ElfReaderImpl<ElfArch> *reader, |
| 237 typename ElfArch::Word section_type) |
| 238 : symbol_section_(reader->GetSectionByType(section_type)), |
| 239 string_section_(NULL), |
| 240 num_symbols_in_section_(0), |
| 241 symbol_within_section_(0) { |
| 242 |
| 243 // If this section type doesn't exist, leave |
| 244 // num_symbols_in_section_ as zero, so this iterator is already |
| 245 // done(). |
| 246 if (symbol_section_ != NULL) { |
| 247 num_symbols_in_section_ = symbol_section_->header().sh_size / |
| 248 symbol_section_->header().sh_entsize; |
| 249 |
| 250 // Symbol sections have sh_link set to the section number of |
| 251 // the string section containing the symbol names. |
| 252 string_section_ = reader->GetSection(symbol_section_->header().sh_link); |
| 253 } |
| 254 } |
| 255 |
| 256 // Return true iff we have passed all symbols in this section. |
| 257 bool done() const { |
| 258 return symbol_within_section_ >= num_symbols_in_section_; |
| 259 } |
| 260 |
| 261 // Advance to the next symbol in this section. |
| 262 // REQUIRES: !done() |
| 263 void Next() { ++symbol_within_section_; } |
| 264 |
| 265 // Return a pointer to the current symbol. |
| 266 // REQUIRES: !done() |
| 267 const typename ElfArch::Sym *GetSymbol() const { |
| 268 return reinterpret_cast<const typename ElfArch::Sym*>( |
| 269 symbol_section_->GetOffset(symbol_within_section_ * |
| 270 symbol_section_->header().sh_entsize)); |
| 271 } |
| 272 |
| 273 // Return the name of the current symbol, NULL if it has none. |
| 274 // REQUIRES: !done() |
| 275 const char *GetSymbolName() const { |
| 276 int name_offset = GetSymbol()->st_name; |
| 277 if (name_offset == 0) |
| 278 return NULL; |
| 279 return string_section_->GetOffset(name_offset); |
| 280 } |
| 281 |
| 282 int GetCurrentSymbolIndex() const { |
| 283 return symbol_within_section_; |
| 284 } |
| 285 |
| 286 private: |
| 287 const ElfSectionReader<ElfArch> *const symbol_section_; |
| 288 const ElfSectionReader<ElfArch> *string_section_; |
| 289 int num_symbols_in_section_; |
| 290 int symbol_within_section_; |
| 291 }; |
| 292 |
| 293 |
| 294 // Copied from strings/strutil.h. Per chatham, |
| 295 // this library should not depend on strings. |
| 296 |
| 297 static inline bool MyHasSuffixString(const string& str, const string& suffix) { |
| 298 int len = str.length(); |
| 299 int suflen = suffix.length(); |
| 300 return (suflen <= len) && (str.compare(len-suflen, suflen, suffix) == 0); |
| 301 } |
| 302 |
| 303 |
| 304 // ElfReader loads an ELF binary and can provide information about its |
| 305 // contents. It is most useful for matching addresses to function |
| 306 // names. It does not understand debugging formats (eg dwarf2), so it |
| 307 // can't print line numbers. It takes a path to an elf file and a |
| 308 // readable file descriptor for that file, which it does not assume |
| 309 // ownership of. |
| 310 template<class ElfArch> |
| 311 class ElfReaderImpl { |
| 312 public: |
| 313 explicit ElfReaderImpl(const string &path, int fd) |
| 314 : path_(path), |
| 315 fd_(fd), |
| 316 section_headers_(NULL), |
| 317 program_headers_(NULL), |
| 318 opd_section_(NULL), |
| 319 base_for_text_(0), |
| 320 plts_supported_(false), |
| 321 plt_code_size_(0), |
| 322 plt0_size_(0), |
| 323 visited_relocation_entries_(false) { |
| 324 string error; |
| 325 is_dwp_ = MyHasSuffixString(path, ".dwp"); |
| 326 ParseHeaders(fd, path); |
| 327 // Currently we need some extra information for PowerPC64 binaries |
| 328 // including a way to read the .opd section for function descriptors and a |
| 329 // way to find the linked base for function symbols. |
| 330 if (header_.e_machine == EM_PPC64) { |
| 331 // "opd_section_" must always be checked for NULL before use. |
| 332 opd_section_ = GetSectionInfoByName(".opd", &opd_info_); |
| 333 for (unsigned int k = 0u; k < GetNumSections(); ++k) { |
| 334 const char *name = GetSectionName(section_headers_[k].sh_name); |
| 335 if (strncmp(name, ".text", strlen(".text")) == 0) { |
| 336 base_for_text_ = |
| 337 section_headers_[k].sh_addr - section_headers_[k].sh_offset; |
| 338 break; |
| 339 } |
| 340 } |
| 341 } |
| 342 // Turn on PLTs. |
| 343 if (header_.e_machine == EM_386 || header_.e_machine == EM_X86_64) { |
| 344 plt_code_size_ = kX86PLTCodeSize; |
| 345 plt0_size_ = kX86PLT0Size; |
| 346 plts_supported_ = true; |
| 347 } else if (header_.e_machine == EM_ARM) { |
| 348 plt_code_size_ = kARMPLTCodeSize; |
| 349 plt0_size_ = kARMPLT0Size; |
| 350 plts_supported_ = true; |
| 351 } else if (header_.e_machine == EM_AARCH64) { |
| 352 plt_code_size_ = kAARCH64PLTCodeSize; |
| 353 plt0_size_ = kAARCH64PLT0Size; |
| 354 plts_supported_ = true; |
| 355 } |
| 356 } |
| 357 |
| 358 ~ElfReaderImpl() { |
| 359 for (unsigned int i = 0u; i < sections_.size(); ++i) |
| 360 delete sections_[i]; |
| 361 delete [] section_headers_; |
| 362 delete [] program_headers_; |
| 363 } |
| 364 |
| 365 // Examine the headers of the file and return whether the file looks |
| 366 // like an ELF file for this architecture. Takes an already-open |
| 367 // file descriptor for the candidate file, reading in the prologue |
| 368 // to see if the ELF file appears to match the current |
| 369 // architecture. If error is non-NULL, it will be set with a reason |
| 370 // in case of failure. |
| 371 static bool IsArchElfFile(int fd, string *error) { |
| 372 unsigned char header[EI_NIDENT]; |
| 373 if (pread(fd, header, sizeof(header), 0) != sizeof(header)) { |
| 374 if (error != NULL) *error = "Could not read header"; |
| 375 return false; |
| 376 } |
| 377 |
| 378 if (memcmp(header, ELFMAG, SELFMAG) != 0) { |
| 379 if (error != NULL) *error = "Missing ELF magic"; |
| 380 return false; |
| 381 } |
| 382 |
| 383 if (header[EI_CLASS] != ElfArch::kElfClass) { |
| 384 if (error != NULL) *error = "Different word size"; |
| 385 return false; |
| 386 } |
| 387 |
| 388 int endian = 0; |
| 389 if (header[EI_DATA] == ELFDATA2LSB) |
| 390 endian = __LITTLE_ENDIAN; |
| 391 else if (header[EI_DATA] == ELFDATA2MSB) |
| 392 endian = __BIG_ENDIAN; |
| 393 if (endian != __BYTE_ORDER) { |
| 394 if (error != NULL) *error = "Different byte order"; |
| 395 return false; |
| 396 } |
| 397 |
| 398 return true; |
| 399 } |
| 400 |
| 401 // Return true if we can use this symbol in Address-to-Symbol map. |
| 402 bool CanUseSymbol(const char *name, const typename ElfArch::Sym *sym) { |
| 403 // For now we only save FUNC and NOTYPE symbols. For now we just |
| 404 // care about functions, but some functions written in assembler |
| 405 // don't have a proper ELF type attached to them, so we store |
| 406 // NOTYPE symbols as well. The remaining significant type is |
| 407 // OBJECT (eg global variables), which represent about 25% of |
| 408 // the symbols in a typical google3 binary. |
| 409 if (ElfArch::Type(sym) != STT_FUNC && |
| 410 ElfArch::Type(sym) != STT_NOTYPE) { |
| 411 return false; |
| 412 } |
| 413 |
| 414 // Target specific filtering. |
| 415 switch (header_.e_machine) { |
| 416 case EM_AARCH64: |
| 417 case EM_ARM: |
| 418 // Filter out '$x' special local symbols used by tools |
| 419 return name[0] != '$' || ElfArch::Bind(sym) != STB_LOCAL; |
| 420 case EM_X86_64: |
| 421 // Filter out read-only constants like .LC123. |
| 422 return name[0] != '.' || ElfArch::Bind(sym) != STB_LOCAL; |
| 423 default: |
| 424 return true; |
| 425 } |
| 426 } |
| 427 |
| 428 // Iterate over the symbols in a section, either SHT_DYNSYM or |
| 429 // SHT_SYMTAB. Add all symbols to the given SymbolMap. |
| 430 /* |
| 431 void GetSymbolPositions(SymbolMap *symbols, |
| 432 typename ElfArch::Word section_type, |
| 433 uint64 mem_offset, |
| 434 uint64 file_offset) { |
| 435 // This map is used to filter out "nested" functions. |
| 436 // See comment below. |
| 437 AddrToSymMap addr_to_sym_map; |
| 438 for (SymbolIterator<ElfArch> it(this, section_type); |
| 439 !it.done(); it.Next()) { |
| 440 const char *name = it.GetSymbolName(); |
| 441 if (name == NULL) |
| 442 continue; |
| 443 const typename ElfArch::Sym *sym = it.GetSymbol(); |
| 444 if (CanUseSymbol(name, sym)) { |
| 445 const int sec = sym->st_shndx; |
| 446 |
| 447 // We don't support special section indices. The most common |
| 448 // is SHN_ABS, for absolute symbols used deep in the bowels of |
| 449 // glibc. Also ignore any undefined symbols. |
| 450 if (sec == SHN_UNDEF || |
| 451 (sec >= SHN_LORESERVE && sec <= SHN_HIRESERVE)) { |
| 452 continue; |
| 453 } |
| 454 |
| 455 const typename ElfArch::Shdr& hdr = section_headers_[sec]; |
| 456 |
| 457 // Adjust for difference between where we expected to mmap |
| 458 // this section, and where it was actually mmapped. |
| 459 const int64 expected_base = hdr.sh_addr - hdr.sh_offset; |
| 460 const int64 real_base = mem_offset - file_offset; |
| 461 const int64 adjust = real_base - expected_base; |
| 462 |
| 463 uint64 start = sym->st_value + adjust; |
| 464 |
| 465 // Adjust function symbols for PowerPC64 by dereferencing and adjusting |
| 466 // the function descriptor to get the function address. |
| 467 if (header_.e_machine == EM_PPC64 && ElfArch::Type(sym) == STT_FUNC) { |
| 468 const uint64 opd_addr = |
| 469 AdjustPPC64FunctionDescriptorSymbolValue(sym->st_value); |
| 470 // Only adjust the returned value if the function address was found. |
| 471 if (opd_addr != sym->st_value) { |
| 472 const int64 adjust_function_symbols = |
| 473 real_base - base_for_text_; |
| 474 start = opd_addr + adjust_function_symbols; |
| 475 } |
| 476 } |
| 477 |
| 478 addr_to_sym_map.push_back(std::make_pair(start, sym)); |
| 479 } |
| 480 } |
| 481 std::sort(addr_to_sym_map.begin(), addr_to_sym_map.end(), &AddrToSymSorter); |
| 482 addr_to_sym_map.erase(std::unique(addr_to_sym_map.begin(), |
| 483 addr_to_sym_map.end(), &AddrToSymEquals), |
| 484 addr_to_sym_map.end()); |
| 485 |
| 486 // Squeeze out any "nested functions". |
| 487 // Nested functions are not allowed in C, but libc plays tricks. |
| 488 // |
| 489 // For example, here is disassembly of /lib64/tls/libc-2.3.5.so: |
| 490 // 0x00000000000aa380 <read+0>: cmpl $0x0,0x2781b9(%rip) |
| 491 // 0x00000000000aa387 <read+7>: jne 0xaa39b <read+27> |
| 492 // 0x00000000000aa389 <__read_nocancel+0>: mov $0x0,%rax |
| 493 // 0x00000000000aa390 <__read_nocancel+7>: syscall |
| 494 // 0x00000000000aa392 <__read_nocancel+9>: cmp $0xfffffffffffff001,%rax |
| 495 // 0x00000000000aa398 <__read_nocancel+15>: jae 0xaa3ef <read+111> |
| 496 // 0x00000000000aa39a <__read_nocancel+17>: retq |
| 497 // 0x00000000000aa39b <read+27>: sub $0x28,%rsp |
| 498 // 0x00000000000aa39f <read+31>: mov %rdi,0x8(%rsp) |
| 499 // ... |
| 500 // Without removing __read_nocancel, symbolizer will return NULL |
| 501 // given e.g. 0xaa39f (because the lower bound is __read_nocancel, |
| 502 // but 0xaa39f is beyond its end. |
| 503 if (addr_to_sym_map.empty()) { |
| 504 return; |
| 505 } |
| 506 const ElfSectionReader<ElfArch> *const symbol_section = |
| 507 this->GetSectionByType(section_type); |
| 508 const ElfSectionReader<ElfArch> *const string_section = |
| 509 this->GetSection(symbol_section->header().sh_link); |
| 510 |
| 511 typename AddrToSymMap::iterator curr = addr_to_sym_map.begin(); |
| 512 // Always insert the first symbol. |
| 513 symbols->AddSymbol(string_section->GetOffset(curr->second->st_name), |
| 514 curr->first, curr->second->st_size); |
| 515 typename AddrToSymMap::iterator prev = curr++; |
| 516 for (; curr != addr_to_sym_map.end(); ++curr) { |
| 517 const uint64 prev_addr = prev->first; |
| 518 const uint64 curr_addr = curr->first; |
| 519 const typename ElfArch::Sym *const prev_sym = prev->second; |
| 520 const typename ElfArch::Sym *const curr_sym = curr->second; |
| 521 if (prev_addr + prev_sym->st_size <= curr_addr || |
| 522 // The next condition is true if two symbols overlap like this: |
| 523 // |
| 524 // Previous symbol |----------------------------| |
| 525 // Current symbol |-------------------------------| |
| 526 // |
| 527 // These symbols are not found in google3 codebase, but in |
| 528 // jdk1.6.0_01_gg1/jre/lib/i386/server/libjvm.so. |
| 529 // |
| 530 // 0619e040 00000046 t CardTableModRefBS::write_region_work() |
| 531 // 0619e070 00000046 t CardTableModRefBS::write_ref_array_work() |
| 532 // |
| 533 // We allow overlapped symbols rather than ignore these. |
| 534 // Due to the way SymbolMap::GetSymbolAtPosition() works, |
| 535 // lookup for any address in [curr_addr, curr_addr + its size) |
| 536 // (e.g. 0619e071) will produce the current symbol, |
| 537 // which is the desired outcome. |
| 538 prev_addr + prev_sym->st_size < curr_addr + curr_sym->st_size) { |
| 539 const char *name = string_section->GetOffset(curr_sym->st_name); |
| 540 symbols->AddSymbol(name, curr_addr, curr_sym->st_size); |
| 541 prev = curr; |
| 542 } else { |
| 543 // Current symbol is "nested" inside previous one like this: |
| 544 // |
| 545 // Previous symbol |----------------------------| |
| 546 // Current symbol |---------------------| |
| 547 // |
| 548 // This happens within glibc, e.g. __read_nocancel is nested |
| 549 // "inside" __read. Ignore "inner" symbol. |
| 550 //DCHECK_LE(curr_addr + curr_sym->st_size, |
| 551 // prev_addr + prev_sym->st_size); |
| 552 ; |
| 553 } |
| 554 } |
| 555 } |
| 556 */ |
| 557 |
| 558 void VisitSymbols(typename ElfArch::Word section_type, |
| 559 ElfReader::SymbolSink *sink) { |
| 560 VisitSymbols(section_type, sink, -1, -1, false); |
| 561 } |
| 562 |
| 563 void VisitSymbols(typename ElfArch::Word section_type, |
| 564 ElfReader::SymbolSink *sink, |
| 565 int symbol_binding, |
| 566 int symbol_type, |
| 567 bool get_raw_symbol_values) { |
| 568 for (SymbolIterator<ElfArch> it(this, section_type); |
| 569 !it.done(); it.Next()) { |
| 570 const char *name = it.GetSymbolName(); |
| 571 if (!name) continue; |
| 572 const typename ElfArch::Sym *sym = it.GetSymbol(); |
| 573 if ((symbol_binding < 0 || ElfArch::Bind(sym) == symbol_binding) && |
| 574 (symbol_type < 0 || ElfArch::Type(sym) == symbol_type)) { |
| 575 typename ElfArch::Sym symbol = *sym; |
| 576 // Add a PLT symbol in addition to the main undefined symbol. |
| 577 // Only do this for SHT_DYNSYM, because PLT symbols are dynamic. |
| 578 int symbol_index = it.GetCurrentSymbolIndex(); |
| 579 // TODO(dthomson): Can be removed once all Java code is using the |
| 580 // Google3 launcher. |
| 581 if (section_type == SHT_DYNSYM && |
| 582 static_cast<unsigned int>(symbol_index) < symbols_plt_offsets_.size(
) && |
| 583 symbols_plt_offsets_[symbol_index] != 0) { |
| 584 string plt_name = string(name) + kPLTFunctionSuffix; |
| 585 if (plt_function_names_[symbol_index].empty()) { |
| 586 plt_function_names_[symbol_index] = plt_name; |
| 587 } else if (plt_function_names_[symbol_index] != plt_name) { |
| 588 ; |
| 589 } |
| 590 sink->AddSymbol(plt_function_names_[symbol_index].c_str(), |
| 591 symbols_plt_offsets_[it.GetCurrentSymbolIndex()], |
| 592 plt_code_size_); |
| 593 } |
| 594 if (!get_raw_symbol_values) |
| 595 AdjustSymbolValue(&symbol); |
| 596 sink->AddSymbol(name, symbol.st_value, symbol.st_size); |
| 597 } |
| 598 } |
| 599 } |
| 600 |
| 601 void VisitRelocationEntries() { |
| 602 if (visited_relocation_entries_) { |
| 603 return; |
| 604 } |
| 605 visited_relocation_entries_ = true; |
| 606 |
| 607 if (!plts_supported_) { |
| 608 return; |
| 609 } |
| 610 // First determine if PLTs exist. If not, then there is nothing to do. |
| 611 ElfReader::SectionInfo plt_section_info; |
| 612 const char* plt_section = |
| 613 GetSectionInfoByName(kElfPLTSectionName, &plt_section_info); |
| 614 if (!plt_section) { |
| 615 return; |
| 616 } |
| 617 if (plt_section_info.size == 0) { |
| 618 return; |
| 619 } |
| 620 |
| 621 // The PLTs could be referenced by either a Rel or Rela (Rel with Addend) |
| 622 // section. |
| 623 ElfReader::SectionInfo rel_section_info; |
| 624 ElfReader::SectionInfo rela_section_info; |
| 625 const char* rel_section = |
| 626 GetSectionInfoByName(kElfPLTRelSectionName, &rel_section_info); |
| 627 const char* rela_section = |
| 628 GetSectionInfoByName(kElfPLTRelaSectionName, &rela_section_info); |
| 629 |
| 630 const typename ElfArch::Rel* rel = |
| 631 reinterpret_cast<const typename ElfArch::Rel*>(rel_section); |
| 632 const typename ElfArch::Rela* rela = |
| 633 reinterpret_cast<const typename ElfArch::Rela*>(rela_section); |
| 634 |
| 635 if (!rel_section && !rela_section) { |
| 636 return; |
| 637 } |
| 638 |
| 639 // Use either Rel or Rela section, depending on which one exists. |
| 640 size_t section_size = rel_section ? rel_section_info.size |
| 641 : rela_section_info.size; |
| 642 size_t entry_size = rel_section ? sizeof(typename ElfArch::Rel) |
| 643 : sizeof(typename ElfArch::Rela); |
| 644 |
| 645 // Determine the number of entries in the dynamic symbol table. |
| 646 ElfReader::SectionInfo dynsym_section_info; |
| 647 const char* dynsym_section = |
| 648 GetSectionInfoByName(kElfDynSymSectionName, &dynsym_section_info); |
| 649 // The dynsym section might not exist, or it might be empty. In either case |
| 650 // there is nothing to be done so return. |
| 651 if (!dynsym_section || dynsym_section_info.size == 0) { |
| 652 return; |
| 653 } |
| 654 size_t num_dynamic_symbols = |
| 655 dynsym_section_info.size / dynsym_section_info.entsize; |
| 656 symbols_plt_offsets_.resize(num_dynamic_symbols, 0); |
| 657 |
| 658 // TODO(dthomson): Can be removed once all Java code is using the |
| 659 // Google3 launcher. |
| 660 // Make storage room for PLT function name strings. |
| 661 plt_function_names_.resize(num_dynamic_symbols); |
| 662 |
| 663 for (size_t i = 0; i < section_size / entry_size; ++i) { |
| 664 // Determine symbol index from the |r_info| field. |
| 665 int sym_index = ElfArch::r_sym(rel_section ? rel[i].r_info |
| 666 : rela[i].r_info); |
| 667 if (static_cast<unsigned int>(sym_index) >= symbols_plt_offsets_.size()) { |
| 668 continue; |
| 669 } |
| 670 symbols_plt_offsets_[sym_index] = |
| 671 plt_section_info.addr + plt0_size_ + i * plt_code_size_; |
| 672 } |
| 673 } |
| 674 |
| 675 // Return an ElfSectionReader for the first section of the given |
| 676 // type by iterating through all section headers. Returns NULL if |
| 677 // the section type is not found. |
| 678 const ElfSectionReader<ElfArch> *GetSectionByType( |
| 679 typename ElfArch::Word section_type) { |
| 680 for (unsigned int k = 0u; k < GetNumSections(); ++k) { |
| 681 if (section_headers_[k].sh_type == section_type) { |
| 682 return GetSection(k); |
| 683 } |
| 684 } |
| 685 return NULL; |
| 686 } |
| 687 |
| 688 // Return the name of section "shndx". Returns NULL if the section |
| 689 // is not found. |
| 690 const char *GetSectionNameByIndex(int shndx) { |
| 691 return GetSectionName(section_headers_[shndx].sh_name); |
| 692 } |
| 693 |
| 694 // Return a pointer to section "shndx", and store the size in |
| 695 // "size". Returns NULL if the section is not found. |
| 696 const char *GetSectionContentsByIndex(int shndx, size_t *size) { |
| 697 const ElfSectionReader<ElfArch> *section = GetSection(shndx); |
| 698 if (section != NULL) { |
| 699 *size = section->section_size(); |
| 700 return section->contents(); |
| 701 } |
| 702 return NULL; |
| 703 } |
| 704 |
| 705 // Return a pointer to the first section of the given name by |
| 706 // iterating through all section headers, and store the size in |
| 707 // "size". Returns NULL if the section name is not found. |
| 708 const char *GetSectionContentsByName(const string §ion_name, |
| 709 size_t *size) { |
| 710 for (unsigned int k = 0u; k < GetNumSections(); ++k) { |
| 711 // When searching for sections in a .dwp file, the sections |
| 712 // we're looking for will always be at the end of the section |
| 713 // table, so reverse the direction of iteration. |
| 714 int shndx = is_dwp_ ? GetNumSections() - k - 1 : k; |
| 715 const char *name = GetSectionName(section_headers_[shndx].sh_name); |
| 716 if (name != NULL && ElfReader::SectionNamesMatch(section_name, name)) { |
| 717 const ElfSectionReader<ElfArch> *section = GetSection(shndx); |
| 718 if (section == NULL) { |
| 719 return NULL; |
| 720 } else { |
| 721 *size = section->section_size(); |
| 722 return section->contents(); |
| 723 } |
| 724 } |
| 725 } |
| 726 return NULL; |
| 727 } |
| 728 |
| 729 // This is like GetSectionContentsByName() but it returns a lot of extra |
| 730 // information about the section. |
| 731 const char *GetSectionInfoByName(const string §ion_name, |
| 732 ElfReader::SectionInfo *info) { |
| 733 for (unsigned int k = 0u; k < GetNumSections(); ++k) { |
| 734 // When searching for sections in a .dwp file, the sections |
| 735 // we're looking for will always be at the end of the section |
| 736 // table, so reverse the direction of iteration. |
| 737 int shndx = is_dwp_ ? GetNumSections() - k - 1 : k; |
| 738 const char *name = GetSectionName(section_headers_[shndx].sh_name); |
| 739 if (name != NULL && ElfReader::SectionNamesMatch(section_name, name)) { |
| 740 const ElfSectionReader<ElfArch> *section = GetSection(shndx); |
| 741 if (section == NULL) { |
| 742 return NULL; |
| 743 } else { |
| 744 info->type = section->header().sh_type; |
| 745 info->flags = section->header().sh_flags; |
| 746 info->addr = section->header().sh_addr; |
| 747 info->offset = section->header().sh_offset; |
| 748 info->size = section->header().sh_size; |
| 749 info->link = section->header().sh_link; |
| 750 info->info = section->header().sh_info; |
| 751 info->addralign = section->header().sh_addralign; |
| 752 info->entsize = section->header().sh_entsize; |
| 753 return section->contents(); |
| 754 } |
| 755 } |
| 756 } |
| 757 return NULL; |
| 758 } |
| 759 |
| 760 // p_vaddr of the first PT_LOAD segment (if any), or 0 if no PT_LOAD |
| 761 // segments are present. This is the address an ELF image was linked |
| 762 // (by static linker) to be loaded at. Usually (but not always) 0 for |
| 763 // shared libraries and position-independent executables. |
| 764 uint64 VaddrOfFirstLoadSegment() const { |
| 765 // Relocatable objects (of type ET_REL) do not have LOAD segments. |
| 766 if (header_.e_type == ET_REL) { |
| 767 return 0; |
| 768 } |
| 769 for (int i = 0; i < GetNumProgramHeaders(); ++i) { |
| 770 if (program_headers_[i].p_type == PT_LOAD) { |
| 771 return program_headers_[i].p_vaddr; |
| 772 } |
| 773 } |
| 774 return 0; |
| 775 } |
| 776 |
| 777 // According to the LSB ("ELF special sections"), sections with debug |
| 778 // info are prefixed by ".debug". The names are not specified, but they |
| 779 // look like ".debug_line", ".debug_info", etc. |
| 780 bool HasDebugSections() { |
| 781 // Debug sections are likely to be near the end, so reverse the |
| 782 // direction of iteration. |
| 783 for (int k = GetNumSections() - 1; k >= 0; --k) { |
| 784 const char *name = GetSectionName(section_headers_[k].sh_name); |
| 785 if (strncmp(name, ".debug", strlen(".debug")) == 0) return true; |
| 786 if (strncmp(name, ".zdebug", strlen(".zdebug")) == 0) return true; |
| 787 } |
| 788 return false; |
| 789 } |
| 790 |
| 791 bool IsDynamicSharedObject() const { |
| 792 return header_.e_type == ET_DYN; |
| 793 } |
| 794 |
| 795 // Return the number of sections. |
| 796 uint64_t GetNumSections() const { |
| 797 if (HasManySections()) |
| 798 return first_section_header_.sh_size; |
| 799 return header_.e_shnum; |
| 800 } |
| 801 |
| 802 private: |
| 803 typedef vector<pair<uint64, const typename ElfArch::Sym *> > AddrToSymMap; |
| 804 |
| 805 static bool AddrToSymSorter(const typename AddrToSymMap::value_type& lhs, |
| 806 const typename AddrToSymMap::value_type& rhs) { |
| 807 return lhs.first < rhs.first; |
| 808 } |
| 809 |
| 810 static bool AddrToSymEquals(const typename AddrToSymMap::value_type& lhs, |
| 811 const typename AddrToSymMap::value_type& rhs) { |
| 812 return lhs.first == rhs.first; |
| 813 } |
| 814 |
| 815 // Does this ELF file have too many sections to fit in the program header? |
| 816 bool HasManySections() const { |
| 817 return header_.e_shnum == SHN_UNDEF; |
| 818 } |
| 819 |
| 820 // Return the number of program headers. |
| 821 int GetNumProgramHeaders() const { |
| 822 if (HasManySections() && header_.e_phnum == 0xffff && |
| 823 first_section_header_.sh_info != 0) |
| 824 return first_section_header_.sh_info; |
| 825 return header_.e_phnum; |
| 826 } |
| 827 |
| 828 // Return the index of the string table. |
| 829 int GetStringTableIndex() const { |
| 830 if (HasManySections()) { |
| 831 if (header_.e_shstrndx == 0xffff) |
| 832 return first_section_header_.sh_link; |
| 833 else if (header_.e_shstrndx >= GetNumSections()) |
| 834 return 0; |
| 835 } |
| 836 return header_.e_shstrndx; |
| 837 } |
| 838 |
| 839 // Given an offset into the section header string table, return the |
| 840 // section name. |
| 841 const char *GetSectionName(typename ElfArch::Word sh_name) { |
| 842 const ElfSectionReader<ElfArch> *shstrtab = |
| 843 GetSection(GetStringTableIndex()); |
| 844 if (shstrtab != NULL) { |
| 845 return shstrtab->GetOffset(sh_name); |
| 846 } |
| 847 return NULL; |
| 848 } |
| 849 |
| 850 // Return an ElfSectionReader for the given section. The reader will |
| 851 // be freed when this object is destroyed. |
| 852 const ElfSectionReader<ElfArch> *GetSection(int num) { |
| 853 const char *name; |
| 854 // Hard-coding the name for the section-name string table prevents |
| 855 // infinite recursion. |
| 856 if (num == GetStringTableIndex()) |
| 857 name = ".shstrtab"; |
| 858 else |
| 859 name = GetSectionNameByIndex(num); |
| 860 ElfSectionReader<ElfArch> *& reader = sections_[num]; |
| 861 if (reader == NULL) |
| 862 reader = new ElfSectionReader<ElfArch>(name, path_, fd_, |
| 863 section_headers_[num]); |
| 864 return reader; |
| 865 } |
| 866 |
| 867 // Parse out the overall header information from the file and assert |
| 868 // that it looks sane. This contains information like the magic |
| 869 // number and target architecture. |
| 870 bool ParseHeaders(int fd, const string &path) { |
| 871 // Read in the global ELF header. |
| 872 if (pread(fd, &header_, sizeof(header_), 0) != sizeof(header_)) { |
| 873 return false; |
| 874 } |
| 875 |
| 876 // Must be an executable, dynamic shared object or relocatable object |
| 877 if (header_.e_type != ET_EXEC && |
| 878 header_.e_type != ET_DYN && |
| 879 header_.e_type != ET_REL) { |
| 880 return false; |
| 881 } |
| 882 // Need a section header. |
| 883 if (header_.e_shoff == 0) { |
| 884 return false; |
| 885 } |
| 886 |
| 887 if (header_.e_shnum == SHN_UNDEF) { |
| 888 // The number of sections in the program header is only a 16-bit value. In |
| 889 // the event of overflow (greater than SHN_LORESERVE sections), e_shnum |
| 890 // will read SHN_UNDEF and the true number of section header table entries |
| 891 // is found in the sh_size field of the first section header. |
| 892 // See: http://www.sco.com/developers/gabi/2003-12-17/ch4.sheader.html |
| 893 if (pread(fd, &first_section_header_, sizeof(first_section_header_), |
| 894 header_.e_shoff) != sizeof(first_section_header_)) { |
| 895 return false; |
| 896 } |
| 897 } |
| 898 |
| 899 // Dynamically allocate enough space to store the section headers |
| 900 // and read them out of the file. |
| 901 const int section_headers_size = |
| 902 GetNumSections() * sizeof(*section_headers_); |
| 903 section_headers_ = new typename ElfArch::Shdr[section_headers_size]; |
| 904 if (pread(fd, section_headers_, section_headers_size, header_.e_shoff) != |
| 905 section_headers_size) { |
| 906 return false; |
| 907 } |
| 908 |
| 909 // Dynamically allocate enough space to store the program headers |
| 910 // and read them out of the file. |
| 911 //const int program_headers_size = |
| 912 // GetNumProgramHeaders() * sizeof(*program_headers_); |
| 913 program_headers_ = new typename ElfArch::Phdr[GetNumProgramHeaders()]; |
| 914 |
| 915 // Presize the sections array for efficiency. |
| 916 sections_.resize(GetNumSections(), NULL); |
| 917 return true; |
| 918 } |
| 919 |
| 920 // Given the "value" of a function descriptor return the address of the |
| 921 // function (i.e. the dereferenced value). Otherwise return "value". |
| 922 uint64 AdjustPPC64FunctionDescriptorSymbolValue(uint64 value) { |
| 923 if (opd_section_ != NULL && |
| 924 opd_info_.addr <= value && |
| 925 value < opd_info_.addr + opd_info_.size) { |
| 926 uint64 offset = value - opd_info_.addr; |
| 927 return (*reinterpret_cast<const uint64*>(opd_section_ + offset)); |
| 928 } |
| 929 return value; |
| 930 } |
| 931 |
| 932 void AdjustSymbolValue(typename ElfArch::Sym* sym) { |
| 933 switch (header_.e_machine) { |
| 934 case EM_ARM: |
| 935 // For ARM architecture, if the LSB of the function symbol offset is set, |
| 936 // it indicates a Thumb function. This bit should not be taken literally. |
| 937 // Clear it. |
| 938 if (ElfArch::Type(sym) == STT_FUNC) |
| 939 sym->st_value = AdjustARMThumbSymbolValue(sym->st_value); |
| 940 break; |
| 941 case EM_386: |
| 942 // No adjustment needed for Intel x86 architecture. However, explicitly |
| 943 // define this case as we use it quite often. |
| 944 break; |
| 945 case EM_PPC64: |
| 946 // PowerPC64 currently has function descriptors as part of the ABI. |
| 947 // Function symbols need to be adjusted accordingly. |
| 948 if (ElfArch::Type(sym) == STT_FUNC) |
| 949 sym->st_value = AdjustPPC64FunctionDescriptorSymbolValue(sym->st_value); |
| 950 break; |
| 951 default: |
| 952 break; |
| 953 } |
| 954 } |
| 955 |
| 956 friend class SymbolIterator<ElfArch>; |
| 957 |
| 958 // The file we're reading. |
| 959 const string path_; |
| 960 // Open file descriptor for path_. Not owned by this object. |
| 961 const int fd_; |
| 962 |
| 963 // The global header of the ELF file. |
| 964 typename ElfArch::Ehdr header_; |
| 965 |
| 966 // The header of the first section. This may be used to supplement the ELF |
| 967 // file header. |
| 968 typename ElfArch::Shdr first_section_header_; |
| 969 |
| 970 // Array of GetNumSections() section headers, allocated when we read |
| 971 // in the global header. |
| 972 typename ElfArch::Shdr *section_headers_; |
| 973 |
| 974 // Array of GetNumProgramHeaders() program headers, allocated when we read |
| 975 // in the global header. |
| 976 typename ElfArch::Phdr *program_headers_; |
| 977 |
| 978 // An array of pointers to ElfSectionReaders. Sections are |
| 979 // mmaped as they're needed and not released until this object is |
| 980 // destroyed. |
| 981 vector<ElfSectionReader<ElfArch>*> sections_; |
| 982 |
| 983 // For PowerPC64 we need to keep track of function descriptors when looking up |
| 984 // values for funtion symbols values. Function descriptors are kept in the |
| 985 // .opd section and are dereferenced to find the function address. |
| 986 ElfReader::SectionInfo opd_info_; |
| 987 const char *opd_section_; // Must be checked for NULL before use. |
| 988 int64 base_for_text_; |
| 989 |
| 990 // Read PLT-related sections for the current architecture. |
| 991 bool plts_supported_; |
| 992 // Code size of each PLT function for the current architecture. |
| 993 size_t plt_code_size_; |
| 994 // Size of the special first entry in the .plt section that calls the runtime |
| 995 // loader resolution routine, and that all other entries jump to when doing |
| 996 // lazy symbol binding. |
| 997 size_t plt0_size_; |
| 998 |
| 999 // Maps a dynamic symbol index to a PLT offset. |
| 1000 // The vector entry index is the dynamic symbol index. |
| 1001 std::vector<uint64> symbols_plt_offsets_; |
| 1002 |
| 1003 // Container for PLT function name strings. These strings are passed by |
| 1004 // reference to SymbolSink::AddSymbol() so they need to be stored somewhere. |
| 1005 std::vector<string> plt_function_names_; |
| 1006 |
| 1007 bool visited_relocation_entries_; |
| 1008 |
| 1009 // True if this is a .dwp file. |
| 1010 bool is_dwp_; |
| 1011 }; |
| 1012 |
| 1013 ElfReader::ElfReader(const string &path) |
| 1014 : path_(path), fd_(-1), impl32_(NULL), impl64_(NULL) { |
| 1015 // linux 2.6.XX kernel can show deleted files like this: |
| 1016 // /var/run/nscd/dbYLJYaE (deleted) |
| 1017 // and the kernel-supplied vdso and vsyscall mappings like this: |
| 1018 // [vdso] |
| 1019 // [vsyscall] |
| 1020 if (MyHasSuffixString(path, " (deleted)")) |
| 1021 return; |
| 1022 if (path == "[vdso]") |
| 1023 return; |
| 1024 if (path == "[vsyscall]") |
| 1025 return; |
| 1026 |
| 1027 fd_ = open(path.c_str(), O_RDONLY); |
| 1028 } |
| 1029 |
| 1030 ElfReader::~ElfReader() { |
| 1031 if (fd_ != -1) |
| 1032 close(fd_); |
| 1033 if (impl32_ != NULL) |
| 1034 delete impl32_; |
| 1035 if (impl64_ != NULL) |
| 1036 delete impl64_; |
| 1037 } |
| 1038 |
| 1039 |
| 1040 // The only word-size specific part of this file is IsNativeElfFile(). |
| 1041 #if __WORDSIZE == 32 |
| 1042 #define NATIVE_ELF_ARCH Elf32 |
| 1043 #elif __WORDSIZE == 64 |
| 1044 #define NATIVE_ELF_ARCH Elf64 |
| 1045 #else |
| 1046 #error "Invalid word size" |
| 1047 #endif |
| 1048 |
| 1049 template <typename ElfArch> |
| 1050 static bool IsElfFile(const int fd, const string &path) { |
| 1051 if (fd < 0) |
| 1052 return false; |
| 1053 if (!ElfReaderImpl<ElfArch>::IsArchElfFile(fd, NULL)) { |
| 1054 // No error message here. IsElfFile gets called many times. |
| 1055 return false; |
| 1056 } |
| 1057 return true; |
| 1058 } |
| 1059 |
| 1060 bool ElfReader::IsNativeElfFile() const { |
| 1061 return IsElfFile<NATIVE_ELF_ARCH>(fd_, path_); |
| 1062 } |
| 1063 |
| 1064 bool ElfReader::IsElf32File() const { |
| 1065 return IsElfFile<Elf32>(fd_, path_); |
| 1066 } |
| 1067 |
| 1068 bool ElfReader::IsElf64File() const { |
| 1069 return IsElfFile<Elf64>(fd_, path_); |
| 1070 } |
| 1071 |
| 1072 /* |
| 1073 void ElfReader::AddSymbols(SymbolMap *symbols, |
| 1074 uint64 mem_offset, uint64 file_offset, |
| 1075 uint64 length) { |
| 1076 if (fd_ < 0) |
| 1077 return; |
| 1078 // TODO(chatham): Actually use the information about file offset and |
| 1079 // the length of the mapped section. On some machines the data |
| 1080 // section gets mapped as executable, and we'll end up reading the |
| 1081 // file twice and getting some of the offsets wrong. |
| 1082 if (IsElf32File()) { |
| 1083 GetImpl32()->GetSymbolPositions(symbols, SHT_SYMTAB, |
| 1084 mem_offset, file_offset); |
| 1085 GetImpl32()->GetSymbolPositions(symbols, SHT_DYNSYM, |
| 1086 mem_offset, file_offset); |
| 1087 } else if (IsElf64File()) { |
| 1088 GetImpl64()->GetSymbolPositions(symbols, SHT_SYMTAB, |
| 1089 mem_offset, file_offset); |
| 1090 GetImpl64()->GetSymbolPositions(symbols, SHT_DYNSYM, |
| 1091 mem_offset, file_offset); |
| 1092 } |
| 1093 } |
| 1094 */ |
| 1095 |
| 1096 void ElfReader::VisitSymbols(ElfReader::SymbolSink *sink) { |
| 1097 VisitSymbols(sink, -1, -1); |
| 1098 } |
| 1099 |
| 1100 void ElfReader::VisitSymbols(ElfReader::SymbolSink *sink, |
| 1101 int symbol_binding, |
| 1102 int symbol_type) { |
| 1103 VisitSymbols(sink, symbol_binding, symbol_type, false); |
| 1104 } |
| 1105 |
| 1106 void ElfReader::VisitSymbols(ElfReader::SymbolSink *sink, |
| 1107 int symbol_binding, |
| 1108 int symbol_type, |
| 1109 bool get_raw_symbol_values) { |
| 1110 if (IsElf32File()) { |
| 1111 GetImpl32()->VisitRelocationEntries(); |
| 1112 GetImpl32()->VisitSymbols(SHT_SYMTAB, sink, symbol_binding, symbol_type, |
| 1113 get_raw_symbol_values); |
| 1114 GetImpl32()->VisitSymbols(SHT_DYNSYM, sink, symbol_binding, symbol_type, |
| 1115 get_raw_symbol_values); |
| 1116 } else if (IsElf64File()) { |
| 1117 GetImpl64()->VisitRelocationEntries(); |
| 1118 GetImpl64()->VisitSymbols(SHT_SYMTAB, sink, symbol_binding, symbol_type, |
| 1119 get_raw_symbol_values); |
| 1120 GetImpl64()->VisitSymbols(SHT_DYNSYM, sink, symbol_binding, symbol_type, |
| 1121 get_raw_symbol_values); |
| 1122 } |
| 1123 } |
| 1124 |
| 1125 uint64 ElfReader::VaddrOfFirstLoadSegment() { |
| 1126 if (IsElf32File()) { |
| 1127 return GetImpl32()->VaddrOfFirstLoadSegment(); |
| 1128 } else if (IsElf64File()) { |
| 1129 return GetImpl64()->VaddrOfFirstLoadSegment(); |
| 1130 } else { |
| 1131 return 0; |
| 1132 } |
| 1133 } |
| 1134 |
| 1135 const char *ElfReader::GetSectionName(int shndx) { |
| 1136 if (shndx < 0 || static_cast<unsigned int>(shndx) >= GetNumSections()) return
NULL; |
| 1137 if (IsElf32File()) { |
| 1138 return GetImpl32()->GetSectionNameByIndex(shndx); |
| 1139 } else if (IsElf64File()) { |
| 1140 return GetImpl64()->GetSectionNameByIndex(shndx); |
| 1141 } else { |
| 1142 return NULL; |
| 1143 } |
| 1144 } |
| 1145 |
| 1146 uint64 ElfReader::GetNumSections() { |
| 1147 if (IsElf32File()) { |
| 1148 return GetImpl32()->GetNumSections(); |
| 1149 } else if (IsElf64File()) { |
| 1150 return GetImpl64()->GetNumSections(); |
| 1151 } else { |
| 1152 return 0; |
| 1153 } |
| 1154 } |
| 1155 |
| 1156 const char *ElfReader::GetSectionByIndex(int shndx, size_t *size) { |
| 1157 if (IsElf32File()) { |
| 1158 return GetImpl32()->GetSectionContentsByIndex(shndx, size); |
| 1159 } else if (IsElf64File()) { |
| 1160 return GetImpl64()->GetSectionContentsByIndex(shndx, size); |
| 1161 } else { |
| 1162 return NULL; |
| 1163 } |
| 1164 } |
| 1165 |
| 1166 const char *ElfReader::GetSectionByName(const string §ion_name, |
| 1167 size_t *size) { |
| 1168 if (IsElf32File()) { |
| 1169 return GetImpl32()->GetSectionContentsByName(section_name, size); |
| 1170 } else if (IsElf64File()) { |
| 1171 return GetImpl64()->GetSectionContentsByName(section_name, size); |
| 1172 } else { |
| 1173 return NULL; |
| 1174 } |
| 1175 } |
| 1176 |
| 1177 const char *ElfReader::GetSectionInfoByName(const string §ion_name, |
| 1178 SectionInfo *info) { |
| 1179 if (IsElf32File()) { |
| 1180 return GetImpl32()->GetSectionInfoByName(section_name, info); |
| 1181 } else if (IsElf64File()) { |
| 1182 return GetImpl64()->GetSectionInfoByName(section_name, info); |
| 1183 } else { |
| 1184 return NULL; |
| 1185 } |
| 1186 } |
| 1187 |
| 1188 bool ElfReader::SectionNamesMatch(const string &name, const string &sh_name) { |
| 1189 if ((name.find(".debug_", 0) == 0) && (sh_name.find(".zdebug_", 0) == 0)) { |
| 1190 const string name_suffix(name, strlen(".debug_")); |
| 1191 const string sh_name_suffix(sh_name, strlen(".zdebug_")); |
| 1192 return name_suffix == sh_name_suffix; |
| 1193 } |
| 1194 return name == sh_name; |
| 1195 } |
| 1196 |
| 1197 bool ElfReader::IsDynamicSharedObject() { |
| 1198 if (IsElf32File()) { |
| 1199 return GetImpl32()->IsDynamicSharedObject(); |
| 1200 } else if (IsElf64File()) { |
| 1201 return GetImpl64()->IsDynamicSharedObject(); |
| 1202 } else { |
| 1203 return false; |
| 1204 } |
| 1205 } |
| 1206 |
| 1207 ElfReaderImpl<Elf32> *ElfReader::GetImpl32() { |
| 1208 if (impl32_ == NULL) { |
| 1209 impl32_ = new ElfReaderImpl<Elf32>(path_, fd_); |
| 1210 } |
| 1211 return impl32_; |
| 1212 } |
| 1213 |
| 1214 ElfReaderImpl<Elf64> *ElfReader::GetImpl64() { |
| 1215 if (impl64_ == NULL) { |
| 1216 impl64_ = new ElfReaderImpl<Elf64>(path_, fd_); |
| 1217 } |
| 1218 return impl64_; |
| 1219 } |
| 1220 |
| 1221 // Return true if file is an ELF binary of ElfArch, with unstripped |
| 1222 // debug info (debug_only=true) or symbol table (debug_only=false). |
| 1223 // Otherwise, return false. |
| 1224 template <typename ElfArch> |
| 1225 static bool IsNonStrippedELFBinaryImpl(const string &path, const int fd, |
| 1226 bool debug_only) { |
| 1227 if (!ElfReaderImpl<ElfArch>::IsArchElfFile(fd, NULL)) return false; |
| 1228 ElfReaderImpl<ElfArch> elf_reader(path, fd); |
| 1229 return debug_only ? |
| 1230 elf_reader.HasDebugSections() |
| 1231 : (elf_reader.GetSectionByType(SHT_SYMTAB) != NULL); |
| 1232 } |
| 1233 |
| 1234 // Helper for the IsNon[Debug]StrippedELFBinary functions. |
| 1235 static bool IsNonStrippedELFBinaryHelper(const string &path, |
| 1236 bool debug_only) { |
| 1237 const int fd = open(path.c_str(), O_RDONLY); |
| 1238 if (fd == -1) { |
| 1239 return false; |
| 1240 } |
| 1241 |
| 1242 if (IsNonStrippedELFBinaryImpl<Elf32>(path, fd, debug_only) || |
| 1243 IsNonStrippedELFBinaryImpl<Elf64>(path, fd, debug_only)) { |
| 1244 close(fd); |
| 1245 return true; |
| 1246 } |
| 1247 close(fd); |
| 1248 return false; |
| 1249 } |
| 1250 |
| 1251 bool ElfReader::IsNonStrippedELFBinary(const string &path) { |
| 1252 return IsNonStrippedELFBinaryHelper(path, false); |
| 1253 } |
| 1254 |
| 1255 bool ElfReader::IsNonDebugStrippedELFBinary(const string &path) { |
| 1256 return IsNonStrippedELFBinaryHelper(path, true); |
| 1257 } |
| 1258 } // namespace dwarf2reader |
OLD | NEW |