src/common/dwarf/elf_reader.cc - Issue 1884283002: Add debug fission support.

Unified Diff: src/common/dwarf/elf_reader.cc

Issue 1884283002: Add debug fission support. (Closed) Base URL: https://chromium.googlesource.com/breakpad/breakpad.git@master

Patch Set: Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/common/dwarf/elf_reader.cc

diff --git a/src/common/dwarf/elf_reader.cc b/src/common/dwarf/elf_reader.cc

new file mode 100644

index 0000000000000000000000000000000000000000..81683141aed105b50f5088355f736bbb0d8566dd

--- /dev/null

+++ b/src/common/dwarf/elf_reader.cc

@@ -0,0 +1,1258 @@

+// Author: chatham@google.com (Andrew Chatham)

ivanpe 2016/04/27 01:11:45 Where does this file come from? Has it been revie

yunlian 2016/04/27 16:35:29 It comes from Google source tree and it has been r

ivanpe 2016/04/27 18:02:07 Great. Can you please, update the CL description

+// Author: satorux@google.com (Satoru Takabayashi)

+//

+// Code for reading in ELF files.

+//

+// For information on the ELF format, see

+// http://www.x86.org/ftp/manuals/tools/elf.pdf

+//

+// I also liked:

+// http://www.caldera.com/developers/gabi/1998-04-29/contents.html

+//

+// A note about types: When dealing with the file format, we use types

+// like Elf32_Word, but in the public interfaces we treat all

+// addresses as uint64. As a result, we should be able to symbolize

+// 64-bit binaries from a 32-bit process (which we don't do,

+// anyway). size_t should therefore be avoided, except where required

+// by things like mmap().

+//

+// Although most of this code can deal with arbitrary ELF files of

+// either word size, the public ElfReader interface only examines

+// files loaded into the current address space, which must all match

+// __WORDSIZE. This code cannot handle ELF files with a non-native

+// byte ordering.

+//

+// TODO(chatham): It would be nice if we could accomplish this task

+// without using malloc(), so we could use it as the process is dying.

+#ifndef _GNU_SOURCE

+#define _GNU_SOURCE // needed for pread()

+#endif

+#include <sys/types.h>

+#include <sys/stat.h>

+#include <sys/mman.h>

+#include <unistd.h>

+#include <fcntl.h>

+#include <elf.h>

+#include <string.h>

+#include <algorithm>

+#include <map>

+#include <string>

+#include <vector>

+#include "zlib.h"

+#include "elf_reader.h"

+//#include "using_std_string.h"

+// EM_AARCH64 is not defined by elf.h of GRTE v3 on x86.

+// TODO(dougkwan): Remove this when v17 is retired.

+#if !defined(EM_AARCH64)

+#define EM_AARCH64 183 /* ARM AARCH64 */

+#endif

+// TODO(dthomson): Can be removed once all Java code is using the Google3

+// launcher. We need to avoid processing PLT functions as it causes memory

+// fragmentation in malloc, which is fixed in tcmalloc - and if the Google3

+// launcher is used the JVM will then use tcmalloc. b/13735638

+//DEFINE_bool(elfreader_process_dynsyms, true,

+// "Activate PLT function processing");

+using std::string;

+using std::vector;

+namespace {

+// The lowest bit of an ARM symbol value is used to indicate a Thumb address.

+const int kARMThumbBitOffset = 0;

+// Converts an ARM Thumb symbol value to a true aligned address value.

+template <typename T>

+T AdjustARMThumbSymbolValue(const T& symbol_table_value) {

+ return symbol_table_value & ~(1 << kARMThumbBitOffset);

+// Names of PLT-related sections.

+const char kElfPLTRelSectionName[] = ".rel.plt"; // Use Rel struct.

+const char kElfPLTRelaSectionName[] = ".rela.plt"; // Use Rela struct.

+const char kElfPLTSectionName[] = ".plt";

+const char kElfDynSymSectionName[] = ".dynsym";

+const int kX86PLTCodeSize = 0x10; // Size of one x86 PLT function in bytes.

+const int kARMPLTCodeSize = 0xc;

+const int kAARCH64PLTCodeSize = 0x10;

+const int kX86PLT0Size = 0x10; // Size of the special PLT0 entry.

+const int kARMPLT0Size = 0x14;

+const int kAARCH64PLT0Size = 0x20;

+// Suffix for PLT functions when it needs to be explicitly identified as such.

+const char kPLTFunctionSuffix[] = "@plt";

+} // namespace

+namespace dwarf2reader {

+template <class ElfArch> class ElfReaderImpl;

+// 32-bit and 64-bit ELF files are processed exactly the same, except

+// for various field sizes. Elf32 and Elf64 encompass all of the

+// differences between the two formats, and all format-specific code

+// in this file is templated on one of them.

+class Elf32 {

+ public:

+ typedef Elf32_Ehdr Ehdr;

+ typedef Elf32_Shdr Shdr;

+ typedef Elf32_Phdr Phdr;

+ typedef Elf32_Word Word;

+ typedef Elf32_Sym Sym;

+ typedef Elf32_Rel Rel;

+ typedef Elf32_Rela Rela;

+ // What should be in the EI_CLASS header.

+ static const int kElfClass = ELFCLASS32;

+ // Given a symbol pointer, return the binding type (eg STB_WEAK).

+ static char Bind(const Elf32_Sym *sym) {

+ return ELF32_ST_BIND(sym->st_info);

+ }

+ // Given a symbol pointer, return the symbol type (eg STT_FUNC).

+ static char Type(const Elf32_Sym *sym) {

+ return ELF32_ST_TYPE(sym->st_info);

+ }

+ // Extract the symbol index from the r_info field of a relocation.

+ static int r_sym(const Elf32_Word r_info) {

+ return ELF32_R_SYM(r_info);

+ }

+};

+class Elf64 {

+ public:

+ typedef Elf64_Ehdr Ehdr;

+ typedef Elf64_Shdr Shdr;

+ typedef Elf64_Phdr Phdr;

+ typedef Elf64_Word Word;

+ typedef Elf64_Sym Sym;

+ typedef Elf64_Rel Rel;

+ typedef Elf64_Rela Rela;

+ // What should be in the EI_CLASS header.

+ static const int kElfClass = ELFCLASS64;

+ static char Bind(const Elf64_Sym *sym) {

+ return ELF64_ST_BIND(sym->st_info);

+ }

+ static char Type(const Elf64_Sym *sym) {

+ return ELF64_ST_TYPE(sym->st_info);

+ }

+ static int r_sym(const Elf64_Xword r_info) {

+ return ELF64_R_SYM(r_info);

+ }

+};

+// ElfSectionReader mmaps a section of an ELF file ("section" is ELF

+// terminology). The ElfReaderImpl object providing the section header

+// must exist for the lifetime of this object.

+//

+// The motivation for mmaping individual sections of the file is that

+// many Google executables are large enough when unstripped that we

+// have to worry about running out of virtual address space.

+//

+// For compressed sections we have no choice but to allocate memory.

+template<class ElfArch>

+class ElfSectionReader {

+ public:

+ ElfSectionReader(const char *name, const string &path, int fd,

+ const typename ElfArch::Shdr &section_header)

+ : contents_aligned_(NULL),

+ contents_(NULL),

+ header_(section_header) {

+ // Back up to the beginning of the page we're interested in.

+ const size_t additional = header_.sh_offset % getpagesize();

+ const size_t offset_aligned = header_.sh_offset - additional;

+ section_size_ = header_.sh_size;

+ size_aligned_ = section_size_ + additional;

+ // If the section has been stripped or is empty, do not attempt

+ // to process its contents.

+ if (header_.sh_type == SHT_NOBITS || header_.sh_size == 0)

+ return;

+ contents_aligned_ = mmap(NULL, size_aligned_, PROT_READ, MAP_SHARED,

+ fd, offset_aligned);

+ // Set where the offset really should begin.

+ contents_ = reinterpret_cast<char *>(contents_aligned_) +

+ (header_.sh_offset - offset_aligned);

+ // Check for and handle any compressed contents.

+ //if (strncmp(name, ".zdebug_", strlen(".zdebug_")) == 0)

+ // DecompressZlibContents();

+ // TODO(saugustine): Add support for proposed elf-section flag

+ // "SHF_COMPRESS".

+ }

+ ~ElfSectionReader() {

+ if (contents_aligned_ != NULL)

+ munmap(contents_aligned_, size_aligned_);

+ else

+ delete[] contents_;

+ }

+ // Return the section header for this section.

+ typename ElfArch::Shdr const &header() const { return header_; }

+ // Return memory at the given offset within this section.

+ const char *GetOffset(typename ElfArch::Word bytes) const {

+ return contents_ + bytes;

+ }

+ const char *contents() const { return contents_; }

+ size_t section_size() const { return section_size_; }

+ private:

+ // page-aligned file contents

+ void *contents_aligned_;

+ // contents as usable by the client. For non-compressed sections,

+ // pointer within contents_aligned_ to where the section data

+ // begins; for compressed sections, pointer to the decompressed

+ // data.

+ char *contents_;

+ // size of contents_aligned_

+ size_t size_aligned_;

+ // size of contents.

+ size_t section_size_;

+ const typename ElfArch::Shdr header_;

+};

+// An iterator over symbols in a given section. It handles walking

+// through the entries in the specified section and mapping symbol

+// entries to their names in the appropriate string table (in

+// another section).

+template<class ElfArch>

+class SymbolIterator {

+ public:

+ SymbolIterator(ElfReaderImpl<ElfArch> *reader,

+ typename ElfArch::Word section_type)

+ : symbol_section_(reader->GetSectionByType(section_type)),

+ string_section_(NULL),

+ num_symbols_in_section_(0),

+ symbol_within_section_(0) {

+ // If this section type doesn't exist, leave

+ // num_symbols_in_section_ as zero, so this iterator is already

+ // done().

+ if (symbol_section_ != NULL) {

+ num_symbols_in_section_ = symbol_section_->header().sh_size /

+ symbol_section_->header().sh_entsize;

+ // Symbol sections have sh_link set to the section number of

+ // the string section containing the symbol names.

+ string_section_ = reader->GetSection(symbol_section_->header().sh_link);

+ }

+ // Return true iff we have passed all symbols in this section.

+ bool done() const {

+ return symbol_within_section_ >= num_symbols_in_section_;

+ }

+ // Advance to the next symbol in this section.

+ // REQUIRES: !done()

+ void Next() { ++symbol_within_section_; }

+ // Return a pointer to the current symbol.

+ // REQUIRES: !done()

+ const typename ElfArch::Sym *GetSymbol() const {

+ return reinterpret_cast<const typename ElfArch::Sym*>(

+ symbol_section_->GetOffset(symbol_within_section_ *

+ symbol_section_->header().sh_entsize));

+ }

+ // Return the name of the current symbol, NULL if it has none.

+ // REQUIRES: !done()

+ const char *GetSymbolName() const {

+ int name_offset = GetSymbol()->st_name;

+ if (name_offset == 0)

+ return NULL;

+ return string_section_->GetOffset(name_offset);

+ }

+ int GetCurrentSymbolIndex() const {

+ return symbol_within_section_;

+ }

+ private:

+ const ElfSectionReader<ElfArch> *const symbol_section_;

+ const ElfSectionReader<ElfArch> *string_section_;

+ int num_symbols_in_section_;

+ int symbol_within_section_;

+};

+// Copied from strings/strutil.h. Per chatham,

+// this library should not depend on strings.

+static inline bool MyHasSuffixString(const string& str, const string& suffix) {

+ int len = str.length();

+ int suflen = suffix.length();

+ return (suflen <= len) && (str.compare(len-suflen, suflen, suffix) == 0);

+// ElfReader loads an ELF binary and can provide information about its

+// contents. It is most useful for matching addresses to function

+// names. It does not understand debugging formats (eg dwarf2), so it

+// can't print line numbers. It takes a path to an elf file and a

+// readable file descriptor for that file, which it does not assume

+// ownership of.

+template<class ElfArch>

+class ElfReaderImpl {

+ public:

+ explicit ElfReaderImpl(const string &path, int fd)

+ : path_(path),

+ fd_(fd),

+ section_headers_(NULL),

+ program_headers_(NULL),

+ opd_section_(NULL),

+ base_for_text_(0),

+ plts_supported_(false),

+ plt_code_size_(0),

+ plt0_size_(0),

+ visited_relocation_entries_(false) {

+ string error;

+ is_dwp_ = MyHasSuffixString(path, ".dwp");

+ ParseHeaders(fd, path);

+ // Currently we need some extra information for PowerPC64 binaries

+ // including a way to read the .opd section for function descriptors and a

+ // way to find the linked base for function symbols.

+ if (header_.e_machine == EM_PPC64) {

+ // "opd_section_" must always be checked for NULL before use.

+ opd_section_ = GetSectionInfoByName(".opd", &opd_info_);

+ for (unsigned int k = 0u; k < GetNumSections(); ++k) {

+ const char *name = GetSectionName(section_headers_[k].sh_name);

+ if (strncmp(name, ".text", strlen(".text")) == 0) {

+ base_for_text_ =

+ section_headers_[k].sh_addr - section_headers_[k].sh_offset;

+ break;

+ }

+ // Turn on PLTs.

+ if (header_.e_machine == EM_386 || header_.e_machine == EM_X86_64) {

+ plt_code_size_ = kX86PLTCodeSize;

+ plt0_size_ = kX86PLT0Size;

+ plts_supported_ = true;

+ } else if (header_.e_machine == EM_ARM) {

+ plt_code_size_ = kARMPLTCodeSize;

+ plt0_size_ = kARMPLT0Size;

+ plts_supported_ = true;

+ } else if (header_.e_machine == EM_AARCH64) {

+ plt_code_size_ = kAARCH64PLTCodeSize;

+ plt0_size_ = kAARCH64PLT0Size;

+ plts_supported_ = true;

+ }

+ ~ElfReaderImpl() {

+ for (unsigned int i = 0u; i < sections_.size(); ++i)

+ delete sections_[i];

+ delete [] section_headers_;

+ delete [] program_headers_;

+ }

+ // Examine the headers of the file and return whether the file looks

+ // like an ELF file for this architecture. Takes an already-open

+ // file descriptor for the candidate file, reading in the prologue

+ // to see if the ELF file appears to match the current

+ // architecture. If error is non-NULL, it will be set with a reason

+ // in case of failure.

+ static bool IsArchElfFile(int fd, string *error) {

+ unsigned char header[EI_NIDENT];

+ if (pread(fd, header, sizeof(header), 0) != sizeof(header)) {

+ if (error != NULL) *error = "Could not read header";

+ return false;

+ }

+ if (memcmp(header, ELFMAG, SELFMAG) != 0) {

+ if (error != NULL) *error = "Missing ELF magic";

+ return false;

+ }

+ if (header[EI_CLASS] != ElfArch::kElfClass) {

+ if (error != NULL) *error = "Different word size";

+ return false;

+ }

+ int endian = 0;

+ if (header[EI_DATA] == ELFDATA2LSB)

+ endian = __LITTLE_ENDIAN;

+ else if (header[EI_DATA] == ELFDATA2MSB)

+ endian = __BIG_ENDIAN;

+ if (endian != __BYTE_ORDER) {

+ if (error != NULL) *error = "Different byte order";

+ return false;

+ }

+ return true;

+ }

+ // Return true if we can use this symbol in Address-to-Symbol map.

+ bool CanUseSymbol(const char *name, const typename ElfArch::Sym *sym) {

+ // For now we only save FUNC and NOTYPE symbols. For now we just

+ // care about functions, but some functions written in assembler

+ // don't have a proper ELF type attached to them, so we store

+ // NOTYPE symbols as well. The remaining significant type is

+ // OBJECT (eg global variables), which represent about 25% of

+ // the symbols in a typical google3 binary.

+ if (ElfArch::Type(sym) != STT_FUNC &&

+ ElfArch::Type(sym) != STT_NOTYPE) {

+ return false;

+ }

+ // Target specific filtering.

+ switch (header_.e_machine) {

+ case EM_AARCH64:

+ case EM_ARM:

+ // Filter out '$x' special local symbols used by tools

+ return name[0] != '$' || ElfArch::Bind(sym) != STB_LOCAL;

+ case EM_X86_64:

+ // Filter out read-only constants like .LC123.

+ return name[0] != '.' || ElfArch::Bind(sym) != STB_LOCAL;

+ default:

+ return true;

+ }

+ // Iterate over the symbols in a section, either SHT_DYNSYM or

+ // SHT_SYMTAB. Add all symbols to the given SymbolMap.

+ /*

+ void GetSymbolPositions(SymbolMap *symbols,

+ typename ElfArch::Word section_type,

+ uint64 mem_offset,

+ uint64 file_offset) {

+ // This map is used to filter out "nested" functions.

+ // See comment below.

+ AddrToSymMap addr_to_sym_map;

+ for (SymbolIterator<ElfArch> it(this, section_type);

+ !it.done(); it.Next()) {

+ const char *name = it.GetSymbolName();

+ if (name == NULL)

+ continue;

+ const typename ElfArch::Sym *sym = it.GetSymbol();

+ if (CanUseSymbol(name, sym)) {

+ const int sec = sym->st_shndx;

+ // We don't support special section indices. The most common

+ // is SHN_ABS, for absolute symbols used deep in the bowels of

+ // glibc. Also ignore any undefined symbols.

+ if (sec == SHN_UNDEF ||

+ (sec >= SHN_LORESERVE && sec <= SHN_HIRESERVE)) {

+ continue;

+ }

+ const typename ElfArch::Shdr& hdr = section_headers_[sec];

+ // Adjust for difference between where we expected to mmap

+ // this section, and where it was actually mmapped.

+ const int64 expected_base = hdr.sh_addr - hdr.sh_offset;

+ const int64 real_base = mem_offset - file_offset;

+ const int64 adjust = real_base - expected_base;

+ uint64 start = sym->st_value + adjust;

+ // Adjust function symbols for PowerPC64 by dereferencing and adjusting

+ // the function descriptor to get the function address.

+ if (header_.e_machine == EM_PPC64 && ElfArch::Type(sym) == STT_FUNC) {

+ const uint64 opd_addr =

+ AdjustPPC64FunctionDescriptorSymbolValue(sym->st_value);

+ // Only adjust the returned value if the function address was found.

+ if (opd_addr != sym->st_value) {

+ const int64 adjust_function_symbols =

+ real_base - base_for_text_;

+ start = opd_addr + adjust_function_symbols;

+ }

+ addr_to_sym_map.push_back(std::make_pair(start, sym));

+ }

+ std::sort(addr_to_sym_map.begin(), addr_to_sym_map.end(), &AddrToSymSorter);

+ addr_to_sym_map.erase(std::unique(addr_to_sym_map.begin(),

+ addr_to_sym_map.end(), &AddrToSymEquals),

+ addr_to_sym_map.end());

+ // Squeeze out any "nested functions".

+ // Nested functions are not allowed in C, but libc plays tricks.

+ //

+ // For example, here is disassembly of /lib64/tls/libc-2.3.5.so:

+ // 0x00000000000aa380 <read+0>: cmpl $0x0,0x2781b9(%rip)

+ // 0x00000000000aa387 <read+7>: jne 0xaa39b <read+27>

+ // 0x00000000000aa389 <__read_nocancel+0>: mov $0x0,%rax

+ // 0x00000000000aa390 <__read_nocancel+7>: syscall

+ // 0x00000000000aa392 <__read_nocancel+9>: cmp $0xfffffffffffff001,%rax

+ // 0x00000000000aa398 <__read_nocancel+15>: jae 0xaa3ef <read+111>

+ // 0x00000000000aa39a <__read_nocancel+17>: retq

+ // 0x00000000000aa39b <read+27>: sub $0x28,%rsp

+ // 0x00000000000aa39f <read+31>: mov %rdi,0x8(%rsp)

+ // ...

+ // Without removing __read_nocancel, symbolizer will return NULL

+ // given e.g. 0xaa39f (because the lower bound is __read_nocancel,

+ // but 0xaa39f is beyond its end.

+ if (addr_to_sym_map.empty()) {

+ return;

+ }

+ const ElfSectionReader<ElfArch> *const symbol_section =

+ this->GetSectionByType(section_type);

+ const ElfSectionReader<ElfArch> *const string_section =

+ this->GetSection(symbol_section->header().sh_link);

+ typename AddrToSymMap::iterator curr = addr_to_sym_map.begin();

+ // Always insert the first symbol.

+ symbols->AddSymbol(string_section->GetOffset(curr->second->st_name),

+ curr->first, curr->second->st_size);

+ typename AddrToSymMap::iterator prev = curr++;

+ for (; curr != addr_to_sym_map.end(); ++curr) {

+ const uint64 prev_addr = prev->first;

+ const uint64 curr_addr = curr->first;

+ const typename ElfArch::Sym *const prev_sym = prev->second;

+ const typename ElfArch::Sym *const curr_sym = curr->second;

+ if (prev_addr + prev_sym->st_size <= curr_addr ||

+ // The next condition is true if two symbols overlap like this:

+ //

+ // Previous symbol |----------------------------|

+ // Current symbol |-------------------------------|

+ //

+ // These symbols are not found in google3 codebase, but in

+ // jdk1.6.0_01_gg1/jre/lib/i386/server/libjvm.so.

+ //

+ // 0619e040 00000046 t CardTableModRefBS::write_region_work()

+ // 0619e070 00000046 t CardTableModRefBS::write_ref_array_work()

+ //

+ // We allow overlapped symbols rather than ignore these.

+ // Due to the way SymbolMap::GetSymbolAtPosition() works,

+ // lookup for any address in [curr_addr, curr_addr + its size)

+ // (e.g. 0619e071) will produce the current symbol,

+ // which is the desired outcome.

+ prev_addr + prev_sym->st_size < curr_addr + curr_sym->st_size) {

+ const char *name = string_section->GetOffset(curr_sym->st_name);

+ symbols->AddSymbol(name, curr_addr, curr_sym->st_size);

+ prev = curr;

+ } else {

+ // Current symbol is "nested" inside previous one like this:

+ //

+ // Previous symbol |----------------------------|

+ // Current symbol |---------------------|

+ //

+ // This happens within glibc, e.g. __read_nocancel is nested

+ // "inside" __read. Ignore "inner" symbol.

+ //DCHECK_LE(curr_addr + curr_sym->st_size,

+ // prev_addr + prev_sym->st_size);

+ ;

+ }

+*/

+ void VisitSymbols(typename ElfArch::Word section_type,

+ ElfReader::SymbolSink *sink) {

+ VisitSymbols(section_type, sink, -1, -1, false);

+ }

+ void VisitSymbols(typename ElfArch::Word section_type,

+ ElfReader::SymbolSink *sink,

+ int symbol_binding,

+ int symbol_type,

+ bool get_raw_symbol_values) {

+ for (SymbolIterator<ElfArch> it(this, section_type);

+ !it.done(); it.Next()) {

+ const char *name = it.GetSymbolName();

+ if (!name) continue;

+ const typename ElfArch::Sym *sym = it.GetSymbol();

+ if ((symbol_binding < 0 || ElfArch::Bind(sym) == symbol_binding) &&

+ (symbol_type < 0 || ElfArch::Type(sym) == symbol_type)) {

+ typename ElfArch::Sym symbol = *sym;

+ // Add a PLT symbol in addition to the main undefined symbol.

+ // Only do this for SHT_DYNSYM, because PLT symbols are dynamic.

+ int symbol_index = it.GetCurrentSymbolIndex();

+ // TODO(dthomson): Can be removed once all Java code is using the

+ // Google3 launcher.

+ if (section_type == SHT_DYNSYM &&

+ static_cast<unsigned int>(symbol_index) < symbols_plt_offsets_.size() &&

+ symbols_plt_offsets_[symbol_index] != 0) {

+ string plt_name = string(name) + kPLTFunctionSuffix;

+ if (plt_function_names_[symbol_index].empty()) {

+ plt_function_names_[symbol_index] = plt_name;

+ } else if (plt_function_names_[symbol_index] != plt_name) {

+ ;

+ }

+ sink->AddSymbol(plt_function_names_[symbol_index].c_str(),

+ symbols_plt_offsets_[it.GetCurrentSymbolIndex()],

+ plt_code_size_);

+ }

+ if (!get_raw_symbol_values)

+ AdjustSymbolValue(&symbol);

+ sink->AddSymbol(name, symbol.st_value, symbol.st_size);

+ }

+ void VisitRelocationEntries() {

+ if (visited_relocation_entries_) {

+ return;

+ }

+ visited_relocation_entries_ = true;

+ if (!plts_supported_) {

+ return;

+ }

+ // First determine if PLTs exist. If not, then there is nothing to do.

+ ElfReader::SectionInfo plt_section_info;

+ const char* plt_section =

+ GetSectionInfoByName(kElfPLTSectionName, &plt_section_info);

+ if (!plt_section) {

+ return;

+ }

+ if (plt_section_info.size == 0) {

+ return;

+ }

+ // The PLTs could be referenced by either a Rel or Rela (Rel with Addend)

+ // section.

+ ElfReader::SectionInfo rel_section_info;

+ ElfReader::SectionInfo rela_section_info;

+ const char* rel_section =

+ GetSectionInfoByName(kElfPLTRelSectionName, &rel_section_info);

+ const char* rela_section =

+ GetSectionInfoByName(kElfPLTRelaSectionName, &rela_section_info);

+ const typename ElfArch::Rel* rel =

+ reinterpret_cast<const typename ElfArch::Rel*>(rel_section);

+ const typename ElfArch::Rela* rela =

+ reinterpret_cast<const typename ElfArch::Rela*>(rela_section);

+ if (!rel_section && !rela_section) {

+ return;

+ }

+ // Use either Rel or Rela section, depending on which one exists.

+ size_t section_size = rel_section ? rel_section_info.size

+ : rela_section_info.size;

+ size_t entry_size = rel_section ? sizeof(typename ElfArch::Rel)

+ : sizeof(typename ElfArch::Rela);

+ // Determine the number of entries in the dynamic symbol table.

+ ElfReader::SectionInfo dynsym_section_info;

+ const char* dynsym_section =

+ GetSectionInfoByName(kElfDynSymSectionName, &dynsym_section_info);

+ // The dynsym section might not exist, or it might be empty. In either case

+ // there is nothing to be done so return.

+ if (!dynsym_section || dynsym_section_info.size == 0) {

+ return;

+ }

+ size_t num_dynamic_symbols =

+ dynsym_section_info.size / dynsym_section_info.entsize;

+ symbols_plt_offsets_.resize(num_dynamic_symbols, 0);

+ // TODO(dthomson): Can be removed once all Java code is using the

+ // Google3 launcher.

+ // Make storage room for PLT function name strings.

+ plt_function_names_.resize(num_dynamic_symbols);

+ for (size_t i = 0; i < section_size / entry_size; ++i) {

+ // Determine symbol index from the |r_info| field.

+ int sym_index = ElfArch::r_sym(rel_section ? rel[i].r_info

+ : rela[i].r_info);

+ if (static_cast<unsigned int>(sym_index) >= symbols_plt_offsets_.size()) {

+ continue;

+ }

+ symbols_plt_offsets_[sym_index] =

+ plt_section_info.addr + plt0_size_ + i * plt_code_size_;

+ }

+ // Return an ElfSectionReader for the first section of the given

+ // type by iterating through all section headers. Returns NULL if

+ // the section type is not found.

+ const ElfSectionReader<ElfArch> *GetSectionByType(

+ typename ElfArch::Word section_type) {

+ for (unsigned int k = 0u; k < GetNumSections(); ++k) {

+ if (section_headers_[k].sh_type == section_type) {

+ return GetSection(k);

+ }

+ return NULL;

+ }

+ // Return the name of section "shndx". Returns NULL if the section

+ // is not found.

+ const char *GetSectionNameByIndex(int shndx) {

+ return GetSectionName(section_headers_[shndx].sh_name);

+ }

+ // Return a pointer to section "shndx", and store the size in

+ // "size". Returns NULL if the section is not found.

+ const char *GetSectionContentsByIndex(int shndx, size_t *size) {

+ const ElfSectionReader<ElfArch> *section = GetSection(shndx);

+ if (section != NULL) {

+ *size = section->section_size();

+ return section->contents();

+ }

+ return NULL;

+ }

+ // Return a pointer to the first section of the given name by

+ // iterating through all section headers, and store the size in

+ // "size". Returns NULL if the section name is not found.

+ const char *GetSectionContentsByName(const string &section_name,

+ size_t *size) {

+ for (unsigned int k = 0u; k < GetNumSections(); ++k) {

+ // When searching for sections in a .dwp file, the sections

+ // we're looking for will always be at the end of the section

+ // table, so reverse the direction of iteration.

+ int shndx = is_dwp_ ? GetNumSections() - k - 1 : k;

+ const char *name = GetSectionName(section_headers_[shndx].sh_name);

+ if (name != NULL && ElfReader::SectionNamesMatch(section_name, name)) {

+ const ElfSectionReader<ElfArch> *section = GetSection(shndx);

+ if (section == NULL) {

+ return NULL;

+ } else {

+ *size = section->section_size();

+ return section->contents();

+ }

+ return NULL;

+ }

+ // This is like GetSectionContentsByName() but it returns a lot of extra

+ // information about the section.

+ const char *GetSectionInfoByName(const string &section_name,

+ ElfReader::SectionInfo *info) {

+ for (unsigned int k = 0u; k < GetNumSections(); ++k) {

+ // When searching for sections in a .dwp file, the sections

+ // we're looking for will always be at the end of the section

+ // table, so reverse the direction of iteration.

+ int shndx = is_dwp_ ? GetNumSections() - k - 1 : k;

+ const char *name = GetSectionName(section_headers_[shndx].sh_name);

+ if (name != NULL && ElfReader::SectionNamesMatch(section_name, name)) {

+ const ElfSectionReader<ElfArch> *section = GetSection(shndx);

+ if (section == NULL) {

+ return NULL;

+ } else {

+ info->type = section->header().sh_type;

+ info->flags = section->header().sh_flags;

+ info->addr = section->header().sh_addr;

+ info->offset = section->header().sh_offset;

+ info->size = section->header().sh_size;

+ info->link = section->header().sh_link;

+ info->info = section->header().sh_info;

+ info->addralign = section->header().sh_addralign;

+ info->entsize = section->header().sh_entsize;

+ return section->contents();

+ }

+ return NULL;

+ }

+ // p_vaddr of the first PT_LOAD segment (if any), or 0 if no PT_LOAD

+ // segments are present. This is the address an ELF image was linked

+ // (by static linker) to be loaded at. Usually (but not always) 0 for

+ // shared libraries and position-independent executables.

+ uint64 VaddrOfFirstLoadSegment() const {

+ // Relocatable objects (of type ET_REL) do not have LOAD segments.

+ if (header_.e_type == ET_REL) {

+ return 0;

+ }

+ for (int i = 0; i < GetNumProgramHeaders(); ++i) {

+ if (program_headers_[i].p_type == PT_LOAD) {

+ return program_headers_[i].p_vaddr;

+ }

+ return 0;

+ }

+ // According to the LSB ("ELF special sections"), sections with debug

+ // info are prefixed by ".debug". The names are not specified, but they

+ // look like ".debug_line", ".debug_info", etc.

+ bool HasDebugSections() {

+ // Debug sections are likely to be near the end, so reverse the

+ // direction of iteration.

+ for (int k = GetNumSections() - 1; k >= 0; --k) {

+ const char *name = GetSectionName(section_headers_[k].sh_name);

+ if (strncmp(name, ".debug", strlen(".debug")) == 0) return true;

+ if (strncmp(name, ".zdebug", strlen(".zdebug")) == 0) return true;

+ }

+ return false;

+ }

+ bool IsDynamicSharedObject() const {

+ return header_.e_type == ET_DYN;

+ }

+ // Return the number of sections.

+ uint64_t GetNumSections() const {

+ if (HasManySections())

+ return first_section_header_.sh_size;

+ return header_.e_shnum;

+ }

+ private:

+ typedef vector<pair<uint64, const typename ElfArch::Sym *> > AddrToSymMap;

+ static bool AddrToSymSorter(const typename AddrToSymMap::value_type& lhs,

+ const typename AddrToSymMap::value_type& rhs) {

+ return lhs.first < rhs.first;

+ }

+ static bool AddrToSymEquals(const typename AddrToSymMap::value_type& lhs,

+ const typename AddrToSymMap::value_type& rhs) {

+ return lhs.first == rhs.first;

+ }

+ // Does this ELF file have too many sections to fit in the program header?

+ bool HasManySections() const {

+ return header_.e_shnum == SHN_UNDEF;

+ }

+ // Return the number of program headers.

+ int GetNumProgramHeaders() const {

+ if (HasManySections() && header_.e_phnum == 0xffff &&

+ first_section_header_.sh_info != 0)

+ return first_section_header_.sh_info;

+ return header_.e_phnum;

+ }

+ // Return the index of the string table.

+ int GetStringTableIndex() const {

+ if (HasManySections()) {

+ if (header_.e_shstrndx == 0xffff)

+ return first_section_header_.sh_link;

+ else if (header_.e_shstrndx >= GetNumSections())

+ return 0;

+ }

+ return header_.e_shstrndx;

+ }

+ // Given an offset into the section header string table, return the

+ // section name.

+ const char *GetSectionName(typename ElfArch::Word sh_name) {

+ const ElfSectionReader<ElfArch> *shstrtab =

+ GetSection(GetStringTableIndex());

+ if (shstrtab != NULL) {

+ return shstrtab->GetOffset(sh_name);

+ }

+ return NULL;

+ }

+ // Return an ElfSectionReader for the given section. The reader will

+ // be freed when this object is destroyed.

+ const ElfSectionReader<ElfArch> *GetSection(int num) {

+ const char *name;

+ // Hard-coding the name for the section-name string table prevents

+ // infinite recursion.

+ if (num == GetStringTableIndex())

+ name = ".shstrtab";

+ else

+ name = GetSectionNameByIndex(num);

+ ElfSectionReader<ElfArch> *& reader = sections_[num];

+ if (reader == NULL)

+ reader = new ElfSectionReader<ElfArch>(name, path_, fd_,

+ section_headers_[num]);

+ return reader;

+ }

+ // Parse out the overall header information from the file and assert

+ // that it looks sane. This contains information like the magic

+ // number and target architecture.

+ bool ParseHeaders(int fd, const string &path) {

+ // Read in the global ELF header.

+ if (pread(fd, &header_, sizeof(header_), 0) != sizeof(header_)) {

+ return false;

+ }

+ // Must be an executable, dynamic shared object or relocatable object

+ if (header_.e_type != ET_EXEC &&

+ header_.e_type != ET_DYN &&

+ header_.e_type != ET_REL) {

+ return false;

+ }

+ // Need a section header.

+ if (header_.e_shoff == 0) {

+ return false;

+ }

+ if (header_.e_shnum == SHN_UNDEF) {

+ // The number of sections in the program header is only a 16-bit value. In

+ // the event of overflow (greater than SHN_LORESERVE sections), e_shnum

+ // will read SHN_UNDEF and the true number of section header table entries

+ // is found in the sh_size field of the first section header.

+ // See: http://www.sco.com/developers/gabi/2003-12-17/ch4.sheader.html

+ if (pread(fd, &first_section_header_, sizeof(first_section_header_),

+ header_.e_shoff) != sizeof(first_section_header_)) {

+ return false;

+ }

+ // Dynamically allocate enough space to store the section headers

+ // and read them out of the file.

+ const int section_headers_size =

+ GetNumSections() * sizeof(*section_headers_);

+ section_headers_ = new typename ElfArch::Shdr[section_headers_size];

+ if (pread(fd, section_headers_, section_headers_size, header_.e_shoff) !=

+ section_headers_size) {

+ return false;

+ }

+ // Dynamically allocate enough space to store the program headers

+ // and read them out of the file.

+ //const int program_headers_size =

+ // GetNumProgramHeaders() * sizeof(*program_headers_);

+ program_headers_ = new typename ElfArch::Phdr[GetNumProgramHeaders()];

+ // Presize the sections array for efficiency.

+ sections_.resize(GetNumSections(), NULL);

+ return true;

+ }

+ // Given the "value" of a function descriptor return the address of the

+ // function (i.e. the dereferenced value). Otherwise return "value".

+ uint64 AdjustPPC64FunctionDescriptorSymbolValue(uint64 value) {

+ if (opd_section_ != NULL &&

+ opd_info_.addr <= value &&

+ value < opd_info_.addr + opd_info_.size) {

+ uint64 offset = value - opd_info_.addr;

+ return (*reinterpret_cast<const uint64*>(opd_section_ + offset));

+ }

+ return value;

+ }

+ void AdjustSymbolValue(typename ElfArch::Sym* sym) {

+ switch (header_.e_machine) {

+ case EM_ARM:

+ // For ARM architecture, if the LSB of the function symbol offset is set,

+ // it indicates a Thumb function. This bit should not be taken literally.

+ // Clear it.

+ if (ElfArch::Type(sym) == STT_FUNC)

+ sym->st_value = AdjustARMThumbSymbolValue(sym->st_value);

+ break;

+ case EM_386:

+ // No adjustment needed for Intel x86 architecture. However, explicitly

+ // define this case as we use it quite often.

+ break;

+ case EM_PPC64:

+ // PowerPC64 currently has function descriptors as part of the ABI.

+ // Function symbols need to be adjusted accordingly.

+ if (ElfArch::Type(sym) == STT_FUNC)

+ sym->st_value = AdjustPPC64FunctionDescriptorSymbolValue(sym->st_value);

+ break;

+ default:

+ break;

+ }

+ friend class SymbolIterator<ElfArch>;

+ // The file we're reading.

+ const string path_;

+ // Open file descriptor for path_. Not owned by this object.

+ const int fd_;

+ // The global header of the ELF file.

+ typename ElfArch::Ehdr header_;

+ // The header of the first section. This may be used to supplement the ELF

+ // file header.

+ typename ElfArch::Shdr first_section_header_;

+ // Array of GetNumSections() section headers, allocated when we read

+ // in the global header.

+ typename ElfArch::Shdr *section_headers_;

+ // Array of GetNumProgramHeaders() program headers, allocated when we read

+ // in the global header.

+ typename ElfArch::Phdr *program_headers_;

+ // An array of pointers to ElfSectionReaders. Sections are

+ // mmaped as they're needed and not released until this object is

+ // destroyed.

+ vector<ElfSectionReader<ElfArch>*> sections_;

+ // For PowerPC64 we need to keep track of function descriptors when looking up

+ // values for funtion symbols values. Function descriptors are kept in the

+ // .opd section and are dereferenced to find the function address.

+ ElfReader::SectionInfo opd_info_;

+ const char *opd_section_; // Must be checked for NULL before use.

+ int64 base_for_text_;

+ // Read PLT-related sections for the current architecture.

+ bool plts_supported_;

+ // Code size of each PLT function for the current architecture.

+ size_t plt_code_size_;

+ // Size of the special first entry in the .plt section that calls the runtime

+ // loader resolution routine, and that all other entries jump to when doing

+ // lazy symbol binding.

+ size_t plt0_size_;

+ // Maps a dynamic symbol index to a PLT offset.

+ // The vector entry index is the dynamic symbol index.

+ std::vector<uint64> symbols_plt_offsets_;

+ // Container for PLT function name strings. These strings are passed by

+ // reference to SymbolSink::AddSymbol() so they need to be stored somewhere.

+ std::vector<string> plt_function_names_;

+ bool visited_relocation_entries_;

+ // True if this is a .dwp file.

+ bool is_dwp_;

+};

+ElfReader::ElfReader(const string &path)

+ : path_(path), fd_(-1), impl32_(NULL), impl64_(NULL) {

+ // linux 2.6.XX kernel can show deleted files like this:

+ // /var/run/nscd/dbYLJYaE (deleted)

+ // and the kernel-supplied vdso and vsyscall mappings like this:

+ // [vdso]

+ // [vsyscall]

+ if (MyHasSuffixString(path, " (deleted)"))

+ return;

+ if (path == "[vdso]")

+ return;

+ if (path == "[vsyscall]")

+ return;

+ fd_ = open(path.c_str(), O_RDONLY);

+ElfReader::~ElfReader() {

+ if (fd_ != -1)

+ close(fd_);

+ if (impl32_ != NULL)

+ delete impl32_;

+ if (impl64_ != NULL)

+ delete impl64_;

+// The only word-size specific part of this file is IsNativeElfFile().

+#if __WORDSIZE == 32

+#define NATIVE_ELF_ARCH Elf32

+#elif __WORDSIZE == 64

+#define NATIVE_ELF_ARCH Elf64

+#else

+#error "Invalid word size"

+#endif

+template <typename ElfArch>

+static bool IsElfFile(const int fd, const string &path) {

+ if (fd < 0)

+ return false;

+ if (!ElfReaderImpl<ElfArch>::IsArchElfFile(fd, NULL)) {

+ // No error message here. IsElfFile gets called many times.

+ return false;

+ }

+ return true;

+bool ElfReader::IsNativeElfFile() const {

+ return IsElfFile<NATIVE_ELF_ARCH>(fd_, path_);

+bool ElfReader::IsElf32File() const {

+ return IsElfFile<Elf32>(fd_, path_);

+bool ElfReader::IsElf64File() const {

+ return IsElfFile<Elf64>(fd_, path_);

+/*

+void ElfReader::AddSymbols(SymbolMap *symbols,

+ uint64 mem_offset, uint64 file_offset,

+ uint64 length) {

+ if (fd_ < 0)

+ return;

+ // TODO(chatham): Actually use the information about file offset and

+ // the length of the mapped section. On some machines the data

+ // section gets mapped as executable, and we'll end up reading the

+ // file twice and getting some of the offsets wrong.

+ if (IsElf32File()) {

+ GetImpl32()->GetSymbolPositions(symbols, SHT_SYMTAB,

+ mem_offset, file_offset);

+ GetImpl32()->GetSymbolPositions(symbols, SHT_DYNSYM,

+ mem_offset, file_offset);

+ } else if (IsElf64File()) {

+ GetImpl64()->GetSymbolPositions(symbols, SHT_SYMTAB,

+ mem_offset, file_offset);

+ GetImpl64()->GetSymbolPositions(symbols, SHT_DYNSYM,

+ mem_offset, file_offset);

+ }

+*/

+void ElfReader::VisitSymbols(ElfReader::SymbolSink *sink) {

+ VisitSymbols(sink, -1, -1);

+void ElfReader::VisitSymbols(ElfReader::SymbolSink *sink,

+ int symbol_binding,

+ int symbol_type) {

+ VisitSymbols(sink, symbol_binding, symbol_type, false);

+void ElfReader::VisitSymbols(ElfReader::SymbolSink *sink,

+ int symbol_binding,

+ int symbol_type,

+ bool get_raw_symbol_values) {

+ if (IsElf32File()) {

+ GetImpl32()->VisitRelocationEntries();

+ GetImpl32()->VisitSymbols(SHT_SYMTAB, sink, symbol_binding, symbol_type,

+ get_raw_symbol_values);

+ GetImpl32()->VisitSymbols(SHT_DYNSYM, sink, symbol_binding, symbol_type,

+ get_raw_symbol_values);

+ } else if (IsElf64File()) {

+ GetImpl64()->VisitRelocationEntries();

+ GetImpl64()->VisitSymbols(SHT_SYMTAB, sink, symbol_binding, symbol_type,

+ get_raw_symbol_values);

+ GetImpl64()->VisitSymbols(SHT_DYNSYM, sink, symbol_binding, symbol_type,

+ get_raw_symbol_values);

+ }

+uint64 ElfReader::VaddrOfFirstLoadSegment() {

+ if (IsElf32File()) {

+ return GetImpl32()->VaddrOfFirstLoadSegment();

+ } else if (IsElf64File()) {

+ return GetImpl64()->VaddrOfFirstLoadSegment();

+ } else {

+ return 0;

+ }

+const char *ElfReader::GetSectionName(int shndx) {

+ if (shndx < 0 || static_cast<unsigned int>(shndx) >= GetNumSections()) return NULL;

+ if (IsElf32File()) {

+ return GetImpl32()->GetSectionNameByIndex(shndx);

+ } else if (IsElf64File()) {

+ return GetImpl64()->GetSectionNameByIndex(shndx);

+ } else {

+ return NULL;

+ }

+uint64 ElfReader::GetNumSections() {

+ if (IsElf32File()) {

+ return GetImpl32()->GetNumSections();

+ } else if (IsElf64File()) {

+ return GetImpl64()->GetNumSections();

+ } else {

+ return 0;

+ }

+const char *ElfReader::GetSectionByIndex(int shndx, size_t *size) {

+ if (IsElf32File()) {

+ return GetImpl32()->GetSectionContentsByIndex(shndx, size);

+ } else if (IsElf64File()) {

+ return GetImpl64()->GetSectionContentsByIndex(shndx, size);

+ } else {

+ return NULL;

+ }

+const char *ElfReader::GetSectionByName(const string &section_name,

+ size_t *size) {

+ if (IsElf32File()) {

+ return GetImpl32()->GetSectionContentsByName(section_name, size);

+ } else if (IsElf64File()) {

+ return GetImpl64()->GetSectionContentsByName(section_name, size);

+ } else {

+ return NULL;

+ }

+const char *ElfReader::GetSectionInfoByName(const string &section_name,

+ SectionInfo *info) {

+ if (IsElf32File()) {

+ return GetImpl32()->GetSectionInfoByName(section_name, info);

+ } else if (IsElf64File()) {

+ return GetImpl64()->GetSectionInfoByName(section_name, info);

+ } else {

+ return NULL;

+ }

+bool ElfReader::SectionNamesMatch(const string &name, const string &sh_name) {

+ if ((name.find(".debug_", 0) == 0) && (sh_name.find(".zdebug_", 0) == 0)) {

+ const string name_suffix(name, strlen(".debug_"));

+ const string sh_name_suffix(sh_name, strlen(".zdebug_"));

+ return name_suffix == sh_name_suffix;

+ }

+ return name == sh_name;

+bool ElfReader::IsDynamicSharedObject() {

+ if (IsElf32File()) {

+ return GetImpl32()->IsDynamicSharedObject();

+ } else if (IsElf64File()) {

+ return GetImpl64()->IsDynamicSharedObject();

+ } else {

+ return false;

+ }

+ElfReaderImpl<Elf32> *ElfReader::GetImpl32() {

+ if (impl32_ == NULL) {

+ impl32_ = new ElfReaderImpl<Elf32>(path_, fd_);

+ }

+ return impl32_;

+ElfReaderImpl<Elf64> *ElfReader::GetImpl64() {

+ if (impl64_ == NULL) {

+ impl64_ = new ElfReaderImpl<Elf64>(path_, fd_);

+ }

+ return impl64_;

+// Return true if file is an ELF binary of ElfArch, with unstripped

+// debug info (debug_only=true) or symbol table (debug_only=false).

+// Otherwise, return false.

+template <typename ElfArch>

+static bool IsNonStrippedELFBinaryImpl(const string &path, const int fd,

+ bool debug_only) {

+ if (!ElfReaderImpl<ElfArch>::IsArchElfFile(fd, NULL)) return false;

+ ElfReaderImpl<ElfArch> elf_reader(path, fd);

+ return debug_only ?

+ elf_reader.HasDebugSections()

+ : (elf_reader.GetSectionByType(SHT_SYMTAB) != NULL);

+// Helper for the IsNon[Debug]StrippedELFBinary functions.

+static bool IsNonStrippedELFBinaryHelper(const string &path,

+ bool debug_only) {

+ const int fd = open(path.c_str(), O_RDONLY);

+ if (fd == -1) {

+ return false;

+ }

+ if (IsNonStrippedELFBinaryImpl<Elf32>(path, fd, debug_only) ||

+ IsNonStrippedELFBinaryImpl<Elf64>(path, fd, debug_only)) {

+ close(fd);

+ return true;

+ }

+ close(fd);

+ return false;

+bool ElfReader::IsNonStrippedELFBinary(const string &path) {

+ return IsNonStrippedELFBinaryHelper(path, false);

+bool ElfReader::IsNonDebugStrippedELFBinary(const string &path) {

+ return IsNonStrippedELFBinaryHelper(path, true);

+} // namespace dwarf2reader

« src/common/dwarf/elf_reader.h ('K') | « src/common/dwarf/elf_reader.h ('k') | src/common/linux/dump_symbols.cc » ('j') | no next file with comments »