Index: courgette/disassembler_elf_32_x86.cc |
diff --git a/courgette/disassembler_elf_32_x86.cc b/courgette/disassembler_elf_32_x86.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..5f3ba959b4276da5093f57df5cb764b83deb01b2 |
--- /dev/null |
+++ b/courgette/disassembler_elf_32_x86.cc |
@@ -0,0 +1,602 @@ |
+// Copyright (c) 2011 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "courgette/disassembler_elf_32_x86.h" |
+ |
+#include <algorithm> |
+#include <string> |
+#include <vector> |
+ |
+#include "base/basictypes.h" |
+#include "base/logging.h" |
+ |
+#include "courgette/assembly_program.h" |
+#include "courgette/courgette.h" |
+#include "courgette/encoded_program.h" |
+ |
+namespace courgette { |
+ |
+DisassemblerElf32X86::DisassemblerElf32X86(const void* start, size_t length) |
+ : Disassembler(start, length) { |
+} |
+ |
+bool DisassemblerElf32X86::ParseHeader() { |
+ if (length() < sizeof(Elf32_Ehdr)) |
+ return Bad("Too small"); |
+ |
+ header_ = (Elf32_Ehdr *)start(); |
+ |
+ // Have magic for elf header? |
+ if (header_->e_ident[0] != 0x7f || |
+ header_->e_ident[1] != 'E' || |
+ header_->e_ident[2] != 'L' || |
+ header_->e_ident[3] != 'F') |
+ return Bad("No Magic Number"); |
+ |
+ if (header_->e_type != ET_EXEC && |
+ header_->e_type != ET_DYN) |
+ return Bad("Not an executable file or shared library"); |
+ |
+ if (header_->e_machine != EM_386) |
+ return Bad("Not a supported architecture"); |
+ |
+ if (header_->e_version != 1) |
+ return Bad("Unknown file version"); |
+ |
+ if (header_->e_shentsize != sizeof(Elf32_Shdr)) |
+ return Bad("Unexpected section header size"); |
+ |
+ if (header_->e_shoff >= length()) |
+ return Bad("Out of bounds section header table offset"); |
+ |
+ section_header_table_ = (Elf32_Shdr *)OffsetToPointer(header_->e_shoff); |
+ section_header_table_size_ = header_->e_shnum; |
+ |
+ if ((header_->e_shoff + header_->e_shnum ) >= length()) |
+ return Bad("Out of bounds section header table"); |
+ |
+ if (header_->e_phoff >= length()) |
+ return Bad("Out of bounds program header table offset"); |
+ |
+ program_header_table_ = (Elf32_Phdr *)OffsetToPointer(header_->e_phoff); |
+ program_header_table_size_ = header_->e_phnum; |
+ |
+ if ((header_->e_phoff + header_->e_phnum) >= length()) |
+ return Bad("Out of bounds program header table"); |
+ |
+ default_string_section_ = (const char *)SectionBody((int)header_->e_shstrndx); |
+ |
+ ReduceLength(DiscoverLength()); |
+ |
+ return Good(); |
+} |
+ |
+bool DisassemblerElf32X86::Disassemble(AssemblyProgram* target) { |
+ if (!ok()) |
+ return false; |
+ |
+ // The Image Base is always 0 for ELF Executables |
+ target->set_image_base(0); |
+ |
+ if (!ParseAbs32Relocs()) |
+ return false; |
+ |
+ if (!ParseRel32RelocsFromSections()) |
+ return false; |
+ |
+ if (!ParseFile(target)) |
+ return false; |
+ |
+ target->DefaultAssignIndexes(); |
+ |
+ return true; |
+} |
+ |
+uint32 DisassemblerElf32X86::DiscoverLength() { |
+ uint32 result = 0; |
+ |
+ // Find the end of the last section |
+ for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) { |
+ const Elf32_Shdr *section_header = SectionHeader(section_id); |
+ |
+ if (section_header->sh_type == SHT_NOBITS) |
+ continue; |
+ |
+ uint32 section_end = section_header->sh_offset + section_header->sh_size; |
+ |
+ if (section_end > result) |
+ result = section_end; |
+ } |
+ |
+ // Find the end of the last segment |
+ for (int i = 0; i < ProgramSegmentHeaderCount(); i++) { |
+ const Elf32_Phdr *segment_header = ProgramSegmentHeader(i); |
+ |
+ uint32 segment_end = segment_header->p_offset + segment_header->p_filesz; |
+ |
+ if (segment_end > result) |
+ result = segment_end; |
+ } |
+ |
+ uint32 section_table_end = header_->e_shoff + |
+ (header_->e_shnum * sizeof(Elf32_Shdr)); |
+ if (section_table_end > result) |
+ result = section_table_end; |
+ |
+ uint32 segment_table_end = header_->e_phoff + |
+ (header_->e_phnum * sizeof(Elf32_Phdr)); |
+ if (segment_table_end > result) |
+ result = segment_table_end; |
+ |
+ return result; |
+} |
+ |
+CheckBool DisassemblerElf32X86::IsValidRVA(RVA rva) const { |
+ |
+ // It's valid if it's contained in any program segment |
+ for (int i = 0; i < ProgramSegmentHeaderCount(); i++) { |
+ const Elf32_Phdr *segment_header = ProgramSegmentHeader(i); |
+ |
+ if (segment_header->p_type != PT_LOAD) |
+ continue; |
+ |
+ Elf32_Addr begin = segment_header->p_vaddr; |
+ Elf32_Addr end = segment_header->p_vaddr + segment_header->p_memsz; |
+ |
+ if (rva >= begin && rva < end) |
+ return true; |
+ } |
+ |
+ return false; |
+} |
+ |
+// Convert an ELF relocation struction into an RVA |
+CheckBool DisassemblerElf32X86::RelToRVA(Elf32_Rel rel, RVA* result) const { |
+ |
+ // The rightmost byte of r_info is the type... |
+ elf32_rel_386_type_values type = |
+ (elf32_rel_386_type_values)(unsigned char)rel.r_info; |
+ |
+ // The other 3 bytes of r_info are the symbol |
+ uint32 symbol = rel.r_info >> 8; |
+ |
+ switch(type) |
+ { |
+ case R_386_NONE: |
+ case R_386_32: |
+ case R_386_PC32: |
+ case R_386_GOT32: |
+ case R_386_PLT32: |
+ case R_386_COPY: |
+ case R_386_GLOB_DAT: |
+ case R_386_JMP_SLOT: |
+ return false; |
+ |
+ case R_386_RELATIVE: |
+ if (symbol != 0) |
+ return false; |
+ |
+ // This is a basic ABS32 relocation address |
+ *result = rel.r_offset; |
+ return true; |
+ |
+ case R_386_GOTOFF: |
+ case R_386_GOTPC: |
+ case R_386_TLS_TPOFF: |
+ return false; |
+ } |
+ |
+ return false; |
+} |
+ |
+// Returns RVA for an in memory address, or NULL. |
+CheckBool DisassemblerElf32X86::RVAToFileOffset(Elf32_Addr addr, |
+ size_t* result) const { |
+ |
+ for (int i = 0; i < ProgramSegmentHeaderCount(); i++) { |
+ Elf32_Addr begin = ProgramSegmentMemoryBegin(i); |
+ Elf32_Addr end = begin + ProgramSegmentMemorySize(i); |
+ |
+ if (addr >= begin && addr < end) { |
+ Elf32_Addr offset = addr - begin; |
+ |
+ if (offset < ProgramSegmentFileSize(i)) { |
+ *result = ProgramSegmentFileOffset(i) + offset; |
+ return true; |
+ } |
+ } |
+ } |
+ |
+ return false; |
+} |
+ |
+RVA DisassemblerElf32X86::FileOffsetToRVA(size_t offset) const { |
+ // File offsets can be 64 bit values, but we are dealing with 32 |
+ // bit executables and so only need to support 32bit file sizes. |
+ uint32 offset32 = (uint32)offset; |
+ |
+ for (int i = 0; i < SectionHeaderCount(); i++) { |
+ |
+ const Elf32_Shdr *section_header = SectionHeader(i); |
+ |
+ // These can appear to have a size in the file, but don't. |
+ if (section_header->sh_type == SHT_NOBITS) |
+ continue; |
+ |
+ Elf32_Off section_begin = section_header->sh_offset; |
+ Elf32_Off section_end = section_begin + section_header->sh_size; |
+ |
+ if (offset32 >= section_begin && offset32 < section_end) { |
+ return section_header->sh_addr + (offset32 - section_begin); |
+ } |
+ } |
+ |
+ return 0; |
+} |
+ |
+CheckBool DisassemblerElf32X86::RVAsToOffsets(std::vector<RVA>* rvas, |
+ std::vector<size_t>* offsets) { |
+ offsets->clear(); |
+ |
+ for (std::vector<RVA>::iterator rva = rvas->begin(); |
+ rva != rvas->end(); |
+ rva++) { |
+ |
+ size_t offset; |
+ |
+ if (!RVAToFileOffset(*rva, &offset)) |
+ return false; |
+ |
+ offsets->push_back(offset); |
+ } |
+ |
+ return true; |
+} |
+ |
+CheckBool DisassemblerElf32X86::ParseFile(AssemblyProgram* program) { |
+ bool ok = true; |
+ |
+ // Walk all the bytes in the file, whether or not in a section. |
+ uint32 file_offset = 0; |
+ |
+ std::vector<size_t> abs_offsets; |
+ std::vector<size_t> rel_offsets; |
+ |
+ if (ok) |
+ ok = RVAsToOffsets(&abs32_locations_, &abs_offsets); |
+ |
+ if (ok) |
+ ok = RVAsToOffsets(&rel32_locations_, &rel_offsets); |
+ |
+ std::vector<size_t>::iterator current_abs_offset = abs_offsets.begin(); |
+ std::vector<size_t>::iterator current_rel_offset = rel_offsets.begin(); |
+ |
+ std::vector<size_t>::iterator end_abs_offset = abs_offsets.end(); |
+ std::vector<size_t>::iterator end_rel_offset = rel_offsets.end(); |
+ |
+ for (int section_id = 0; |
+ ok && (section_id < SectionHeaderCount()); |
+ section_id++) { |
+ |
+ const Elf32_Shdr *section_header = SectionHeader(section_id); |
+ |
+ if (ok) { |
+ ok = ParseSimpleRegion(file_offset, |
+ section_header->sh_offset, |
+ program); |
+ file_offset = section_header->sh_offset; |
+ } |
+ |
+ switch (section_header->sh_type) { |
+ case SHT_REL: |
+ if (ok) { |
+ ok = ParseRelocationSection(section_header, program); |
+ file_offset = section_header->sh_offset + section_header->sh_size; |
+ } |
+ break; |
+ case SHT_PROGBITS: |
+ if (ok) { |
+ ok = ParseProgbitsSection(section_header, |
+ ¤t_abs_offset, end_abs_offset, |
+ ¤t_rel_offset, end_rel_offset, |
+ program); |
+ file_offset = section_header->sh_offset + section_header->sh_size; |
+ } |
+ |
+ break; |
+ default: |
+ break; |
+ } |
+ } |
+ |
+ // Rest of the file past the last section |
+ if (ok) { |
+ ok = ParseSimpleRegion(file_offset, |
+ length(), |
+ program); |
+ } |
+ |
+ // Make certain we consume all of the relocations as expected |
+ ok = ok && (current_abs_offset == end_abs_offset); |
+ |
+ return ok; |
+} |
+ |
+CheckBool DisassemblerElf32X86::ParseRelocationSection( |
+ const Elf32_Shdr *section_header, |
+ AssemblyProgram* program) { |
+ // We can reproduce the R_386_RELATIVE entries in one of the relocation |
+ // table based on other information in the patch, given these |
+ // conditions.... |
+ // |
+ // All R_386_RELATIVE entries are: |
+ // 1) In the same relocation table |
+ // 2) Are consecutive |
+ // 3) Are sorted in memory address order |
+ // |
+ // Happily, this is normally the case, but it's not required by spec |
+ // so we check, and just don't do it if we don't match up. |
+ |
+ // The expectation is that one relocation section will contain |
+ // all of our R_386_RELATIVE entries in the expected order followed |
+ // by assorted other entries we can't use special handling for. |
+ |
+ bool ok = true; |
+ bool match = true; |
+ |
+ // Walk all the bytes in the section, matching relocation table or not |
+ size_t file_offset = section_header->sh_offset; |
+ size_t section_end = section_header->sh_offset + section_header->sh_size; |
+ |
+ Elf32_Rel *section_relocs_iter = |
+ (Elf32_Rel *)OffsetToPointer(section_header->sh_offset); |
+ |
+ uint32 section_relocs_count = section_header->sh_size / |
+ section_header->sh_entsize; |
+ |
+ if (abs32_locations_.size() > section_relocs_count) |
+ match = false; |
+ |
+ std::vector<RVA>::iterator reloc_iter = abs32_locations_.begin(); |
+ |
+ while (match && (reloc_iter != abs32_locations_.end())) { |
+ if (section_relocs_iter->r_info != R_386_RELATIVE || |
+ section_relocs_iter->r_offset != *reloc_iter) |
+ match = false; |
+ section_relocs_iter++; |
+ reloc_iter++; |
+ } |
+ |
+ if (match) { |
+ // Skip over relocation tables |
+ ok = program->EmitElfRelocationInstruction(); |
+ file_offset += sizeof(Elf32_Rel) * abs32_locations_.size(); |
+ } |
+ |
+ if (ok) { |
+ ok = ParseSimpleRegion(file_offset, section_end, program); |
+ } |
+ |
+ return ok; |
+} |
+ |
+CheckBool DisassemblerElf32X86::ParseProgbitsSection( |
+ const Elf32_Shdr *section_header, |
+ std::vector<size_t>::iterator* current_abs_offset, |
+ std::vector<size_t>::iterator end_abs_offset, |
+ std::vector<size_t>::iterator* current_rel_offset, |
+ std::vector<size_t>::iterator end_rel_offset, |
+ AssemblyProgram* program) { |
+ |
+ bool ok = true; |
+ |
+ // Walk all the bytes in the file, whether or not in a section. |
+ size_t file_offset = section_header->sh_offset; |
+ size_t section_end = section_header->sh_offset + section_header->sh_size; |
+ |
+ Elf32_Addr origin = section_header->sh_addr; |
+ size_t origin_offset = section_header->sh_offset; |
+ ok = program->EmitOriginInstruction(origin); |
+ |
+ while (ok && file_offset < section_end) { |
+ |
+ if (*current_abs_offset != end_abs_offset && |
+ file_offset > **current_abs_offset) { |
+ ok = false; |
+ } |
+ |
+ while (*current_rel_offset != end_rel_offset && |
+ file_offset > **current_rel_offset) { |
+ (*current_rel_offset)++; |
+ } |
+ |
+ size_t next_relocation = section_end; |
+ |
+ if (*current_abs_offset != end_abs_offset && |
+ next_relocation > **current_abs_offset) |
+ next_relocation = **current_abs_offset; |
+ |
+ // Rel offsets are heuristically derived, and might (incorrectly) overlap |
+ // an Abs value, or the end of the section, so +3 to make sure there is |
+ // room for the full 4 byte value. |
+ if (*current_rel_offset != end_rel_offset && |
+ next_relocation > (**current_rel_offset + 3)) |
+ next_relocation = **current_rel_offset; |
+ |
+ if (ok && (next_relocation > file_offset)) { |
+ ok = ParseSimpleRegion(file_offset, next_relocation, program); |
+ |
+ file_offset = next_relocation; |
+ continue; |
+ } |
+ |
+ if (ok && |
+ *current_abs_offset != end_abs_offset && |
+ file_offset == **current_abs_offset) { |
+ |
+ const uint8* p = OffsetToPointer(file_offset); |
+ RVA target_rva = Read32LittleEndian(p); |
+ |
+ ok = program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva)); |
+ file_offset += sizeof(RVA); |
+ (*current_abs_offset)++; |
+ continue; |
+ } |
+ |
+ if (ok && |
+ *current_rel_offset != end_rel_offset && |
+ file_offset == **current_rel_offset) { |
+ |
+ const uint8* p = OffsetToPointer(file_offset); |
+ uint32 relative_target = Read32LittleEndian(p); |
+ // This cast is for 64 bit systems, and is only safe because we |
+ // are working on 32 bit executables. |
+ RVA target_rva = (RVA)(origin + (file_offset - origin_offset) + |
+ 4 + relative_target); |
+ |
+ ok = program->EmitRel32(program->FindOrMakeRel32Label(target_rva)); |
+ file_offset += sizeof(RVA); |
+ (*current_rel_offset)++; |
+ continue; |
+ } |
+ } |
+ |
+ // Rest of the section (if any) |
+ if (ok) { |
+ ok = ParseSimpleRegion(file_offset, section_end, program); |
+ } |
+ |
+ return ok; |
+} |
+ |
+CheckBool DisassemblerElf32X86::ParseSimpleRegion( |
+ size_t start_file_offset, |
+ size_t end_file_offset, |
+ AssemblyProgram* program) { |
+ |
+ const uint8* start = OffsetToPointer(start_file_offset); |
+ const uint8* end = OffsetToPointer(end_file_offset); |
+ |
+ const uint8* p = start; |
+ |
+ bool ok = true; |
+ while (p < end && ok) { |
+ ok = program->EmitByteInstruction(*p); |
+ ++p; |
+ } |
+ |
+ return ok; |
+} |
+ |
+CheckBool DisassemblerElf32X86::ParseAbs32Relocs() { |
+ abs32_locations_.clear(); |
+ |
+ // Loop through sections for relocation sections |
+ for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) { |
+ const Elf32_Shdr *section_header = SectionHeader(section_id); |
+ |
+ if (section_header->sh_type == SHT_REL) { |
+ |
+ Elf32_Rel *relocs_table = (Elf32_Rel *)SectionBody(section_id); |
+ |
+ int relocs_table_count = section_header->sh_size / |
+ section_header->sh_entsize; |
+ |
+ // Elf32_Word relocation_section_id = section_header->sh_info; |
+ |
+ // Loop through relocation objects in the relocation section |
+ for (int rel_id = 0; rel_id < relocs_table_count; rel_id++) { |
+ RVA rva; |
+ |
+ // Quite a few of these conversions fail, and we simply skip |
+ // them, that's okay. |
+ if (RelToRVA(relocs_table[rel_id], &rva)) |
+ abs32_locations_.push_back(rva); |
+ } |
+ } |
+ } |
+ |
+ std::sort(abs32_locations_.begin(), abs32_locations_.end()); |
+ return true; |
+} |
+ |
+CheckBool DisassemblerElf32X86::ParseRel32RelocsFromSections() { |
+ |
+ rel32_locations_.clear(); |
+ |
+ // Loop through sections for relocation sections |
+ for (int section_id = 0; |
+ section_id < SectionHeaderCount(); |
+ section_id++) { |
+ |
+ const Elf32_Shdr *section_header = SectionHeader(section_id); |
+ |
+ if (section_header->sh_type != SHT_PROGBITS) |
+ continue; |
+ |
+ if (!ParseRel32RelocsFromSection(section_header)) |
+ return false; |
+ } |
+ |
+ std::sort(rel32_locations_.begin(), rel32_locations_.end()); |
+ return true; |
+} |
+ |
+CheckBool DisassemblerElf32X86::ParseRel32RelocsFromSection( |
+ const Elf32_Shdr* section_header) { |
+ |
+ uint32 start_file_offset = section_header->sh_offset; |
+ uint32 end_file_offset = start_file_offset + section_header->sh_size; |
+ |
+ const uint8* start_pointer = OffsetToPointer(start_file_offset); |
+ const uint8* end_pointer = OffsetToPointer(end_file_offset); |
+ |
+ // Quick way to convert from Pointer to RVA within a single Section is to |
+ // subtract 'pointer_to_rva'. |
+ const uint8* const adjust_pointer_to_rva = start_pointer - |
+ section_header->sh_addr; |
+ |
+ // Find the rel32 relocations. |
+ const uint8* p = start_pointer; |
+ while (p < end_pointer) { |
+ //RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva); |
+ |
+ // Heuristic discovery of rel32 locations in instruction stream: are the |
+ // next few bytes the start of an instruction containing a rel32 |
+ // addressing mode? |
+ const uint8* rel32 = NULL; |
+ |
+ if (p + 5 < end_pointer) { |
+ if (*p == 0xE8 || *p == 0xE9) { // jmp rel32 and call rel32 |
+ rel32 = p + 1; |
+ } |
+ } |
+ if (p + 6 < end_pointer) { |
+ if (*p == 0x0F && (*(p+1) & 0xF0) == 0x80) { // Jcc long form |
+ if (p[1] != 0x8A && p[1] != 0x8B) // JPE/JPO unlikely |
+ rel32 = p + 2; |
+ } |
+ } |
+ if (rel32) { |
+ RVA rel32_rva = static_cast<RVA>(rel32 - adjust_pointer_to_rva); |
+ |
+ RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32); |
+ // To be valid, rel32 target must be within image, and within this |
+ // section. |
+ if (IsValidRVA(target_rva)) { |
+ rel32_locations_.push_back(rel32_rva); |
+#if COURGETTE_HISTOGRAM_TARGETS |
+ ++rel32_target_rvas_[target_rva]; |
+#endif |
+ p += 4; |
+ continue; |
+ } |
+ } |
+ p += 1; |
+ } |
+ |
+ return true; |
+} |
+ |
+} // namespace courgette |