OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "courgette/disassembler_elf_32_x86.h" |
| 6 |
| 7 #include <algorithm> |
| 8 #include <string> |
| 9 #include <vector> |
| 10 |
| 11 #include "base/basictypes.h" |
| 12 #include "base/logging.h" |
| 13 |
| 14 #include "courgette/assembly_program.h" |
| 15 #include "courgette/courgette.h" |
| 16 #include "courgette/encoded_program.h" |
| 17 |
| 18 namespace courgette { |
| 19 |
| 20 DisassemblerElf32X86::DisassemblerElf32X86(const void* start, size_t length) |
| 21 : Disassembler(start, length) { |
| 22 } |
| 23 |
| 24 bool DisassemblerElf32X86::ParseHeader() { |
| 25 if (length() < sizeof(Elf32_Ehdr)) |
| 26 return Bad("Too small"); |
| 27 |
| 28 header_ = (Elf32_Ehdr *)start(); |
| 29 |
| 30 // Have magic for elf header? |
| 31 if (header_->e_ident[0] != 0x7f || |
| 32 header_->e_ident[1] != 'E' || |
| 33 header_->e_ident[2] != 'L' || |
| 34 header_->e_ident[3] != 'F') |
| 35 return Bad("No Magic Number"); |
| 36 |
| 37 if (header_->e_type != ET_EXEC && |
| 38 header_->e_type != ET_DYN) |
| 39 return Bad("Not an executable file or shared library"); |
| 40 |
| 41 if (header_->e_machine != EM_386) |
| 42 return Bad("Not a supported architecture"); |
| 43 |
| 44 if (header_->e_version != 1) |
| 45 return Bad("Unknown file version"); |
| 46 |
| 47 if (header_->e_shentsize != sizeof(Elf32_Shdr)) |
| 48 return Bad("Unexpected section header size"); |
| 49 |
| 50 if (header_->e_shoff >= length()) |
| 51 return Bad("Out of bounds section header table offset"); |
| 52 |
| 53 section_header_table_ = (Elf32_Shdr *)OffsetToPointer(header_->e_shoff); |
| 54 section_header_table_size_ = header_->e_shnum; |
| 55 |
| 56 if ((header_->e_shoff + header_->e_shnum ) >= length()) |
| 57 return Bad("Out of bounds section header table"); |
| 58 |
| 59 if (header_->e_phoff >= length()) |
| 60 return Bad("Out of bounds program header table offset"); |
| 61 |
| 62 program_header_table_ = (Elf32_Phdr *)OffsetToPointer(header_->e_phoff); |
| 63 program_header_table_size_ = header_->e_phnum; |
| 64 |
| 65 if ((header_->e_phoff + header_->e_phnum) >= length()) |
| 66 return Bad("Out of bounds program header table"); |
| 67 |
| 68 default_string_section_ = (const char *)SectionBody((int)header_->e_shstrndx); |
| 69 |
| 70 ReduceLength(DiscoverLength()); |
| 71 |
| 72 return Good(); |
| 73 } |
| 74 |
| 75 bool DisassemblerElf32X86::Disassemble(AssemblyProgram* target) { |
| 76 if (!ok()) |
| 77 return false; |
| 78 |
| 79 // The Image Base is always 0 for ELF Executables |
| 80 target->set_image_base(0); |
| 81 |
| 82 if (!ParseAbs32Relocs()) |
| 83 return false; |
| 84 |
| 85 if (!ParseRel32RelocsFromSections()) |
| 86 return false; |
| 87 |
| 88 if (!ParseFile(target)) |
| 89 return false; |
| 90 |
| 91 target->DefaultAssignIndexes(); |
| 92 |
| 93 return true; |
| 94 } |
| 95 |
| 96 uint32 DisassemblerElf32X86::DiscoverLength() { |
| 97 uint32 result = 0; |
| 98 |
| 99 // Find the end of the last section |
| 100 for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) { |
| 101 const Elf32_Shdr *section_header = SectionHeader(section_id); |
| 102 |
| 103 if (section_header->sh_type == SHT_NOBITS) |
| 104 continue; |
| 105 |
| 106 uint32 section_end = section_header->sh_offset + section_header->sh_size; |
| 107 |
| 108 if (section_end > result) |
| 109 result = section_end; |
| 110 } |
| 111 |
| 112 // Find the end of the last segment |
| 113 for (int i = 0; i < ProgramSegmentHeaderCount(); i++) { |
| 114 const Elf32_Phdr *segment_header = ProgramSegmentHeader(i); |
| 115 |
| 116 uint32 segment_end = segment_header->p_offset + segment_header->p_filesz; |
| 117 |
| 118 if (segment_end > result) |
| 119 result = segment_end; |
| 120 } |
| 121 |
| 122 uint32 section_table_end = header_->e_shoff + |
| 123 (header_->e_shnum * sizeof(Elf32_Shdr)); |
| 124 if (section_table_end > result) |
| 125 result = section_table_end; |
| 126 |
| 127 uint32 segment_table_end = header_->e_phoff + |
| 128 (header_->e_phnum * sizeof(Elf32_Phdr)); |
| 129 if (segment_table_end > result) |
| 130 result = segment_table_end; |
| 131 |
| 132 return result; |
| 133 } |
| 134 |
| 135 CheckBool DisassemblerElf32X86::IsValidRVA(RVA rva) const { |
| 136 |
| 137 // It's valid if it's contained in any program segment |
| 138 for (int i = 0; i < ProgramSegmentHeaderCount(); i++) { |
| 139 const Elf32_Phdr *segment_header = ProgramSegmentHeader(i); |
| 140 |
| 141 if (segment_header->p_type != PT_LOAD) |
| 142 continue; |
| 143 |
| 144 Elf32_Addr begin = segment_header->p_vaddr; |
| 145 Elf32_Addr end = segment_header->p_vaddr + segment_header->p_memsz; |
| 146 |
| 147 if (rva >= begin && rva < end) |
| 148 return true; |
| 149 } |
| 150 |
| 151 return false; |
| 152 } |
| 153 |
| 154 // Convert an ELF relocation struction into an RVA |
| 155 CheckBool DisassemblerElf32X86::RelToRVA(Elf32_Rel rel, RVA* result) const { |
| 156 |
| 157 // The rightmost byte of r_info is the type... |
| 158 elf32_rel_386_type_values type = |
| 159 (elf32_rel_386_type_values)(unsigned char)rel.r_info; |
| 160 |
| 161 // The other 3 bytes of r_info are the symbol |
| 162 uint32 symbol = rel.r_info >> 8; |
| 163 |
| 164 switch(type) |
| 165 { |
| 166 case R_386_NONE: |
| 167 case R_386_32: |
| 168 case R_386_PC32: |
| 169 case R_386_GOT32: |
| 170 case R_386_PLT32: |
| 171 case R_386_COPY: |
| 172 case R_386_GLOB_DAT: |
| 173 case R_386_JMP_SLOT: |
| 174 return false; |
| 175 |
| 176 case R_386_RELATIVE: |
| 177 if (symbol != 0) |
| 178 return false; |
| 179 |
| 180 // This is a basic ABS32 relocation address |
| 181 *result = rel.r_offset; |
| 182 return true; |
| 183 |
| 184 case R_386_GOTOFF: |
| 185 case R_386_GOTPC: |
| 186 case R_386_TLS_TPOFF: |
| 187 return false; |
| 188 } |
| 189 |
| 190 return false; |
| 191 } |
| 192 |
| 193 // Returns RVA for an in memory address, or NULL. |
| 194 CheckBool DisassemblerElf32X86::RVAToFileOffset(Elf32_Addr addr, |
| 195 size_t* result) const { |
| 196 |
| 197 for (int i = 0; i < ProgramSegmentHeaderCount(); i++) { |
| 198 Elf32_Addr begin = ProgramSegmentMemoryBegin(i); |
| 199 Elf32_Addr end = begin + ProgramSegmentMemorySize(i); |
| 200 |
| 201 if (addr >= begin && addr < end) { |
| 202 Elf32_Addr offset = addr - begin; |
| 203 |
| 204 if (offset < ProgramSegmentFileSize(i)) { |
| 205 *result = ProgramSegmentFileOffset(i) + offset; |
| 206 return true; |
| 207 } |
| 208 } |
| 209 } |
| 210 |
| 211 return false; |
| 212 } |
| 213 |
| 214 RVA DisassemblerElf32X86::FileOffsetToRVA(size_t offset) const { |
| 215 // File offsets can be 64 bit values, but we are dealing with 32 |
| 216 // bit executables and so only need to support 32bit file sizes. |
| 217 uint32 offset32 = (uint32)offset; |
| 218 |
| 219 for (int i = 0; i < SectionHeaderCount(); i++) { |
| 220 |
| 221 const Elf32_Shdr *section_header = SectionHeader(i); |
| 222 |
| 223 // These can appear to have a size in the file, but don't. |
| 224 if (section_header->sh_type == SHT_NOBITS) |
| 225 continue; |
| 226 |
| 227 Elf32_Off section_begin = section_header->sh_offset; |
| 228 Elf32_Off section_end = section_begin + section_header->sh_size; |
| 229 |
| 230 if (offset32 >= section_begin && offset32 < section_end) { |
| 231 return section_header->sh_addr + (offset32 - section_begin); |
| 232 } |
| 233 } |
| 234 |
| 235 return 0; |
| 236 } |
| 237 |
| 238 CheckBool DisassemblerElf32X86::RVAsToOffsets(std::vector<RVA>* rvas, |
| 239 std::vector<size_t>* offsets) { |
| 240 offsets->clear(); |
| 241 |
| 242 for (std::vector<RVA>::iterator rva = rvas->begin(); |
| 243 rva != rvas->end(); |
| 244 rva++) { |
| 245 |
| 246 size_t offset; |
| 247 |
| 248 if (!RVAToFileOffset(*rva, &offset)) |
| 249 return false; |
| 250 |
| 251 offsets->push_back(offset); |
| 252 } |
| 253 |
| 254 return true; |
| 255 } |
| 256 |
| 257 CheckBool DisassemblerElf32X86::ParseFile(AssemblyProgram* program) { |
| 258 bool ok = true; |
| 259 |
| 260 // Walk all the bytes in the file, whether or not in a section. |
| 261 uint32 file_offset = 0; |
| 262 |
| 263 std::vector<size_t> abs_offsets; |
| 264 std::vector<size_t> rel_offsets; |
| 265 |
| 266 if (ok) |
| 267 ok = RVAsToOffsets(&abs32_locations_, &abs_offsets); |
| 268 |
| 269 if (ok) |
| 270 ok = RVAsToOffsets(&rel32_locations_, &rel_offsets); |
| 271 |
| 272 std::vector<size_t>::iterator current_abs_offset = abs_offsets.begin(); |
| 273 std::vector<size_t>::iterator current_rel_offset = rel_offsets.begin(); |
| 274 |
| 275 std::vector<size_t>::iterator end_abs_offset = abs_offsets.end(); |
| 276 std::vector<size_t>::iterator end_rel_offset = rel_offsets.end(); |
| 277 |
| 278 for (int section_id = 0; |
| 279 ok && (section_id < SectionHeaderCount()); |
| 280 section_id++) { |
| 281 |
| 282 const Elf32_Shdr *section_header = SectionHeader(section_id); |
| 283 |
| 284 if (ok) { |
| 285 ok = ParseSimpleRegion(file_offset, |
| 286 section_header->sh_offset, |
| 287 program); |
| 288 file_offset = section_header->sh_offset; |
| 289 } |
| 290 |
| 291 switch (section_header->sh_type) { |
| 292 case SHT_REL: |
| 293 if (ok) { |
| 294 ok = ParseRelocationSection(section_header, program); |
| 295 file_offset = section_header->sh_offset + section_header->sh_size; |
| 296 } |
| 297 break; |
| 298 case SHT_PROGBITS: |
| 299 if (ok) { |
| 300 ok = ParseProgbitsSection(section_header, |
| 301 ¤t_abs_offset, end_abs_offset, |
| 302 ¤t_rel_offset, end_rel_offset, |
| 303 program); |
| 304 file_offset = section_header->sh_offset + section_header->sh_size; |
| 305 } |
| 306 |
| 307 break; |
| 308 default: |
| 309 break; |
| 310 } |
| 311 } |
| 312 |
| 313 // Rest of the file past the last section |
| 314 if (ok) { |
| 315 ok = ParseSimpleRegion(file_offset, |
| 316 length(), |
| 317 program); |
| 318 } |
| 319 |
| 320 // Make certain we consume all of the relocations as expected |
| 321 ok = ok && (current_abs_offset == end_abs_offset); |
| 322 |
| 323 return ok; |
| 324 } |
| 325 |
| 326 CheckBool DisassemblerElf32X86::ParseRelocationSection( |
| 327 const Elf32_Shdr *section_header, |
| 328 AssemblyProgram* program) { |
| 329 // We can reproduce the R_386_RELATIVE entries in one of the relocation |
| 330 // table based on other information in the patch, given these |
| 331 // conditions.... |
| 332 // |
| 333 // All R_386_RELATIVE entries are: |
| 334 // 1) In the same relocation table |
| 335 // 2) Are consecutive |
| 336 // 3) Are sorted in memory address order |
| 337 // |
| 338 // Happily, this is normally the case, but it's not required by spec |
| 339 // so we check, and just don't do it if we don't match up. |
| 340 |
| 341 // The expectation is that one relocation section will contain |
| 342 // all of our R_386_RELATIVE entries in the expected order followed |
| 343 // by assorted other entries we can't use special handling for. |
| 344 |
| 345 bool ok = true; |
| 346 bool match = true; |
| 347 |
| 348 // Walk all the bytes in the section, matching relocation table or not |
| 349 size_t file_offset = section_header->sh_offset; |
| 350 size_t section_end = section_header->sh_offset + section_header->sh_size; |
| 351 |
| 352 Elf32_Rel *section_relocs_iter = |
| 353 (Elf32_Rel *)OffsetToPointer(section_header->sh_offset); |
| 354 |
| 355 uint32 section_relocs_count = section_header->sh_size / |
| 356 section_header->sh_entsize; |
| 357 |
| 358 if (abs32_locations_.size() > section_relocs_count) |
| 359 match = false; |
| 360 |
| 361 std::vector<RVA>::iterator reloc_iter = abs32_locations_.begin(); |
| 362 |
| 363 while (match && (reloc_iter != abs32_locations_.end())) { |
| 364 if (section_relocs_iter->r_info != R_386_RELATIVE || |
| 365 section_relocs_iter->r_offset != *reloc_iter) |
| 366 match = false; |
| 367 section_relocs_iter++; |
| 368 reloc_iter++; |
| 369 } |
| 370 |
| 371 if (match) { |
| 372 // Skip over relocation tables |
| 373 ok = program->EmitElfRelocationInstruction(); |
| 374 file_offset += sizeof(Elf32_Rel) * abs32_locations_.size(); |
| 375 } |
| 376 |
| 377 if (ok) { |
| 378 ok = ParseSimpleRegion(file_offset, section_end, program); |
| 379 } |
| 380 |
| 381 return ok; |
| 382 } |
| 383 |
| 384 CheckBool DisassemblerElf32X86::ParseProgbitsSection( |
| 385 const Elf32_Shdr *section_header, |
| 386 std::vector<size_t>::iterator* current_abs_offset, |
| 387 std::vector<size_t>::iterator end_abs_offset, |
| 388 std::vector<size_t>::iterator* current_rel_offset, |
| 389 std::vector<size_t>::iterator end_rel_offset, |
| 390 AssemblyProgram* program) { |
| 391 |
| 392 bool ok = true; |
| 393 |
| 394 // Walk all the bytes in the file, whether or not in a section. |
| 395 size_t file_offset = section_header->sh_offset; |
| 396 size_t section_end = section_header->sh_offset + section_header->sh_size; |
| 397 |
| 398 Elf32_Addr origin = section_header->sh_addr; |
| 399 size_t origin_offset = section_header->sh_offset; |
| 400 ok = program->EmitOriginInstruction(origin); |
| 401 |
| 402 while (ok && file_offset < section_end) { |
| 403 |
| 404 if (*current_abs_offset != end_abs_offset && |
| 405 file_offset > **current_abs_offset) { |
| 406 ok = false; |
| 407 } |
| 408 |
| 409 while (*current_rel_offset != end_rel_offset && |
| 410 file_offset > **current_rel_offset) { |
| 411 (*current_rel_offset)++; |
| 412 } |
| 413 |
| 414 size_t next_relocation = section_end; |
| 415 |
| 416 if (*current_abs_offset != end_abs_offset && |
| 417 next_relocation > **current_abs_offset) |
| 418 next_relocation = **current_abs_offset; |
| 419 |
| 420 // Rel offsets are heuristically derived, and might (incorrectly) overlap |
| 421 // an Abs value, or the end of the section, so +3 to make sure there is |
| 422 // room for the full 4 byte value. |
| 423 if (*current_rel_offset != end_rel_offset && |
| 424 next_relocation > (**current_rel_offset + 3)) |
| 425 next_relocation = **current_rel_offset; |
| 426 |
| 427 if (ok && (next_relocation > file_offset)) { |
| 428 ok = ParseSimpleRegion(file_offset, next_relocation, program); |
| 429 |
| 430 file_offset = next_relocation; |
| 431 continue; |
| 432 } |
| 433 |
| 434 if (ok && |
| 435 *current_abs_offset != end_abs_offset && |
| 436 file_offset == **current_abs_offset) { |
| 437 |
| 438 const uint8* p = OffsetToPointer(file_offset); |
| 439 RVA target_rva = Read32LittleEndian(p); |
| 440 |
| 441 ok = program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva)); |
| 442 file_offset += sizeof(RVA); |
| 443 (*current_abs_offset)++; |
| 444 continue; |
| 445 } |
| 446 |
| 447 if (ok && |
| 448 *current_rel_offset != end_rel_offset && |
| 449 file_offset == **current_rel_offset) { |
| 450 |
| 451 const uint8* p = OffsetToPointer(file_offset); |
| 452 uint32 relative_target = Read32LittleEndian(p); |
| 453 // This cast is for 64 bit systems, and is only safe because we |
| 454 // are working on 32 bit executables. |
| 455 RVA target_rva = (RVA)(origin + (file_offset - origin_offset) + |
| 456 4 + relative_target); |
| 457 |
| 458 ok = program->EmitRel32(program->FindOrMakeRel32Label(target_rva)); |
| 459 file_offset += sizeof(RVA); |
| 460 (*current_rel_offset)++; |
| 461 continue; |
| 462 } |
| 463 } |
| 464 |
| 465 // Rest of the section (if any) |
| 466 if (ok) { |
| 467 ok = ParseSimpleRegion(file_offset, section_end, program); |
| 468 } |
| 469 |
| 470 return ok; |
| 471 } |
| 472 |
| 473 CheckBool DisassemblerElf32X86::ParseSimpleRegion( |
| 474 size_t start_file_offset, |
| 475 size_t end_file_offset, |
| 476 AssemblyProgram* program) { |
| 477 |
| 478 const uint8* start = OffsetToPointer(start_file_offset); |
| 479 const uint8* end = OffsetToPointer(end_file_offset); |
| 480 |
| 481 const uint8* p = start; |
| 482 |
| 483 bool ok = true; |
| 484 while (p < end && ok) { |
| 485 ok = program->EmitByteInstruction(*p); |
| 486 ++p; |
| 487 } |
| 488 |
| 489 return ok; |
| 490 } |
| 491 |
| 492 CheckBool DisassemblerElf32X86::ParseAbs32Relocs() { |
| 493 abs32_locations_.clear(); |
| 494 |
| 495 // Loop through sections for relocation sections |
| 496 for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) { |
| 497 const Elf32_Shdr *section_header = SectionHeader(section_id); |
| 498 |
| 499 if (section_header->sh_type == SHT_REL) { |
| 500 |
| 501 Elf32_Rel *relocs_table = (Elf32_Rel *)SectionBody(section_id); |
| 502 |
| 503 int relocs_table_count = section_header->sh_size / |
| 504 section_header->sh_entsize; |
| 505 |
| 506 // Elf32_Word relocation_section_id = section_header->sh_info; |
| 507 |
| 508 // Loop through relocation objects in the relocation section |
| 509 for (int rel_id = 0; rel_id < relocs_table_count; rel_id++) { |
| 510 RVA rva; |
| 511 |
| 512 // Quite a few of these conversions fail, and we simply skip |
| 513 // them, that's okay. |
| 514 if (RelToRVA(relocs_table[rel_id], &rva)) |
| 515 abs32_locations_.push_back(rva); |
| 516 } |
| 517 } |
| 518 } |
| 519 |
| 520 std::sort(abs32_locations_.begin(), abs32_locations_.end()); |
| 521 return true; |
| 522 } |
| 523 |
| 524 CheckBool DisassemblerElf32X86::ParseRel32RelocsFromSections() { |
| 525 |
| 526 rel32_locations_.clear(); |
| 527 |
| 528 // Loop through sections for relocation sections |
| 529 for (int section_id = 0; |
| 530 section_id < SectionHeaderCount(); |
| 531 section_id++) { |
| 532 |
| 533 const Elf32_Shdr *section_header = SectionHeader(section_id); |
| 534 |
| 535 if (section_header->sh_type != SHT_PROGBITS) |
| 536 continue; |
| 537 |
| 538 if (!ParseRel32RelocsFromSection(section_header)) |
| 539 return false; |
| 540 } |
| 541 |
| 542 std::sort(rel32_locations_.begin(), rel32_locations_.end()); |
| 543 return true; |
| 544 } |
| 545 |
| 546 CheckBool DisassemblerElf32X86::ParseRel32RelocsFromSection( |
| 547 const Elf32_Shdr* section_header) { |
| 548 |
| 549 uint32 start_file_offset = section_header->sh_offset; |
| 550 uint32 end_file_offset = start_file_offset + section_header->sh_size; |
| 551 |
| 552 const uint8* start_pointer = OffsetToPointer(start_file_offset); |
| 553 const uint8* end_pointer = OffsetToPointer(end_file_offset); |
| 554 |
| 555 // Quick way to convert from Pointer to RVA within a single Section is to |
| 556 // subtract 'pointer_to_rva'. |
| 557 const uint8* const adjust_pointer_to_rva = start_pointer - |
| 558 section_header->sh_addr; |
| 559 |
| 560 // Find the rel32 relocations. |
| 561 const uint8* p = start_pointer; |
| 562 while (p < end_pointer) { |
| 563 //RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva); |
| 564 |
| 565 // Heuristic discovery of rel32 locations in instruction stream: are the |
| 566 // next few bytes the start of an instruction containing a rel32 |
| 567 // addressing mode? |
| 568 const uint8* rel32 = NULL; |
| 569 |
| 570 if (p + 5 < end_pointer) { |
| 571 if (*p == 0xE8 || *p == 0xE9) { // jmp rel32 and call rel32 |
| 572 rel32 = p + 1; |
| 573 } |
| 574 } |
| 575 if (p + 6 < end_pointer) { |
| 576 if (*p == 0x0F && (*(p+1) & 0xF0) == 0x80) { // Jcc long form |
| 577 if (p[1] != 0x8A && p[1] != 0x8B) // JPE/JPO unlikely |
| 578 rel32 = p + 2; |
| 579 } |
| 580 } |
| 581 if (rel32) { |
| 582 RVA rel32_rva = static_cast<RVA>(rel32 - adjust_pointer_to_rva); |
| 583 |
| 584 RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32); |
| 585 // To be valid, rel32 target must be within image, and within this |
| 586 // section. |
| 587 if (IsValidRVA(target_rva)) { |
| 588 rel32_locations_.push_back(rel32_rva); |
| 589 #if COURGETTE_HISTOGRAM_TARGETS |
| 590 ++rel32_target_rvas_[target_rva]; |
| 591 #endif |
| 592 p += 4; |
| 593 continue; |
| 594 } |
| 595 } |
| 596 p += 1; |
| 597 } |
| 598 |
| 599 return true; |
| 600 } |
| 601 |
| 602 } // namespace courgette |
OLD | NEW |