Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1438)

Side by Side Diff: courgette/disassembler_elf_32_x86.cc

Issue 8477045: Add Elf 32 Support to Courgette. (Closed) Base URL: http://git.chromium.org/git/chromium.git@trunk
Patch Set: Remove debug printf present by mistake. Created 9 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « courgette/disassembler_elf_32_x86.h ('k') | courgette/disassembler_elf_32_x86_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "courgette/disassembler_elf_32_x86.h"
6
7 #include <algorithm>
8 #include <string>
9 #include <vector>
10
11 #include "base/basictypes.h"
12 #include "base/logging.h"
13
14 #include "courgette/assembly_program.h"
15 #include "courgette/courgette.h"
16 #include "courgette/encoded_program.h"
17
18 namespace courgette {
19
20 DisassemblerElf32X86::DisassemblerElf32X86(const void* start, size_t length)
21 : Disassembler(start, length) {
22 }
23
24 bool DisassemblerElf32X86::ParseHeader() {
25 if (length() < sizeof(Elf32_Ehdr))
26 return Bad("Too small");
27
28 header_ = (Elf32_Ehdr *)start();
29
30 // Have magic for elf header?
31 if (header_->e_ident[0] != 0x7f ||
32 header_->e_ident[1] != 'E' ||
33 header_->e_ident[2] != 'L' ||
34 header_->e_ident[3] != 'F')
35 return Bad("No Magic Number");
36
37 if (header_->e_type != ET_EXEC &&
38 header_->e_type != ET_DYN)
39 return Bad("Not an executable file or shared library");
40
41 if (header_->e_machine != EM_386)
42 return Bad("Not a supported architecture");
43
44 if (header_->e_version != 1)
45 return Bad("Unknown file version");
46
47 if (header_->e_shentsize != sizeof(Elf32_Shdr))
48 return Bad("Unexpected section header size");
49
50 if (header_->e_shoff >= length())
51 return Bad("Out of bounds section header table offset");
52
53 section_header_table_ = (Elf32_Shdr *)OffsetToPointer(header_->e_shoff);
54 section_header_table_size_ = header_->e_shnum;
55
56 if ((header_->e_shoff + header_->e_shnum ) >= length())
57 return Bad("Out of bounds section header table");
58
59 if (header_->e_phoff >= length())
60 return Bad("Out of bounds program header table offset");
61
62 program_header_table_ = (Elf32_Phdr *)OffsetToPointer(header_->e_phoff);
63 program_header_table_size_ = header_->e_phnum;
64
65 if ((header_->e_phoff + header_->e_phnum) >= length())
66 return Bad("Out of bounds program header table");
67
68 default_string_section_ = (const char *)SectionBody((int)header_->e_shstrndx);
69
70 ReduceLength(DiscoverLength());
71
72 return Good();
73 }
74
75 bool DisassemblerElf32X86::Disassemble(AssemblyProgram* target) {
76 if (!ok())
77 return false;
78
79 // The Image Base is always 0 for ELF Executables
80 target->set_image_base(0);
81
82 if (!ParseAbs32Relocs())
83 return false;
84
85 if (!ParseRel32RelocsFromSections())
86 return false;
87
88 if (!ParseFile(target))
89 return false;
90
91 target->DefaultAssignIndexes();
92
93 return true;
94 }
95
96 uint32 DisassemblerElf32X86::DiscoverLength() {
97 uint32 result = 0;
98
99 // Find the end of the last section
100 for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) {
101 const Elf32_Shdr *section_header = SectionHeader(section_id);
102
103 if (section_header->sh_type == SHT_NOBITS)
104 continue;
105
106 uint32 section_end = section_header->sh_offset + section_header->sh_size;
107
108 if (section_end > result)
109 result = section_end;
110 }
111
112 // Find the end of the last segment
113 for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
114 const Elf32_Phdr *segment_header = ProgramSegmentHeader(i);
115
116 uint32 segment_end = segment_header->p_offset + segment_header->p_filesz;
117
118 if (segment_end > result)
119 result = segment_end;
120 }
121
122 uint32 section_table_end = header_->e_shoff +
123 (header_->e_shnum * sizeof(Elf32_Shdr));
124 if (section_table_end > result)
125 result = section_table_end;
126
127 uint32 segment_table_end = header_->e_phoff +
128 (header_->e_phnum * sizeof(Elf32_Phdr));
129 if (segment_table_end > result)
130 result = segment_table_end;
131
132 return result;
133 }
134
135 CheckBool DisassemblerElf32X86::IsValidRVA(RVA rva) const {
136
137 // It's valid if it's contained in any program segment
138 for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
139 const Elf32_Phdr *segment_header = ProgramSegmentHeader(i);
140
141 if (segment_header->p_type != PT_LOAD)
142 continue;
143
144 Elf32_Addr begin = segment_header->p_vaddr;
145 Elf32_Addr end = segment_header->p_vaddr + segment_header->p_memsz;
146
147 if (rva >= begin && rva < end)
148 return true;
149 }
150
151 return false;
152 }
153
154 // Convert an ELF relocation struction into an RVA
155 CheckBool DisassemblerElf32X86::RelToRVA(Elf32_Rel rel, RVA* result) const {
156
157 // The rightmost byte of r_info is the type...
158 elf32_rel_386_type_values type =
159 (elf32_rel_386_type_values)(unsigned char)rel.r_info;
160
161 // The other 3 bytes of r_info are the symbol
162 uint32 symbol = rel.r_info >> 8;
163
164 switch(type)
165 {
166 case R_386_NONE:
167 case R_386_32:
168 case R_386_PC32:
169 case R_386_GOT32:
170 case R_386_PLT32:
171 case R_386_COPY:
172 case R_386_GLOB_DAT:
173 case R_386_JMP_SLOT:
174 return false;
175
176 case R_386_RELATIVE:
177 if (symbol != 0)
178 return false;
179
180 // This is a basic ABS32 relocation address
181 *result = rel.r_offset;
182 return true;
183
184 case R_386_GOTOFF:
185 case R_386_GOTPC:
186 case R_386_TLS_TPOFF:
187 return false;
188 }
189
190 return false;
191 }
192
193 // Returns RVA for an in memory address, or NULL.
194 CheckBool DisassemblerElf32X86::RVAToFileOffset(Elf32_Addr addr,
195 size_t* result) const {
196
197 for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
198 Elf32_Addr begin = ProgramSegmentMemoryBegin(i);
199 Elf32_Addr end = begin + ProgramSegmentMemorySize(i);
200
201 if (addr >= begin && addr < end) {
202 Elf32_Addr offset = addr - begin;
203
204 if (offset < ProgramSegmentFileSize(i)) {
205 *result = ProgramSegmentFileOffset(i) + offset;
206 return true;
207 }
208 }
209 }
210
211 return false;
212 }
213
214 RVA DisassemblerElf32X86::FileOffsetToRVA(size_t offset) const {
215 // File offsets can be 64 bit values, but we are dealing with 32
216 // bit executables and so only need to support 32bit file sizes.
217 uint32 offset32 = (uint32)offset;
218
219 for (int i = 0; i < SectionHeaderCount(); i++) {
220
221 const Elf32_Shdr *section_header = SectionHeader(i);
222
223 // These can appear to have a size in the file, but don't.
224 if (section_header->sh_type == SHT_NOBITS)
225 continue;
226
227 Elf32_Off section_begin = section_header->sh_offset;
228 Elf32_Off section_end = section_begin + section_header->sh_size;
229
230 if (offset32 >= section_begin && offset32 < section_end) {
231 return section_header->sh_addr + (offset32 - section_begin);
232 }
233 }
234
235 return 0;
236 }
237
238 CheckBool DisassemblerElf32X86::RVAsToOffsets(std::vector<RVA>* rvas,
239 std::vector<size_t>* offsets) {
240 offsets->clear();
241
242 for (std::vector<RVA>::iterator rva = rvas->begin();
243 rva != rvas->end();
244 rva++) {
245
246 size_t offset;
247
248 if (!RVAToFileOffset(*rva, &offset))
249 return false;
250
251 offsets->push_back(offset);
252 }
253
254 return true;
255 }
256
257 CheckBool DisassemblerElf32X86::ParseFile(AssemblyProgram* program) {
258 bool ok = true;
259
260 // Walk all the bytes in the file, whether or not in a section.
261 uint32 file_offset = 0;
262
263 std::vector<size_t> abs_offsets;
264 std::vector<size_t> rel_offsets;
265
266 if (ok)
267 ok = RVAsToOffsets(&abs32_locations_, &abs_offsets);
268
269 if (ok)
270 ok = RVAsToOffsets(&rel32_locations_, &rel_offsets);
271
272 std::vector<size_t>::iterator current_abs_offset = abs_offsets.begin();
273 std::vector<size_t>::iterator current_rel_offset = rel_offsets.begin();
274
275 std::vector<size_t>::iterator end_abs_offset = abs_offsets.end();
276 std::vector<size_t>::iterator end_rel_offset = rel_offsets.end();
277
278 for (int section_id = 0;
279 ok && (section_id < SectionHeaderCount());
280 section_id++) {
281
282 const Elf32_Shdr *section_header = SectionHeader(section_id);
283
284 if (ok) {
285 ok = ParseSimpleRegion(file_offset,
286 section_header->sh_offset,
287 program);
288 file_offset = section_header->sh_offset;
289 }
290
291 switch (section_header->sh_type) {
292 case SHT_REL:
293 if (ok) {
294 ok = ParseRelocationSection(section_header, program);
295 file_offset = section_header->sh_offset + section_header->sh_size;
296 }
297 break;
298 case SHT_PROGBITS:
299 if (ok) {
300 ok = ParseProgbitsSection(section_header,
301 &current_abs_offset, end_abs_offset,
302 &current_rel_offset, end_rel_offset,
303 program);
304 file_offset = section_header->sh_offset + section_header->sh_size;
305 }
306
307 break;
308 default:
309 break;
310 }
311 }
312
313 // Rest of the file past the last section
314 if (ok) {
315 ok = ParseSimpleRegion(file_offset,
316 length(),
317 program);
318 }
319
320 // Make certain we consume all of the relocations as expected
321 ok = ok && (current_abs_offset == end_abs_offset);
322
323 return ok;
324 }
325
326 CheckBool DisassemblerElf32X86::ParseRelocationSection(
327 const Elf32_Shdr *section_header,
328 AssemblyProgram* program) {
329 // We can reproduce the R_386_RELATIVE entries in one of the relocation
330 // table based on other information in the patch, given these
331 // conditions....
332 //
333 // All R_386_RELATIVE entries are:
334 // 1) In the same relocation table
335 // 2) Are consecutive
336 // 3) Are sorted in memory address order
337 //
338 // Happily, this is normally the case, but it's not required by spec
339 // so we check, and just don't do it if we don't match up.
340
341 // The expectation is that one relocation section will contain
342 // all of our R_386_RELATIVE entries in the expected order followed
343 // by assorted other entries we can't use special handling for.
344
345 bool ok = true;
346 bool match = true;
347
348 // Walk all the bytes in the section, matching relocation table or not
349 size_t file_offset = section_header->sh_offset;
350 size_t section_end = section_header->sh_offset + section_header->sh_size;
351
352 Elf32_Rel *section_relocs_iter =
353 (Elf32_Rel *)OffsetToPointer(section_header->sh_offset);
354
355 uint32 section_relocs_count = section_header->sh_size /
356 section_header->sh_entsize;
357
358 if (abs32_locations_.size() > section_relocs_count)
359 match = false;
360
361 std::vector<RVA>::iterator reloc_iter = abs32_locations_.begin();
362
363 while (match && (reloc_iter != abs32_locations_.end())) {
364 if (section_relocs_iter->r_info != R_386_RELATIVE ||
365 section_relocs_iter->r_offset != *reloc_iter)
366 match = false;
367 section_relocs_iter++;
368 reloc_iter++;
369 }
370
371 if (match) {
372 // Skip over relocation tables
373 ok = program->EmitElfRelocationInstruction();
374 file_offset += sizeof(Elf32_Rel) * abs32_locations_.size();
375 }
376
377 if (ok) {
378 ok = ParseSimpleRegion(file_offset, section_end, program);
379 }
380
381 return ok;
382 }
383
384 CheckBool DisassemblerElf32X86::ParseProgbitsSection(
385 const Elf32_Shdr *section_header,
386 std::vector<size_t>::iterator* current_abs_offset,
387 std::vector<size_t>::iterator end_abs_offset,
388 std::vector<size_t>::iterator* current_rel_offset,
389 std::vector<size_t>::iterator end_rel_offset,
390 AssemblyProgram* program) {
391
392 bool ok = true;
393
394 // Walk all the bytes in the file, whether or not in a section.
395 size_t file_offset = section_header->sh_offset;
396 size_t section_end = section_header->sh_offset + section_header->sh_size;
397
398 Elf32_Addr origin = section_header->sh_addr;
399 size_t origin_offset = section_header->sh_offset;
400 ok = program->EmitOriginInstruction(origin);
401
402 while (ok && file_offset < section_end) {
403
404 if (*current_abs_offset != end_abs_offset &&
405 file_offset > **current_abs_offset) {
406 ok = false;
407 }
408
409 while (*current_rel_offset != end_rel_offset &&
410 file_offset > **current_rel_offset) {
411 (*current_rel_offset)++;
412 }
413
414 size_t next_relocation = section_end;
415
416 if (*current_abs_offset != end_abs_offset &&
417 next_relocation > **current_abs_offset)
418 next_relocation = **current_abs_offset;
419
420 // Rel offsets are heuristically derived, and might (incorrectly) overlap
421 // an Abs value, or the end of the section, so +3 to make sure there is
422 // room for the full 4 byte value.
423 if (*current_rel_offset != end_rel_offset &&
424 next_relocation > (**current_rel_offset + 3))
425 next_relocation = **current_rel_offset;
426
427 if (ok && (next_relocation > file_offset)) {
428 ok = ParseSimpleRegion(file_offset, next_relocation, program);
429
430 file_offset = next_relocation;
431 continue;
432 }
433
434 if (ok &&
435 *current_abs_offset != end_abs_offset &&
436 file_offset == **current_abs_offset) {
437
438 const uint8* p = OffsetToPointer(file_offset);
439 RVA target_rva = Read32LittleEndian(p);
440
441 ok = program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva));
442 file_offset += sizeof(RVA);
443 (*current_abs_offset)++;
444 continue;
445 }
446
447 if (ok &&
448 *current_rel_offset != end_rel_offset &&
449 file_offset == **current_rel_offset) {
450
451 const uint8* p = OffsetToPointer(file_offset);
452 uint32 relative_target = Read32LittleEndian(p);
453 // This cast is for 64 bit systems, and is only safe because we
454 // are working on 32 bit executables.
455 RVA target_rva = (RVA)(origin + (file_offset - origin_offset) +
456 4 + relative_target);
457
458 ok = program->EmitRel32(program->FindOrMakeRel32Label(target_rva));
459 file_offset += sizeof(RVA);
460 (*current_rel_offset)++;
461 continue;
462 }
463 }
464
465 // Rest of the section (if any)
466 if (ok) {
467 ok = ParseSimpleRegion(file_offset, section_end, program);
468 }
469
470 return ok;
471 }
472
473 CheckBool DisassemblerElf32X86::ParseSimpleRegion(
474 size_t start_file_offset,
475 size_t end_file_offset,
476 AssemblyProgram* program) {
477
478 const uint8* start = OffsetToPointer(start_file_offset);
479 const uint8* end = OffsetToPointer(end_file_offset);
480
481 const uint8* p = start;
482
483 bool ok = true;
484 while (p < end && ok) {
485 ok = program->EmitByteInstruction(*p);
486 ++p;
487 }
488
489 return ok;
490 }
491
492 CheckBool DisassemblerElf32X86::ParseAbs32Relocs() {
493 abs32_locations_.clear();
494
495 // Loop through sections for relocation sections
496 for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) {
497 const Elf32_Shdr *section_header = SectionHeader(section_id);
498
499 if (section_header->sh_type == SHT_REL) {
500
501 Elf32_Rel *relocs_table = (Elf32_Rel *)SectionBody(section_id);
502
503 int relocs_table_count = section_header->sh_size /
504 section_header->sh_entsize;
505
506 // Elf32_Word relocation_section_id = section_header->sh_info;
507
508 // Loop through relocation objects in the relocation section
509 for (int rel_id = 0; rel_id < relocs_table_count; rel_id++) {
510 RVA rva;
511
512 // Quite a few of these conversions fail, and we simply skip
513 // them, that's okay.
514 if (RelToRVA(relocs_table[rel_id], &rva))
515 abs32_locations_.push_back(rva);
516 }
517 }
518 }
519
520 std::sort(abs32_locations_.begin(), abs32_locations_.end());
521 return true;
522 }
523
524 CheckBool DisassemblerElf32X86::ParseRel32RelocsFromSections() {
525
526 rel32_locations_.clear();
527
528 // Loop through sections for relocation sections
529 for (int section_id = 0;
530 section_id < SectionHeaderCount();
531 section_id++) {
532
533 const Elf32_Shdr *section_header = SectionHeader(section_id);
534
535 if (section_header->sh_type != SHT_PROGBITS)
536 continue;
537
538 if (!ParseRel32RelocsFromSection(section_header))
539 return false;
540 }
541
542 std::sort(rel32_locations_.begin(), rel32_locations_.end());
543 return true;
544 }
545
546 CheckBool DisassemblerElf32X86::ParseRel32RelocsFromSection(
547 const Elf32_Shdr* section_header) {
548
549 uint32 start_file_offset = section_header->sh_offset;
550 uint32 end_file_offset = start_file_offset + section_header->sh_size;
551
552 const uint8* start_pointer = OffsetToPointer(start_file_offset);
553 const uint8* end_pointer = OffsetToPointer(end_file_offset);
554
555 // Quick way to convert from Pointer to RVA within a single Section is to
556 // subtract 'pointer_to_rva'.
557 const uint8* const adjust_pointer_to_rva = start_pointer -
558 section_header->sh_addr;
559
560 // Find the rel32 relocations.
561 const uint8* p = start_pointer;
562 while (p < end_pointer) {
563 //RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva);
564
565 // Heuristic discovery of rel32 locations in instruction stream: are the
566 // next few bytes the start of an instruction containing a rel32
567 // addressing mode?
568 const uint8* rel32 = NULL;
569
570 if (p + 5 < end_pointer) {
571 if (*p == 0xE8 || *p == 0xE9) { // jmp rel32 and call rel32
572 rel32 = p + 1;
573 }
574 }
575 if (p + 6 < end_pointer) {
576 if (*p == 0x0F && (*(p+1) & 0xF0) == 0x80) { // Jcc long form
577 if (p[1] != 0x8A && p[1] != 0x8B) // JPE/JPO unlikely
578 rel32 = p + 2;
579 }
580 }
581 if (rel32) {
582 RVA rel32_rva = static_cast<RVA>(rel32 - adjust_pointer_to_rva);
583
584 RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32);
585 // To be valid, rel32 target must be within image, and within this
586 // section.
587 if (IsValidRVA(target_rva)) {
588 rel32_locations_.push_back(rel32_rva);
589 #if COURGETTE_HISTOGRAM_TARGETS
590 ++rel32_target_rvas_[target_rva];
591 #endif
592 p += 4;
593 continue;
594 }
595 }
596 p += 1;
597 }
598
599 return true;
600 }
601
602 } // namespace courgette
OLDNEW
« no previous file with comments | « courgette/disassembler_elf_32_x86.h ('k') | courgette/disassembler_elf_32_x86_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698