Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(94)

Side by Side Diff: courgette/disassembler.cc

Issue 7920004: Start refactoring to reduce executable type knowledge. (Closed) Base URL: http://git.chromium.org/git/chromium.git@trunk
Patch Set: Fix windows compile warning. Created 9 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « courgette/disassembler.h ('k') | courgette/disassembler_win32_x86.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "courgette/disassembler.h" 5 #include "courgette/disassembler.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <string> 8 #include <string>
9 #include <vector> 9 #include <vector>
10 10
11 #include "base/basictypes.h" 11 #include "base/basictypes.h"
12 #include "base/logging.h" 12 #include "base/logging.h"
13 13
14 #include "courgette/assembly_program.h" 14 #include "courgette/assembly_program.h"
15 #include "courgette/courgette.h" 15 #include "courgette/courgette.h"
16 #include "courgette/disassembler_win32_x86.h"
16 #include "courgette/encoded_program.h" 17 #include "courgette/encoded_program.h"
17 #include "courgette/image_info.h" 18 #include "courgette/image_info.h"
18 19
19 // COURGETTE_HISTOGRAM_TARGETS prints out a histogram of how frequently 20 // COURGETTE_HISTOGRAM_TARGETS prints out a histogram of how frequently
20 // different target addresses are referenced. Purely for debugging. 21 // different target addresses are referenced. Purely for debugging.
21 #define COURGETTE_HISTOGRAM_TARGETS 0 22 #define COURGETTE_HISTOGRAM_TARGETS 0
22 23
23 namespace courgette { 24 namespace courgette {
24 25
25 class DisassemblerWin32X86 : public Disassembler { 26 ////////////////////////////////////////////////////////////////////////////////
26 public:
27 explicit DisassemblerWin32X86(PEInfo* pe_info)
28 : pe_info_(pe_info),
29 incomplete_disassembly_(false) {
30 }
31 27
32 virtual bool Disassemble(AssemblyProgram* target); 28 ExecutableType DetectExecutableType(const void* buffer, size_t length) {
33 29
34 virtual void Destroy() { delete this; } 30 bool parsed = false;
35 31
36 protected: 32 PEInfo* pe_info = new PEInfo();
37 PEInfo& pe_info() { return *pe_info_; } 33 pe_info->Init(buffer, length);
34 parsed = pe_info->ParseHeader();
35 delete pe_info;
38 36
39 CheckBool ParseFile(AssemblyProgram* target) WARN_UNUSED_RESULT; 37 if (parsed)
40 bool ParseAbs32Relocs(); 38 return WIN32_X86;
41 void ParseRel32RelocsFromSections();
42 void ParseRel32RelocsFromSection(const Section* section);
43 39
44 CheckBool ParseNonSectionFileRegion(uint32 start_file_offset, 40 return UNKNOWN;
45 uint32 end_file_offset, AssemblyProgram* program) WARN_UNUSED_RESULT;
46 CheckBool ParseFileRegion(const Section* section,
47 uint32 start_file_offset, uint32 end_file_offset,
48 AssemblyProgram* program) WARN_UNUSED_RESULT;
49
50 #if COURGETTE_HISTOGRAM_TARGETS
51 void HistogramTargets(const char* kind, const std::map<RVA, int>& map);
52 #endif
53
54 PEInfo* pe_info_;
55 bool incomplete_disassembly_; // 'true' if can leave out 'uninteresting' bits
56
57 std::vector<RVA> abs32_locations_;
58 std::vector<RVA> rel32_locations_;
59
60 #if COURGETTE_HISTOGRAM_TARGETS
61 std::map<RVA, int> abs32_target_rvas_;
62 std::map<RVA, int> rel32_target_rvas_;
63 #endif
64 };
65
66 bool DisassemblerWin32X86::Disassemble(AssemblyProgram* target) {
67 if (!pe_info().ok())
68 return false;
69
70 target->set_image_base(pe_info().image_base());
71
72 if (!ParseAbs32Relocs())
73 return false;
74
75 ParseRel32RelocsFromSections();
76
77 if (!ParseFile(target))
78 return false;
79
80 target->DefaultAssignIndexes();
81
82 return true;
83 } 41 }
84 42
85 static uint32 Read32LittleEndian(const void* address) { 43 Status ParseDetectedExecutable(const void* buffer, size_t length,
86 return *reinterpret_cast<const uint32*>(address); 44 AssemblyProgram** output) {
87 }
88
89 bool DisassemblerWin32X86::ParseAbs32Relocs() {
90 abs32_locations_.clear();
91 if (!pe_info().ParseRelocs(&abs32_locations_))
92 return false;
93
94 std::sort(abs32_locations_.begin(), abs32_locations_.end());
95
96 #if COURGETTE_HISTOGRAM_TARGETS
97 for (size_t i = 0; i < abs32_locations_.size(); ++i) {
98 RVA rva = abs32_locations_[i];
99 // The 4 bytes at the relocation are a reference to some address.
100 uint32 target_address = Read32LittleEndian(pe_info().RVAToPointer(rva));
101 ++abs32_target_rvas_[target_address - pe_info().image_base()];
102 }
103 #endif
104 return true;
105 }
106
107 void DisassemblerWin32X86::ParseRel32RelocsFromSections() {
108 uint32 file_offset = 0;
109 while (file_offset < pe_info().length()) {
110 const Section* section = pe_info().FindNextSection(file_offset);
111 if (section == NULL)
112 break;
113 if (file_offset < section->file_offset_of_raw_data)
114 file_offset = section->file_offset_of_raw_data;
115 ParseRel32RelocsFromSection(section);
116 file_offset += section->size_of_raw_data;
117 }
118 std::sort(rel32_locations_.begin(), rel32_locations_.end());
119
120 #if COURGETTE_HISTOGRAM_TARGETS
121 VLOG(1) << "abs32_locations_ " << abs32_locations_.size()
122 << "\nrel32_locations_ " << rel32_locations_.size()
123 << "\nabs32_target_rvas_ " << abs32_target_rvas_.size()
124 << "\nrel32_target_rvas_ " << rel32_target_rvas_.size();
125
126 int common = 0;
127 std::map<RVA, int>::iterator abs32_iter = abs32_target_rvas_.begin();
128 std::map<RVA, int>::iterator rel32_iter = rel32_target_rvas_.begin();
129 while (abs32_iter != abs32_target_rvas_.end() &&
130 rel32_iter != rel32_target_rvas_.end()) {
131 if (abs32_iter->first < rel32_iter->first)
132 ++abs32_iter;
133 else if (rel32_iter->first < abs32_iter->first)
134 ++rel32_iter;
135 else {
136 ++common;
137 ++abs32_iter;
138 ++rel32_iter;
139 }
140 }
141 VLOG(1) << "common " << common;
142 #endif
143 }
144
145 void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) {
146 // TODO(sra): use characteristic.
147 bool isCode = strcmp(section->name, ".text") == 0;
148 if (!isCode)
149 return;
150
151 uint32 start_file_offset = section->file_offset_of_raw_data;
152 uint32 end_file_offset = start_file_offset + section->size_of_raw_data;
153 RVA relocs_start_rva = pe_info().base_relocation_table().address_;
154
155 const uint8* start_pointer = pe_info().FileOffsetToPointer(start_file_offset);
156 const uint8* end_pointer = pe_info().FileOffsetToPointer(end_file_offset);
157
158 RVA start_rva = pe_info().FileOffsetToRVA(start_file_offset);
159 RVA end_rva = start_rva + section->virtual_size;
160
161 // Quick way to convert from Pointer to RVA within a single Section is to
162 // subtract 'pointer_to_rva'.
163 const uint8* const adjust_pointer_to_rva = start_pointer - start_rva;
164
165 std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin();
166
167 // Find the rel32 relocations.
168 const uint8* p = start_pointer;
169 while (p < end_pointer) {
170 RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva);
171 if (current_rva == relocs_start_rva) {
172 uint32 relocs_size = pe_info().base_relocation_table().size_;
173 if (relocs_size) {
174 p += relocs_size;
175 continue;
176 }
177 }
178
179 //while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva)
180 // ++abs32_pos;
181
182 // Heuristic discovery of rel32 locations in instruction stream: are the
183 // next few bytes the start of an instruction containing a rel32
184 // addressing mode?
185 const uint8* rel32 = NULL;
186
187 if (p + 5 < end_pointer) {
188 if (*p == 0xE8 || *p == 0xE9) { // jmp rel32 and call rel32
189 rel32 = p + 1;
190 }
191 }
192 if (p + 6 < end_pointer) {
193 if (*p == 0x0F && (*(p+1) & 0xF0) == 0x80) { // Jcc long form
194 if (p[1] != 0x8A && p[1] != 0x8B) // JPE/JPO unlikely
195 rel32 = p + 2;
196 }
197 }
198 if (rel32) {
199 RVA rel32_rva = static_cast<RVA>(rel32 - adjust_pointer_to_rva);
200
201 // Is there an abs32 reloc overlapping the candidate?
202 while (abs32_pos != abs32_locations_.end() && *abs32_pos < rel32_rva - 3)
203 ++abs32_pos;
204 // Now: (*abs32_pos > rel32_rva - 4) i.e. the lowest addressed 4-byte
205 // region that could overlap rel32_rva.
206 if (abs32_pos != abs32_locations_.end()) {
207 if (*abs32_pos < rel32_rva + 4) {
208 // Beginning of abs32 reloc is before end of rel32 reloc so they
209 // overlap. Skip four bytes past the abs32 reloc.
210 p += (*abs32_pos + 4) - current_rva;
211 continue;
212 }
213 }
214
215 RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32);
216 // To be valid, rel32 target must be within image, and within this
217 // section.
218 if (pe_info().IsValidRVA(target_rva) &&
219 start_rva <= target_rva && target_rva < end_rva) {
220 rel32_locations_.push_back(rel32_rva);
221 #if COURGETTE_HISTOGRAM_TARGETS
222 ++rel32_target_rvas_[target_rva];
223 #endif
224 p += 4;
225 continue;
226 }
227 }
228 p += 1;
229 }
230 }
231
232 CheckBool DisassemblerWin32X86::ParseFile(AssemblyProgram* program) {
233 bool ok = true;
234 // Walk all the bytes in the file, whether or not in a section.
235 uint32 file_offset = 0;
236 while (ok && file_offset < pe_info().length()) {
237 const Section* section = pe_info().FindNextSection(file_offset);
238 if (section == NULL) {
239 // No more sections. There should not be extra stuff following last
240 // section.
241 // ParseNonSectionFileRegion(file_offset, pe_info().length(), program);
242 break;
243 }
244 if (file_offset < section->file_offset_of_raw_data) {
245 uint32 section_start_offset = section->file_offset_of_raw_data;
246 ok = ParseNonSectionFileRegion(file_offset, section_start_offset,
247 program);
248 file_offset = section_start_offset;
249 }
250 if (ok) {
251 uint32 end = file_offset + section->size_of_raw_data;
252 ok = ParseFileRegion(section, file_offset, end, program);
253 file_offset = end;
254 }
255 }
256
257 #if COURGETTE_HISTOGRAM_TARGETS
258 HistogramTargets("abs32 relocs", abs32_target_rvas_);
259 HistogramTargets("rel32 relocs", rel32_target_rvas_);
260 #endif
261
262 return ok;
263 }
264
265 CheckBool DisassemblerWin32X86::ParseNonSectionFileRegion(
266 uint32 start_file_offset,
267 uint32 end_file_offset,
268 AssemblyProgram* program) {
269 if (incomplete_disassembly_)
270 return true;
271
272 const uint8* start = pe_info().FileOffsetToPointer(start_file_offset);
273 const uint8* end = pe_info().FileOffsetToPointer(end_file_offset);
274
275 const uint8* p = start;
276
277 bool ok = true;
278 while (p < end && ok) {
279 ok = program->EmitByteInstruction(*p);
280 ++p;
281 }
282
283 return ok;
284 }
285
286 CheckBool DisassemblerWin32X86::ParseFileRegion(
287 const Section* section,
288 uint32 start_file_offset, uint32 end_file_offset,
289 AssemblyProgram* program) {
290 RVA relocs_start_rva = pe_info().base_relocation_table().address_;
291
292 const uint8* start_pointer = pe_info().FileOffsetToPointer(start_file_offset);
293 const uint8* end_pointer = pe_info().FileOffsetToPointer(end_file_offset);
294
295 RVA start_rva = pe_info().FileOffsetToRVA(start_file_offset);
296 RVA end_rva = start_rva + section->virtual_size;
297
298 // Quick way to convert from Pointer to RVA within a single Section is to
299 // subtract 'pointer_to_rva'.
300 const uint8* const adjust_pointer_to_rva = start_pointer - start_rva;
301
302 std::vector<RVA>::iterator rel32_pos = rel32_locations_.begin();
303 std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin();
304
305 bool ok = program->EmitOriginInstruction(start_rva);
306
307 const uint8* p = start_pointer;
308
309 while (ok && p < end_pointer) {
310 RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva);
311
312 // The base relocation table is usually in the .relocs section, but it could
313 // actually be anywhere. Make sure we skip it because we will regenerate it
314 // during assembly.
315 if (current_rva == relocs_start_rva) {
316 ok = program->EmitMakeRelocsInstruction();
317 if (!ok)
318 break;
319 uint32 relocs_size = pe_info().base_relocation_table().size_;
320 if (relocs_size) {
321 p += relocs_size;
322 continue;
323 }
324 }
325
326 while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva)
327 ++abs32_pos;
328
329 if (abs32_pos != abs32_locations_.end() && *abs32_pos == current_rva) {
330 uint32 target_address = Read32LittleEndian(p);
331 RVA target_rva = target_address - pe_info().image_base();
332 // TODO(sra): target could be Label+offset. It is not clear how to guess
333 // which it might be. We assume offset==0.
334 ok = program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva));
335 if (!ok)
336 break;
337 p += 4;
338 continue;
339 }
340
341 while (rel32_pos != rel32_locations_.end() && *rel32_pos < current_rva)
342 ++rel32_pos;
343
344 if (rel32_pos != rel32_locations_.end() && *rel32_pos == current_rva) {
345 RVA target_rva = current_rva + 4 + Read32LittleEndian(p);
346 ok = program->EmitRel32(program->FindOrMakeRel32Label(target_rva));
347 p += 4;
348 continue;
349 }
350
351 if (incomplete_disassembly_) {
352 if ((abs32_pos == abs32_locations_.end() || end_rva <= *abs32_pos) &&
353 (rel32_pos == rel32_locations_.end() || end_rva <= *rel32_pos) &&
354 (end_rva <= relocs_start_rva || current_rva >= relocs_start_rva)) {
355 // No more relocs in this section, don't bother encoding bytes.
356 break;
357 }
358 }
359
360 ok = program->EmitByteInstruction(*p);
361 p += 1;
362 }
363
364 return ok;
365 }
366
367 #if COURGETTE_HISTOGRAM_TARGETS
368 // Histogram is printed to std::cout. It is purely for debugging the algorithm
369 // and is only enabled manually in 'exploration' builds. I don't want to add
370 // command-line configuration for this feature because this code has to be
371 // small, which means compiled-out.
372 void DisassemblerWin32X86::HistogramTargets(const char* kind,
373 const std::map<RVA, int>& map) {
374 int total = 0;
375 std::map<int, std::vector<RVA> > h;
376 for (std::map<RVA, int>::const_iterator p = map.begin();
377 p != map.end();
378 ++p) {
379 h[p->second].push_back(p->first);
380 total += p->second;
381 }
382
383 std::cout << total << " " << kind << " to "
384 << map.size() << " unique targets" << std::endl;
385
386 std::cout << "indegree: #targets-with-indegree (example)" << std::endl;
387 const int kFirstN = 15;
388 bool someSkipped = false;
389 int index = 0;
390 for (std::map<int, std::vector<RVA> >::reverse_iterator p = h.rbegin();
391 p != h.rend();
392 ++p) {
393 ++index;
394 if (index <= kFirstN || p->first <= 3) {
395 if (someSkipped) {
396 std::cout << "..." << std::endl;
397 }
398 size_t count = p->second.size();
399 std::cout << std::dec << p->first << ": " << count;
400 if (count <= 2) {
401 for (size_t i = 0; i < count; ++i)
402 std::cout << " " << pe_info().DescribeRVA(p->second[i]);
403 }
404 std::cout << std::endl;
405 someSkipped = false;
406 } else {
407 someSkipped = true;
408 }
409 }
410 }
411 #endif // COURGETTE_HISTOGRAM_TARGETS
412
413 Disassembler* Disassembler::MakeDisassemberWin32X86(PEInfo* pe_info) {
414 return new DisassemblerWin32X86(pe_info);
415 }
416
417 ////////////////////////////////////////////////////////////////////////////////
418
419 Status ParseWin32X86PE(const void* buffer, size_t length,
420 AssemblyProgram** output) {
421 *output = NULL; 45 *output = NULL;
422 46
423 PEInfo* pe_info = new PEInfo(); 47 PEInfo* pe_info = new PEInfo();
424 pe_info->Init(buffer, length); 48 pe_info->Init(buffer, length);
425 49
426 if (!pe_info->ParseHeader()) { 50 if (!pe_info->ParseHeader()) {
427 delete pe_info; 51 delete pe_info;
428 return C_INPUT_NOT_RECOGNIZED; 52 return C_INPUT_NOT_RECOGNIZED;
429 } 53 }
430 54
431 Disassembler* disassembler = Disassembler::MakeDisassemberWin32X86(pe_info); 55 Disassembler* disassembler = new DisassemblerWin32X86(pe_info);
432 AssemblyProgram* program = new AssemblyProgram(); 56 AssemblyProgram* program = new AssemblyProgram();
433 57
434 if (!disassembler->Disassemble(program)) { 58 if (!disassembler->Disassemble(program)) {
435 delete program; 59 delete program;
436 disassembler->Destroy(); 60 delete disassembler;
437 delete pe_info; 61 delete pe_info;
438 return C_DISASSEMBLY_FAILED; 62 return C_DISASSEMBLY_FAILED;
439 } 63 }
440 64
441 disassembler->Destroy(); 65 delete disassembler;
442 delete pe_info; 66 delete pe_info;
443 *output = program; 67 *output = program;
444 return C_OK; 68 return C_OK;
445 } 69 }
446 70
447 void DeleteAssemblyProgram(AssemblyProgram* program) { 71 void DeleteAssemblyProgram(AssemblyProgram* program) {
448 delete program; 72 delete program;
449 } 73 }
450 74
451 } // namespace courgette 75 } // namespace courgette
OLDNEW
« no previous file with comments | « courgette/disassembler.h ('k') | courgette/disassembler_win32_x86.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698