OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "courgette/disassembler.h" | 5 #include "courgette/disassembler.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <string> | 8 #include <string> |
9 #include <vector> | 9 #include <vector> |
10 | 10 |
11 #include "base/basictypes.h" | 11 #include "base/basictypes.h" |
12 #include "base/logging.h" | 12 #include "base/logging.h" |
13 | 13 |
14 #include "courgette/assembly_program.h" | 14 #include "courgette/assembly_program.h" |
15 #include "courgette/courgette.h" | 15 #include "courgette/courgette.h" |
| 16 #include "courgette/disassembler_win32_x86.h" |
16 #include "courgette/encoded_program.h" | 17 #include "courgette/encoded_program.h" |
17 #include "courgette/image_info.h" | 18 #include "courgette/image_info.h" |
18 | 19 |
19 // COURGETTE_HISTOGRAM_TARGETS prints out a histogram of how frequently | 20 // COURGETTE_HISTOGRAM_TARGETS prints out a histogram of how frequently |
20 // different target addresses are referenced. Purely for debugging. | 21 // different target addresses are referenced. Purely for debugging. |
21 #define COURGETTE_HISTOGRAM_TARGETS 0 | 22 #define COURGETTE_HISTOGRAM_TARGETS 0 |
22 | 23 |
23 namespace courgette { | 24 namespace courgette { |
24 | 25 |
25 class DisassemblerWin32X86 : public Disassembler { | 26 //////////////////////////////////////////////////////////////////////////////// |
26 public: | |
27 explicit DisassemblerWin32X86(PEInfo* pe_info) | |
28 : pe_info_(pe_info), | |
29 incomplete_disassembly_(false) { | |
30 } | |
31 | 27 |
32 virtual bool Disassemble(AssemblyProgram* target); | 28 ExecutableType DetectExecutableType(const void* buffer, size_t length) { |
33 | 29 |
34 virtual void Destroy() { delete this; } | 30 bool parsed = false; |
35 | 31 |
36 protected: | 32 PEInfo* pe_info = new PEInfo(); |
37 PEInfo& pe_info() { return *pe_info_; } | 33 pe_info->Init(buffer, length); |
| 34 parsed = pe_info->ParseHeader(); |
| 35 delete pe_info; |
38 | 36 |
39 CheckBool ParseFile(AssemblyProgram* target) WARN_UNUSED_RESULT; | 37 if (parsed) |
40 bool ParseAbs32Relocs(); | 38 return WIN32_X86; |
41 void ParseRel32RelocsFromSections(); | |
42 void ParseRel32RelocsFromSection(const Section* section); | |
43 | 39 |
44 CheckBool ParseNonSectionFileRegion(uint32 start_file_offset, | 40 return UNKNOWN; |
45 uint32 end_file_offset, AssemblyProgram* program) WARN_UNUSED_RESULT; | |
46 CheckBool ParseFileRegion(const Section* section, | |
47 uint32 start_file_offset, uint32 end_file_offset, | |
48 AssemblyProgram* program) WARN_UNUSED_RESULT; | |
49 | |
50 #if COURGETTE_HISTOGRAM_TARGETS | |
51 void HistogramTargets(const char* kind, const std::map<RVA, int>& map); | |
52 #endif | |
53 | |
54 PEInfo* pe_info_; | |
55 bool incomplete_disassembly_; // 'true' if can leave out 'uninteresting' bits | |
56 | |
57 std::vector<RVA> abs32_locations_; | |
58 std::vector<RVA> rel32_locations_; | |
59 | |
60 #if COURGETTE_HISTOGRAM_TARGETS | |
61 std::map<RVA, int> abs32_target_rvas_; | |
62 std::map<RVA, int> rel32_target_rvas_; | |
63 #endif | |
64 }; | |
65 | |
66 bool DisassemblerWin32X86::Disassemble(AssemblyProgram* target) { | |
67 if (!pe_info().ok()) | |
68 return false; | |
69 | |
70 target->set_image_base(pe_info().image_base()); | |
71 | |
72 if (!ParseAbs32Relocs()) | |
73 return false; | |
74 | |
75 ParseRel32RelocsFromSections(); | |
76 | |
77 if (!ParseFile(target)) | |
78 return false; | |
79 | |
80 target->DefaultAssignIndexes(); | |
81 | |
82 return true; | |
83 } | 41 } |
84 | 42 |
85 static uint32 Read32LittleEndian(const void* address) { | 43 Status ParseDetectedExecutable(const void* buffer, size_t length, |
86 return *reinterpret_cast<const uint32*>(address); | 44 AssemblyProgram** output) { |
87 } | |
88 | |
89 bool DisassemblerWin32X86::ParseAbs32Relocs() { | |
90 abs32_locations_.clear(); | |
91 if (!pe_info().ParseRelocs(&abs32_locations_)) | |
92 return false; | |
93 | |
94 std::sort(abs32_locations_.begin(), abs32_locations_.end()); | |
95 | |
96 #if COURGETTE_HISTOGRAM_TARGETS | |
97 for (size_t i = 0; i < abs32_locations_.size(); ++i) { | |
98 RVA rva = abs32_locations_[i]; | |
99 // The 4 bytes at the relocation are a reference to some address. | |
100 uint32 target_address = Read32LittleEndian(pe_info().RVAToPointer(rva)); | |
101 ++abs32_target_rvas_[target_address - pe_info().image_base()]; | |
102 } | |
103 #endif | |
104 return true; | |
105 } | |
106 | |
107 void DisassemblerWin32X86::ParseRel32RelocsFromSections() { | |
108 uint32 file_offset = 0; | |
109 while (file_offset < pe_info().length()) { | |
110 const Section* section = pe_info().FindNextSection(file_offset); | |
111 if (section == NULL) | |
112 break; | |
113 if (file_offset < section->file_offset_of_raw_data) | |
114 file_offset = section->file_offset_of_raw_data; | |
115 ParseRel32RelocsFromSection(section); | |
116 file_offset += section->size_of_raw_data; | |
117 } | |
118 std::sort(rel32_locations_.begin(), rel32_locations_.end()); | |
119 | |
120 #if COURGETTE_HISTOGRAM_TARGETS | |
121 VLOG(1) << "abs32_locations_ " << abs32_locations_.size() | |
122 << "\nrel32_locations_ " << rel32_locations_.size() | |
123 << "\nabs32_target_rvas_ " << abs32_target_rvas_.size() | |
124 << "\nrel32_target_rvas_ " << rel32_target_rvas_.size(); | |
125 | |
126 int common = 0; | |
127 std::map<RVA, int>::iterator abs32_iter = abs32_target_rvas_.begin(); | |
128 std::map<RVA, int>::iterator rel32_iter = rel32_target_rvas_.begin(); | |
129 while (abs32_iter != abs32_target_rvas_.end() && | |
130 rel32_iter != rel32_target_rvas_.end()) { | |
131 if (abs32_iter->first < rel32_iter->first) | |
132 ++abs32_iter; | |
133 else if (rel32_iter->first < abs32_iter->first) | |
134 ++rel32_iter; | |
135 else { | |
136 ++common; | |
137 ++abs32_iter; | |
138 ++rel32_iter; | |
139 } | |
140 } | |
141 VLOG(1) << "common " << common; | |
142 #endif | |
143 } | |
144 | |
145 void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) { | |
146 // TODO(sra): use characteristic. | |
147 bool isCode = strcmp(section->name, ".text") == 0; | |
148 if (!isCode) | |
149 return; | |
150 | |
151 uint32 start_file_offset = section->file_offset_of_raw_data; | |
152 uint32 end_file_offset = start_file_offset + section->size_of_raw_data; | |
153 RVA relocs_start_rva = pe_info().base_relocation_table().address_; | |
154 | |
155 const uint8* start_pointer = pe_info().FileOffsetToPointer(start_file_offset); | |
156 const uint8* end_pointer = pe_info().FileOffsetToPointer(end_file_offset); | |
157 | |
158 RVA start_rva = pe_info().FileOffsetToRVA(start_file_offset); | |
159 RVA end_rva = start_rva + section->virtual_size; | |
160 | |
161 // Quick way to convert from Pointer to RVA within a single Section is to | |
162 // subtract 'pointer_to_rva'. | |
163 const uint8* const adjust_pointer_to_rva = start_pointer - start_rva; | |
164 | |
165 std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin(); | |
166 | |
167 // Find the rel32 relocations. | |
168 const uint8* p = start_pointer; | |
169 while (p < end_pointer) { | |
170 RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva); | |
171 if (current_rva == relocs_start_rva) { | |
172 uint32 relocs_size = pe_info().base_relocation_table().size_; | |
173 if (relocs_size) { | |
174 p += relocs_size; | |
175 continue; | |
176 } | |
177 } | |
178 | |
179 //while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva) | |
180 // ++abs32_pos; | |
181 | |
182 // Heuristic discovery of rel32 locations in instruction stream: are the | |
183 // next few bytes the start of an instruction containing a rel32 | |
184 // addressing mode? | |
185 const uint8* rel32 = NULL; | |
186 | |
187 if (p + 5 < end_pointer) { | |
188 if (*p == 0xE8 || *p == 0xE9) { // jmp rel32 and call rel32 | |
189 rel32 = p + 1; | |
190 } | |
191 } | |
192 if (p + 6 < end_pointer) { | |
193 if (*p == 0x0F && (*(p+1) & 0xF0) == 0x80) { // Jcc long form | |
194 if (p[1] != 0x8A && p[1] != 0x8B) // JPE/JPO unlikely | |
195 rel32 = p + 2; | |
196 } | |
197 } | |
198 if (rel32) { | |
199 RVA rel32_rva = static_cast<RVA>(rel32 - adjust_pointer_to_rva); | |
200 | |
201 // Is there an abs32 reloc overlapping the candidate? | |
202 while (abs32_pos != abs32_locations_.end() && *abs32_pos < rel32_rva - 3) | |
203 ++abs32_pos; | |
204 // Now: (*abs32_pos > rel32_rva - 4) i.e. the lowest addressed 4-byte | |
205 // region that could overlap rel32_rva. | |
206 if (abs32_pos != abs32_locations_.end()) { | |
207 if (*abs32_pos < rel32_rva + 4) { | |
208 // Beginning of abs32 reloc is before end of rel32 reloc so they | |
209 // overlap. Skip four bytes past the abs32 reloc. | |
210 p += (*abs32_pos + 4) - current_rva; | |
211 continue; | |
212 } | |
213 } | |
214 | |
215 RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32); | |
216 // To be valid, rel32 target must be within image, and within this | |
217 // section. | |
218 if (pe_info().IsValidRVA(target_rva) && | |
219 start_rva <= target_rva && target_rva < end_rva) { | |
220 rel32_locations_.push_back(rel32_rva); | |
221 #if COURGETTE_HISTOGRAM_TARGETS | |
222 ++rel32_target_rvas_[target_rva]; | |
223 #endif | |
224 p += 4; | |
225 continue; | |
226 } | |
227 } | |
228 p += 1; | |
229 } | |
230 } | |
231 | |
232 CheckBool DisassemblerWin32X86::ParseFile(AssemblyProgram* program) { | |
233 bool ok = true; | |
234 // Walk all the bytes in the file, whether or not in a section. | |
235 uint32 file_offset = 0; | |
236 while (ok && file_offset < pe_info().length()) { | |
237 const Section* section = pe_info().FindNextSection(file_offset); | |
238 if (section == NULL) { | |
239 // No more sections. There should not be extra stuff following last | |
240 // section. | |
241 // ParseNonSectionFileRegion(file_offset, pe_info().length(), program); | |
242 break; | |
243 } | |
244 if (file_offset < section->file_offset_of_raw_data) { | |
245 uint32 section_start_offset = section->file_offset_of_raw_data; | |
246 ok = ParseNonSectionFileRegion(file_offset, section_start_offset, | |
247 program); | |
248 file_offset = section_start_offset; | |
249 } | |
250 if (ok) { | |
251 uint32 end = file_offset + section->size_of_raw_data; | |
252 ok = ParseFileRegion(section, file_offset, end, program); | |
253 file_offset = end; | |
254 } | |
255 } | |
256 | |
257 #if COURGETTE_HISTOGRAM_TARGETS | |
258 HistogramTargets("abs32 relocs", abs32_target_rvas_); | |
259 HistogramTargets("rel32 relocs", rel32_target_rvas_); | |
260 #endif | |
261 | |
262 return ok; | |
263 } | |
264 | |
265 CheckBool DisassemblerWin32X86::ParseNonSectionFileRegion( | |
266 uint32 start_file_offset, | |
267 uint32 end_file_offset, | |
268 AssemblyProgram* program) { | |
269 if (incomplete_disassembly_) | |
270 return true; | |
271 | |
272 const uint8* start = pe_info().FileOffsetToPointer(start_file_offset); | |
273 const uint8* end = pe_info().FileOffsetToPointer(end_file_offset); | |
274 | |
275 const uint8* p = start; | |
276 | |
277 bool ok = true; | |
278 while (p < end && ok) { | |
279 ok = program->EmitByteInstruction(*p); | |
280 ++p; | |
281 } | |
282 | |
283 return ok; | |
284 } | |
285 | |
286 CheckBool DisassemblerWin32X86::ParseFileRegion( | |
287 const Section* section, | |
288 uint32 start_file_offset, uint32 end_file_offset, | |
289 AssemblyProgram* program) { | |
290 RVA relocs_start_rva = pe_info().base_relocation_table().address_; | |
291 | |
292 const uint8* start_pointer = pe_info().FileOffsetToPointer(start_file_offset); | |
293 const uint8* end_pointer = pe_info().FileOffsetToPointer(end_file_offset); | |
294 | |
295 RVA start_rva = pe_info().FileOffsetToRVA(start_file_offset); | |
296 RVA end_rva = start_rva + section->virtual_size; | |
297 | |
298 // Quick way to convert from Pointer to RVA within a single Section is to | |
299 // subtract 'pointer_to_rva'. | |
300 const uint8* const adjust_pointer_to_rva = start_pointer - start_rva; | |
301 | |
302 std::vector<RVA>::iterator rel32_pos = rel32_locations_.begin(); | |
303 std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin(); | |
304 | |
305 bool ok = program->EmitOriginInstruction(start_rva); | |
306 | |
307 const uint8* p = start_pointer; | |
308 | |
309 while (ok && p < end_pointer) { | |
310 RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva); | |
311 | |
312 // The base relocation table is usually in the .relocs section, but it could | |
313 // actually be anywhere. Make sure we skip it because we will regenerate it | |
314 // during assembly. | |
315 if (current_rva == relocs_start_rva) { | |
316 ok = program->EmitMakeRelocsInstruction(); | |
317 if (!ok) | |
318 break; | |
319 uint32 relocs_size = pe_info().base_relocation_table().size_; | |
320 if (relocs_size) { | |
321 p += relocs_size; | |
322 continue; | |
323 } | |
324 } | |
325 | |
326 while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva) | |
327 ++abs32_pos; | |
328 | |
329 if (abs32_pos != abs32_locations_.end() && *abs32_pos == current_rva) { | |
330 uint32 target_address = Read32LittleEndian(p); | |
331 RVA target_rva = target_address - pe_info().image_base(); | |
332 // TODO(sra): target could be Label+offset. It is not clear how to guess | |
333 // which it might be. We assume offset==0. | |
334 ok = program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva)); | |
335 if (!ok) | |
336 break; | |
337 p += 4; | |
338 continue; | |
339 } | |
340 | |
341 while (rel32_pos != rel32_locations_.end() && *rel32_pos < current_rva) | |
342 ++rel32_pos; | |
343 | |
344 if (rel32_pos != rel32_locations_.end() && *rel32_pos == current_rva) { | |
345 RVA target_rva = current_rva + 4 + Read32LittleEndian(p); | |
346 ok = program->EmitRel32(program->FindOrMakeRel32Label(target_rva)); | |
347 p += 4; | |
348 continue; | |
349 } | |
350 | |
351 if (incomplete_disassembly_) { | |
352 if ((abs32_pos == abs32_locations_.end() || end_rva <= *abs32_pos) && | |
353 (rel32_pos == rel32_locations_.end() || end_rva <= *rel32_pos) && | |
354 (end_rva <= relocs_start_rva || current_rva >= relocs_start_rva)) { | |
355 // No more relocs in this section, don't bother encoding bytes. | |
356 break; | |
357 } | |
358 } | |
359 | |
360 ok = program->EmitByteInstruction(*p); | |
361 p += 1; | |
362 } | |
363 | |
364 return ok; | |
365 } | |
366 | |
367 #if COURGETTE_HISTOGRAM_TARGETS | |
368 // Histogram is printed to std::cout. It is purely for debugging the algorithm | |
369 // and is only enabled manually in 'exploration' builds. I don't want to add | |
370 // command-line configuration for this feature because this code has to be | |
371 // small, which means compiled-out. | |
372 void DisassemblerWin32X86::HistogramTargets(const char* kind, | |
373 const std::map<RVA, int>& map) { | |
374 int total = 0; | |
375 std::map<int, std::vector<RVA> > h; | |
376 for (std::map<RVA, int>::const_iterator p = map.begin(); | |
377 p != map.end(); | |
378 ++p) { | |
379 h[p->second].push_back(p->first); | |
380 total += p->second; | |
381 } | |
382 | |
383 std::cout << total << " " << kind << " to " | |
384 << map.size() << " unique targets" << std::endl; | |
385 | |
386 std::cout << "indegree: #targets-with-indegree (example)" << std::endl; | |
387 const int kFirstN = 15; | |
388 bool someSkipped = false; | |
389 int index = 0; | |
390 for (std::map<int, std::vector<RVA> >::reverse_iterator p = h.rbegin(); | |
391 p != h.rend(); | |
392 ++p) { | |
393 ++index; | |
394 if (index <= kFirstN || p->first <= 3) { | |
395 if (someSkipped) { | |
396 std::cout << "..." << std::endl; | |
397 } | |
398 size_t count = p->second.size(); | |
399 std::cout << std::dec << p->first << ": " << count; | |
400 if (count <= 2) { | |
401 for (size_t i = 0; i < count; ++i) | |
402 std::cout << " " << pe_info().DescribeRVA(p->second[i]); | |
403 } | |
404 std::cout << std::endl; | |
405 someSkipped = false; | |
406 } else { | |
407 someSkipped = true; | |
408 } | |
409 } | |
410 } | |
411 #endif // COURGETTE_HISTOGRAM_TARGETS | |
412 | |
413 Disassembler* Disassembler::MakeDisassemberWin32X86(PEInfo* pe_info) { | |
414 return new DisassemblerWin32X86(pe_info); | |
415 } | |
416 | |
417 //////////////////////////////////////////////////////////////////////////////// | |
418 | |
419 Status ParseWin32X86PE(const void* buffer, size_t length, | |
420 AssemblyProgram** output) { | |
421 *output = NULL; | 45 *output = NULL; |
422 | 46 |
423 PEInfo* pe_info = new PEInfo(); | 47 PEInfo* pe_info = new PEInfo(); |
424 pe_info->Init(buffer, length); | 48 pe_info->Init(buffer, length); |
425 | 49 |
426 if (!pe_info->ParseHeader()) { | 50 if (!pe_info->ParseHeader()) { |
427 delete pe_info; | 51 delete pe_info; |
428 return C_INPUT_NOT_RECOGNIZED; | 52 return C_INPUT_NOT_RECOGNIZED; |
429 } | 53 } |
430 | 54 |
431 Disassembler* disassembler = Disassembler::MakeDisassemberWin32X86(pe_info); | 55 Disassembler* disassembler = new DisassemblerWin32X86(pe_info); |
432 AssemblyProgram* program = new AssemblyProgram(); | 56 AssemblyProgram* program = new AssemblyProgram(); |
433 | 57 |
434 if (!disassembler->Disassemble(program)) { | 58 if (!disassembler->Disassemble(program)) { |
435 delete program; | 59 delete program; |
436 disassembler->Destroy(); | 60 delete disassembler; |
437 delete pe_info; | 61 delete pe_info; |
438 return C_DISASSEMBLY_FAILED; | 62 return C_DISASSEMBLY_FAILED; |
439 } | 63 } |
440 | 64 |
441 disassembler->Destroy(); | 65 delete disassembler; |
442 delete pe_info; | 66 delete pe_info; |
443 *output = program; | 67 *output = program; |
444 return C_OK; | 68 return C_OK; |
445 } | 69 } |
446 | 70 |
447 void DeleteAssemblyProgram(AssemblyProgram* program) { | 71 void DeleteAssemblyProgram(AssemblyProgram* program) { |
448 delete program; | 72 delete program; |
449 } | 73 } |
450 | 74 |
451 } // namespace courgette | 75 } // namespace courgette |
OLD | NEW |