OLD | NEW |
| (Empty) |
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "third_party/courgette/disassembler.h" | |
6 | |
7 #include <algorithm> | |
8 #include <iostream> | |
9 #include <string> | |
10 #include <vector> | |
11 | |
12 #include "base/basictypes.h" | |
13 #include "base/logging.h" | |
14 | |
15 #include "third_party/courgette/assembly_program.h" | |
16 #include "third_party/courgette/courgette.h" | |
17 #include "third_party/courgette/encoded_program.h" | |
18 #include "third_party/courgette/image_info.h" | |
19 | |
20 // COURGETTE_HISTOGRAM_TARGETS prints out a histogram of how frequently | |
21 // different target addresses are referenced. Purely for debugging. | |
22 #define COURGETTE_HISTOGRAM_TARGETS 0 | |
23 | |
24 namespace courgette { | |
25 | |
26 class DisassemblerWin32X86 : public Disassembler { | |
27 public: | |
28 explicit DisassemblerWin32X86(PEInfo* pe_info) | |
29 : pe_info_(pe_info), | |
30 incomplete_disassembly_(false) { | |
31 } | |
32 | |
33 virtual bool Disassemble(AssemblyProgram* target); | |
34 | |
35 virtual void Destroy() { delete this; } | |
36 | |
37 protected: | |
38 PEInfo& pe_info() { return *pe_info_; } | |
39 | |
40 void ParseFile(AssemblyProgram* target); | |
41 bool ParseAbs32Relocs(); | |
42 void ParseRel32RelocsFromSections(); | |
43 void ParseRel32RelocsFromSection(const Section* section); | |
44 | |
45 void ParseNonSectionFileRegion(uint32 start_file_offset, | |
46 uint32 end_file_offset, | |
47 AssemblyProgram* program); | |
48 void ParseFileRegion(const Section* section, | |
49 uint32 start_file_offset, uint32 end_file_offset, | |
50 AssemblyProgram* program); | |
51 | |
52 #if COURGETTE_HISTOGRAM_TARGETS | |
53 void HistogramTargets(const char* kind, const std::map<RVA, int>& map); | |
54 #endif | |
55 | |
56 PEInfo* pe_info_; | |
57 bool incomplete_disassembly_; // 'true' if can leave out 'uninteresting' bits | |
58 | |
59 std::vector<RVA> abs32_locations_; | |
60 std::vector<RVA> rel32_locations_; | |
61 | |
62 #if COURGETTE_HISTOGRAM_TARGETS | |
63 std::map<RVA, int> abs32_target_rvas_; | |
64 std::map<RVA, int> rel32_target_rvas_; | |
65 #endif | |
66 }; | |
67 | |
68 bool DisassemblerWin32X86::Disassemble(AssemblyProgram* target) { | |
69 if (!pe_info().ok()) | |
70 return false; | |
71 | |
72 target->set_image_base(pe_info().image_base()); | |
73 | |
74 if (!ParseAbs32Relocs()) | |
75 return false; | |
76 | |
77 ParseRel32RelocsFromSections(); | |
78 | |
79 ParseFile(target); | |
80 | |
81 target->DefaultAssignIndexes(); | |
82 return true; | |
83 } | |
84 | |
85 static uint32 Read32LittleEndian(const void* address) { | |
86 return *reinterpret_cast<const uint32*>(address); | |
87 } | |
88 | |
89 bool DisassemblerWin32X86::ParseAbs32Relocs() { | |
90 abs32_locations_.clear(); | |
91 if (!pe_info().ParseRelocs(&abs32_locations_)) | |
92 return false; | |
93 | |
94 std::sort(abs32_locations_.begin(), abs32_locations_.end()); | |
95 | |
96 #if COURGETTE_HISTOGRAM_TARGETS | |
97 for (size_t i = 0; i < abs32_locations_.size(); ++i) { | |
98 RVA rva = abs32_locations_[i]; | |
99 // The 4 bytes at the relocation are a reference to some address. | |
100 uint32 target_address = Read32LittleEndian(pe_info().RVAToPointer(rva)); | |
101 ++abs32_target_rvas_[target_address - pe_info().image_base()]; | |
102 } | |
103 #endif | |
104 return true; | |
105 } | |
106 | |
107 void DisassemblerWin32X86::ParseRel32RelocsFromSections() { | |
108 uint32 file_offset = 0; | |
109 while (file_offset < pe_info().length()) { | |
110 const Section* section = pe_info().FindNextSection(file_offset); | |
111 if (section == NULL) | |
112 break; | |
113 if (file_offset < section->file_offset_of_raw_data) | |
114 file_offset = section->file_offset_of_raw_data; | |
115 ParseRel32RelocsFromSection(section); | |
116 file_offset += section->size_of_raw_data; | |
117 } | |
118 std::sort(rel32_locations_.begin(), rel32_locations_.end()); | |
119 | |
120 #if COURGETTE_HISTOGRAM_TARGETS | |
121 LOG(INFO) << "abs32_locations_ " << abs32_locations_.size(); | |
122 LOG(INFO) << "rel32_locations_ " << rel32_locations_.size(); | |
123 LOG(INFO) << "abs32_target_rvas_ " << abs32_target_rvas_.size(); | |
124 LOG(INFO) << "rel32_target_rvas_ " << rel32_target_rvas_.size(); | |
125 | |
126 int common = 0; | |
127 std::map<RVA, int>::iterator abs32_iter = abs32_target_rvas_.begin(); | |
128 std::map<RVA, int>::iterator rel32_iter = rel32_target_rvas_.begin(); | |
129 while (abs32_iter != abs32_target_rvas_.end() && | |
130 rel32_iter != rel32_target_rvas_.end()) { | |
131 if (abs32_iter->first < rel32_iter->first) | |
132 ++abs32_iter; | |
133 else if (rel32_iter->first < abs32_iter->first) | |
134 ++rel32_iter; | |
135 else { | |
136 ++common; | |
137 ++abs32_iter; | |
138 ++rel32_iter; | |
139 } | |
140 } | |
141 LOG(INFO) << "common " << common; | |
142 #endif | |
143 } | |
144 | |
145 void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) { | |
146 // TODO(sra): use characteristic. | |
147 bool isCode = strcmp(section->name, ".text") == 0; | |
148 if (!isCode) | |
149 return; | |
150 | |
151 uint32 start_file_offset = section->file_offset_of_raw_data; | |
152 uint32 end_file_offset = start_file_offset + section->size_of_raw_data; | |
153 RVA relocs_start_rva = pe_info().base_relocation_table().address_; | |
154 | |
155 const uint8* start_pointer = pe_info().FileOffsetToPointer(start_file_offset); | |
156 const uint8* end_pointer = pe_info().FileOffsetToPointer(end_file_offset); | |
157 | |
158 RVA start_rva = pe_info().FileOffsetToRVA(start_file_offset); | |
159 RVA end_rva = start_rva + section->virtual_size; | |
160 | |
161 // Quick way to convert from Pointer to RVA within a single Section is to | |
162 // subtract 'pointer_to_rva'. | |
163 const uint8* const adjust_pointer_to_rva = start_pointer - start_rva; | |
164 | |
165 std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin(); | |
166 | |
167 // Find the rel32 relocations. | |
168 const uint8* p = start_pointer; | |
169 while (p < end_pointer) { | |
170 RVA current_rva = p - adjust_pointer_to_rva; | |
171 if (current_rva == relocs_start_rva) { | |
172 uint32 relocs_size = pe_info().base_relocation_table().size_; | |
173 if (relocs_size) { | |
174 p += relocs_size; | |
175 continue; | |
176 } | |
177 } | |
178 | |
179 //while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva) | |
180 // ++abs32_pos; | |
181 | |
182 // Heuristic discovery of rel32 locations in instruction stream: are the | |
183 // next few bytes the start of an instruction containing a rel32 | |
184 // addressing mode? | |
185 const uint8* rel32 = NULL; | |
186 | |
187 if (p + 5 < end_pointer) { | |
188 if (*p == 0xE8 || *p == 0xE9) { // jmp rel32 and call rel32 | |
189 rel32 = p + 1; | |
190 } | |
191 } | |
192 if (p + 6 < end_pointer) { | |
193 if (*p == 0x0F && (*(p+1) & 0xF0) == 0x80) { // Jcc long form | |
194 if (p[1] != 0x8A && p[1] != 0x8B) // JPE/JPO unlikely | |
195 rel32 = p + 2; | |
196 } | |
197 } | |
198 if (rel32) { | |
199 RVA rel32_rva = rel32 - adjust_pointer_to_rva; | |
200 | |
201 // Is there an abs32 reloc overlapping the candidate? | |
202 while (abs32_pos != abs32_locations_.end() && *abs32_pos < rel32_rva - 3) | |
203 ++abs32_pos; | |
204 // Now: (*abs32_pos > rel32_rva - 4) i.e. the lowest addressed 4-byte | |
205 // region that could overlap rel32_rva. | |
206 if (abs32_pos != abs32_locations_.end()) { | |
207 if (*abs32_pos < rel32_rva + 4) { | |
208 // Beginning of abs32 reloc is before end of rel32 reloc so they | |
209 // overlap. Skip four bytes past the abs32 reloc. | |
210 p += (*abs32_pos + 4) - current_rva; | |
211 continue; | |
212 } | |
213 } | |
214 | |
215 RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32); | |
216 // To be valid, rel32 target must be within image, and within this | |
217 // section. | |
218 if (pe_info().IsValidRVA(target_rva) && | |
219 start_rva <= target_rva && target_rva < end_rva) { | |
220 rel32_locations_.push_back(rel32_rva); | |
221 #if COURGETTE_HISTOGRAM_TARGETS | |
222 ++rel32_target_rvas_[target_rva]; | |
223 #endif | |
224 p += 4; | |
225 continue; | |
226 } | |
227 } | |
228 p += 1; | |
229 } | |
230 } | |
231 | |
232 void DisassemblerWin32X86::ParseFile(AssemblyProgram* program) { | |
233 // Walk all the bytes in the file, whether or not in a section. | |
234 uint32 file_offset = 0; | |
235 while (file_offset < pe_info().length()) { | |
236 const Section* section = pe_info().FindNextSection(file_offset); | |
237 if (section == NULL) { | |
238 // No more sections. There should not be extra stuff following last | |
239 // section. | |
240 // ParseNonSectionFileRegion(file_offset, pe_info().length(), program); | |
241 break; | |
242 } | |
243 if (file_offset < section->file_offset_of_raw_data) { | |
244 uint32 section_start_offset = section->file_offset_of_raw_data; | |
245 ParseNonSectionFileRegion(file_offset, section_start_offset, program); | |
246 file_offset = section_start_offset; | |
247 } | |
248 uint32 end = file_offset + section->size_of_raw_data; | |
249 ParseFileRegion(section, file_offset, end, program); | |
250 file_offset = end; | |
251 } | |
252 | |
253 #if COURGETTE_HISTOGRAM_TARGETS | |
254 HistogramTargets("abs32 relocs", abs32_target_rvas_); | |
255 HistogramTargets("rel32 relocs", rel32_target_rvas_); | |
256 #endif | |
257 } | |
258 | |
259 void DisassemblerWin32X86::ParseNonSectionFileRegion( | |
260 uint32 start_file_offset, | |
261 uint32 end_file_offset, | |
262 AssemblyProgram* program) { | |
263 if (incomplete_disassembly_) | |
264 return; | |
265 | |
266 const uint8* start = pe_info().FileOffsetToPointer(start_file_offset); | |
267 const uint8* end = pe_info().FileOffsetToPointer(end_file_offset); | |
268 | |
269 const uint8* p = start; | |
270 | |
271 while (p < end) { | |
272 program->EmitByteInstruction(*p); | |
273 ++p; | |
274 } | |
275 } | |
276 | |
277 void DisassemblerWin32X86::ParseFileRegion( | |
278 const Section* section, | |
279 uint32 start_file_offset, uint32 end_file_offset, | |
280 AssemblyProgram* program) { | |
281 RVA relocs_start_rva = pe_info().base_relocation_table().address_; | |
282 | |
283 const uint8* start_pointer = pe_info().FileOffsetToPointer(start_file_offset); | |
284 const uint8* end_pointer = pe_info().FileOffsetToPointer(end_file_offset); | |
285 | |
286 RVA start_rva = pe_info().FileOffsetToRVA(start_file_offset); | |
287 RVA end_rva = start_rva + section->virtual_size; | |
288 | |
289 // Quick way to convert from Pointer to RVA within a single Section is to | |
290 // subtract 'pointer_to_rva'. | |
291 const uint8* const adjust_pointer_to_rva = start_pointer - start_rva; | |
292 | |
293 std::vector<RVA>::iterator rel32_pos = rel32_locations_.begin(); | |
294 std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin(); | |
295 | |
296 program->EmitOriginInstruction(start_rva); | |
297 | |
298 const uint8* p = start_pointer; | |
299 | |
300 while (p < end_pointer) { | |
301 RVA current_rva = p - adjust_pointer_to_rva; | |
302 | |
303 // The base relocation table is usually in the .relocs section, but it could | |
304 // actually be anywhere. Make sure we skip it because we will regenerate it | |
305 // during assembly. | |
306 if (current_rva == relocs_start_rva) { | |
307 program->EmitMakeRelocsInstruction(); | |
308 uint32 relocs_size = pe_info().base_relocation_table().size_; | |
309 if (relocs_size) { | |
310 p += relocs_size; | |
311 continue; | |
312 } | |
313 } | |
314 | |
315 while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva) | |
316 ++abs32_pos; | |
317 | |
318 if (abs32_pos != abs32_locations_.end() && *abs32_pos == current_rva) { | |
319 uint32 target_address = Read32LittleEndian(p); | |
320 RVA target_rva = target_address - pe_info().image_base(); | |
321 // TODO(sra): target could be Label+offset. It is not clear how to guess | |
322 // which it might be. We assume offset==0. | |
323 program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva)); | |
324 p += 4; | |
325 continue; | |
326 } | |
327 | |
328 while (rel32_pos != rel32_locations_.end() && *rel32_pos < current_rva) | |
329 ++rel32_pos; | |
330 | |
331 if (rel32_pos != rel32_locations_.end() && *rel32_pos == current_rva) { | |
332 RVA target_rva = current_rva + 4 + Read32LittleEndian(p); | |
333 program->EmitRel32(program->FindOrMakeRel32Label(target_rva)); | |
334 p += 4; | |
335 continue; | |
336 } | |
337 | |
338 if (incomplete_disassembly_) { | |
339 if ((abs32_pos == abs32_locations_.end() || end_rva <= *abs32_pos) && | |
340 (rel32_pos == rel32_locations_.end() || end_rva <= *rel32_pos) && | |
341 (end_rva <= relocs_start_rva || current_rva >= relocs_start_rva)) { | |
342 // No more relocs in this section, don't bother encoding bytes. | |
343 break; | |
344 } | |
345 } | |
346 | |
347 program->EmitByteInstruction(*p); | |
348 p += 1; | |
349 } | |
350 } | |
351 | |
352 #if COURGETTE_HISTOGRAM_TARGETS | |
353 // Histogram is printed to std::cout. It is purely for debugging the algorithm | |
354 // and is only enabled manually in 'exploration' builds. I don't want to add | |
355 // command-line configuration for this feature because this code has to be | |
356 // small, which means compiled-out. | |
357 void DisassemblerWin32X86::HistogramTargets(const char* kind, | |
358 const std::map<RVA, int>& map) { | |
359 int total = 0; | |
360 std::map<int, std::vector<RVA> > h; | |
361 for (std::map<RVA, int>::const_iterator p = map.begin(); | |
362 p != map.end(); | |
363 ++p) { | |
364 h[p->second].push_back(p->first); | |
365 total += p->second; | |
366 } | |
367 | |
368 std::cout << total << " " << kind << " to " | |
369 << map.size() << " unique targets" << std::endl; | |
370 | |
371 std::cout << "indegree: #targets-with-indegree (example)" << std::endl; | |
372 const int kFirstN = 15; | |
373 bool someSkipped = false; | |
374 int index = 0; | |
375 for (std::map<int, std::vector<RVA> >::reverse_iterator p = h.rbegin(); | |
376 p != h.rend(); | |
377 ++p) { | |
378 ++index; | |
379 if (index <= kFirstN || p->first <= 3) { | |
380 if (someSkipped) { | |
381 std::cout << "..." << std::endl; | |
382 } | |
383 size_t count = p->second.size(); | |
384 std::cout << std::dec << p->first << ": " << count; | |
385 if (count <= 2) { | |
386 for (size_t i = 0; i < count; ++i) | |
387 std::cout << " " << pe_info().DescribeRVA(p->second[i]); | |
388 } | |
389 std::cout << std::endl; | |
390 someSkipped = false; | |
391 } else { | |
392 someSkipped = true; | |
393 } | |
394 } | |
395 } | |
396 #endif // COURGETTE_HISTOGRAM_TARGETS | |
397 | |
398 Disassembler* Disassembler::MakeDisassemberWin32X86(PEInfo* pe_info) { | |
399 return new DisassemblerWin32X86(pe_info); | |
400 } | |
401 | |
402 //////////////////////////////////////////////////////////////////////////////// | |
403 | |
404 Status ParseWin32X86PE(const void* buffer, size_t length, | |
405 AssemblyProgram** output) { | |
406 *output = NULL; | |
407 | |
408 PEInfo* pe_info = new PEInfo(); | |
409 pe_info->Init(buffer, length); | |
410 | |
411 if (!pe_info->ParseHeader()) { | |
412 delete pe_info; | |
413 return C_INPUT_NOT_RECOGNIZED; | |
414 } | |
415 | |
416 Disassembler* disassembler = Disassembler::MakeDisassemberWin32X86(pe_info); | |
417 AssemblyProgram* program = new AssemblyProgram(); | |
418 | |
419 if (!disassembler->Disassemble(program)) { | |
420 delete program; | |
421 disassembler->Destroy(); | |
422 delete pe_info; | |
423 return C_DISASSEMBLY_FAILED; | |
424 } | |
425 | |
426 disassembler->Destroy(); | |
427 delete pe_info; | |
428 *output = program; | |
429 return C_OK; | |
430 } | |
431 | |
432 void DeleteAssemblyProgram(AssemblyProgram* program) { | |
433 delete program; | |
434 } | |
435 | |
436 } // namespace courgette | |
OLD | NEW |