OLD | NEW |
| (Empty) |
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "third_party/courgette/image_info.h" | |
6 | |
7 #include <memory.h> | |
8 #include <algorithm> | |
9 #include <map> | |
10 #include <set> | |
11 #include <sstream> | |
12 #include <vector> | |
13 | |
14 #include "base/logging.h" | |
15 | |
16 namespace courgette { | |
17 | |
18 std::string SectionName(const Section* section) { | |
19 if (section == NULL) | |
20 return "<none>"; | |
21 char name[9]; | |
22 memcpy(name, section->name, 8); | |
23 name[8] = '\0'; // Ensure termination. | |
24 return name; | |
25 } | |
26 | |
27 PEInfo::PEInfo() | |
28 : failure_reason_("uninitialized"), | |
29 start_(0), end_(0), length_(0), | |
30 is_PE32_plus_(0), file_length_(0), has_text_section_(false) { | |
31 } | |
32 | |
33 void PEInfo::Init(const void* start, size_t length) { | |
34 start_ = reinterpret_cast<const uint8*>(start); | |
35 length_ = length; | |
36 end_ = start_ + length_; | |
37 failure_reason_ = "unparsed"; | |
38 } | |
39 | |
40 // DescribeRVA is for debugging only. I would put it under #ifdef DEBUG except | |
41 // that during development I'm finding I need to call it when compiled in | |
42 // Release mode. Hence: | |
43 // TODO(sra): make this compile only for debug mode. | |
44 std::string PEInfo::DescribeRVA(RVA rva) const { | |
45 const Section* section = RVAToSection(rva); | |
46 std::ostringstream s; | |
47 s << std::hex << rva; | |
48 if (section) { | |
49 s << " ("; | |
50 s << SectionName(section) << "+" | |
51 << std::hex << (rva - section->virtual_address) | |
52 << ")"; | |
53 } | |
54 return s.str(); | |
55 } | |
56 | |
57 const Section* PEInfo::FindNextSection(uint32 fileOffset) const { | |
58 const Section* best = 0; | |
59 for (int i = 0; i < number_of_sections_; i++) { | |
60 const Section* section = §ions_[i]; | |
61 if (fileOffset <= section->file_offset_of_raw_data) { | |
62 if (best == 0 || | |
63 section->file_offset_of_raw_data < best->file_offset_of_raw_data) { | |
64 best = section; | |
65 } | |
66 } | |
67 } | |
68 return best; | |
69 } | |
70 | |
71 const Section* PEInfo::RVAToSection(RVA rva) const { | |
72 for (int i = 0; i < number_of_sections_; i++) { | |
73 const Section* section = §ions_[i]; | |
74 uint32 offset = rva - section->virtual_address; | |
75 if (offset < section->virtual_size) { | |
76 return section; | |
77 } | |
78 } | |
79 return NULL; | |
80 } | |
81 | |
82 int PEInfo::RVAToFileOffset(RVA rva) const { | |
83 const Section* section = RVAToSection(rva); | |
84 if (section) { | |
85 uint32 offset = rva - section->virtual_address; | |
86 if (offset < section->size_of_raw_data) { | |
87 return section->file_offset_of_raw_data + offset; | |
88 } else { | |
89 return kNoOffset; // In section but not in file (e.g. uninit data). | |
90 } | |
91 } | |
92 | |
93 // Small RVA values point into the file header in the loaded image. | |
94 // RVA 0 is the module load address which Windows uses as the module handle. | |
95 // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the | |
96 // DOS header. | |
97 if (rva == 0 || rva == 2) | |
98 return rva; | |
99 | |
100 NOTREACHED(); | |
101 return kNoOffset; | |
102 } | |
103 | |
104 const uint8* PEInfo::RVAToPointer(RVA rva) const { | |
105 int file_offset = RVAToFileOffset(rva); | |
106 if (file_offset == kNoOffset) | |
107 return NULL; | |
108 else | |
109 return start_ + file_offset; | |
110 } | |
111 | |
112 RVA PEInfo::FileOffsetToRVA(uint32 file_offset) const { | |
113 for (int i = 0; i < number_of_sections_; i++) { | |
114 const Section* section = §ions_[i]; | |
115 uint32 offset = file_offset - section->file_offset_of_raw_data; | |
116 if (offset < section->size_of_raw_data) { | |
117 return section->virtual_address + offset; | |
118 } | |
119 } | |
120 return 0; | |
121 } | |
122 | |
123 //////////////////////////////////////////////////////////////////////////////// | |
124 | |
125 namespace { | |
126 | |
127 // Constants and offsets gleaned from WINNT.H and various articles on the | |
128 // format of Windows PE executables. | |
129 | |
130 // This is FIELD_OFFSET(IMAGE_DOS_HEADER, e_lfanew): | |
131 const size_t kOffsetOfFileAddressOfNewExeHeader = 0x3c; | |
132 | |
133 const uint16 kImageNtOptionalHdr32Magic = 0x10b; | |
134 const uint16 kImageNtOptionalHdr64Magic = 0x20b; | |
135 | |
136 const size_t kSizeOfCoffHeader = 20; | |
137 const size_t kOffsetOfDataDirectoryFromImageOptionalHeader32 = 96; | |
138 const size_t kOffsetOfDataDirectoryFromImageOptionalHeader64 = 112; | |
139 | |
140 // These helper functions avoid the need for casts in the main code. | |
141 inline uint16 ReadU16(const uint8* address, size_t offset) { | |
142 return *reinterpret_cast<const uint16*>(address + offset); | |
143 } | |
144 | |
145 inline uint32 ReadU32(const uint8* address, size_t offset) { | |
146 return *reinterpret_cast<const uint32*>(address + offset); | |
147 } | |
148 | |
149 inline uint64 ReadU64(const uint8* address, size_t offset) { | |
150 return *reinterpret_cast<const uint64*>(address + offset); | |
151 } | |
152 | |
153 } // namespace | |
154 | |
155 // ParseHeader attempts to match up the buffer with the Windows data | |
156 // structures that exist within a Windows 'Portable Executable' format file. | |
157 // Returns 'true' if the buffer matches, and 'false' if the data looks | |
158 // suspicious. Rather than try to 'map' the buffer to the numerous windows | |
159 // structures, we extract the information we need into the courgette::PEInfo | |
160 // structure. | |
161 // | |
162 bool PEInfo::ParseHeader() { | |
163 if (length_ < kOffsetOfFileAddressOfNewExeHeader + 4 /*size*/) | |
164 return Bad("Too small"); | |
165 | |
166 // Have 'MZ' magic for a DOS header? | |
167 if (start_[0] != 'M' || start_[1] != 'Z') | |
168 return Bad("Not MZ"); | |
169 | |
170 // offset from DOS header to PE header is stored in DOS header. | |
171 uint32 offset = ReadU32(start_, kOffsetOfFileAddressOfNewExeHeader); | |
172 | |
173 const uint8* const pe_header = start_ + offset; | |
174 const size_t kMinPEHeaderSize = 4 /*signature*/ + kSizeOfCoffHeader; | |
175 if (pe_header <= start_ || pe_header >= end_ - kMinPEHeaderSize) | |
176 return Bad("Bad offset to PE header"); | |
177 | |
178 if (offset % 8 != 0) | |
179 return Bad("Misaligned PE header"); | |
180 | |
181 // The 'PE' header is an IMAGE_NT_HEADERS structure as defined in WINNT.H. | |
182 // See http://msdn.microsoft.com/en-us/library/ms680336(VS.85).aspx | |
183 // | |
184 // The first field of the IMAGE_NT_HEADERS is the signature. | |
185 if (!(pe_header[0] == 'P' && | |
186 pe_header[1] == 'E' && | |
187 pe_header[2] == 0 && | |
188 pe_header[3] == 0)) | |
189 return Bad("no PE signature"); | |
190 | |
191 // The second field of the IMAGE_NT_HEADERS is the COFF header. | |
192 // The COFF header is also called an IMAGE_FILE_HEADER | |
193 // http://msdn.microsoft.com/en-us/library/ms680313(VS.85).aspx | |
194 const uint8* const coff_header = pe_header + 4; | |
195 machine_type_ = ReadU16(coff_header, 0); | |
196 number_of_sections_ = ReadU16(coff_header, 2); | |
197 size_of_optional_header_ = ReadU16(coff_header, 16); | |
198 | |
199 // The rest of the IMAGE_NT_HEADERS is the IMAGE_OPTIONAL_HEADER(32|64) | |
200 const uint8* const optional_header = coff_header + kSizeOfCoffHeader; | |
201 optional_header_ = optional_header; | |
202 | |
203 if (optional_header + size_of_optional_header_ >= end_) | |
204 return Bad("optional header past end of file"); | |
205 | |
206 // Check we can read the magic. | |
207 if (size_of_optional_header_ < 2) | |
208 return Bad("optional header no magic"); | |
209 | |
210 uint16 magic = ReadU16(optional_header, 0); | |
211 | |
212 if (magic == kImageNtOptionalHdr32Magic) { | |
213 is_PE32_plus_ = false; | |
214 offset_of_data_directories_ = | |
215 kOffsetOfDataDirectoryFromImageOptionalHeader32; | |
216 } else if (magic == kImageNtOptionalHdr64Magic) { | |
217 is_PE32_plus_ = true; | |
218 offset_of_data_directories_ = | |
219 kOffsetOfDataDirectoryFromImageOptionalHeader64; | |
220 } else { | |
221 return Bad("unrecognized magic"); | |
222 } | |
223 | |
224 // Check that we can read the rest of the the fixed fields. Data directories | |
225 // directly follow the fixed fields of the IMAGE_OPTIONAL_HEADER. | |
226 if (size_of_optional_header_ < offset_of_data_directories_) | |
227 return Bad("optional header too short"); | |
228 | |
229 // The optional header is either an IMAGE_OPTIONAL_HEADER32 or | |
230 // IMAGE_OPTIONAL_HEADER64 | |
231 // http://msdn.microsoft.com/en-us/library/ms680339(VS.85).aspx | |
232 // | |
233 // Copy the fields we care about. | |
234 size_of_code_ = ReadU32(optional_header, 4); | |
235 size_of_initialized_data_ = ReadU32(optional_header, 8); | |
236 size_of_uninitialized_data_ = ReadU32(optional_header, 12); | |
237 base_of_code_ = ReadU32(optional_header, 20); | |
238 if (is_PE32_plus_) { | |
239 base_of_data_ = 0; | |
240 image_base_ = ReadU64(optional_header, 24); | |
241 } else { | |
242 base_of_data_ = ReadU32(optional_header, 24); | |
243 image_base_ = ReadU32(optional_header, 28); | |
244 } | |
245 size_of_image_ = ReadU32(optional_header, 56); | |
246 number_of_data_directories_ = | |
247 ReadU32(optional_header, (is_PE32_plus_ ? 108 : 92)); | |
248 | |
249 if (size_of_code_ >= length_ || | |
250 size_of_initialized_data_ >= length_ || | |
251 size_of_code_ + size_of_initialized_data_ >= length_) { | |
252 // This validation fires on some perfectly fine executables. | |
253 // return Bad("code or initialized data too big"); | |
254 } | |
255 | |
256 // TODO(sra): we can probably get rid of most of the data directories. | |
257 bool b = true; | |
258 // 'b &= ...' could be short circuit 'b = b && ...' but it is not necessary | |
259 // for correctness and it compiles smaller this way. | |
260 b &= ReadDataDirectory(0, &export_table_); | |
261 b &= ReadDataDirectory(1, &import_table_); | |
262 b &= ReadDataDirectory(2, &resource_table_); | |
263 b &= ReadDataDirectory(3, &exception_table_); | |
264 b &= ReadDataDirectory(5, &base_relocation_table_); | |
265 b &= ReadDataDirectory(11, &bound_import_table_); | |
266 b &= ReadDataDirectory(12, &import_address_table_); | |
267 b &= ReadDataDirectory(13, &delay_import_descriptor_); | |
268 b &= ReadDataDirectory(14, &clr_runtime_header_); | |
269 if (!b) { | |
270 return Bad("malformed data directory"); | |
271 } | |
272 | |
273 // Sections follow the optional header. | |
274 sections_ = | |
275 reinterpret_cast<const Section*>(optional_header + | |
276 size_of_optional_header_); | |
277 file_length_ = 0; | |
278 | |
279 for (int i = 0; i < number_of_sections_; ++i) { | |
280 const Section* section = §ions_[i]; | |
281 | |
282 // TODO(sra): consider using the 'characteristics' field of the section | |
283 // header to see if the section contains instructions. | |
284 if (memcmp(section->name, ".text", 6) == 0) | |
285 has_text_section_ = true; | |
286 | |
287 uint32 section_end = | |
288 section->file_offset_of_raw_data + section->size_of_raw_data; | |
289 if (section_end > file_length_) | |
290 file_length_ = section_end; | |
291 } | |
292 | |
293 failure_reason_ = NULL; | |
294 return true; | |
295 } | |
296 | |
297 bool PEInfo::ReadDataDirectory(int index, ImageDataDirectory* directory) { | |
298 if (index < number_of_data_directories_) { | |
299 size_t offset = index * 8 + offset_of_data_directories_; | |
300 if (offset >= size_of_optional_header_) | |
301 return Bad("number of data directories inconsistent"); | |
302 const uint8* data_directory = optional_header_ + offset; | |
303 if (data_directory < start_ || data_directory + 8 >= end_) | |
304 return Bad("data directory outside image"); | |
305 RVA rva = ReadU32(data_directory, 0); | |
306 size_t size = ReadU32(data_directory, 4); | |
307 if (size > size_of_image_) | |
308 return Bad("data directory size too big"); | |
309 | |
310 // TODO(sra): validate RVA. | |
311 directory->address_ = rva; | |
312 directory->size_ = size; | |
313 return true; | |
314 } else { | |
315 directory->address_ = 0; | |
316 directory->size_ = 0; | |
317 return true; | |
318 } | |
319 } | |
320 | |
321 bool PEInfo::Bad(const char* reason) { | |
322 failure_reason_ = reason; | |
323 return false; | |
324 } | |
325 | |
326 //////////////////////////////////////////////////////////////////////////////// | |
327 | |
328 bool PEInfo::ParseRelocs(std::vector<RVA> *relocs) { | |
329 relocs->clear(); | |
330 | |
331 size_t relocs_size = base_relocation_table_.size_; | |
332 if (relocs_size == 0) | |
333 return true; | |
334 | |
335 // The format of the base relocation table is a sequence of variable sized | |
336 // IMAGE_BASE_RELOCATION blocks. Search for | |
337 // "The format of the base relocation data is somewhat quirky" | |
338 // at http://msdn.microsoft.com/en-us/library/ms809762.aspx | |
339 | |
340 const uint8* start = RVAToPointer(base_relocation_table_.address_); | |
341 const uint8* end = start + relocs_size; | |
342 | |
343 // Make sure entire base relocation table is within the buffer. | |
344 if (start < start_ || | |
345 start >= end_ || | |
346 end <= start_ || | |
347 end > end_) { | |
348 return Bad(".relocs outside image"); | |
349 } | |
350 | |
351 const uint8* block = start; | |
352 | |
353 // Walk the variable sized blocks. | |
354 while (block + 8 < end) { | |
355 RVA page_rva = ReadU32(block, 0); | |
356 uint32 size = ReadU32(block, 4); | |
357 if (size < 8 || // Size includes header ... | |
358 size % 4 != 0) // ... and is word aligned. | |
359 return Bad("unreasonable relocs block"); | |
360 | |
361 const uint8* end_entries = block + size; | |
362 | |
363 if (end_entries <= block || end_entries <= start_ || end_entries > end_) | |
364 return Bad(".relocs block outside image"); | |
365 | |
366 // Walk through the two-byte entries. | |
367 for (const uint8* p = block + 8; p < end_entries; p += 2) { | |
368 uint16 entry = ReadU16(p, 0); | |
369 int type = entry >> 12; | |
370 int offset = entry & 0xFFF; | |
371 | |
372 RVA rva = page_rva + offset; | |
373 if (type == 3) { // IMAGE_REL_BASED_HIGHLOW | |
374 relocs->push_back(rva); | |
375 } else if (type == 0) { // IMAGE_REL_BASED_ABSOLUTE | |
376 // Ignore, used as padding. | |
377 } else { | |
378 // Does not occur in Windows x86 executables. | |
379 return Bad("unknown type of reloc"); | |
380 } | |
381 } | |
382 | |
383 block += size; | |
384 } | |
385 | |
386 std::sort(relocs->begin(), relocs->end()); | |
387 | |
388 return true; | |
389 } | |
390 | |
391 } // namespace courgette | |
392 | |
OLD | NEW |