| OLD | NEW |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "courgette/disassembler_win32_x64.h" | 5 #include "courgette/disassembler_win32_x64.h" |
| 6 | 6 |
| 7 #include <stddef.h> | 7 #include <stddef.h> |
| 8 #include <stdint.h> | 8 #include <stdint.h> |
| 9 | 9 |
| 10 #include <algorithm> | 10 #include <algorithm> |
| 11 #include <iostream> | 11 #include <string> |
| 12 #include <vector> |
| 12 | 13 |
| 13 #include "base/logging.h" | 14 #include "base/logging.h" |
| 14 #include "base/numerics/safe_conversions.h" | 15 #include "base/numerics/safe_conversions.h" |
| 16 |
| 15 #include "courgette/assembly_program.h" | 17 #include "courgette/assembly_program.h" |
| 16 #include "courgette/courgette.h" | 18 #include "courgette/courgette.h" |
| 19 #include "courgette/encoded_program.h" |
| 17 | 20 |
| 18 namespace courgette { | 21 namespace courgette { |
| 19 | 22 |
| 20 DisassemblerWin32X64::DisassemblerWin32X64(const void* start, size_t length) | 23 DisassemblerWin32X64::DisassemblerWin32X64(const void* start, size_t length) |
| 21 : Disassembler(start, length), | 24 : Disassembler(start, length), |
| 22 incomplete_disassembly_(false), | 25 incomplete_disassembly_(false), |
| 23 is_PE32_plus_(false), | 26 is_PE32_plus_(false), |
| 24 optional_header_(nullptr), | 27 optional_header_(NULL), |
| 25 size_of_optional_header_(0), | 28 size_of_optional_header_(0), |
| 26 offset_of_data_directories_(0), | 29 offset_of_data_directories_(0), |
| 27 machine_type_(0), | 30 machine_type_(0), |
| 28 number_of_sections_(0), | 31 number_of_sections_(0), |
| 29 sections_(nullptr), | 32 sections_(NULL), |
| 30 has_text_section_(false), | 33 has_text_section_(false), |
| 31 size_of_code_(0), | 34 size_of_code_(0), |
| 32 size_of_initialized_data_(0), | 35 size_of_initialized_data_(0), |
| 33 size_of_uninitialized_data_(0), | 36 size_of_uninitialized_data_(0), |
| 34 base_of_code_(0), | 37 base_of_code_(0), |
| 35 base_of_data_(0), | 38 base_of_data_(0), |
| 36 image_base_(0), | 39 image_base_(0), |
| 37 size_of_image_(0), | 40 size_of_image_(0), |
| 38 number_of_data_directories_(0) { | 41 number_of_data_directories_(0) { |
| 39 } | |
| 40 | |
| 41 FileOffset DisassemblerWin32X64::RVAToFileOffset(RVA rva) const { | |
| 42 const Section* section = RVAToSection(rva); | |
| 43 if (section != nullptr) { | |
| 44 FileOffset offset_in_section = rva - section->virtual_address; | |
| 45 // Need this extra check, since an |rva| may be valid for a section, but is | |
| 46 // non-existent in an image (e.g. uninit data). | |
| 47 if (offset_in_section >= section->size_of_raw_data) | |
| 48 return kNoFileOffset; | |
| 49 | |
| 50 return static_cast<FileOffset>(section->file_offset_of_raw_data + | |
| 51 offset_in_section); | |
| 52 } | |
| 53 | |
| 54 // Small RVA values point into the file header in the loaded image. | |
| 55 // RVA 0 is the module load address which Windows uses as the module handle. | |
| 56 // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the | |
| 57 // DOS header. | |
| 58 if (rva == 0 || rva == 2) | |
| 59 return static_cast<FileOffset>(rva); | |
| 60 | |
| 61 NOTREACHED(); | |
| 62 return kNoFileOffset; | |
| 63 } | |
| 64 | |
| 65 RVA DisassemblerWin32X64::FileOffsetToRVA(FileOffset file_offset) const { | |
| 66 for (int i = 0; i < number_of_sections_; ++i) { | |
| 67 const Section* section = §ions_[i]; | |
| 68 if (file_offset >= section->file_offset_of_raw_data) { | |
| 69 FileOffset offset_in_section = | |
| 70 file_offset - section->file_offset_of_raw_data; | |
| 71 if (offset_in_section < section->size_of_raw_data) | |
| 72 return static_cast<RVA>(section->virtual_address + offset_in_section); | |
| 73 } | |
| 74 } | |
| 75 | |
| 76 NOTREACHED(); | |
| 77 return kNoRVA; | |
| 78 } | 42 } |
| 79 | 43 |
| 80 // ParseHeader attempts to match up the buffer with the Windows data | 44 // ParseHeader attempts to match up the buffer with the Windows data |
| 81 // structures that exist within a Windows 'Portable Executable' format file. | 45 // structures that exist within a Windows 'Portable Executable' format file. |
| 82 // Returns 'true' if the buffer matches, and 'false' if the data looks | 46 // Returns 'true' if the buffer matches, and 'false' if the data looks |
| 83 // suspicious. Rather than try to 'map' the buffer to the numerous windows | 47 // suspicious. Rather than try to 'map' the buffer to the numerous windows |
| 84 // structures, we extract the information we need into the courgette::PEInfo | 48 // structures, we extract the information we need into the courgette::PEInfo |
| 85 // structure. | 49 // structure. |
| 86 // | 50 // |
| 87 bool DisassemblerWin32X64::ParseHeader() { | 51 bool DisassemblerWin32X64::ParseHeader() { |
| 88 if (length() < kOffsetOfFileAddressOfNewExeHeader + 4 /*size*/) | 52 if (length() < kOffsetOfFileAddressOfNewExeHeader + 4 /*size*/) |
| 89 return Bad("Too small"); | 53 return Bad("Too small"); |
| 90 | 54 |
| 91 // Have 'MZ' magic for a DOS header? | 55 // Have 'MZ' magic for a DOS header? |
| 92 if (start()[0] != 'M' || start()[1] != 'Z') | 56 if (start()[0] != 'M' || start()[1] != 'Z') |
| 93 return Bad("Not MZ"); | 57 return Bad("Not MZ"); |
| 94 | 58 |
| 95 // offset from DOS header to PE header is stored in DOS header. | 59 // offset from DOS header to PE header is stored in DOS header. |
| 96 FileOffset file_offset = static_cast<FileOffset>( | 60 uint32_t offset = ReadU32(start(), kOffsetOfFileAddressOfNewExeHeader); |
| 97 ReadU32(start(), kOffsetOfFileAddressOfNewExeHeader)); | |
| 98 | 61 |
| 99 if (file_offset >= length()) | 62 if (offset >= length()) |
| 100 return Bad("Bad offset to PE header"); | 63 return Bad("Bad offset to PE header"); |
| 101 | 64 |
| 102 const uint8_t* const pe_header = FileOffsetToPointer(file_offset); | 65 const uint8_t* const pe_header = OffsetToPointer(offset); |
| 103 const size_t kMinPEHeaderSize = 4 /*signature*/ + kSizeOfCoffHeader; | 66 const size_t kMinPEHeaderSize = 4 /*signature*/ + kSizeOfCoffHeader; |
| 104 if (pe_header <= start() || | 67 if (pe_header <= start() || |
| 105 pe_header >= end() - kMinPEHeaderSize) | 68 pe_header >= end() - kMinPEHeaderSize) |
| 106 return Bad("Bad file offset to PE header"); | 69 return Bad("Bad offset to PE header"); |
| 107 | 70 |
| 108 if (file_offset % 8 != 0) | 71 if (offset % 8 != 0) |
| 109 return Bad("Misaligned PE header"); | 72 return Bad("Misaligned PE header"); |
| 110 | 73 |
| 111 // The 'PE' header is an IMAGE_NT_HEADERS structure as defined in WINNT.H. | 74 // The 'PE' header is an IMAGE_NT_HEADERS structure as defined in WINNT.H. |
| 112 // See http://msdn.microsoft.com/en-us/library/ms680336(VS.85).aspx | 75 // See http://msdn.microsoft.com/en-us/library/ms680336(VS.85).aspx |
| 113 // | 76 // |
| 114 // The first field of the IMAGE_NT_HEADERS is the signature. | 77 // The first field of the IMAGE_NT_HEADERS is the signature. |
| 115 if (!(pe_header[0] == 'P' && | 78 if (!(pe_header[0] == 'P' && |
| 116 pe_header[1] == 'E' && | 79 pe_header[1] == 'E' && |
| 117 pe_header[2] == 0 && | 80 pe_header[2] == 0 && |
| 118 pe_header[3] == 0)) | 81 pe_header[3] == 0)) |
| (...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 199 if (!b) { | 162 if (!b) { |
| 200 return Bad("malformed data directory"); | 163 return Bad("malformed data directory"); |
| 201 } | 164 } |
| 202 | 165 |
| 203 // Sections follow the optional header. | 166 // Sections follow the optional header. |
| 204 sections_ = | 167 sections_ = |
| 205 reinterpret_cast<const Section*>(optional_header + | 168 reinterpret_cast<const Section*>(optional_header + |
| 206 size_of_optional_header_); | 169 size_of_optional_header_); |
| 207 size_t detected_length = 0; | 170 size_t detected_length = 0; |
| 208 | 171 |
| 209 for (int i = 0; i < number_of_sections_; ++i) { | 172 for (int i = 0; i < number_of_sections_; ++i) { |
| 210 const Section* section = §ions_[i]; | 173 const Section* section = §ions_[i]; |
| 211 | 174 |
| 212 // TODO(sra): consider using the 'characteristics' field of the section | 175 // TODO(sra): consider using the 'characteristics' field of the section |
| 213 // header to see if the section contains instructions. | 176 // header to see if the section contains instructions. |
| 214 if (memcmp(section->name, ".text", 6) == 0) | 177 if (memcmp(section->name, ".text", 6) == 0) |
| 215 has_text_section_ = true; | 178 has_text_section_ = true; |
| 216 | 179 |
| 217 uint32_t section_end = | 180 uint32_t section_end = |
| 218 section->file_offset_of_raw_data + section->size_of_raw_data; | 181 section->file_offset_of_raw_data + section->size_of_raw_data; |
| 219 if (section_end > detected_length) | 182 if (section_end > detected_length) |
| (...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 297 | 260 |
| 298 // Walk through the two-byte entries. | 261 // Walk through the two-byte entries. |
| 299 for (const uint8_t* p = block + 8; p < end_entries; p += 2) { | 262 for (const uint8_t* p = block + 8; p < end_entries; p += 2) { |
| 300 uint16_t entry = ReadU16(p, 0); | 263 uint16_t entry = ReadU16(p, 0); |
| 301 int type = entry >> 12; | 264 int type = entry >> 12; |
| 302 int offset = entry & 0xFFF; | 265 int offset = entry & 0xFFF; |
| 303 | 266 |
| 304 RVA rva = page_rva + offset; | 267 RVA rva = page_rva + offset; |
| 305 // TODO(sebmarchand): Skip the relocs that live outside of the image. See | 268 // TODO(sebmarchand): Skip the relocs that live outside of the image. See |
| 306 // the version of this function in disassembler_win32_x86.cc. | 269 // the version of this function in disassembler_win32_x86.cc. |
| 307 if (type == 10) { // IMAGE_REL_BASED_DIR64 | 270 if (type == 10) { // IMAGE_REL_BASED_DIR64 |
| 308 relocs->push_back(rva); | 271 relocs->push_back(rva); |
| 309 } else if (type == 0) { // IMAGE_REL_BASED_ABSOLUTE | 272 } else if (type == 0) { // IMAGE_REL_BASED_ABSOLUTE |
| 310 // Ignore, used as padding. | 273 // Ignore, used as padding. |
| 311 } else { | 274 } else { |
| 312 // Does not occur in Windows x64 executables. | 275 // Does not occur in Windows x64 executables. |
| 313 return Bad("unknown type of reloc"); | 276 return Bad("unknown type of reloc"); |
| 314 } | 277 } |
| 315 } | 278 } |
| 316 | 279 |
| 317 block += size; | 280 block += size; |
| 318 } | 281 } |
| 319 | 282 |
| 320 std::sort(relocs->begin(), relocs->end()); | 283 std::sort(relocs->begin(), relocs->end()); |
| 321 DCHECK(relocs->empty() || relocs->back() != kUnassignedRVA); | 284 DCHECK(relocs->empty() || relocs->back() != kUnassignedRVA); |
| 322 | 285 |
| 323 return true; | 286 return true; |
| 324 } | 287 } |
| 325 | 288 |
| 326 const Section* DisassemblerWin32X64::RVAToSection(RVA rva) const { | 289 const Section* DisassemblerWin32X64::RVAToSection(RVA rva) const { |
| 327 for (int i = 0; i < number_of_sections_; ++i) { | 290 for (int i = 0; i < number_of_sections_; i++) { |
| 328 const Section* section = §ions_[i]; | 291 const Section* section = §ions_[i]; |
| 329 if (rva >= section->virtual_address) { | 292 uint32_t offset = rva - section->virtual_address; |
| 330 FileOffset offset_in_section = rva - section->virtual_address; | 293 if (offset < section->virtual_size) { |
| 331 if (offset_in_section < section->virtual_size) | 294 return section; |
| 332 return section; | |
| 333 } | 295 } |
| 334 } | 296 } |
| 335 return nullptr; | 297 return NULL; |
| 298 } |
| 299 |
| 300 int DisassemblerWin32X64::RVAToFileOffset(RVA rva) const { |
| 301 const Section* section = RVAToSection(rva); |
| 302 if (section) { |
| 303 uint32_t offset = rva - section->virtual_address; |
| 304 if (offset < section->size_of_raw_data) { |
| 305 return section->file_offset_of_raw_data + offset; |
| 306 } else { |
| 307 return kNoOffset; // In section but not in file (e.g. uninit data). |
| 308 } |
| 309 } |
| 310 |
| 311 // Small RVA values point into the file header in the loaded image. |
| 312 // RVA 0 is the module load address which Windows uses as the module handle. |
| 313 // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the |
| 314 // DOS header. |
| 315 if (rva == 0 || rva == 2) |
| 316 return rva; |
| 317 |
| 318 NOTREACHED(); |
| 319 return kNoOffset; |
| 320 } |
| 321 |
| 322 const uint8_t* DisassemblerWin32X64::RVAToPointer(RVA rva) const { |
| 323 int file_offset = RVAToFileOffset(rva); |
| 324 if (file_offset == kNoOffset) |
| 325 return NULL; |
| 326 else |
| 327 return OffsetToPointer(file_offset); |
| 336 } | 328 } |
| 337 | 329 |
| 338 std::string DisassemblerWin32X64::SectionName(const Section* section) { | 330 std::string DisassemblerWin32X64::SectionName(const Section* section) { |
| 339 if (section == nullptr) | 331 if (section == NULL) |
| 340 return "<none>"; | 332 return "<none>"; |
| 341 char name[9]; | 333 char name[9]; |
| 342 memcpy(name, section->name, 8); | 334 memcpy(name, section->name, 8); |
| 343 name[8] = '\0'; // Ensure termination. | 335 name[8] = '\0'; // Ensure termination. |
| 344 return name; | 336 return name; |
| 345 } | 337 } |
| 346 | 338 |
| 347 CheckBool DisassemblerWin32X64::ParseFile(AssemblyProgram* program) { | 339 CheckBool DisassemblerWin32X64::ParseFile(AssemblyProgram* program) { |
| 348 // Walk all the bytes in the file, whether or not in a section. | 340 // Walk all the bytes in the file, whether or not in a section. |
| 349 FileOffset file_offset = 0; | 341 uint32_t file_offset = 0; |
| 350 while (file_offset < length()) { | 342 while (file_offset < length()) { |
| 351 const Section* section = FindNextSection(file_offset); | 343 const Section* section = FindNextSection(file_offset); |
| 352 if (section == nullptr) { | 344 if (section == NULL) { |
| 353 // No more sections. There should not be extra stuff following last | 345 // No more sections. There should not be extra stuff following last |
| 354 // section. | 346 // section. |
| 355 // ParseNonSectionFileRegion(file_offset, pe_info().length(), program); | 347 // ParseNonSectionFileRegion(file_offset, pe_info().length(), program); |
| 356 break; | 348 break; |
| 357 } | 349 } |
| 358 if (file_offset < section->file_offset_of_raw_data) { | 350 if (file_offset < section->file_offset_of_raw_data) { |
| 359 FileOffset section_start_offset = section->file_offset_of_raw_data; | 351 uint32_t section_start_offset = section->file_offset_of_raw_data; |
| 360 if (!ParseNonSectionFileRegion(file_offset, section_start_offset, | 352 if(!ParseNonSectionFileRegion(file_offset, section_start_offset, |
| 361 program)) { | 353 program)) |
| 362 return false; | 354 return false; |
| 363 } | |
| 364 | 355 |
| 365 file_offset = section_start_offset; | 356 file_offset = section_start_offset; |
| 366 } | 357 } |
| 367 FileOffset end = file_offset + section->size_of_raw_data; | 358 uint32_t end = file_offset + section->size_of_raw_data; |
| 368 if (!ParseFileRegion(section, file_offset, end, program)) | 359 if (!ParseFileRegion(section, file_offset, end, program)) |
| 369 return false; | 360 return false; |
| 370 file_offset = end; | 361 file_offset = end; |
| 371 } | 362 } |
| 372 | 363 |
| 373 #if COURGETTE_HISTOGRAM_TARGETS | 364 #if COURGETTE_HISTOGRAM_TARGETS |
| 374 HistogramTargets("abs32 relocs", abs32_target_rvas_); | 365 HistogramTargets("abs32 relocs", abs32_target_rvas_); |
| 375 HistogramTargets("rel32 relocs", rel32_target_rvas_); | 366 HistogramTargets("rel32 relocs", rel32_target_rvas_); |
| 376 #endif | 367 #endif |
| 377 | 368 |
| 378 return true; | 369 return true; |
| 379 } | 370 } |
| 380 | 371 |
| 381 bool DisassemblerWin32X64::ParseAbs32Relocs() { | 372 bool DisassemblerWin32X64::ParseAbs32Relocs() { |
| 382 abs32_locations_.clear(); | 373 abs32_locations_.clear(); |
| 383 if (!ParseRelocs(&abs32_locations_)) | 374 if (!ParseRelocs(&abs32_locations_)) |
| 384 return false; | 375 return false; |
| 385 | 376 |
| 386 #if COURGETTE_HISTOGRAM_TARGETS | 377 #if COURGETTE_HISTOGRAM_TARGETS |
| 387 for (size_t i = 0; i < abs32_locations_.size(); ++i) { | 378 for (size_t i = 0; i < abs32_locations_.size(); ++i) { |
| 388 RVA rva = abs32_locations_[i]; | 379 RVA rva = abs32_locations_[i]; |
| 389 // The 4 bytes at the relocation are a reference to some address. | 380 // The 4 bytes at the relocation are a reference to some address. |
| 390 uint32_t target_address = Read32LittleEndian(RVAToPointer(rva)); | 381 uint32_t target_address = Read32LittleEndian(RVAToPointer(rva)); |
| 391 ++abs32_target_rvas_[target_address - image_base()]; | 382 ++abs32_target_rvas_[target_address - image_base()]; |
| 392 } | 383 } |
| 393 #endif | 384 #endif |
| 394 return true; | 385 return true; |
| 395 } | 386 } |
| 396 | 387 |
| 397 void DisassemblerWin32X64::ParseRel32RelocsFromSections() { | 388 void DisassemblerWin32X64::ParseRel32RelocsFromSections() { |
| 398 FileOffset file_offset = 0; | 389 uint32_t file_offset = 0; |
| 399 while (file_offset < length()) { | 390 while (file_offset < length()) { |
| 400 const Section* section = FindNextSection(file_offset); | 391 const Section* section = FindNextSection(file_offset); |
| 401 if (section == nullptr) | 392 if (section == NULL) |
| 402 break; | 393 break; |
| 403 if (file_offset < section->file_offset_of_raw_data) | 394 if (file_offset < section->file_offset_of_raw_data) |
| 404 file_offset = section->file_offset_of_raw_data; | 395 file_offset = section->file_offset_of_raw_data; |
| 405 ParseRel32RelocsFromSection(section); | 396 ParseRel32RelocsFromSection(section); |
| 406 file_offset += section->size_of_raw_data; | 397 file_offset += section->size_of_raw_data; |
| 407 } | 398 } |
| 408 std::sort(rel32_locations_.begin(), rel32_locations_.end()); | 399 std::sort(rel32_locations_.begin(), rel32_locations_.end()); |
| 409 DCHECK(rel32_locations_.empty() || | 400 DCHECK(rel32_locations_.empty() || |
| 410 rel32_locations_.back() != kUnassignedRVA); | 401 rel32_locations_.back() != kUnassignedRVA); |
| 411 | 402 |
| 412 #if COURGETTE_HISTOGRAM_TARGETS | 403 #if COURGETTE_HISTOGRAM_TARGETS |
| 413 VLOG(1) << "abs32_locations_ " << abs32_locations_.size() | 404 VLOG(1) << "abs32_locations_ " << abs32_locations_.size() |
| 414 << "\nrel32_locations_ " << rel32_locations_.size() | 405 << "\nrel32_locations_ " << rel32_locations_.size() |
| 415 << "\nabs32_target_rvas_ " << abs32_target_rvas_.size() | 406 << "\nabs32_target_rvas_ " << abs32_target_rvas_.size() |
| 416 << "\nrel32_target_rvas_ " << rel32_target_rvas_.size(); | 407 << "\nrel32_target_rvas_ " << rel32_target_rvas_.size(); |
| 417 | 408 |
| 418 int common = 0; | 409 int common = 0; |
| 419 std::map<RVA, int>::iterator abs32_iter = abs32_target_rvas_.begin(); | 410 std::map<RVA, int>::iterator abs32_iter = abs32_target_rvas_.begin(); |
| 420 std::map<RVA, int>::iterator rel32_iter = rel32_target_rvas_.begin(); | 411 std::map<RVA, int>::iterator rel32_iter = rel32_target_rvas_.begin(); |
| 421 while (abs32_iter != abs32_target_rvas_.end() && | 412 while (abs32_iter != abs32_target_rvas_.end() && |
| 422 rel32_iter != rel32_target_rvas_.end()) { | 413 rel32_iter != rel32_target_rvas_.end()) { |
| 423 if (abs32_iter->first < rel32_iter->first) { | 414 if (abs32_iter->first < rel32_iter->first) |
| 424 ++abs32_iter; | 415 ++abs32_iter; |
| 425 } else if (rel32_iter->first < abs32_iter->first) { | 416 else if (rel32_iter->first < abs32_iter->first) |
| 426 ++rel32_iter; | 417 ++rel32_iter; |
| 427 } else { | 418 else { |
| 428 ++common; | 419 ++common; |
| 429 ++abs32_iter; | 420 ++abs32_iter; |
| 430 ++rel32_iter; | 421 ++rel32_iter; |
| 431 } | 422 } |
| 432 } | 423 } |
| 433 VLOG(1) << "common " << common; | 424 VLOG(1) << "common " << common; |
| 434 #endif | 425 #endif |
| 435 } | 426 } |
| 436 | 427 |
| 437 void DisassemblerWin32X64::ParseRel32RelocsFromSection(const Section* section) { | 428 void DisassemblerWin32X64::ParseRel32RelocsFromSection(const Section* section) { |
| 438 // TODO(sra): use characteristic. | 429 // TODO(sra): use characteristic. |
| 439 bool isCode = strcmp(section->name, ".text") == 0; | 430 bool isCode = strcmp(section->name, ".text") == 0; |
| 440 if (!isCode) | 431 if (!isCode) |
| 441 return; | 432 return; |
| 442 | 433 |
| 443 FileOffset start_file_offset = section->file_offset_of_raw_data; | 434 uint32_t start_file_offset = section->file_offset_of_raw_data; |
| 444 FileOffset end_file_offset = start_file_offset + section->size_of_raw_data; | 435 uint32_t end_file_offset = start_file_offset + section->size_of_raw_data; |
| 445 RVA relocs_start_rva = base_relocation_table().address_; | 436 RVA relocs_start_rva = base_relocation_table().address_; |
| 446 | 437 |
| 447 const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset); | 438 const uint8_t* start_pointer = OffsetToPointer(start_file_offset); |
| 448 const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset); | 439 const uint8_t* end_pointer = OffsetToPointer(end_file_offset); |
| 449 | 440 |
| 450 RVA start_rva = FileOffsetToRVA(start_file_offset); | 441 RVA start_rva = FileOffsetToRVA(start_file_offset); |
| 451 RVA end_rva = start_rva + section->virtual_size; | 442 RVA end_rva = start_rva + section->virtual_size; |
| 452 | 443 |
| 453 // Quick way to convert from Pointer to RVA within a single Section is to | 444 // Quick way to convert from Pointer to RVA within a single Section is to |
| 454 // subtract |pointer_to_rva|. | 445 // subtract 'pointer_to_rva'. |
| 455 const uint8_t* const adjust_pointer_to_rva = start_pointer - start_rva; | 446 const uint8_t* const adjust_pointer_to_rva = start_pointer - start_rva; |
| 456 | 447 |
| 457 std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin(); | 448 std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin(); |
| 458 | 449 |
| 459 // Find the rel32 relocations. | 450 // Find the rel32 relocations. |
| 460 const uint8_t* p = start_pointer; | 451 const uint8_t* p = start_pointer; |
| 461 while (p < end_pointer) { | 452 while (p < end_pointer) { |
| 462 RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva); | 453 RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva); |
| 463 if (current_rva == relocs_start_rva) { | 454 if (current_rva == relocs_start_rva) { |
| 464 uint32_t relocs_size = base_relocation_table().size_; | 455 uint32_t relocs_size = base_relocation_table().size_; |
| 465 if (relocs_size) { | 456 if (relocs_size) { |
| 466 p += relocs_size; | 457 p += relocs_size; |
| 467 continue; | 458 continue; |
| 468 } | 459 } |
| 469 } | 460 } |
| 470 | 461 |
| 462 //while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva) |
| 463 // ++abs32_pos; |
| 464 |
| 471 // Heuristic discovery of rel32 locations in instruction stream: are the | 465 // Heuristic discovery of rel32 locations in instruction stream: are the |
| 472 // next few bytes the start of an instruction containing a rel32 | 466 // next few bytes the start of an instruction containing a rel32 |
| 473 // addressing mode? | 467 // addressing mode? |
| 474 const uint8_t* rel32 = nullptr; | 468 const uint8_t* rel32 = NULL; |
| 475 bool is_rip_relative = false; | 469 bool is_rip_relative = false; |
| 476 | 470 |
| 477 if (p + 5 <= end_pointer) { | 471 if (p + 5 <= end_pointer) { |
| 478 if (*p == 0xE8 || *p == 0xE9) // jmp rel32 and call rel32 | 472 if (*p == 0xE8 || *p == 0xE9) // jmp rel32 and call rel32 |
| 479 rel32 = p + 1; | 473 rel32 = p + 1; |
| 480 } | 474 } |
| 481 if (p + 6 <= end_pointer) { | 475 if (p + 6 <= end_pointer) { |
| 482 if (*p == 0x0F && (*(p + 1) & 0xF0) == 0x80) { // Jcc long form | 476 if (*p == 0x0F && (*(p + 1) & 0xF0) == 0x80) { // Jcc long form |
| 483 if (p[1] != 0x8A && p[1] != 0x8B) // JPE/JPO unlikely | 477 if (p[1] != 0x8A && p[1] != 0x8B) // JPE/JPO unlikely |
| 484 rel32 = p + 2; | 478 rel32 = p + 2; |
| (...skipping 30 matching lines...) Expand all Loading... |
| 515 // Beginning of abs32 reloc is before end of rel32 reloc so they | 509 // Beginning of abs32 reloc is before end of rel32 reloc so they |
| 516 // overlap. Skip four bytes past the abs32 reloc. | 510 // overlap. Skip four bytes past the abs32 reloc. |
| 517 p += (*abs32_pos + 4) - current_rva; | 511 p += (*abs32_pos + 4) - current_rva; |
| 518 continue; | 512 continue; |
| 519 } | 513 } |
| 520 } | 514 } |
| 521 | 515 |
| 522 RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32); | 516 RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32); |
| 523 // To be valid, rel32 target must be within image, and within this | 517 // To be valid, rel32 target must be within image, and within this |
| 524 // section. | 518 // section. |
| 525 if (target_rva < size_of_image_ && // Subsumes rva != kUnassignedRVA. | 519 if (IsValidRVA(target_rva) && |
| 526 (is_rip_relative || | 520 (is_rip_relative || |
| 527 (start_rva <= target_rva && target_rva < end_rva))) { | 521 (start_rva <= target_rva && target_rva < end_rva))) { |
| 528 rel32_locations_.push_back(rel32_rva); | 522 rel32_locations_.push_back(rel32_rva); |
| 529 #if COURGETTE_HISTOGRAM_TARGETS | 523 #if COURGETTE_HISTOGRAM_TARGETS |
| 530 ++rel32_target_rvas_[target_rva]; | 524 ++rel32_target_rvas_[target_rva]; |
| 531 #endif | 525 #endif |
| 532 p = rel32 + 4; | 526 p = rel32 + 4; |
| 533 continue; | 527 continue; |
| 534 } | 528 } |
| 535 } | 529 } |
| 536 p += 1; | 530 p += 1; |
| 537 } | 531 } |
| 538 } | 532 } |
| 539 | 533 |
| 540 CheckBool DisassemblerWin32X64::ParseNonSectionFileRegion( | 534 CheckBool DisassemblerWin32X64::ParseNonSectionFileRegion( |
| 541 FileOffset start_file_offset, | 535 uint32_t start_file_offset, |
| 542 FileOffset end_file_offset, | 536 uint32_t end_file_offset, |
| 543 AssemblyProgram* program) { | 537 AssemblyProgram* program) { |
| 544 if (incomplete_disassembly_) | 538 if (incomplete_disassembly_) |
| 545 return true; | 539 return true; |
| 546 | 540 |
| 547 if (end_file_offset > start_file_offset) { | 541 if (end_file_offset > start_file_offset) { |
| 548 if (!program->EmitBytesInstruction(FileOffsetToPointer(start_file_offset), | 542 if (!program->EmitBytesInstruction(OffsetToPointer(start_file_offset), |
| 549 end_file_offset - start_file_offset)) { | 543 end_file_offset - start_file_offset)) { |
| 550 return false; | 544 return false; |
| 551 } | 545 } |
| 552 } | 546 } |
| 553 | 547 |
| 554 return true; | 548 return true; |
| 555 } | 549 } |
| 556 | 550 |
| 557 CheckBool DisassemblerWin32X64::ParseFileRegion(const Section* section, | 551 CheckBool DisassemblerWin32X64::ParseFileRegion(const Section* section, |
| 558 FileOffset start_file_offset, | 552 uint32_t start_file_offset, |
| 559 FileOffset end_file_offset, | 553 uint32_t end_file_offset, |
| 560 AssemblyProgram* program) { | 554 AssemblyProgram* program) { |
| 561 RVA relocs_start_rva = base_relocation_table().address_; | 555 RVA relocs_start_rva = base_relocation_table().address_; |
| 562 | 556 |
| 563 const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset); | 557 const uint8_t* start_pointer = OffsetToPointer(start_file_offset); |
| 564 const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset); | 558 const uint8_t* end_pointer = OffsetToPointer(end_file_offset); |
| 565 | 559 |
| 566 RVA start_rva = FileOffsetToRVA(start_file_offset); | 560 RVA start_rva = FileOffsetToRVA(start_file_offset); |
| 567 RVA end_rva = start_rva + section->virtual_size; | 561 RVA end_rva = start_rva + section->virtual_size; |
| 568 | 562 |
| 569 // Quick way to convert from Pointer to RVA within a single Section is to | 563 // Quick way to convert from Pointer to RVA within a single Section is to |
| 570 // subtract 'pointer_to_rva'. | 564 // subtract 'pointer_to_rva'. |
| 571 const uint8_t* const adjust_pointer_to_rva = start_pointer - start_rva; | 565 const uint8_t* const adjust_pointer_to_rva = start_pointer - start_rva; |
| 572 | 566 |
| 573 std::vector<RVA>::iterator rel32_pos = rel32_locations_.begin(); | 567 std::vector<RVA>::iterator rel32_pos = rel32_locations_.begin(); |
| 574 std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin(); | 568 std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin(); |
| (...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 663 p != h.rend(); | 657 p != h.rend(); |
| 664 ++p) { | 658 ++p) { |
| 665 ++index; | 659 ++index; |
| 666 if (index <= kFirstN || p->first <= 3) { | 660 if (index <= kFirstN || p->first <= 3) { |
| 667 if (someSkipped) { | 661 if (someSkipped) { |
| 668 std::cout << "..." << std::endl; | 662 std::cout << "..." << std::endl; |
| 669 } | 663 } |
| 670 size_t count = p->second.size(); | 664 size_t count = p->second.size(); |
| 671 std::cout << std::dec << p->first << ": " << count; | 665 std::cout << std::dec << p->first << ": " << count; |
| 672 if (count <= 2) { | 666 if (count <= 2) { |
| 673 for (size_t i = 0; i < count; ++i) | 667 for (size_t i = 0; i < count; ++i) |
| 674 std::cout << " " << DescribeRVA(p->second[i]); | 668 std::cout << " " << DescribeRVA(p->second[i]); |
| 675 } | 669 } |
| 676 std::cout << std::endl; | 670 std::cout << std::endl; |
| 677 someSkipped = false; | 671 someSkipped = false; |
| 678 } else { | 672 } else { |
| 679 someSkipped = true; | 673 someSkipped = true; |
| 680 } | 674 } |
| 681 } | 675 } |
| 682 } | 676 } |
| 683 #endif // COURGETTE_HISTOGRAM_TARGETS | 677 #endif // COURGETTE_HISTOGRAM_TARGETS |
| 684 | 678 |
| 679 |
| 685 // DescribeRVA is for debugging only. I would put it under #ifdef DEBUG except | 680 // DescribeRVA is for debugging only. I would put it under #ifdef DEBUG except |
| 686 // that during development I'm finding I need to call it when compiled in | 681 // that during development I'm finding I need to call it when compiled in |
| 687 // Release mode. Hence: | 682 // Release mode. Hence: |
| 688 // TODO(sra): make this compile only for debug mode. | 683 // TODO(sra): make this compile only for debug mode. |
| 689 std::string DisassemblerWin32X64::DescribeRVA(RVA rva) const { | 684 std::string DisassemblerWin32X64::DescribeRVA(RVA rva) const { |
| 690 const Section* section = RVAToSection(rva); | 685 const Section* section = RVAToSection(rva); |
| 691 std::ostringstream s; | 686 std::ostringstream s; |
| 692 s << std::hex << rva; | 687 s << std::hex << rva; |
| 693 if (section) { | 688 if (section) { |
| 694 s << " ("; | 689 s << " ("; |
| 695 s << SectionName(section) << "+" | 690 s << SectionName(section) << "+" |
| 696 << std::hex << (rva - section->virtual_address) | 691 << std::hex << (rva - section->virtual_address) |
| 697 << ")"; | 692 << ")"; |
| 698 } | 693 } |
| 699 return s.str(); | 694 return s.str(); |
| 700 } | 695 } |
| 701 | 696 |
| 702 const Section* DisassemblerWin32X64::FindNextSection( | 697 const Section* DisassemblerWin32X64::FindNextSection( |
| 703 FileOffset file_offset) const { | 698 uint32_t fileOffset) const { |
| 704 const Section* best = 0; | 699 const Section* best = 0; |
| 705 for (int i = 0; i < number_of_sections_; ++i) { | 700 for (int i = 0; i < number_of_sections_; i++) { |
| 706 const Section* section = §ions_[i]; | 701 const Section* section = §ions_[i]; |
| 707 if (section->size_of_raw_data > 0) { // i.e. has data in file. | 702 if (section->size_of_raw_data > 0) { // i.e. has data in file. |
| 708 if (file_offset <= section->file_offset_of_raw_data) { | 703 if (fileOffset <= section->file_offset_of_raw_data) { |
| 709 if (best == 0 || | 704 if (best == 0 || |
| 710 section->file_offset_of_raw_data < best->file_offset_of_raw_data) { | 705 section->file_offset_of_raw_data < best->file_offset_of_raw_data) { |
| 711 best = section; | 706 best = section; |
| 712 } | 707 } |
| 713 } | 708 } |
| 714 } | 709 } |
| 715 } | 710 } |
| 716 return best; | 711 return best; |
| 717 } | 712 } |
| 718 | 713 |
| 714 RVA DisassemblerWin32X64::FileOffsetToRVA(uint32_t file_offset) const { |
| 715 for (int i = 0; i < number_of_sections_; i++) { |
| 716 const Section* section = §ions_[i]; |
| 717 uint32_t offset = file_offset - section->file_offset_of_raw_data; |
| 718 if (offset < section->size_of_raw_data) { |
| 719 return section->virtual_address + offset; |
| 720 } |
| 721 } |
| 722 return 0; |
| 723 } |
| 724 |
| 719 bool DisassemblerWin32X64::ReadDataDirectory( | 725 bool DisassemblerWin32X64::ReadDataDirectory( |
| 720 int index, | 726 int index, |
| 721 ImageDataDirectory* directory) { | 727 ImageDataDirectory* directory) { |
| 722 | 728 |
| 723 if (index < number_of_data_directories_) { | 729 if (index < number_of_data_directories_) { |
| 724 FileOffset file_offset = index * 8 + offset_of_data_directories_; | 730 size_t offset = index * 8 + offset_of_data_directories_; |
| 725 if (file_offset >= size_of_optional_header_) | 731 if (offset >= size_of_optional_header_) |
| 726 return Bad("number of data directories inconsistent"); | 732 return Bad("number of data directories inconsistent"); |
| 727 const uint8_t* data_directory = optional_header_ + file_offset; | 733 const uint8_t* data_directory = optional_header_ + offset; |
| 728 if (data_directory < start() || | 734 if (data_directory < start() || |
| 729 data_directory + 8 >= end()) | 735 data_directory + 8 >= end()) |
| 730 return Bad("data directory outside image"); | 736 return Bad("data directory outside image"); |
| 731 RVA rva = ReadU32(data_directory, 0); | 737 RVA rva = ReadU32(data_directory, 0); |
| 732 size_t size = ReadU32(data_directory, 4); | 738 size_t size = ReadU32(data_directory, 4); |
| 733 if (size > size_of_image_) | 739 if (size > size_of_image_) |
| 734 return Bad("data directory size too big"); | 740 return Bad("data directory size too big"); |
| 735 | 741 |
| 736 // TODO(sra): validate RVA. | 742 // TODO(sra): validate RVA. |
| 737 directory->address_ = rva; | 743 directory->address_ = rva; |
| 738 directory->size_ = static_cast<uint32_t>(size); | 744 directory->size_ = static_cast<uint32_t>(size); |
| 739 return true; | 745 return true; |
| 740 } else { | 746 } else { |
| 741 directory->address_ = 0; | 747 directory->address_ = 0; |
| 742 directory->size_ = 0; | 748 directory->size_ = 0; |
| 743 return true; | 749 return true; |
| 744 } | 750 } |
| 745 } | 751 } |
| 746 | 752 |
| 747 } // namespace courgette | 753 } // namespace courgette |
| OLD | NEW |