tools/symsrc/pefile.py - Issue 155136: Add the symbol and source server scripts.

Unified Diff: tools/symsrc/pefile.py

Issue 155136: Add the symbol and source server scripts. (Closed)

Patch Set: review feedback Created 11 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: tools/symsrc/pefile.py

diff --git a/tools/symsrc/pefile.py b/tools/symsrc/pefile.py

new file mode 100644

index 0000000000000000000000000000000000000000..e22fd1ae2457eed8d79f9e2ce2219be7c838a2ec

--- /dev/null

+++ b/tools/symsrc/pefile.py

@@ -0,0 +1,3729 @@

+# -*- coding: Latin-1 -*-

+"""pefile, Portable Executable reader module

+All the PE file basic structures are available with their default names

+as attributes of the instance returned.

+Processed elements such as the import table are made available with lowercase

+names, to differentiate them from the upper case basic structure names.

+pefile has been tested against the limits of valid PE headers, that is, malware.

+Lots of packed malware attempt to abuse the format way beyond its standard use.

+To the best of my knowledge most of the abuses are handled gracefully.

+For detailed copyright information see the file COPYING in

+the root of the distribution archive.

+"""

+__author__ = 'Ero Carrera'

+__version__ = '1.2.9.1'

+__contact__ = 'ero@dkbza.org'

+import os

+import struct

+import time

+import math

+import re

+import exceptions

+import string

+import array

+sha1, sha256, sha512, md5 = None, None, None, None

+try:

+ import hashlib

+ sha1 = hashlib.sha1

+ sha256 = hashlib.sha256

+ sha512 = hashlib.sha512

+ md5 = hashlib.md5

+except ImportError:

+ try:

+ import sha

+ sha1 = sha.new

+ except ImportError:

+ pass

+ try:

+ import md5

+ md5 = md5.new

+ except ImportError:

+ pass

+fast_load = False

+IMAGE_DOS_SIGNATURE = 0x5A4D

+IMAGE_OS2_SIGNATURE = 0x454E

+IMAGE_OS2_SIGNATURE_LE = 0x454C

+IMAGE_VXD_SIGNATURE = 0x454C

+IMAGE_NT_SIGNATURE = 0x00004550

+IMAGE_NUMBEROF_DIRECTORY_ENTRIES= 16

+IMAGE_ORDINAL_FLAG = 0x80000000L

+IMAGE_ORDINAL_FLAG64 = 0x8000000000000000L

+OPTIONAL_HEADER_MAGIC_PE = 0x10b

+OPTIONAL_HEADER_MAGIC_PE_PLUS = 0x20b

+directory_entry_types = [

+ ('IMAGE_DIRECTORY_ENTRY_EXPORT', 0),

+ ('IMAGE_DIRECTORY_ENTRY_IMPORT', 1),

+ ('IMAGE_DIRECTORY_ENTRY_RESOURCE', 2),

+ ('IMAGE_DIRECTORY_ENTRY_EXCEPTION', 3),

+ ('IMAGE_DIRECTORY_ENTRY_SECURITY', 4),

+ ('IMAGE_DIRECTORY_ENTRY_BASERELOC', 5),

+ ('IMAGE_DIRECTORY_ENTRY_DEBUG', 6),

+ ('IMAGE_DIRECTORY_ENTRY_COPYRIGHT', 7),

+ ('IMAGE_DIRECTORY_ENTRY_GLOBALPTR', 8),

+ ('IMAGE_DIRECTORY_ENTRY_TLS', 9),

+ ('IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG', 10),

+ ('IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT', 11),

+ ('IMAGE_DIRECTORY_ENTRY_IAT', 12),

+ ('IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT', 13),

+ ('IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR',14),

+ ('IMAGE_DIRECTORY_ENTRY_RESERVED', 15) ]

+DIRECTORY_ENTRY = dict([(e[1], e[0]) for e in directory_entry_types]+directory_entry_types)

+image_characteristics = [

+ ('IMAGE_FILE_RELOCS_STRIPPED', 0x0001),

+ ('IMAGE_FILE_EXECUTABLE_IMAGE', 0x0002),

+ ('IMAGE_FILE_LINE_NUMS_STRIPPED', 0x0004),

+ ('IMAGE_FILE_LOCAL_SYMS_STRIPPED', 0x0008),

+ ('IMAGE_FILE_AGGRESIVE_WS_TRIM', 0x0010),

+ ('IMAGE_FILE_LARGE_ADDRESS_AWARE', 0x0020),

+ ('IMAGE_FILE_16BIT_MACHINE', 0x0040),

+ ('IMAGE_FILE_BYTES_REVERSED_LO', 0x0080),

+ ('IMAGE_FILE_32BIT_MACHINE', 0x0100),

+ ('IMAGE_FILE_DEBUG_STRIPPED', 0x0200),

+ ('IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP', 0x0400),

+ ('IMAGE_FILE_NET_RUN_FROM_SWAP', 0x0800),

+ ('IMAGE_FILE_SYSTEM', 0x1000),

+ ('IMAGE_FILE_DLL', 0x2000),

+ ('IMAGE_FILE_UP_SYSTEM_ONLY', 0x4000),

+ ('IMAGE_FILE_BYTES_REVERSED_HI', 0x8000) ]

+IMAGE_CHARACTERISTICS = dict([(e[1], e[0]) for e in

+ image_characteristics]+image_characteristics)

+section_characteristics = [

+ ('IMAGE_SCN_CNT_CODE', 0x00000020),

+ ('IMAGE_SCN_CNT_INITIALIZED_DATA', 0x00000040),

+ ('IMAGE_SCN_CNT_UNINITIALIZED_DATA', 0x00000080),

+ ('IMAGE_SCN_LNK_OTHER', 0x00000100),

+ ('IMAGE_SCN_LNK_INFO', 0x00000200),

+ ('IMAGE_SCN_LNK_REMOVE', 0x00000800),

+ ('IMAGE_SCN_LNK_COMDAT', 0x00001000),

+ ('IMAGE_SCN_MEM_FARDATA', 0x00008000),

+ ('IMAGE_SCN_MEM_PURGEABLE', 0x00020000),

+ ('IMAGE_SCN_MEM_16BIT', 0x00020000),

+ ('IMAGE_SCN_MEM_LOCKED', 0x00040000),

+ ('IMAGE_SCN_MEM_PRELOAD', 0x00080000),

+ ('IMAGE_SCN_ALIGN_1BYTES', 0x00100000),

+ ('IMAGE_SCN_ALIGN_2BYTES', 0x00200000),

+ ('IMAGE_SCN_ALIGN_4BYTES', 0x00300000),

+ ('IMAGE_SCN_ALIGN_8BYTES', 0x00400000),

+ ('IMAGE_SCN_ALIGN_16BYTES', 0x00500000),

+ ('IMAGE_SCN_ALIGN_32BYTES', 0x00600000),

+ ('IMAGE_SCN_ALIGN_64BYTES', 0x00700000),

+ ('IMAGE_SCN_ALIGN_128BYTES', 0x00800000),

+ ('IMAGE_SCN_ALIGN_256BYTES', 0x00900000),

+ ('IMAGE_SCN_ALIGN_512BYTES', 0x00A00000),

+ ('IMAGE_SCN_ALIGN_1024BYTES', 0x00B00000),

+ ('IMAGE_SCN_ALIGN_2048BYTES', 0x00C00000),

+ ('IMAGE_SCN_ALIGN_4096BYTES', 0x00D00000),

+ ('IMAGE_SCN_ALIGN_8192BYTES', 0x00E00000),

+ ('IMAGE_SCN_ALIGN_MASK', 0x00F00000),

+ ('IMAGE_SCN_LNK_NRELOC_OVFL', 0x01000000),

+ ('IMAGE_SCN_MEM_DISCARDABLE', 0x02000000),

+ ('IMAGE_SCN_MEM_NOT_CACHED', 0x04000000),

+ ('IMAGE_SCN_MEM_NOT_PAGED', 0x08000000),

+ ('IMAGE_SCN_MEM_SHARED', 0x10000000),

+ ('IMAGE_SCN_MEM_EXECUTE', 0x20000000),

+ ('IMAGE_SCN_MEM_READ', 0x40000000),

+ ('IMAGE_SCN_MEM_WRITE', 0x80000000L) ]

+SECTION_CHARACTERISTICS = dict([(e[1], e[0]) for e in

+ section_characteristics]+section_characteristics)

+debug_types = [

+ ('IMAGE_DEBUG_TYPE_UNKNOWN', 0),

+ ('IMAGE_DEBUG_TYPE_COFF', 1),

+ ('IMAGE_DEBUG_TYPE_CODEVIEW', 2),

+ ('IMAGE_DEBUG_TYPE_FPO', 3),

+ ('IMAGE_DEBUG_TYPE_MISC', 4),

+ ('IMAGE_DEBUG_TYPE_EXCEPTION', 5),

+ ('IMAGE_DEBUG_TYPE_FIXUP', 6),

+ ('IMAGE_DEBUG_TYPE_OMAP_TO_SRC', 7),

+ ('IMAGE_DEBUG_TYPE_OMAP_FROM_SRC', 8),

+ ('IMAGE_DEBUG_TYPE_BORLAND', 9),

+ ('IMAGE_DEBUG_TYPE_RESERVED10', 10) ]

+DEBUG_TYPE = dict([(e[1], e[0]) for e in debug_types]+debug_types)

+subsystem_types = [

+ ('IMAGE_SUBSYSTEM_UNKNOWN', 0),

+ ('IMAGE_SUBSYSTEM_NATIVE', 1),

+ ('IMAGE_SUBSYSTEM_WINDOWS_GUI', 2),

+ ('IMAGE_SUBSYSTEM_WINDOWS_CUI', 3),

+ ('IMAGE_SUBSYSTEM_OS2_CUI', 5),

+ ('IMAGE_SUBSYSTEM_POSIX_CUI', 7),

+ ('IMAGE_SUBSYSTEM_WINDOWS_CE_GUI', 9),

+ ('IMAGE_SUBSYSTEM_EFI_APPLICATION', 10),

+ ('IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER', 11),

+ ('IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER', 12),

+ ('IMAGE_SUBSYSTEM_EFI_ROM', 13),

+ ('IMAGE_SUBSYSTEM_XBOX', 14)]

+SUBSYSTEM_TYPE = dict([(e[1], e[0]) for e in subsystem_types]+subsystem_types)

+machine_types = [

+ ('IMAGE_FILE_MACHINE_UNKNOWN', 0),

+ ('IMAGE_FILE_MACHINE_AM33', 0x1d3),

+ ('IMAGE_FILE_MACHINE_AMD64', 0x8664),

+ ('IMAGE_FILE_MACHINE_ARM', 0x1c0),

+ ('IMAGE_FILE_MACHINE_EBC', 0xebc),

+ ('IMAGE_FILE_MACHINE_I386', 0x14c),

+ ('IMAGE_FILE_MACHINE_IA64', 0x200),

+ ('IMAGE_FILE_MACHINE_MR32', 0x9041),

+ ('IMAGE_FILE_MACHINE_MIPS16', 0x266),

+ ('IMAGE_FILE_MACHINE_MIPSFPU', 0x366),

+ ('IMAGE_FILE_MACHINE_MIPSFPU16',0x466),

+ ('IMAGE_FILE_MACHINE_POWERPC', 0x1f0),

+ ('IMAGE_FILE_MACHINE_POWERPCFP',0x1f1),

+ ('IMAGE_FILE_MACHINE_R4000', 0x166),

+ ('IMAGE_FILE_MACHINE_SH3', 0x1a2),

+ ('IMAGE_FILE_MACHINE_SH3DSP', 0x1a3),

+ ('IMAGE_FILE_MACHINE_SH4', 0x1a6),

+ ('IMAGE_FILE_MACHINE_SH5', 0x1a8),

+ ('IMAGE_FILE_MACHINE_THUMB', 0x1c2),

+ ('IMAGE_FILE_MACHINE_WCEMIPSV2',0x169),

+ ]

+MACHINE_TYPE = dict([(e[1], e[0]) for e in machine_types]+machine_types)

+relocation_types = [

+ ('IMAGE_REL_BASED_ABSOLUTE', 0),

+ ('IMAGE_REL_BASED_HIGH', 1),

+ ('IMAGE_REL_BASED_LOW', 2),

+ ('IMAGE_REL_BASED_HIGHLOW', 3),

+ ('IMAGE_REL_BASED_HIGHADJ', 4),

+ ('IMAGE_REL_BASED_MIPS_JMPADDR', 5),

+ ('IMAGE_REL_BASED_SECTION', 6),

+ ('IMAGE_REL_BASED_REL', 7),

+ ('IMAGE_REL_BASED_MIPS_JMPADDR16', 9),

+ ('IMAGE_REL_BASED_IA64_IMM64', 9),

+ ('IMAGE_REL_BASED_DIR64', 10),

+ ('IMAGE_REL_BASED_HIGH3ADJ', 11) ]

+RELOCATION_TYPE = dict([(e[1], e[0]) for e in relocation_types]+relocation_types)

+dll_characteristics = [

+ ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0001', 0x0001),

+ ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0002', 0x0002),

+ ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0004', 0x0004),

+ ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0008', 0x0008),

+ ('IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE', 0x0040),

+ ('IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY', 0x0080),

+ ('IMAGE_DLL_CHARACTERISTICS_NX_COMPAT', 0x0100),

+ ('IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION', 0x0200),

+ ('IMAGE_DLL_CHARACTERISTICS_NO_SEH', 0x0400),

+ ('IMAGE_DLL_CHARACTERISTICS_NO_BIND', 0x0800),

+ ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x1000', 0x1000),

+ ('IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER', 0x2000),

+ ('IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE', 0x8000) ]

+DLL_CHARACTERISTICS = dict([(e[1], e[0]) for e in dll_characteristics]+dll_characteristics)

+# Resource types

+resource_type = [

+ ('RT_CURSOR', 1),

+ ('RT_BITMAP', 2),

+ ('RT_ICON', 3),

+ ('RT_MENU', 4),

+ ('RT_DIALOG', 5),

+ ('RT_STRING', 6),

+ ('RT_FONTDIR', 7),

+ ('RT_FONT', 8),

+ ('RT_ACCELERATOR', 9),

+ ('RT_RCDATA', 10),

+ ('RT_MESSAGETABLE', 11),

+ ('RT_GROUP_CURSOR', 12),

+ ('RT_GROUP_ICON', 14),

+ ('RT_VERSION', 16),

+ ('RT_DLGINCLUDE', 17),

+ ('RT_PLUGPLAY', 19),

+ ('RT_VXD', 20),

+ ('RT_ANICURSOR', 21),

+ ('RT_ANIICON', 22),

+ ('RT_HTML', 23),

+ ('RT_MANIFEST', 24) ]

+RESOURCE_TYPE = dict([(e[1], e[0]) for e in resource_type]+resource_type)

+# Language definitions

+lang = [

+ ('LANG_NEUTRAL', 0x00),

+ ('LANG_INVARIANT', 0x7f),

+ ('LANG_AFRIKAANS', 0x36),

+ ('LANG_ALBANIAN', 0x1c),

+ ('LANG_ARABIC', 0x01),

+ ('LANG_ARMENIAN', 0x2b),

+ ('LANG_ASSAMESE', 0x4d),

+ ('LANG_AZERI', 0x2c),

+ ('LANG_BASQUE', 0x2d),

+ ('LANG_BELARUSIAN', 0x23),

+ ('LANG_BENGALI', 0x45),

+ ('LANG_BULGARIAN', 0x02),

+ ('LANG_CATALAN', 0x03),

+ ('LANG_CHINESE', 0x04),

+ ('LANG_CROATIAN', 0x1a),

+ ('LANG_CZECH', 0x05),

+ ('LANG_DANISH', 0x06),

+ ('LANG_DIVEHI', 0x65),

+ ('LANG_DUTCH', 0x13),

+ ('LANG_ENGLISH', 0x09),

+ ('LANG_ESTONIAN', 0x25),

+ ('LANG_FAEROESE', 0x38),

+ ('LANG_FARSI', 0x29),

+ ('LANG_FINNISH', 0x0b),

+ ('LANG_FRENCH', 0x0c),

+ ('LANG_GALICIAN', 0x56),

+ ('LANG_GEORGIAN', 0x37),

+ ('LANG_GERMAN', 0x07),

+ ('LANG_GREEK', 0x08),

+ ('LANG_GUJARATI', 0x47),

+ ('LANG_HEBREW', 0x0d),

+ ('LANG_HINDI', 0x39),

+ ('LANG_HUNGARIAN', 0x0e),

+ ('LANG_ICELANDIC', 0x0f),

+ ('LANG_INDONESIAN', 0x21),

+ ('LANG_ITALIAN', 0x10),

+ ('LANG_JAPANESE', 0x11),

+ ('LANG_KANNADA', 0x4b),

+ ('LANG_KASHMIRI', 0x60),

+ ('LANG_KAZAK', 0x3f),

+ ('LANG_KONKANI', 0x57),

+ ('LANG_KOREAN', 0x12),

+ ('LANG_KYRGYZ', 0x40),

+ ('LANG_LATVIAN', 0x26),

+ ('LANG_LITHUANIAN', 0x27),

+ ('LANG_MACEDONIAN', 0x2f),

+ ('LANG_MALAY', 0x3e),

+ ('LANG_MALAYALAM', 0x4c),

+ ('LANG_MANIPURI', 0x58),

+ ('LANG_MARATHI', 0x4e),

+ ('LANG_MONGOLIAN', 0x50),

+ ('LANG_NEPALI', 0x61),

+ ('LANG_NORWEGIAN', 0x14),

+ ('LANG_ORIYA', 0x48),

+ ('LANG_POLISH', 0x15),

+ ('LANG_PORTUGUESE', 0x16),

+ ('LANG_PUNJABI', 0x46),

+ ('LANG_ROMANIAN', 0x18),

+ ('LANG_RUSSIAN', 0x19),

+ ('LANG_SANSKRIT', 0x4f),

+ ('LANG_SERBIAN', 0x1a),

+ ('LANG_SINDHI', 0x59),

+ ('LANG_SLOVAK', 0x1b),

+ ('LANG_SLOVENIAN', 0x24),

+ ('LANG_SPANISH', 0x0a),

+ ('LANG_SWAHILI', 0x41),

+ ('LANG_SWEDISH', 0x1d),

+ ('LANG_SYRIAC', 0x5a),

+ ('LANG_TAMIL', 0x49),

+ ('LANG_TATAR', 0x44),

+ ('LANG_TELUGU', 0x4a),

+ ('LANG_THAI', 0x1e),

+ ('LANG_TURKISH', 0x1f),

+ ('LANG_UKRAINIAN', 0x22),

+ ('LANG_URDU', 0x20),

+ ('LANG_UZBEK', 0x43),

+ ('LANG_VIETNAMESE', 0x2a),

+ ('LANG_GAELIC', 0x3c),

+ ('LANG_MALTESE', 0x3a),

+ ('LANG_MAORI', 0x28),

+ ('LANG_RHAETO_ROMANCE',0x17),

+ ('LANG_SAAMI', 0x3b),

+ ('LANG_SORBIAN', 0x2e),

+ ('LANG_SUTU', 0x30),

+ ('LANG_TSONGA', 0x31),

+ ('LANG_TSWANA', 0x32),

+ ('LANG_VENDA', 0x33),

+ ('LANG_XHOSA', 0x34),

+ ('LANG_ZULU', 0x35),

+ ('LANG_ESPERANTO', 0x8f),

+ ('LANG_WALON', 0x90),

+ ('LANG_CORNISH', 0x91),

+ ('LANG_WELSH', 0x92),

+ ('LANG_BRETON', 0x93) ]

+LANG = dict(lang+[(e[1], e[0]) for e in lang])

+# Sublanguage definitions

+sublang = [

+ ('SUBLANG_NEUTRAL', 0x00),

+ ('SUBLANG_DEFAULT', 0x01),

+ ('SUBLANG_SYS_DEFAULT', 0x02),

+ ('SUBLANG_ARABIC_SAUDI_ARABIA', 0x01),

+ ('SUBLANG_ARABIC_IRAQ', 0x02),

+ ('SUBLANG_ARABIC_EGYPT', 0x03),

+ ('SUBLANG_ARABIC_LIBYA', 0x04),

+ ('SUBLANG_ARABIC_ALGERIA', 0x05),

+ ('SUBLANG_ARABIC_MOROCCO', 0x06),

+ ('SUBLANG_ARABIC_TUNISIA', 0x07),

+ ('SUBLANG_ARABIC_OMAN', 0x08),

+ ('SUBLANG_ARABIC_YEMEN', 0x09),

+ ('SUBLANG_ARABIC_SYRIA', 0x0a),

+ ('SUBLANG_ARABIC_JORDAN', 0x0b),

+ ('SUBLANG_ARABIC_LEBANON', 0x0c),

+ ('SUBLANG_ARABIC_KUWAIT', 0x0d),

+ ('SUBLANG_ARABIC_UAE', 0x0e),

+ ('SUBLANG_ARABIC_BAHRAIN', 0x0f),

+ ('SUBLANG_ARABIC_QATAR', 0x10),

+ ('SUBLANG_AZERI_LATIN', 0x01),

+ ('SUBLANG_AZERI_CYRILLIC', 0x02),

+ ('SUBLANG_CHINESE_TRADITIONAL', 0x01),

+ ('SUBLANG_CHINESE_SIMPLIFIED', 0x02),

+ ('SUBLANG_CHINESE_HONGKONG', 0x03),

+ ('SUBLANG_CHINESE_SINGAPORE', 0x04),

+ ('SUBLANG_CHINESE_MACAU', 0x05),

+ ('SUBLANG_DUTCH', 0x01),

+ ('SUBLANG_DUTCH_BELGIAN', 0x02),

+ ('SUBLANG_ENGLISH_US', 0x01),

+ ('SUBLANG_ENGLISH_UK', 0x02),

+ ('SUBLANG_ENGLISH_AUS', 0x03),

+ ('SUBLANG_ENGLISH_CAN', 0x04),

+ ('SUBLANG_ENGLISH_NZ', 0x05),

+ ('SUBLANG_ENGLISH_EIRE', 0x06),

+ ('SUBLANG_ENGLISH_SOUTH_AFRICA', 0x07),

+ ('SUBLANG_ENGLISH_JAMAICA', 0x08),

+ ('SUBLANG_ENGLISH_CARIBBEAN', 0x09),

+ ('SUBLANG_ENGLISH_BELIZE', 0x0a),

+ ('SUBLANG_ENGLISH_TRINIDAD', 0x0b),

+ ('SUBLANG_ENGLISH_ZIMBABWE', 0x0c),

+ ('SUBLANG_ENGLISH_PHILIPPINES', 0x0d),

+ ('SUBLANG_FRENCH', 0x01),

+ ('SUBLANG_FRENCH_BELGIAN', 0x02),

+ ('SUBLANG_FRENCH_CANADIAN', 0x03),

+ ('SUBLANG_FRENCH_SWISS', 0x04),

+ ('SUBLANG_FRENCH_LUXEMBOURG', 0x05),

+ ('SUBLANG_FRENCH_MONACO', 0x06),

+ ('SUBLANG_GERMAN', 0x01),

+ ('SUBLANG_GERMAN_SWISS', 0x02),

+ ('SUBLANG_GERMAN_AUSTRIAN', 0x03),

+ ('SUBLANG_GERMAN_LUXEMBOURG', 0x04),

+ ('SUBLANG_GERMAN_LIECHTENSTEIN', 0x05),

+ ('SUBLANG_ITALIAN', 0x01),

+ ('SUBLANG_ITALIAN_SWISS', 0x02),

+ ('SUBLANG_KASHMIRI_SASIA', 0x02),

+ ('SUBLANG_KASHMIRI_INDIA', 0x02),

+ ('SUBLANG_KOREAN', 0x01),

+ ('SUBLANG_LITHUANIAN', 0x01),

+ ('SUBLANG_MALAY_MALAYSIA', 0x01),

+ ('SUBLANG_MALAY_BRUNEI_DARUSSALAM', 0x02),

+ ('SUBLANG_NEPALI_INDIA', 0x02),

+ ('SUBLANG_NORWEGIAN_BOKMAL', 0x01),

+ ('SUBLANG_NORWEGIAN_NYNORSK', 0x02),

+ ('SUBLANG_PORTUGUESE', 0x02),

+ ('SUBLANG_PORTUGUESE_BRAZILIAN', 0x01),

+ ('SUBLANG_SERBIAN_LATIN', 0x02),

+ ('SUBLANG_SERBIAN_CYRILLIC', 0x03),

+ ('SUBLANG_SPANISH', 0x01),

+ ('SUBLANG_SPANISH_MEXICAN', 0x02),

+ ('SUBLANG_SPANISH_MODERN', 0x03),

+ ('SUBLANG_SPANISH_GUATEMALA', 0x04),

+ ('SUBLANG_SPANISH_COSTA_RICA', 0x05),

+ ('SUBLANG_SPANISH_PANAMA', 0x06),

+ ('SUBLANG_SPANISH_DOMINICAN_REPUBLIC', 0x07),

+ ('SUBLANG_SPANISH_VENEZUELA', 0x08),

+ ('SUBLANG_SPANISH_COLOMBIA', 0x09),

+ ('SUBLANG_SPANISH_PERU', 0x0a),

+ ('SUBLANG_SPANISH_ARGENTINA', 0x0b),

+ ('SUBLANG_SPANISH_ECUADOR', 0x0c),

+ ('SUBLANG_SPANISH_CHILE', 0x0d),

+ ('SUBLANG_SPANISH_URUGUAY', 0x0e),

+ ('SUBLANG_SPANISH_PARAGUAY', 0x0f),

+ ('SUBLANG_SPANISH_BOLIVIA', 0x10),

+ ('SUBLANG_SPANISH_EL_SALVADOR', 0x11),

+ ('SUBLANG_SPANISH_HONDURAS', 0x12),

+ ('SUBLANG_SPANISH_NICARAGUA', 0x13),

+ ('SUBLANG_SPANISH_PUERTO_RICO', 0x14),

+ ('SUBLANG_SWEDISH', 0x01),

+ ('SUBLANG_SWEDISH_FINLAND', 0x02),

+ ('SUBLANG_URDU_PAKISTAN', 0x01),

+ ('SUBLANG_URDU_INDIA', 0x02),

+ ('SUBLANG_UZBEK_LATIN', 0x01),

+ ('SUBLANG_UZBEK_CYRILLIC', 0x02),

+ ('SUBLANG_DUTCH_SURINAM', 0x03),

+ ('SUBLANG_ROMANIAN', 0x01),

+ ('SUBLANG_ROMANIAN_MOLDAVIA', 0x02),

+ ('SUBLANG_RUSSIAN', 0x01),

+ ('SUBLANG_RUSSIAN_MOLDAVIA', 0x02),

+ ('SUBLANG_CROATIAN', 0x01),

+ ('SUBLANG_LITHUANIAN_CLASSIC', 0x02),

+ ('SUBLANG_GAELIC', 0x01),

+ ('SUBLANG_GAELIC_SCOTTISH', 0x02),

+ ('SUBLANG_GAELIC_MANX', 0x03) ]

+SUBLANG = dict(sublang+[(e[1], e[0]) for e in sublang])

+class UnicodeStringWrapperPostProcessor:

+ """This class attemps to help the process of identifying strings

+ that might be plain Unicode or Pascal. A list of strings will be

+ wrapped on it with the hope the overlappings will help make the

+ decission about their type."""

+ def __init__(self, pe, rva_ptr):

+ self.pe = pe

+ self.rva_ptr = rva_ptr

+ self.string = None

+ def get_rva(self):

+ """Get the RVA of the string."""

+ return self.rva_ptr

+ def __str__(self):

+ """Return the escaped ASCII representation of the string."""

+ def convert_char(char):

+ if char in string.printable:

+ return char

+ else:

+ return r'\x%02x' % ord(char)

+ if self.string:

+ return ''.join([convert_char(c) for c in self.string])

+ return ''

+ def invalidate(self):

+ """Make this instance None, to express it's no known string type."""

+ self = None

+ def render_pascal_16(self):

+ self.string = self.pe.get_string_u_at_rva(

+ self.rva_ptr+2,

+ max_length=self.__get_pascal_16_length())

+ def ask_pascal_16(self, next_rva_ptr):

+ """The next RVA is taken to be the one immediately following this one.

+ Such RVA could indicate the natural end of the string and will be checked

+ with the possible length contained in the first word.

+ """

+ length = self.__get_pascal_16_length()

+ if length == (next_rva_ptr - (self.rva_ptr+2)) / 2:

+ self.length = length

+ return True

+ return False

+ def __get_pascal_16_length(self):

+ return self.__get_word_value_at_rva(self.rva_ptr)

+ def __get_word_value_at_rva(self, rva):

+ try:

+ data = self.pe.get_data(self.rva_ptr, 2)

+ except PEFormatError, e:

+ return False

+ if len(data)<2:

+ return False

+ return struct.unpack('<H', data)[0]

+ #def render_pascal_8(self):

+ # """"""

+ def ask_unicode_16(self, next_rva_ptr):

+ """The next RVA is taken to be the one immediately following this one.

+ Such RVA could indicate the natural end of the string and will be checked

+ to see if there's a Unicode NULL character there.

+ """

+ if self.__get_word_value_at_rva(next_rva_ptr-2) == 0:

+ self.length = next_rva_ptr - self.rva_ptr

+ return True

+ return False

+ def render_unicode_16(self):

+ """"""

+ self.string = self.pe.get_string_u_at_rva(self.rva_ptr)

+class PEFormatError(Exception):

+ """Generic PE format error exception."""

+ def __init__(self, value):

+ self.value = value

+ def __str__(self):

+ return repr(self.value)

+class Dump:

+ """Convenience class for dumping the PE information."""

+ def __init__(self):

+ self.text = ''

+ def add_lines(self, txt, indent=0):

+ """Adds a list of lines.

+ The list can be indented with the optional argument 'indent'.

+ """

+ for line in txt:

+ self.add_line(line, indent)

+ def add_line(self, txt, indent=0):

+ """Adds a line.

+ The line can be indented with the optional argument 'indent'.

+ """

+ self.add(txt+'\n', indent)

+ def add(self, txt, indent=0):

+ """Adds some text, no newline will be appended.

+ The text can be indented with the optional argument 'indent'.

+ """

+ if isinstance(txt, unicode):

+ s = []

+ for c in txt:

+ try:

+ s.append(str(c))

+ except UnicodeEncodeError, e:

+ s.append(repr(c))

+ txt = ''.join(s)

+ self.text += ' '*indent+txt

+ def add_header(self, txt):

+ """Adds a header element."""

+ self.add_line('-'*10+txt+'-'*10+'\n')

+ def add_newline(self):

+ """Adds a newline."""

+ self.text += '\n'

+ def get_text(self):

+ """Get the text in its current state."""

+ return self.text

+class Structure:

+ """Prepare structure object to extract members from data.

+ Format is a list containing definitions for the elements

+ of the structure.

+ """

+ def __init__(self, format, name=None, file_offset=None):

+ # Format is forced little endian, for big endian non Intel platforms

+ self.__format__ = '<'

+ self.__keys__ = []

+# self.values = {}

+ self.__format_length__ = 0

+ self.__set_format__(format[1])

+ self._all_zeroes = False

+ self.__unpacked_data_elms__ = None

+ self.__file_offset__ = file_offset

+ if name:

+ self.name = name

+ else:

+ self.name = format[0]

+ def __get_format__(self):

+ return self.__format__

+ def get_file_offset(self):

+ return self.__file_offset__

+ def set_file_offset(self, offset):

+ self.__file_offset__ = offset

+ def all_zeroes(self):

+ """Returns true is the unpacked data is all zeroes."""

+ return self._all_zeroes

+ def __set_format__(self, format):

+ for elm in format:

+ if ',' in elm:

+ elm_type, elm_name = elm.split(',', 1)

+ self.__format__ += elm_type

+ elm_names = elm_name.split(',')

+ names = []

+ for elm_name in elm_names:

+ if elm_name in self.__keys__:

+ search_list = [x[:len(elm_name)] for x in self.__keys__]

+ occ_count = search_list.count(elm_name)

+ elm_name = elm_name+'_'+str(occ_count)

+ names.append(elm_name)

+ # Some PE header structures have unions on them, so a certain

+ # value might have different names, so each key has a list of

+ # all the possible members referring to the data.

+ self.__keys__.append(names)

+ self.__format_length__ = struct.calcsize(self.__format__)

+ def sizeof(self):

+ """Return size of the structure."""

+ return self.__format_length__

+ def __unpack__(self, data):

+ if len(data)>self.__format_length__:

+ data = data[:self.__format_length__]

+ # OC Patch:

+ # Some malware have incorrect header lengths.

+ # Fail gracefully if this occurs

+ # Buggy malware: a29b0118af8b7408444df81701ad5a7f

+ #

+ elif len(data)<self.__format_length__:

+ raise PEFormatError('Data length less than expected header length.')

+ if data.count(chr(0)) == len(data):

+ self._all_zeroes = True

+ self.__unpacked_data_elms__ = struct.unpack(self.__format__, data)

+ for i in xrange(len(self.__unpacked_data_elms__)):

+ for key in self.__keys__[i]:

+# self.values[key] = self.__unpacked_data_elms__[i]

+ setattr(self, key, self.__unpacked_data_elms__[i])

+ def __pack__(self):

+ new_values = []

+ for i in xrange(len(self.__unpacked_data_elms__)):

+ for key in self.__keys__[i]:

+ new_val = getattr(self, key)

+ old_val = self.__unpacked_data_elms__[i]

+ # In the case of Unions, when the first changed value

+ # is picked the loop is exited

+ if new_val != old_val:

+ break

+ new_values.append(new_val)

+ return struct.pack(self.__format__, *new_values)

+ def __str__(self):

+ return '\n'.join( self.dump() )

+ def __repr__(self):

+ return '<Structure: %s>' % (' '.join( [' '.join(s.split()) for s in self.dump()] ))

+ def dump(self, indentation=0):

+ """Returns a string representation of the structure."""

+ dump = []

+ dump.append('[%s]' % self.name)

+ # Refer to the __set_format__ method for an explanation

+ # of the following construct.

+ for keys in self.__keys__:

+ for key in keys:

+ val = getattr(self, key)

+ if isinstance(val, int) or isinstance(val, long):

+ val_str = '0x%-8X' % (val)

+ if key == 'TimeDateStamp' or key == 'dwTimeStamp':

+ try:

+ val_str += ' [%s UTC]' % time.asctime(time.gmtime(val))

+ except exceptions.ValueError, e:

+ val_str += ' [INVALID TIME]'

+ else:

+ val_str = ''.join(filter(lambda c:c != '\0', str(val)))

+ dump.append('%-30s %s' % (key+':', val_str))

+ return dump

+class SectionStructure(Structure):

+ """Convenience section handling class."""

+ def get_data(self, start, length=None):

+ """Get data chunk from a section.

+ Allows to query data from the section by passing the

+ addresses where the PE file would be loaded by default.

+ It is then possible to retrieve code and data by its real

+ addresses as it would be if loaded.

+ """

+ offset = start - self.VirtualAddress

+ if length:

+ end = offset+length

+ else:

+ end = len(self.data)

+ return self.data[offset:end]

+ def get_rva_from_offset(self, offset):

+ return offset - self.PointerToRawData + self.VirtualAddress

+ def get_offset_from_rva(self, rva):

+ return (rva - self.VirtualAddress) + self.PointerToRawData

+ def contains_offset(self, offset):

+ """Check whether the section contains the file offset provided."""

+ if not self.PointerToRawData:

+ # bss and other sections containing only uninitialized data must have 0

+ # and do not take space in the file

+ return False

+ return self.PointerToRawData <= offset < self.VirtualAddress + self.SizeOfRawData

+ def contains_rva(self, rva):

+ """Check whether the section contains the address provided."""

+ # PECOFF documentation v8 says:

+ # The total size of the section when loaded into memory.

+ # If this value is greater than SizeOfRawData, the section is zero-padded.

+ # This field is valid only for executable images and should be set to zero

+ # for object files.

+ if len(self.data) < self.SizeOfRawData:

+ size = self.Misc_VirtualSize

+ else:

+ size = max(self.SizeOfRawData, self.Misc_VirtualSize)

+ return self.VirtualAddress <= rva < self.VirtualAddress + size

+ def contains(self, rva):

+ #print "DEPRECATION WARNING: you should use contains_rva() instead of contains()"

+ return self.contains_rva(rva)

+ def set_data(self, data):

+ """Set the data belonging to the section."""

+ self.data = data

+ def get_entropy(self):

+ """Calculate and return the entropy for the section."""

+ return self.entropy_H( self.data )

+ def get_hash_sha1(self):

+ """Get the SHA-1 hex-digest of the section's data."""

+ if sha1 is not None:

+ return sha1( self.data ).hexdigest()

+ def get_hash_sha256(self):

+ """Get the SHA-256 hex-digest of the section's data."""

+ if sha256 is not None:

+ return sha256( self.data ).hexdigest()

+ def get_hash_sha512(self):

+ """Get the SHA-512 hex-digest of the section's data."""

+ if sha512 is not None:

+ return sha512( self.data ).hexdigest()

+ def get_hash_md5(self):

+ """Get the MD5 hex-digest of the section's data."""

+ if md5 is not None:

+ return md5( self.data ).hexdigest()

+ def entropy_H(self, data):

+ """Calculate the entropy of a chunk of data."""

+ if len(data) == 0:

+ return 0.0

+ occurences = array.array('L', [0]*256)

+ for x in data:

+ occurences[ord(x)] += 1

+ entropy = 0

+ for x in occurences:

+ if x:

+ p_x = float(x) / len(data)

+ entropy -= p_x*math.log(p_x, 2)

+ return entropy

+class DataContainer:

+ """Generic data container."""

+ def __init__(self, **args):

+ for key, value in args.items():

+ setattr(self, key, value)

+class ImportDescData(DataContainer):

+ """Holds import descriptor information.

+ dll: name of the imported DLL

+ imports: list of imported symbols (ImportData instances)

+ struct: IMAGE_IMPORT_DESCRIPTOR sctruture

+ """

+class ImportData(DataContainer):

+ """Holds imported symbol's information.

+ ordinal: Ordinal of the symbol

+ name: Name of the symbol

+ bound: If the symbol is bound, this contains

+ the address.

+ """

+class ExportDirData(DataContainer):

+ """Holds export directory information.

+ struct: IMAGE_EXPORT_DIRECTORY structure

+ symbols: list of exported symbols (ExportData instances)

+"""

+class ExportData(DataContainer):

+ """Holds exported symbols' information.

+ ordinal: ordinal of the symbol

+ address: address of the symbol

+ name: name of the symbol (None if the symbol is

+ exported by ordinal only)

+ forwarder: if the symbol is forwarded it will

+ contain the name of the target symbol,

+ None otherwise.

+ """

+class ResourceDirData(DataContainer):

+ """Holds resource directory information.

+ struct: IMAGE_RESOURCE_DIRECTORY structure

+ entries: list of entries (ResourceDirEntryData instances)

+ """

+class ResourceDirEntryData(DataContainer):

+ """Holds resource directory entry data.

+ struct: IMAGE_RESOURCE_DIRECTORY_ENTRY structure

+ name: If the resource is identified by name this

+ attribute will contain the name string. None

+ otherwise. If identified by id, the id is

+ availabe at 'struct.Id'

+ id: the id, also in struct.Id

+ directory: If this entry has a lower level directory

+ this attribute will point to the

+ ResourceDirData instance representing it.

+ data: If this entry has no futher lower directories

+ and points to the actual resource data, this

+ attribute will reference the corresponding

+ ResourceDataEntryData instance.

+ (Either of the 'directory' or 'data' attribute will exist,

+ but not both.)

+ """

+class ResourceDataEntryData(DataContainer):

+ """Holds resource data entry information.

+ struct: IMAGE_RESOURCE_DATA_ENTRY structure

+ lang: Primary language ID

+ sublang: Sublanguage ID

+ """

+class DebugData(DataContainer):

+ """Holds debug information.

+ struct: IMAGE_DEBUG_DIRECTORY structure

+ """

+class BaseRelocationData(DataContainer):

+ """Holds base relocation information.

+ struct: IMAGE_BASE_RELOCATION structure

+ entries: list of relocation data (RelocationData instances)

+ """

+class RelocationData(DataContainer):

+ """Holds relocation information.

+ type: Type of relocation

+ The type string is can be obtained by

+ RELOCATION_TYPE[type]

+ rva: RVA of the relocation

+ """

+class TlsData(DataContainer):

+ """Holds TLS information.

+ struct: IMAGE_TLS_DIRECTORY structure

+ """

+class BoundImportDescData(DataContainer):

+ """Holds bound import descriptor data.

+ This directory entry will provide with information on the

+ DLLs this PE files has been bound to (if bound at all).

+ The structure will contain the name and timestamp of the

+ DLL at the time of binding so that the loader can know

+ whether it differs from the one currently present in the

+ system and must, therefore, re-bind the PE's imports.

+ struct: IMAGE_BOUND_IMPORT_DESCRIPTOR structure

+ name: DLL name

+ entries: list of entries (BoundImportRefData instances)

+ the entries will exist if this DLL has forwarded

+ symbols. If so, the destination DLL will have an

+ entry in this list.

+ """

+class BoundImportRefData(DataContainer):

+ """Holds bound import forwader reference data.

+ Contains the same information as the bound descriptor but

+ for forwarded DLLs, if any.

+ struct: IMAGE_BOUND_FORWARDER_REF structure

+ name: dll name

+ """

+class PE:

+ """A Portable Executable representation.

+ This class provides access to most of the information in a PE file.

+ It expects to be supplied the name of the file to load or PE data

+ to process and an optional argument 'fast_load' (False by default)

+ which controls whether to load all the directories information,

+ which can be quite time consuming.

+ pe = pefile.PE('module.dll')

+ pe = pefile.PE(name='module.dll')

+ would load 'module.dll' and process it. If the data would be already

+ available in a buffer the same could be achieved with:

+ pe = pefile.PE(data=module_dll_data)

+ The "fast_load" can be set to a default by setting its value in the

+ module itself by means,for instance, of a "pefile.fast_load = True".

+ That will make all the subsequent instances not to load the

+ whole PE structure. The "full_load" method can be used to parse

+ the missing data at a later stage.

+ Basic headers information will be available in the attributes:

+ DOS_HEADER

+ NT_HEADERS

+ FILE_HEADER

+ OPTIONAL_HEADER

+ All of them will contain among their attrbitues the members of the

+ corresponding structures as defined in WINNT.H

+ The raw data corresponding to the header (from the beginning of the

+ file up to the start of the first section) will be avaiable in the

+ instance's attribute 'header' as a string.

+ The sections will be available as a list in the 'sections' attribute.

+ Each entry will contain as attributes all the structure's members.

+ Directory entries will be available as attributes (if they exist):

+ (no other entries are processed at this point)

+ DIRECTORY_ENTRY_IMPORT (list of ImportDescData instances)

+ DIRECTORY_ENTRY_EXPORT (ExportDirData instance)

+ DIRECTORY_ENTRY_RESOURCE (ResourceDirData instance)

+ DIRECTORY_ENTRY_DEBUG (list of DebugData instances)

+ DIRECTORY_ENTRY_BASERELOC (list of BaseRelocationData instances)

+ DIRECTORY_ENTRY_TLS

+ DIRECTORY_ENTRY_BOUND_IMPORT (list of BoundImportData instances)

+ The following dictionary attributes provide ways of mapping different

+ constants. They will accept the numeric value and return the string

+ representation and the opposite, feed in the string and get the

+ numeric constant:

+ DIRECTORY_ENTRY

+ IMAGE_CHARACTERISTICS

+ SECTION_CHARACTERISTICS

+ DEBUG_TYPE

+ SUBSYSTEM_TYPE

+ MACHINE_TYPE

+ RELOCATION_TYPE

+ RESOURCE_TYPE

+ LANG

+ SUBLANG

+ """

+ #

+ # Format specifications for PE structures.

+ #

+ __IMAGE_DOS_HEADER_format__ = ('IMAGE_DOS_HEADER',

+ ('H,e_magic', 'H,e_cblp', 'H,e_cp',

+ 'H,e_crlc', 'H,e_cparhdr', 'H,e_minalloc',

+ 'H,e_maxalloc', 'H,e_ss', 'H,e_sp', 'H,e_csum',

+ 'H,e_ip', 'H,e_cs', 'H,e_lfarlc', 'H,e_ovno', '8s,e_res',

+ 'H,e_oemid', 'H,e_oeminfo', '20s,e_res2',

+ 'L,e_lfanew'))

+ __IMAGE_FILE_HEADER_format__ = ('IMAGE_FILE_HEADER',

+ ('H,Machine', 'H,NumberOfSections',

+ 'L,TimeDateStamp', 'L,PointerToSymbolTable',

+ 'L,NumberOfSymbols', 'H,SizeOfOptionalHeader',

+ 'H,Characteristics'))

+ __IMAGE_DATA_DIRECTORY_format__ = ('IMAGE_DATA_DIRECTORY',

+ ('L,VirtualAddress', 'L,Size'))

+ __IMAGE_OPTIONAL_HEADER_format__ = ('IMAGE_OPTIONAL_HEADER',

+ ('H,Magic', 'B,MajorLinkerVersion',

+ 'B,MinorLinkerVersion', 'L,SizeOfCode',

+ 'L,SizeOfInitializedData', 'L,SizeOfUninitializedData',

+ 'L,AddressOfEntryPoint', 'L,BaseOfCode', 'L,BaseOfData',

+ 'L,ImageBase', 'L,SectionAlignment', 'L,FileAlignment',

+ 'H,MajorOperatingSystemVersion', 'H,MinorOperatingSystemVersion',

+ 'H,MajorImageVersion', 'H,MinorImageVersion',

+ 'H,MajorSubsystemVersion', 'H,MinorSubsystemVersion',

+ 'L,Reserved1', 'L,SizeOfImage', 'L,SizeOfHeaders',

+ 'L,CheckSum', 'H,Subsystem', 'H,DllCharacteristics',

+ 'L,SizeOfStackReserve', 'L,SizeOfStackCommit',

+ 'L,SizeOfHeapReserve', 'L,SizeOfHeapCommit',

+ 'L,LoaderFlags', 'L,NumberOfRvaAndSizes' ))

+ __IMAGE_OPTIONAL_HEADER64_format__ = ('IMAGE_OPTIONAL_HEADER64',

+ ('H,Magic', 'B,MajorLinkerVersion',

+ 'B,MinorLinkerVersion', 'L,SizeOfCode',

+ 'L,SizeOfInitializedData', 'L,SizeOfUninitializedData',

+ 'L,AddressOfEntryPoint', 'L,BaseOfCode',

+ 'Q,ImageBase', 'L,SectionAlignment', 'L,FileAlignment',

+ 'H,MajorOperatingSystemVersion', 'H,MinorOperatingSystemVersion',

+ 'H,MajorImageVersion', 'H,MinorImageVersion',

+ 'H,MajorSubsystemVersion', 'H,MinorSubsystemVersion',

+ 'L,Reserved1', 'L,SizeOfImage', 'L,SizeOfHeaders',

+ 'L,CheckSum', 'H,Subsystem', 'H,DllCharacteristics',

+ 'Q,SizeOfStackReserve', 'Q,SizeOfStackCommit',

+ 'Q,SizeOfHeapReserve', 'Q,SizeOfHeapCommit',

+ 'L,LoaderFlags', 'L,NumberOfRvaAndSizes' ))

+ __IMAGE_NT_HEADERS_format__ = ('IMAGE_NT_HEADERS', ('L,Signature',))

+ __IMAGE_SECTION_HEADER_format__ = ('IMAGE_SECTION_HEADER',

+ ('8s,Name', 'L,Misc,Misc_PhysicalAddress,Misc_VirtualSize',

+ 'L,VirtualAddress', 'L,SizeOfRawData', 'L,PointerToRawData',

+ 'L,PointerToRelocations', 'L,PointerToLinenumbers',

+ 'H,NumberOfRelocations', 'H,NumberOfLinenumbers',

+ 'L,Characteristics'))

+ __IMAGE_DELAY_IMPORT_DESCRIPTOR_format__ = ('IMAGE_DELAY_IMPORT_DESCRIPTOR',

+ ('L,grAttrs', 'L,szName', 'L,phmod', 'L,pIAT', 'L,pINT',

+ 'L,pBoundIAT', 'L,pUnloadIAT', 'L,dwTimeStamp'))

+ __IMAGE_IMPORT_DESCRIPTOR_format__ = ('IMAGE_IMPORT_DESCRIPTOR',

+ ('L,OriginalFirstThunk,Characteristics',

+ 'L,TimeDateStamp', 'L,ForwarderChain', 'L,Name', 'L,FirstThunk'))

+ __IMAGE_EXPORT_DIRECTORY_format__ = ('IMAGE_EXPORT_DIRECTORY',

+ ('L,Characteristics',

+ 'L,TimeDateStamp', 'H,MajorVersion', 'H,MinorVersion', 'L,Name',

+ 'L,Base', 'L,NumberOfFunctions', 'L,NumberOfNames',

+ 'L,AddressOfFunctions', 'L,AddressOfNames', 'L,AddressOfNameOrdinals'))

+ __IMAGE_RESOURCE_DIRECTORY_format__ = ('IMAGE_RESOURCE_DIRECTORY',

+ ('L,Characteristics',

+ 'L,TimeDateStamp', 'H,MajorVersion', 'H,MinorVersion',

+ 'H,NumberOfNamedEntries', 'H,NumberOfIdEntries'))

+ __IMAGE_RESOURCE_DIRECTORY_ENTRY_format__ = ('IMAGE_RESOURCE_DIRECTORY_ENTRY',

+ ('L,Name',

+ 'L,OffsetToData'))

+ __IMAGE_RESOURCE_DATA_ENTRY_format__ = ('IMAGE_RESOURCE_DATA_ENTRY',

+ ('L,OffsetToData', 'L,Size', 'L,CodePage', 'L,Reserved'))

+ __VS_VERSIONINFO_format__ = ( 'VS_VERSIONINFO',

+ ('H,Length', 'H,ValueLength', 'H,Type' ))

+ __VS_FIXEDFILEINFO_format__ = ( 'VS_FIXEDFILEINFO',

+ ('L,Signature', 'L,StrucVersion', 'L,FileVersionMS', 'L,FileVersionLS',

+ 'L,ProductVersionMS', 'L,ProductVersionLS', 'L,FileFlagsMask', 'L,FileFlags',

+ 'L,FileOS', 'L,FileType', 'L,FileSubtype', 'L,FileDateMS', 'L,FileDateLS'))

+ __StringFileInfo_format__ = ( 'StringFileInfo',

+ ('H,Length', 'H,ValueLength', 'H,Type' ))

+ __StringTable_format__ = ( 'StringTable',

+ ('H,Length', 'H,ValueLength', 'H,Type' ))

+ __String_format__ = ( 'String',

+ ('H,Length', 'H,ValueLength', 'H,Type' ))

+ __Var_format__ = ( 'Var', ('H,Length', 'H,ValueLength', 'H,Type' ))

+ __IMAGE_THUNK_DATA_format__ = ('IMAGE_THUNK_DATA',

+ ('L,ForwarderString,Function,Ordinal,AddressOfData',))

+ __IMAGE_THUNK_DATA64_format__ = ('IMAGE_THUNK_DATA',

+ ('Q,ForwarderString,Function,Ordinal,AddressOfData',))

+ __IMAGE_DEBUG_DIRECTORY_format__ = ('IMAGE_DEBUG_DIRECTORY',

+ ('L,Characteristics', 'L,TimeDateStamp', 'H,MajorVersion',

+ 'H,MinorVersion', 'L,Type', 'L,SizeOfData', 'L,AddressOfRawData',

+ 'L,PointerToRawData'))

+ __IMAGE_BASE_RELOCATION_format__ = ('IMAGE_BASE_RELOCATION',

+ ('L,VirtualAddress', 'L,SizeOfBlock') )

+ __IMAGE_TLS_DIRECTORY_format__ = ('IMAGE_TLS_DIRECTORY',

+ ('L,StartAddressOfRawData', 'L,EndAddressOfRawData',

+ 'L,AddressOfIndex', 'L,AddressOfCallBacks',

+ 'L,SizeOfZeroFill', 'L,Characteristics' ) )

+ __IMAGE_TLS_DIRECTORY64_format__ = ('IMAGE_TLS_DIRECTORY',

+ ('Q,StartAddressOfRawData', 'Q,EndAddressOfRawData',

+ 'Q,AddressOfIndex', 'Q,AddressOfCallBacks',

+ 'L,SizeOfZeroFill', 'L,Characteristics' ) )

+ __IMAGE_BOUND_IMPORT_DESCRIPTOR_format__ = ('IMAGE_BOUND_IMPORT_DESCRIPTOR',

+ ('L,TimeDateStamp', 'H,OffsetModuleName', 'H,NumberOfModuleForwarderRefs'))

+ __IMAGE_BOUND_FORWARDER_REF_format__ = ('IMAGE_BOUND_FORWARDER_REF',

+ ('L,TimeDateStamp', 'H,OffsetModuleName', 'H,Reserved') )

+ def __init__(self, name=None, data=None, fast_load=None):

+ self.sections = []

+ self.__warnings = []

+ self.PE_TYPE = None

+ if not name and not data:

+ return

+ # This list will keep track of all the structures created.

+ # That will allow for an easy iteration through the list

+ # in order to save the modifications made

+ self.__structures__ = []

+ if not fast_load:

+ fast_load = globals()['fast_load']

+ self.__parse__(name, data, fast_load)

+ def __unpack_data__(self, format, data, file_offset):

+ """Apply structure format to raw data.

+ Returns and unpacked structure object if successful, None otherwise.

+ """

+ structure = Structure(format, file_offset=file_offset)

+ #if len(data) < structure.sizeof():

+ # return None

+ try:

+ structure.__unpack__(data)

+ except PEFormatError, err:

+ self.__warnings.append(

+ 'Corrupt header "%s" at file offset %d. Exception: %s' % (

+ format[0], file_offset, str(err)) )

+ return None

+ self.__structures__.append(structure)

+ return structure

+ def __parse__(self, fname, data, fast_load):

+ """Parse a Portable Executable file.

+ Loads a PE file, parsing all its structures and making them available

+ through the instance's attributes.

+ """

+ if fname:

+ fd = file(fname, 'rb')

+ self.__data__ = fd.read()

+ fd.close()

+ elif data:

+ self.__data__ = data

+ self.DOS_HEADER = self.__unpack_data__(

+ self.__IMAGE_DOS_HEADER_format__,

+ self.__data__, file_offset=0)

+ if not self.DOS_HEADER or self.DOS_HEADER.e_magic != IMAGE_DOS_SIGNATURE:

+ raise PEFormatError('DOS Header magic not found.')

+ # OC Patch:

+ # Check for sane value in e_lfanew

+ #

+ if self.DOS_HEADER.e_lfanew > len(self.__data__):

+ raise PEFormatError('Invalid e_lfanew value, probably not a PE file')

+ nt_headers_offset = self.DOS_HEADER.e_lfanew

+ self.NT_HEADERS = self.__unpack_data__(

+ self.__IMAGE_NT_HEADERS_format__,

+ self.__data__[nt_headers_offset:],

+ file_offset = nt_headers_offset)

+ # We better check the signature right here, before the file screws

+ # around with sections:

+ # OC Patch:

+ # Some malware will cause the Signature value to not exist at all

+ if not self.NT_HEADERS or not self.NT_HEADERS.Signature:

+ raise PEFormatError('NT Headers not found.')

+ if self.NT_HEADERS.Signature != IMAGE_NT_SIGNATURE:

+ raise PEFormatError('Invalid NT Headers signature.')

+ self.FILE_HEADER = self.__unpack_data__(

+ self.__IMAGE_FILE_HEADER_format__,

+ self.__data__[nt_headers_offset+4:],

+ file_offset = nt_headers_offset+4)

+ image_flags = self.retrieve_flags(IMAGE_CHARACTERISTICS, 'IMAGE_FILE_')

+ if not self.FILE_HEADER:

+ raise PEFormatError('File Header missing')

+ # Set the image's flags according the the Characteristics member

+ self.set_flags(self.FILE_HEADER, self.FILE_HEADER.Characteristics, image_flags)

+ optional_header_offset = \

+ nt_headers_offset+4+self.FILE_HEADER.sizeof()

+ # Note: location of sections can be controlled from PE header:

+ sections_offset = optional_header_offset + self.FILE_HEADER.SizeOfOptionalHeader

+ self.OPTIONAL_HEADER = self.__unpack_data__(

+ self.__IMAGE_OPTIONAL_HEADER_format__,

+ self.__data__[optional_header_offset:],

+ file_offset = optional_header_offset)

+ # According to solardesigner's findings for his

+ # Tiny PE project, the optional header does not

+ # need fields beyond "Subsystem" in order to be

+ # loadable by the Windows loader (given that zeroes

+ # are acceptable values and the header is loaded

+ # in a zeroed memory page)

+ # If trying to parse a full Optional Header fails

+ # we try to parse it again with some 0 padding

+ #

+ MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE = 69

+ if ( self.OPTIONAL_HEADER is None and

+ len(self.__data__[optional_header_offset:])

+ >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE ):

+ # Add enough zeroes to make up for the unused fields

+ #

+ padding_length = 128

+ # Create padding

+ #

+ padded_data = self.__data__[optional_header_offset:] + (

+ '\0' * padding_length)

+ self.OPTIONAL_HEADER = self.__unpack_data__(

+ self.__IMAGE_OPTIONAL_HEADER_format__,

+ padded_data,

+ file_offset = optional_header_offset)

+ # Check the Magic in the OPTIONAL_HEADER and set the PE file

+ # type accordingly

+ #

+ if self.OPTIONAL_HEADER is not None:

+ if self.OPTIONAL_HEADER.Magic == OPTIONAL_HEADER_MAGIC_PE:

+ self.PE_TYPE = OPTIONAL_HEADER_MAGIC_PE

+ elif self.OPTIONAL_HEADER.Magic == OPTIONAL_HEADER_MAGIC_PE_PLUS:

+ self.PE_TYPE = OPTIONAL_HEADER_MAGIC_PE_PLUS

+ self.OPTIONAL_HEADER = self.__unpack_data__(

+ self.__IMAGE_OPTIONAL_HEADER64_format__,

+ self.__data__[optional_header_offset:],

+ file_offset = optional_header_offset)

+ # Again, as explained above, we try to parse

+ # a reduced form of the Optional Header which

+ # is still valid despite not including all

+ # structure members

+ #

+ MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE = 69+4

+ if ( self.OPTIONAL_HEADER is None and

+ len(self.__data__[optional_header_offset:])

+ >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE ):

+ padding_length = 128

+ padded_data = self.__data__[optional_header_offset:] + (

+ '\0' * padding_length)

+ self.OPTIONAL_HEADER = self.__unpack_data__(

+ self.__IMAGE_OPTIONAL_HEADER64_format__,

+ padded_data,

+ file_offset = optional_header_offset)

+ if not self.FILE_HEADER:

+ raise PEFormatError('File Header missing')

+ # OC Patch:

+ # Die gracefully if there is no OPTIONAL_HEADER field

+ # 975440f5ad5e2e4a92c4d9a5f22f75c1

+ if self.PE_TYPE is None or self.OPTIONAL_HEADER is None:

+ raise PEFormatError("No Optional Header found, invalid PE32 or PE32+ file")

+ dll_characteristics_flags = self.retrieve_flags(DLL_CHARACTERISTICS, 'IMAGE_DLL_CHARACTERISTICS_')

+ # Set the Dll Characteristics flags according the the DllCharacteristics member

+ self.set_flags(

+ self.OPTIONAL_HEADER,

+ self.OPTIONAL_HEADER.DllCharacteristics,

+ dll_characteristics_flags)

+ self.OPTIONAL_HEADER.DATA_DIRECTORY = []

+ #offset = (optional_header_offset + self.FILE_HEADER.SizeOfOptionalHeader)

+ offset = (optional_header_offset + self.OPTIONAL_HEADER.sizeof())

+ self.NT_HEADERS.FILE_HEADER = self.FILE_HEADER

+ self.NT_HEADERS.OPTIONAL_HEADER = self.OPTIONAL_HEADER

+ # The NumberOfRvaAndSizes is sanitized to stay within

+ # reasonable limits so can be casted to an int

+ #

+ if self.OPTIONAL_HEADER.NumberOfRvaAndSizes > 0x10:

+ self.__warnings.append(

+ 'Suspicious NumberOfRvaAndSizes in the Optional Header. ' +

+ 'Normal values are never larger than 0x10, the value is: 0x%x' %

+ self.OPTIONAL_HEADER.NumberOfRvaAndSizes )

+ for i in xrange(int(0x7fffffffL & self.OPTIONAL_HEADER.NumberOfRvaAndSizes)):

+ if len(self.__data__[offset:]) == 0:

+ break

+ if len(self.__data__[offset:]) < 8:

+ data = self.__data__[offset:]+'\0'*8

+ else:

+ data = self.__data__[offset:]

+ dir_entry = self.__unpack_data__(

+ self.__IMAGE_DATA_DIRECTORY_format__,

+ data,

+ file_offset = offset)

+ if dir_entry is None:

+ break

+ # Would fail if missing an entry

+ # 1d4937b2fa4d84ad1bce0309857e70ca offending sample

+ try:

+ dir_entry.name = DIRECTORY_ENTRY[i]

+ except (KeyError, AttributeError):

+ break

+ offset += dir_entry.sizeof()

+ self.OPTIONAL_HEADER.DATA_DIRECTORY.append(dir_entry)

+ # If the offset goes outside the optional header,

+ # the loop is broken, regardless of how many directories

+ # NumberOfRvaAndSizes says there are

+ #

+ # We assume a normally sized optional header, hence that we do

+ # a sizeof() instead of reading SizeOfOptionalHeader.

+ # Then we add a default number of drectories times their size,

+ # if we go beyond that, we assume the number of directories

+ # is wrong and stop processing

+ if offset >= (optional_header_offset +

+ self.OPTIONAL_HEADER.sizeof() + 8*16) :

+ break

+ offset = self.parse_sections(sections_offset)

+ # OC Patch:

+ # There could be a problem if there are no raw data sections

+ # greater than 0

+ # fc91013eb72529da005110a3403541b6 example

+ # Should this throw an exception in the minimum header offset

+ # can't be found?

+ #

+ rawDataPointers = [

+ s.PointerToRawData for s in self.sections if s.PointerToRawData>0]

+ if len(rawDataPointers) > 0:

+ lowest_section_offset = min(rawDataPointers)

+ else:

+ lowest_section_offset = None

+ if not lowest_section_offset or lowest_section_offset<offset:

+ self.header = self.__data__[:offset]

+ else:

+ self.header = self.__data__[:lowest_section_offset]

+ # Check whether the entry point lies within a section

+ #

+ if self.get_section_by_rva(self.OPTIONAL_HEADER.AddressOfEntryPoint) is not None:

+ # Check whether the entry point lies within the file

+ #

+ ep_offset = self.get_offset_from_rva(self.OPTIONAL_HEADER.AddressOfEntryPoint)

+ if ep_offset > len(self.__data__):

+ self.__warnings.append(

+ 'Possibly corrupt file. AddressOfEntryPoint lies outside the file. ' +

+ 'AddressOfEntryPoint: 0x%x' %

+ self.OPTIONAL_HEADER.AddressOfEntryPoint )

+ else:

+ self.__warnings.append(

+ 'AddressOfEntryPoint lies outside the sections\' boundaries. ' +

+ 'AddressOfEntryPoint: 0x%x' %

+ self.OPTIONAL_HEADER.AddressOfEntryPoint )

+ if not fast_load:

+ self.parse_data_directories()

+ def get_warnings(self):

+ """Return the list of warnings.

+ Non-critical problems found when parsing the PE file are

+ appended to a list of warnings. This method returns the

+ full list.

+ """

+ return self.__warnings

+ def show_warnings(self):

+ """Print the list of warnings.

+ Non-critical problems found when parsing the PE file are

+ appended to a list of warnings. This method prints the

+ full list to standard output.

+ """

+ for warning in self.__warnings:

+ print '>', warning

+ def full_load(self):

+ """Process the data directories.

+ This mathod will load the data directories which might not have

+ been loaded if the "fast_load" option was used.

+ """

+ self.parse_data_directories()

+ def write(self, filename=None):

+ """Write the PE file.

+ This function will process all headers and components

+ of the PE file and include all changes made (by just

+ assigning to attributes in the PE objects) and write

+ the changes back to a file whose name is provided as

+ an argument. The filename is optional.

+ The data to be written to the file will be returned

+ as a 'str' object.

+ """

+ file_data = list(self.__data__)

+ for struct in self.__structures__:

+ struct_data = list(struct.__pack__())

+ offset = struct.get_file_offset()

+ file_data[offset:offset+len(struct_data)] = struct_data

+ if hasattr(self, 'VS_VERSIONINFO'):

+ if hasattr(self, 'FileInfo'):

+ for entry in self.FileInfo:

+ if hasattr(entry, 'StringTable'):

+ for st_entry in entry.StringTable:

+ for key, entry in st_entry.entries.items():

+ offsets = st_entry.entries_offsets[key]

+ lengths = st_entry.entries_lengths[key]

+ if len( entry ) > lengths[1]:

+ uc = zip(

+ list(entry[:lengths[1]]), ['\0'] * lengths[1] )

+ l = list()

+ map(l.extend, uc)

+ file_data[

+ offsets[1] : offsets[1] + lengths[1]*2 ] = l

+ else:

+ uc = zip(

+ list(entry), ['\0'] * len(entry) )

+ l = list()

+ map(l.extend, uc)

+ file_data[

+ offsets[1] : offsets[1] + len(entry)*2 ] = l

+ remainder = lengths[1] - len(entry)

+ file_data[

+ offsets[1] + len(entry)*2 :

+ offsets[1] + lengths[1]*2 ] = [

+ u'\0' ] * remainder*2

+ new_file_data = ''.join( [ chr(ord(c)) for c in file_data ] )

+ if filename:

+ f = file(filename, 'wb+')

+ f.write(new_file_data)

+ f.close()

+ return new_file_data

+ def parse_sections(self, offset):

+ """Fetch the PE file sections.

+ The sections will be readily available in the "sections" attribute.

+ Its attributes will contain all the section information plus "data"

+ a buffer containing the section's data.

+ The "Characteristics" member will be processed and attributes

+ representing the section characteristics (with the 'IMAGE_SCN_'

+ string trimmed from the constant's names) will be added to the

+ section instance.

+ Refer to the SectionStructure class for additional info.

+ """

+ self.sections = []

+ for i in xrange(self.FILE_HEADER.NumberOfSections):

+ section = SectionStructure(self.__IMAGE_SECTION_HEADER_format__)

+ if not section:

+ break

+ section_offset = offset + section.sizeof() * i

+ section.set_file_offset(section_offset)

+ section.__unpack__(self.__data__[section_offset:])

+ self.__structures__.append(section)

+ if section.SizeOfRawData > len(self.__data__):

+ self.__warnings.append(

+ ('Error parsing section %d. ' % i) +

+ 'SizeOfRawData is larger than file.')

+ if section.PointerToRawData > len(self.__data__):

+ self.__warnings.append(

+ ('Error parsing section %d. ' % i) +

+ 'PointerToRawData points beyond the end of the file.')

+ if section.Misc_VirtualSize > 0x10000000:

+ self.__warnings.append(

+ ('Suspicious value found parsing section %d. ' % i) +

+ 'VirtualSize is extremely large > 256MiB.')

+ if section.VirtualAddress > 0x10000000:

+ self.__warnings.append(

+ ('Suspicious value found parsing section %d. ' % i) +

+ 'VirtualAddress is beyond 0x10000000.')

+ #

+ # Some packer used a non-aligned PointerToRawData in the sections,

+ # which causes several common tools not to load the section data

+ # properly as they blindly read from the indicated offset.

+ # It seems that Windows will round the offset down to the largest

+ # offset multiple of FileAlignment which is smaller than

+ # PointerToRawData. The following code will do the same.

+ #

+ #alignment = self.OPTIONAL_HEADER.FileAlignment

+ section_data_start = section.PointerToRawData

+ if ( self.OPTIONAL_HEADER.FileAlignment != 0 and

+ (section.PointerToRawData % self.OPTIONAL_HEADER.FileAlignment) != 0):

+ self.__warnings.append(

+ ('Error parsing section %d. ' % i) +

+ 'Suspicious value for FileAlignment in the Optional Header. ' +

+ 'Normally the PointerToRawData entry of the sections\' structures ' +

+ 'is a multiple of FileAlignment, this might imply the file ' +

+ 'is trying to confuse tools which parse this incorrectly')

+ section_data_end = section_data_start+section.SizeOfRawData

+ section.set_data(self.__data__[section_data_start:section_data_end])

+ section_flags = self.retrieve_flags(SECTION_CHARACTERISTICS, 'IMAGE_SCN_')

+ # Set the section's flags according the the Characteristics member

+ self.set_flags(section, section.Characteristics, section_flags)

+ if ( section.__dict__.get('IMAGE_SCN_MEM_WRITE', False) and

+ section.__dict__.get('IMAGE_SCN_MEM_EXECUTE', False) ):

+ self.__warnings.append(

+ ('Suspicious flags set for section %d. ' % i) +

+ 'Both IMAGE_SCN_MEM_WRITE and IMAGE_SCN_MEM_EXECUTE are set.' +

+ 'This might indicate a packed executable.')

+ self.sections.append(section)

+ if self.FILE_HEADER.NumberOfSections > 0 and self.sections:

+ return offset + self.sections[0].sizeof()*self.FILE_HEADER.NumberOfSections

+ else:

+ return offset

+ def retrieve_flags(self, flag_dict, flag_filter):

+ """Read the flags from a dictionary and return them in a usable form.

+ Will return a list of (flag, value) for all flags in "flag_dict"

+ matching the filter "flag_filter".

+ """

+ return [(f[0], f[1]) for f in flag_dict.items() if

+ isinstance(f[0], str) and f[0].startswith(flag_filter)]

+ def set_flags(self, obj, flag_field, flags):

+ """Will process the flags and set attributes in the object accordingly.

+ The object "obj" will gain attritutes named after the flags provided in

+ "flags" and valued True/False, matching the results of applyin each

+ flag value from "flags" to flag_field.

+ """

+ for flag in flags:

+ if flag[1] & flag_field:

+ setattr(obj, flag[0], True)

+ else:

+ setattr(obj, flag[0], False)

+ def parse_data_directories(self):

+ """Parse and process the PE file's data directories."""

+ directory_parsing = (

+ ('IMAGE_DIRECTORY_ENTRY_IMPORT', self.parse_import_directory),

+ ('IMAGE_DIRECTORY_ENTRY_EXPORT', self.parse_export_directory),

+ ('IMAGE_DIRECTORY_ENTRY_RESOURCE', self.parse_resources_directory),

+ ('IMAGE_DIRECTORY_ENTRY_DEBUG', self.parse_debug_directory),

+ ('IMAGE_DIRECTORY_ENTRY_BASERELOC', self.parse_relocations_directory),

+ ('IMAGE_DIRECTORY_ENTRY_TLS', self.parse_directory_tls),

+ ('IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT', self.parse_delay_import_directory),

+ ('IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT', self.parse_directory_bound_imports) )

+ for entry in directory_parsing:

+ # OC Patch:

+ #

+ try:

+ dir_entry = self.OPTIONAL_HEADER.DATA_DIRECTORY[

+ DIRECTORY_ENTRY[entry[0]]]

+ except IndexError:

+ break

+ if dir_entry.VirtualAddress:

+ value = entry[1](dir_entry.VirtualAddress, dir_entry.Size)

+ if value:

+ setattr(self, entry[0][6:], value)

+ def parse_directory_bound_imports(self, rva, size):

+ """"""

+ bnd_descr = Structure(self.__IMAGE_BOUND_IMPORT_DESCRIPTOR_format__)

+ bnd_descr_size = bnd_descr.sizeof()

+ start = rva

+ bound_imports = []

+ while True:

+ bnd_descr = self.__unpack_data__(

+ self.__IMAGE_BOUND_IMPORT_DESCRIPTOR_format__,

+ self.__data__[rva:rva+bnd_descr_size],

+ file_offset = rva)

+ if bnd_descr is None:

+ # If can't parse directory then silently return.

+ # This directory does not necesarily have to be valid to

+ # still have a valid PE file

+ self.__warnings.append(

+ 'The Bound Imports directory exists but can\'t be parsed.')

+ return

+ if bnd_descr.all_zeroes():

+ break

+ rva += bnd_descr.sizeof()

+ forwarder_refs = []

+ for idx in xrange(bnd_descr.NumberOfModuleForwarderRefs):

+ # Both structures IMAGE_BOUND_IMPORT_DESCRIPTOR and

+ # IMAGE_BOUND_FORWARDER_REF have the same size.

+ bnd_frwd_ref = self.__unpack_data__(

+ self.__IMAGE_BOUND_FORWARDER_REF_format__,

+ self.__data__[rva:rva+bnd_descr_size],

+ file_offset = rva)

+ # OC Patch:

+ if not bnd_frwd_ref:

+ raise PEFormatError(

+ "IMAGE_BOUND_FORWARDER_REF cannot be read")

+ rva += bnd_frwd_ref.sizeof()

+ name_str = self.get_string_from_data(

+ start+bnd_frwd_ref.OffsetModuleName, self.__data__)

+ if not name_str:

+ break

+ forwarder_refs.append(BoundImportRefData(

+ struct = bnd_frwd_ref,

+ name = name_str))

+ name_str = self.get_string_from_data(

+ start+bnd_descr.OffsetModuleName, self.__data__)

+ if not name_str:

+ break

+ bound_imports.append(

+ BoundImportDescData(

+ struct = bnd_descr,

+ name = name_str,

+ entries = forwarder_refs))

+ return bound_imports

+ def parse_directory_tls(self, rva, size):

+ """"""

+ if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:

+ format = self.__IMAGE_TLS_DIRECTORY_format__

+ elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:

+ format = self.__IMAGE_TLS_DIRECTORY64_format__

+ tls_struct = self.__unpack_data__(

+ format,

+ self.get_data(rva),

+ file_offset = self.get_offset_from_rva(rva))

+ if not tls_struct:

+ return None

+ return TlsData( struct = tls_struct )

+ def parse_relocations_directory(self, rva, size):

+ """"""

+ rlc = Structure(self.__IMAGE_BASE_RELOCATION_format__)

+ rlc_size = rlc.sizeof()

+ end = rva+size

+ relocations = []

+ while rva<end:

+ # OC Patch:

+ # Malware that has bad rva entries will cause an error.

+ # Just continue on after an exception

+ #

+ try:

+ rlc = self.__unpack_data__(

+ self.__IMAGE_BASE_RELOCATION_format__,

+ self.get_data(rva, rlc_size),

+ file_offset = self.get_offset_from_rva(rva) )

+ except PEFormatError:

+ self.__warnings.append(

+ 'Invalid relocation information. Can\'t read ' +

+ 'data at RVA: 0x%x' % rva)

+ rlc = None

+ if not rlc:

+ break

+ reloc_entries = self.parse_relocations(

+ rva+rlc_size, rlc.VirtualAddress, rlc.SizeOfBlock-rlc_size)

+ relocations.append(

+ BaseRelocationData(

+ struct = rlc,

+ entries = reloc_entries))

+ if not rlc.SizeOfBlock:

+ break

+ rva += rlc.SizeOfBlock

+ return relocations

+ def parse_relocations(self, data_rva, rva, size):

+ """"""

+ data = self.get_data(data_rva, size)

+ entries = []

+ for idx in xrange(len(data)/2):

+ word = struct.unpack('<H', data[idx*2:(idx+1)*2])[0]

+ reloc_type = (word>>12)

+ reloc_offset = (word&0x0fff)

+ entries.append(

+ RelocationData(

+ type = reloc_type,

+ rva = reloc_offset+rva))

+ return entries

+ def parse_debug_directory(self, rva, size):

+ """"""

+ dbg = Structure(self.__IMAGE_DEBUG_DIRECTORY_format__)

+ dbg_size = dbg.sizeof()

+ debug = []

+ for idx in xrange(size/dbg_size):

+ try:

+ data = self.get_data(rva+dbg_size*idx, dbg_size)

+ except PEFormatError, e:

+ self.__warnings.append(

+ 'Invalid debug information. Can\'t read ' +

+ 'data at RVA: 0x%x' % rva)

+ return None

+ dbg = self.__unpack_data__(

+ self.__IMAGE_DEBUG_DIRECTORY_format__,

+ data, file_offset = self.get_offset_from_rva(rva+dbg_size*idx))

+ if not dbg:

+ return None

+ debug.append(

+ DebugData(

+ struct = dbg))

+ return debug

+ def parse_resources_directory(self, rva, size=0, base_rva = None, level = 0):

+ """Parse the resources directory.

+ Given the rva of the resources directory, it will process all

+ its entries.

+ The root will have the corresponding member of its structure,

+ IMAGE_RESOURCE_DIRECTORY plus 'entries', a list of all the

+ entries in the directory.

+ Those entries will have, correspondingly, all the structure's

+ members (IMAGE_RESOURCE_DIRECTORY_ENTRY) and an additional one,

+ "directory", pointing to the IMAGE_RESOURCE_DIRECTORY structure

+ representing upper layers of the tree. This one will also have

+ an 'entries' attribute, pointing to the 3rd, and last, level.

+ Another directory with more entries. Those last entries will

+ have a new atribute (both 'leaf' or 'data_entry' can be used to

+ access it). This structure finally points to the resource data.

+ All the members of this structure, IMAGE_RESOURCE_DATA_ENTRY,

+ are available as its attributes.

+ """

+ # OC Patch:

+ original_rva = rva

+ if base_rva is None:

+ base_rva = rva

+ resources_section = self.get_section_by_rva(rva)

+ try:

+ # If the RVA is invalid all would blow up. Some EXEs seem to be

+ # specially nasty and have an invalid RVA.

+ data = self.get_data(rva)

+ except PEFormatError, e:

+ self.__warnings.append(

+ 'Invalid resources directory. Can\'t read ' +

+ 'directory data at RVA: 0x%x' % rva)

+ return None

+ # Get the resource directory structure, that is, the header

+ # of the table preceding the actual entries

+ #

+ resource_dir = self.__unpack_data__(

+ self.__IMAGE_RESOURCE_DIRECTORY_format__, data,

+ file_offset = self.get_offset_from_rva(rva) )

+ if resource_dir is None:

+ # If can't parse resources directory then silently return.

+ # This directory does not necesarily have to be valid to

+ # still have a valid PE file

+ self.__warnings.append(

+ 'Invalid resources directory. Can\'t parse ' +

+ 'directory data at RVA: 0x%x' % rva)

+ return None

+ dir_entries = []

+ # Advance the rva to the positon immediately following the directory

+ # table header and pointing to the first entry in the table

+ #

+ rva += resource_dir.sizeof()

+ number_of_entries = (

+ resource_dir.NumberOfNamedEntries +

+ resource_dir.NumberOfIdEntries )

+ strings_to_postprocess = list()

+ for idx in xrange(number_of_entries):

+ res = self.parse_resource_entry(rva)

+ if res is None:

+ self.__warnings.append(

+ 'Error parsing the resources directory, ' +

+ 'Entry %d is invalid, RVA = 0x%x. ' %

+ (idx, rva) )

+ break

+ entry_name = None

+ entry_id = None

+ # If all named entries have been processed, only Id ones

+ # remain

+ if idx >= resource_dir.NumberOfNamedEntries:

+ entry_id = res.Name

+ else:

+ ustr_offset = base_rva+res.NameOffset

+ try:

+ #entry_name = self.get_string_u_at_rva(ustr_offset, max_length=16)

+ entry_name = UnicodeStringWrapperPostProcessor(self, ustr_offset)

+ strings_to_postprocess.append(entry_name)

+ except PEFormatError, excp:

+ self.__warnings.append(

+ 'Error parsing the resources directory, ' +

+ 'attempting to read entry name. ' +

+ 'Can\'t read unicode string at offset 0x%x' %

+ (ustr_offset) )

+ if res.DataIsDirectory:

+ # OC Patch:

+ #

+ # One trick malware can do is to recursively reference

+ # the next directory. This causes hilarity to ensue when

+ # trying to parse everything correctly.

+ # If the original RVA given to this function is equal to

+ # the next one to parse, we assume that it's a trick.

+ # Instead of raising a PEFormatError this would skip some

+ # reasonable data so we just break.

+ #

+ # 9ee4d0a0caf095314fd7041a3e4404dc is the offending sample

+ if original_rva == (base_rva + res.OffsetToDirectory):

+ break

+ else:

+ entry_directory = self.parse_resources_directory(

+ base_rva+res.OffsetToDirectory,

+ base_rva=base_rva, level = level+1)

+ if not entry_directory:

+ break

+ dir_entries.append(

+ ResourceDirEntryData(

+ struct = res,

+ name = entry_name,

+ id = entry_id,

+ directory = entry_directory))

+ else:

+ struct = self.parse_resource_data_entry(

+ base_rva + res.OffsetToDirectory)

+ if struct:

+ entry_data = ResourceDataEntryData(

+ struct = struct,

+ lang = res.Name & 0xff,

+ sublang = (res.Name>>8) & 0xff)

+ dir_entries.append(

+ ResourceDirEntryData(

+ struct = res,

+ name = entry_name,

+ id = entry_id,

+ data = entry_data))

+ else:

+ break

+ # Check if this entry contains version information

+ #

+ if level == 0 and res.Id == RESOURCE_TYPE['RT_VERSION']:

+ if len(dir_entries)>0:

+ last_entry = dir_entries[-1]

+ rt_version_struct = None

+ try:

+ rt_version_struct = last_entry.directory.entries[0].directory.entries[0].data.struct

+ except:

+ # Maybe a malformed directory structure...?

+ # Lets ignore it

+ pass

+ if rt_version_struct is not None:

+ self.parse_version_information(rt_version_struct)

+ rva += res.sizeof()

+ string_rvas = [s.get_rva() for s in strings_to_postprocess]

+ string_rvas.sort()

+ for idx, s in enumerate(strings_to_postprocess):

+ s.render_pascal_16()

+ resource_directory_data = ResourceDirData(

+ struct = resource_dir,

+ entries = dir_entries)

+ return resource_directory_data

+ def parse_resource_data_entry(self, rva):

+ """Parse a data entry from the resources directory."""

+ try:

+ # If the RVA is invalid all would blow up. Some EXEs seem to be

+ # specially nasty and have an invalid RVA.

+ data = self.get_data(rva)

+ except PEFormatError, excp:

+ self.__warnings.append(

+ 'Error parsing a resource directory data entry, ' +

+ 'the RVA is invalid: 0x%x' % ( rva ) )

+ return None

+ data_entry = self.__unpack_data__(

+ self.__IMAGE_RESOURCE_DATA_ENTRY_format__, data,

+ file_offset = self.get_offset_from_rva(rva) )

+ return data_entry

+ def parse_resource_entry(self, rva):

+ """Parse a directory entry from the resources directory."""

+ resource = self.__unpack_data__(

+ self.__IMAGE_RESOURCE_DIRECTORY_ENTRY_format__, self.get_data(rva),

+ file_offset = self.get_offset_from_rva(rva) )

+ if resource is None:

+ return None

+ #resource.NameIsString = (resource.Name & 0x80000000L) >> 31

+ resource.NameOffset = resource.Name & 0x7FFFFFFFL

+ resource.__pad = resource.Name & 0xFFFF0000L

+ resource.Id = resource.Name & 0x0000FFFFL

+ resource.DataIsDirectory = (resource.OffsetToData & 0x80000000L) >> 31

+ resource.OffsetToDirectory = resource.OffsetToData & 0x7FFFFFFFL

+ return resource

+ def parse_version_information(self, version_struct):

+ """Parse version information structure.

+ The date will be made available in three attributes of the PE object.

+ VS_VERSIONINFO will contain the first three fields of the main structure:

+ 'Length', 'ValueLength', and 'Type'

+ VS_FIXEDFILEINFO will hold the rest of the fields, accessible as sub-attributes:

+ 'Signature', 'StrucVersion', 'FileVersionMS', 'FileVersionLS',

+ 'ProductVersionMS', 'ProductVersionLS', 'FileFlagsMask', 'FileFlags',

+ 'FileOS', 'FileType', 'FileSubtype', 'FileDateMS', 'FileDateLS'

+ FileInfo is a list of all StringFileInfo and VarFileInfo structures.

+ StringFileInfo structures will have a list as an attribute named 'StringTable'

+ containing all the StringTable structures. Each of those structures contains a

+ dictionary 'entries' with all the key/value version information string pairs.

+ VarFileInfo structures will have a list as an attribute named 'Var' containing

+ all Var structures. Each Var structure will have a dictionary as an attribute

+ named 'entry' which will contain the name and value of the Var.

+ """

+ # Retrieve the data for the version info resource

+ #

+ start_offset = self.get_offset_from_rva( version_struct.OffsetToData )

+ raw_data = self.__data__[ start_offset : start_offset+version_struct.Size ]

+ # Map the main structure and the subsequent string

+ #

+ versioninfo_struct = self.__unpack_data__(

+ self.__VS_VERSIONINFO_format__, raw_data,

+ file_offset = start_offset )

+ if versioninfo_struct is None:

+ return

+ ustr_offset = version_struct.OffsetToData + versioninfo_struct.sizeof()

+ try:

+ versioninfo_string = self.get_string_u_at_rva( ustr_offset )

+ except PEFormatError, excp:

+ self.__warnings.append(

+ 'Error parsing the version information, ' +

+ 'attempting to read VS_VERSION_INFO string. Can\'t ' +

+ 'read unicode string at offset 0x%x' % (

+ ustr_offset ) )

+ versioninfo_string = None

+ # If the structure does not contain the expected name, it's assumed to be invalid

+ #

+ if versioninfo_string != u'VS_VERSION_INFO':

+ self.__warnings.append('Invalid VS_VERSION_INFO block')

+ return

+ # Set the PE object's VS_VERSIONINFO to this one

+ #

+ self.VS_VERSIONINFO = versioninfo_struct

+ # The the Key attribute to point to the unicode string identifying the structure

+ #

+ self.VS_VERSIONINFO.Key = versioninfo_string

+ # Process the fixed version information, get the offset and structure

+ #

+ fixedfileinfo_offset = self.dword_align(

+ versioninfo_struct.sizeof() + 2 * (len(versioninfo_string) + 1),

+ version_struct.OffsetToData)

+ fixedfileinfo_struct = self.__unpack_data__(

+ self.__VS_FIXEDFILEINFO_format__,

+ raw_data[fixedfileinfo_offset:],

+ file_offset = start_offset+fixedfileinfo_offset )

+ if not fixedfileinfo_struct:

+ return

+ # Set the PE object's VS_FIXEDFILEINFO to this one

+ #

+ self.VS_FIXEDFILEINFO = fixedfileinfo_struct

+ # Start parsing all the StringFileInfo and VarFileInfo structures

+ #

+ # Get the first one

+ #

+ stringfileinfo_offset = self.dword_align(

+ fixedfileinfo_offset + fixedfileinfo_struct.sizeof(),

+ version_struct.OffsetToData)

+ original_stringfileinfo_offset = stringfileinfo_offset

+ # Set the PE object's attribute that will contain them all.

+ #

+ self.FileInfo = list()

+ while True:

+ # Process the StringFileInfo/VarFileInfo struct

+ #

+ stringfileinfo_struct = self.__unpack_data__(

+ self.__StringFileInfo_format__,

+ raw_data[stringfileinfo_offset:],

+ file_offset = start_offset+stringfileinfo_offset )

+ if stringfileinfo_struct is None:

+ self.__warnings.append(

+ 'Error parsing StringFileInfo/VarFileInfo struct' )

+ return None

+ # Get the subsequent string defining the structure.

+ #

+ ustr_offset = ( version_struct.OffsetToData +

+ stringfileinfo_offset + versioninfo_struct.sizeof() )

+ try:

+ stringfileinfo_string = self.get_string_u_at_rva( ustr_offset )

+ except PEFormatError, excp:

+ self.__warnings.append(

+ 'Error parsing the version information, ' +

+ 'attempting to read StringFileInfo string. Can\'t ' +

+ 'read unicode string at offset 0x%x' % ( ustr_offset ) )

+ break

+ # Set such string as the Key attribute

+ #

+ stringfileinfo_struct.Key = stringfileinfo_string

+ # Append the structure to the PE object's list

+ #

+ self.FileInfo.append(stringfileinfo_struct)

+ # Parse a StringFileInfo entry

+ #

+ if stringfileinfo_string == u'StringFileInfo':

+ if stringfileinfo_struct.Type == 1 and stringfileinfo_struct.ValueLength == 0:

+ stringtable_offset = self.dword_align(

+ stringfileinfo_offset + stringfileinfo_struct.sizeof() +

+ 2*(len(stringfileinfo_string)+1),

+ version_struct.OffsetToData)

+ stringfileinfo_struct.StringTable = list()

+ # Process the String Table entries

+ #

+ while True:

+ stringtable_struct = self.__unpack_data__(

+ self.__StringTable_format__,

+ raw_data[stringtable_offset:],

+ file_offset = start_offset+stringtable_offset )

+ if not stringtable_struct:

+ break

+ ustr_offset = ( version_struct.OffsetToData + stringtable_offset +

+ stringtable_struct.sizeof() )

+ try:

+ stringtable_string = self.get_string_u_at_rva( ustr_offset )

+ except PEFormatError, excp:

+ self.__warnings.append(

+ 'Error parsing the version information, ' +

+ 'attempting to read StringTable string. Can\'t ' +

+ 'read unicode string at offset 0x%x' % ( ustr_offset ) )

+ break

+ stringtable_struct.LangID = stringtable_string

+ stringtable_struct.entries = dict()

+ stringtable_struct.entries_offsets = dict()

+ stringtable_struct.entries_lengths = dict()

+ stringfileinfo_struct.StringTable.append(stringtable_struct)

+ entry_offset = self.dword_align(

+ stringtable_offset + stringtable_struct.sizeof() +

+ 2*(len(stringtable_string)+1),

+ version_struct.OffsetToData)

+ # Process all entries in the string table

+ #

+ while entry_offset < stringtable_offset + stringtable_struct.Length:

+ string_struct = self.__unpack_data__(

+ self.__String_format__, raw_data[entry_offset:],

+ file_offset = start_offset+entry_offset )

+ if not string_struct:

+ break

+ ustr_offset = ( version_struct.OffsetToData + entry_offset +

+ string_struct.sizeof() )

+ try:

+ key = self.get_string_u_at_rva( ustr_offset )

+ key_offset = self.get_offset_from_rva( ustr_offset )

+ except PEFormatError, excp:

+ self.__warnings.append(

+ 'Error parsing the version information, ' +

+ 'attempting to read StringTable Key string. Can\'t ' +

+ 'read unicode string at offset 0x%x' % ( ustr_offset ) )

+ break

+ value_offset = self.dword_align(

+ 2*(len(key)+1) + entry_offset + string_struct.sizeof(),

+ version_struct.OffsetToData)

+ ustr_offset = version_struct.OffsetToData + value_offset

+ try:

+ value = self.get_string_u_at_rva( ustr_offset,

+ max_length = string_struct.ValueLength )

+ value_offset = self.get_offset_from_rva( ustr_offset )

+ except PEFormatError, excp:

+ self.__warnings.append(

+ 'Error parsing the version information, ' +

+ 'attempting to read StringTable Value string. ' +

+ 'Can\'t read unicode string at offset 0x%x' % (

+ ustr_offset ) )

+ break

+ if string_struct.Length == 0:

+ entry_offset = stringtable_offset + stringtable_struct.Length

+ else:

+ entry_offset = self.dword_align(

+ string_struct.Length+entry_offset, version_struct.OffsetToData)

+ key_as_char = []

+ for c in key:

+ if ord(c)>128:

+ key_as_char.append('\\x%02x' %ord(c))

+ else:

+ key_as_char.append(c)

+ key_as_char = ''.join(key_as_char)

+ setattr(stringtable_struct, key_as_char, value)

+ stringtable_struct.entries[key] = value

+ stringtable_struct.entries_offsets[key] = (key_offset, value_offset)

+ stringtable_struct.entries_lengths[key] = (len(key), len(value))

+ stringtable_offset = self.dword_align(

+ stringtable_struct.Length + stringtable_offset,

+ version_struct.OffsetToData)

+ if stringtable_offset >= stringfileinfo_struct.Length:

+ break

+ # Parse a VarFileInfo entry

+ #

+ elif stringfileinfo_string == u'VarFileInfo':

+ varfileinfo_struct = stringfileinfo_struct

+ varfileinfo_struct.name = 'VarFileInfo'

+ if varfileinfo_struct.Type == 1 and varfileinfo_struct.ValueLength == 0:

+ var_offset = self.dword_align(

+ stringfileinfo_offset + varfileinfo_struct.sizeof() +

+ 2*(len(stringfileinfo_string)+1),

+ version_struct.OffsetToData)

+ varfileinfo_struct.Var = list()

+ # Process all entries

+ #

+ while True:

+ var_struct = self.__unpack_data__(

+ self.__Var_format__,

+ raw_data[var_offset:],

+ file_offset = start_offset+var_offset )

+ if not var_struct:

+ break

+ ustr_offset = ( version_struct.OffsetToData + var_offset +

+ var_struct.sizeof() )

+ try:

+ var_string = self.get_string_u_at_rva( ustr_offset )

+ except PEFormatError, excp:

+ self.__warnings.append(

+ 'Error parsing the version information, ' +

+ 'attempting to read VarFileInfo Var string. ' +

+ 'Can\'t read unicode string at offset 0x%x' % (ustr_offset))

+ break

+ varfileinfo_struct.Var.append(var_struct)

+ varword_offset = self.dword_align(

+ 2*(len(var_string)+1) + var_offset + var_struct.sizeof(),

+ version_struct.OffsetToData)

+ orig_varword_offset = varword_offset

+ while varword_offset < orig_varword_offset + var_struct.ValueLength:

+ word1 = self.get_word_from_data(

+ raw_data[varword_offset:varword_offset+2], 0)

+ word2 = self.get_word_from_data(

+ raw_data[varword_offset+2:varword_offset+4], 0)

+ varword_offset += 4

+ var_struct.entry = {var_string: '0x%04x 0x%04x' % (word1, word2)}

+ var_offset = self.dword_align(

+ var_offset+var_struct.Length, version_struct.OffsetToData)

+ if var_offset <= var_offset+var_struct.Length:

+ break

+ # Increment and align the offset

+ #

+ stringfileinfo_offset = self.dword_align(

+ stringfileinfo_struct.Length+stringfileinfo_offset,

+ version_struct.OffsetToData)

+ # Check if all the StringFileInfo and VarFileInfo items have been processed

+ #

+ if stringfileinfo_struct.Length == 0 or stringfileinfo_offset >= versioninfo_struct.Length:

+ break

+ def parse_export_directory(self, rva, size):

+ """Parse the export directory.

+ Given the rva of the export directory, it will process all

+ its entries.

+ The exports will be made available through a list "exports"

+ containing a tuple with the following elements:

+ (ordinal, symbol_address, symbol_name)

+ And also through a dicionary "exports_by_ordinal" whose keys

+ will be the ordinals and the values tuples of the from:

+ (symbol_address, symbol_name)

+ The symbol addresses are relative, not absolute.

+ """

+ try:

+ export_dir = self.__unpack_data__(

+ self.__IMAGE_EXPORT_DIRECTORY_format__, self.get_data(rva),

+ file_offset = self.get_offset_from_rva(rva) )

+ except PEFormatError:

+ self.__warnings.append(

+ 'Error parsing export directory at RVA: 0x%x' % ( rva ) )

+ return

+ if not export_dir:

+ return

+ try:

+ address_of_names = self.get_data(

+ export_dir.AddressOfNames, export_dir.NumberOfNames*4)

+ address_of_name_ordinals = self.get_data(

+ export_dir.AddressOfNameOrdinals, export_dir.NumberOfNames*4)

+ address_of_functions = self.get_data(

+ export_dir.AddressOfFunctions, export_dir.NumberOfFunctions*4)

+ except PEFormatError:

+ self.__warnings.append(

+ 'Error parsing export directory at RVA: 0x%x' % ( rva ) )

+ return

+ exports = []

+ for i in xrange(export_dir.NumberOfNames):

+ symbol_name = self.get_string_at_rva(

+ self.get_dword_from_data(address_of_names, i))

+ symbol_ordinal = self.get_word_from_data(

+ address_of_name_ordinals, i)

+ if symbol_ordinal*4<len(address_of_functions):

+ symbol_address = self.get_dword_from_data(

+ address_of_functions, symbol_ordinal)

+ else:

+ # Corrupt? a bad pointer... we assume it's all

+ # useless, no exports

+ return None

+ # If the funcion's rva points within the export directory

+ # it will point to a string with the forwarded symbol's string

+ # instead of pointing the the function start address.

+ if symbol_address>=rva and symbol_address<rva+size:

+ forwarder_str = self.get_string_at_rva(symbol_address)

+ else:

+ forwarder_str = None

+ exports.append(

+ ExportData(

+ ordinal = export_dir.Base+symbol_ordinal,

+ address = symbol_address,

+ name = symbol_name,

+ forwarder = forwarder_str))

+ ordinals = [exp.ordinal for exp in exports]

+ for idx in xrange(export_dir.NumberOfFunctions):

+ if not idx+export_dir.Base in ordinals:

+ symbol_address = self.get_dword_from_data(

+ address_of_functions,

+ idx)

+ #

+ # Checking for forwarder again.

+ #

+ if symbol_address>=rva and symbol_address<rva+size:

+ forwarder_str = self.get_string_at_rva(symbol_address)

+ else:

+ forwarder_str = None

+ exports.append(

+ ExportData(

+ ordinal = export_dir.Base+idx,

+ address = symbol_address,

+ name = None,

+ forwarder = forwarder_str))

+ return ExportDirData(

+ struct = export_dir,

+ symbols = exports)

+ def dword_align(self, offset, base):

+ offset += base

+ return (offset+3) - ((offset+3)%4) - base

+ def parse_delay_import_directory(self, rva, size):

+ """Walk and parse the delay import directory."""

+ import_descs = []

+ while True:

+ try:

+ # If the RVA is invalid all would blow up. Some PEs seem to be

+ # specially nasty and have an invalid RVA.

+ data = self.get_data(rva)

+ except PEFormatError, e:

+ self.__warnings.append(

+ 'Error parsing the Delay import directory at RVA: 0x%x' % ( rva ) )

+ break

+ import_desc = self.__unpack_data__(

+ self.__IMAGE_DELAY_IMPORT_DESCRIPTOR_format__,

+ data, file_offset = self.get_offset_from_rva(rva) )

+ # If the structure is all zeores, we reached the end of the list

+ if not import_desc or import_desc.all_zeroes():

+ break

+ rva += import_desc.sizeof()

+ try:

+ import_data = self.parse_imports(

+ import_desc.pINT,

+ import_desc.pIAT,

+ None)

+ except PEFormatError, e:

+ self.__warnings.append(

+ 'Error parsing the Delay import directory. ' +

+ 'Invalid import data at RVA: 0x%x' % ( rva ) )

+ break

+ if not import_data:

+ continue

+ dll = self.get_string_at_rva(import_desc.szName)

+ if dll:

+ import_descs.append(

+ ImportDescData(

+ struct = import_desc,

+ imports = import_data,

+ dll = dll))

+ return import_descs

+ def parse_import_directory(self, rva, size):

+ """Walk and parse the import directory."""

+ import_descs = []

+ while True:

+ try:

+ # If the RVA is invalid all would blow up. Some EXEs seem to be

+ # specially nasty and have an invalid RVA.

+ data = self.get_data(rva)

+ except PEFormatError, e:

+ self.__warnings.append(

+ 'Error parsing the Import directory at RVA: 0x%x' % ( rva ) )

+ break

+ import_desc = self.__unpack_data__(

+ self.__IMAGE_IMPORT_DESCRIPTOR_format__,

+ data, file_offset = self.get_offset_from_rva(rva) )

+ # If the structure is all zeores, we reached the end of the list

+ if not import_desc or import_desc.all_zeroes():

+ break

+ rva += import_desc.sizeof()

+ try:

+ import_data = self.parse_imports(

+ import_desc.OriginalFirstThunk,

+ import_desc.FirstThunk,

+ import_desc.ForwarderChain)

+ except PEFormatError, excp:

+ self.__warnings.append(

+ 'Error parsing the Import directory. ' +

+ 'Invalid Import data at RVA: 0x%x' % ( rva ) )

+ break

+ #raise excp

+ if not import_data:

+ continue

+ dll = self.get_string_at_rva(import_desc.Name)

+ if dll:

+ import_descs.append(

+ ImportDescData(

+ struct = import_desc,

+ imports = import_data,

+ dll = dll))

+ return import_descs

+ def parse_imports(self, original_first_thunk, first_thunk, forwarder_chain):

+ """Parse the imported symbols.

+ It will fill a list, which will be avalable as the dictionary

+ attribute "imports". Its keys will be the DLL names and the values

+ all the symbols imported from that object.

+ """

+ imported_symbols = []

+ imports_section = self.get_section_by_rva(first_thunk)

+ if not imports_section:

+ raise PEFormatError, 'Invalid/corrupt imports.'

+ # Import Lookup Table. Contains ordinals or pointers to strings.

+ ilt = self.get_import_table(original_first_thunk)

+ # Import Address Table. May have identical content to ILT if

+ # PE file is not bounded, Will contain the address of the

+ # imported symbols once the binary is loaded or if it is already

+ # bound.

+ iat = self.get_import_table(first_thunk)

+ # OC Patch:

+ # Would crash if iat or ilt had None type

+ if not iat and not ilt:

+ raise PEFormatError(

+ 'Invalid Import Table information. ' +

+ 'Both ILT and IAT appear to be broken.')

+ if not iat and ilt:

+ table = ilt

+ elif iat and not ilt:

+ table = iat

+ elif ilt and ((len(ilt) and len(iat)==0) or (len(ilt) == len(iat))):

+ table = ilt

+ elif (ilt and len(ilt))==0 and (iat and len(iat)):

+ table = iat

+ else:

+ return None

+ for idx in xrange(len(table)):

+ imp_ord = None

+ imp_hint = None

+ imp_name = None

+ hint_name_table_rva = None

+ if table[idx].AddressOfData:

+ if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:

+ ordinal_flag = IMAGE_ORDINAL_FLAG

+ elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:

+ ordinal_flag = IMAGE_ORDINAL_FLAG64

+ # If imported by ordinal, we will append the ordinal number

+ #

+ if table[idx].AddressOfData & ordinal_flag:

+ import_by_ordinal = True

+ imp_ord = table[idx].AddressOfData & 0xffff

+ imp_name = None

+ else:

+ import_by_ordinal = False

+ try:

+ hint_name_table_rva = table[idx].AddressOfData & 0x7fffffff

+ data = self.get_data(hint_name_table_rva, 2)

+ # Get the Hint

+ imp_hint = self.get_word_from_data(data, 0)

+ imp_name = self.get_string_at_rva(table[idx].AddressOfData+2)

+ except PEFormatError, e:

+ pass

+ imp_address = first_thunk+self.OPTIONAL_HEADER.ImageBase+idx*4

+ if iat and ilt and ilt[idx].AddressOfData != iat[idx].AddressOfData:

+ imp_bound = iat[idx].AddressOfData

+ else:

+ imp_bound = None

+ if imp_name != '' and (imp_ord or imp_name):

+ imported_symbols.append(

+ ImportData(

+ import_by_ordinal = import_by_ordinal,

+ ordinal = imp_ord,

+ hint = imp_hint,

+ name = imp_name,

+ bound = imp_bound,

+ address = imp_address,

+ hint_name_table_rva = hint_name_table_rva))

+ return imported_symbols

+ def get_import_table(self, rva):

+ table = []

+ while True and rva:

+ try:

+ data = self.get_data(rva)

+ except PEFormatError, e:

+ self.__warnings.append(

+ 'Error parsing the import table. ' +

+ 'Invalid data at RVA: 0x%x' % ( rva ) )

+ return None

+ if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:

+ format = self.__IMAGE_THUNK_DATA_format__

+ elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:

+ format = self.__IMAGE_THUNK_DATA64_format__

+ thunk_data = self.__unpack_data__(

+ format, data, file_offset=self.get_offset_from_rva(rva) )

+ if not thunk_data or thunk_data.all_zeroes():

+ break

+ rva += thunk_data.sizeof()

+ table.append(thunk_data)

+ return table

+ def get_memory_mapped_image(self, max_virtual_address=0x10000000, ImageBase=None):

+ """Returns the data corresponding to the memory layout of the PE file.

+ The data includes the PE header and the sections loaded at offsets

+ corresponding to their relative virtual addresses. (the VirtualAddress

+ section header member).

+ Any offset in this data corresponds to the absolute memory address

+ ImageBase+offset.

+ The optional argument 'max_virtual_address' provides with means of limiting

+ which section are processed.

+ Any section with their VirtualAddress beyond this value will be skipped.

+ Normally, sections with values beyond this range are just there to confuse

+ tools. It's a common trick to see in packed executables.

+ If the 'ImageBase' optional argument is supplied, the file's relocations

+ will be applied to the image by calling the 'relocate_image()' method.

+ """

+ # Collect all sections in one code block

+ data = self.header

+ for section in self.sections:

+ # Miscellanous integrity tests.

+ # Some packer will set these to bogus values to

+ # make tools go nuts.

+ #

+ if section.Misc_VirtualSize == 0 or section.SizeOfRawData == 0:

+ continue

+ if section.SizeOfRawData > len(self.__data__):

+ continue

+ if section.PointerToRawData > len(self.__data__):

+ continue

+ if section.VirtualAddress >= max_virtual_address:

+ continue

+ padding_length = section.VirtualAddress - len(data)

+ if padding_length>0:

+ data += '\0'*padding_length

+ elif padding_length<0:

+ data = data[:padding_length]

+ data += section.data

+ return data

+ def get_data(self, rva, length=None):

+ """Get data regardless of the section where it lies on.

+ Given a rva and the size of the chunk to retrieve, this method

+ will find the section where the data lies and return the data.

+ """

+ s = self.get_section_by_rva(rva)

+ if not s:

+ if rva<len(self.header):

+ if length:

+ end = rva+length

+ else:

+ end = None

+ return self.header[rva:end]

+ raise PEFormatError, 'data at RVA can\'t be fetched. Corrupt header?'

+ return s.get_data(rva, length)

+ def get_rva_from_offset(self, offset):

+ """Get the rva corresponding to this file offset. """

+ s = self.get_section_by_offset(offset)

+ if not s:

+ raise PEFormatError("specified offset (0x%x) doesn't belong to any section." % offset)

+ return s.get_rva_from_offset(offset)

+ def get_offset_from_rva(self, rva):

+ """Get the file offset corresponding to this rva.

+ Given a rva , this method will find the section where the

+ data lies and return the offset within the file.

+ """

+ s = self.get_section_by_rva(rva)

+ if not s:

+ raise PEFormatError, 'data at RVA can\'t be fetched. Corrupt header?'

+ return s.get_offset_from_rva(rva)

+ def get_string_at_rva(self, rva):

+ """Get an ASCII string located at the given address."""

+ s = self.get_section_by_rva(rva)

+ if not s:

+ if rva<len(self.header):

+ return self.get_string_from_data(rva, self.header)

+ return None

+ return self.get_string_from_data(rva-s.VirtualAddress, s.data)

+ def get_string_from_data(self, offset, data):

+ """Get an ASCII string from within the data."""

+ # OC Patch

+ b = None

+ try:

+ b = data[offset]

+ except IndexError:

+ return ''

+ s = ''

+ while ord(b):

+ s += b

+ offset += 1

+ try:

+ b = data[offset]

+ except IndexError:

+ break

+ return s

+ def get_string_u_at_rva(self, rva, max_length = 2**16):

+ """Get an Unicode string located at the given address."""

+ try:

+ # If the RVA is invalid all would blow up. Some EXEs seem to be

+ # specially nasty and have an invalid RVA.

+ data = self.get_data(rva, 2)

+ except PEFormatError, e:

+ return None

+ #length = struct.unpack('<H', data)[0]

+ s = u''

+ for idx in xrange(max_length):

+ try:

+ uchr = struct.unpack('<H', self.get_data(rva+2*idx, 2))[0]

+ except struct.error:

+ break

+ if unichr(uchr) == u'\0':

+ break

+ s += unichr(uchr)

+ return s

+ def get_section_by_offset(self, offset):

+ """Get the section containing the given file offset."""

+ sections = [s for s in self.sections if s.contains_offset(offset)]

+ if sections:

+ return sections[0]

+ return None

+ def get_section_by_rva(self, rva):

+ """Get the section containing the given address."""

+ sections = [s for s in self.sections if s.contains_rva(rva)]

+ if sections:

+ return sections[0]

+ return None

+ def __str__(self):

+ return self.dump_info()

+ def print_info(self):

+ """Print all the PE header information in a human readable from."""

+ print self.dump_info()

+ def dump_info(self, dump=None):

+ """Dump all the PE header information into human readable string."""

+ if dump is None:

+ dump = Dump()

+ warnings = self.get_warnings()

+ if warnings:

+ dump.add_header('Parsing Warnings')

+ for warning in warnings:

+ dump.add_line(warning)

+ dump.add_newline()

+ dump.add_header('DOS_HEADER')

+ dump.add_lines(self.DOS_HEADER.dump())

+ dump.add_newline()

+ dump.add_header('NT_HEADERS')

+ dump.add_lines(self.NT_HEADERS.dump())

+ dump.add_newline()

+ dump.add_header('FILE_HEADER')

+ dump.add_lines(self.FILE_HEADER.dump())

+ image_flags = self.retrieve_flags(IMAGE_CHARACTERISTICS, 'IMAGE_FILE_')

+ dump.add('Flags: ')

+ flags = []

+ for flag in image_flags:

+ if getattr(self.FILE_HEADER, flag[0]):

+ flags.append(flag[0])

+ dump.add_line(', '.join(flags))

+ dump.add_newline()

+ if hasattr(self, 'OPTIONAL_HEADER') and self.OPTIONAL_HEADER is not None:

+ dump.add_header('OPTIONAL_HEADER')

+ dump.add_lines(self.OPTIONAL_HEADER.dump())

+ dll_characteristics_flags = self.retrieve_flags(DLL_CHARACTERISTICS, 'IMAGE_DLL_CHARACTERISTICS_')

+ dump.add('DllCharacteristics: ')

+ flags = []

+ for flag in dll_characteristics_flags:

+ if getattr(self.OPTIONAL_HEADER, flag[0]):

+ flags.append(flag[0])

+ dump.add_line(', '.join(flags))

+ dump.add_newline()

+ dump.add_header('PE Sections')

+ section_flags = self.retrieve_flags(SECTION_CHARACTERISTICS, 'IMAGE_SCN_')

+ for section in self.sections:

+ dump.add_lines(section.dump())

+ dump.add('Flags: ')

+ flags = []

+ for flag in section_flags:

+ if getattr(section, flag[0]):

+ flags.append(flag[0])

+ dump.add_line(', '.join(flags))

+ dump.add_line('Entropy: %f (Min=0.0, Max=8.0)' % section.get_entropy() )

+ if md5 is not None:

+ dump.add_line('MD5 hash: %s' % section.get_hash_md5() )

+ if sha1 is not None:

+ dump.add_line('SHA-1 hash: %s' % section.get_hash_sha1() )

+ if sha256 is not None:

+ dump.add_line('SHA-256 hash: %s' % section.get_hash_sha256() )

+ if sha512 is not None:

+ dump.add_line('SHA-512 hash: %s' % section.get_hash_sha512() )

+ dump.add_newline()

+ if (hasattr(self, 'OPTIONAL_HEADER') and

+ hasattr(self.OPTIONAL_HEADER, 'DATA_DIRECTORY') ):

+ dump.add_header('Directories')

+ for idx in xrange(len(self.OPTIONAL_HEADER.DATA_DIRECTORY)):

+ directory = self.OPTIONAL_HEADER.DATA_DIRECTORY[idx]

+ dump.add_lines(directory.dump())

+ dump.add_newline()

+ if hasattr(self, 'VS_VERSIONINFO'):

+ dump.add_header('Version Information')

+ dump.add_lines(self.VS_VERSIONINFO.dump())

+ dump.add_newline()

+ if hasattr(self, 'VS_FIXEDFILEINFO'):

+ dump.add_lines(self.VS_FIXEDFILEINFO.dump())

+ dump.add_newline()

+ if hasattr(self, 'FileInfo'):

+ for entry in self.FileInfo:

+ dump.add_lines(entry.dump())

+ dump.add_newline()

+ if hasattr(entry, 'StringTable'):

+ for st_entry in entry.StringTable:

+ [dump.add_line(' '+line) for line in st_entry.dump()]

+ dump.add_line(' LangID: '+st_entry.LangID)

+ dump.add_newline()

+ for str_entry in st_entry.entries.items():

+ dump.add_line(' '+str_entry[0]+': '+str_entry[1])

+ dump.add_newline()

+ elif hasattr(entry, 'Var'):

+ for var_entry in entry.Var:

+ if hasattr(var_entry, 'entry'):

+ [dump.add_line(' '+line) for line in var_entry.dump()]

+ dump.add_line(

+ ' ' + var_entry.entry.keys()[0] +

+ ': ' + var_entry.entry.values()[0])

+ dump.add_newline()

+ if hasattr(self, 'DIRECTORY_ENTRY_EXPORT'):

+ dump.add_header('Exported symbols')

+ dump.add_lines(self.DIRECTORY_ENTRY_EXPORT.struct.dump())

+ dump.add_newline()

+ dump.add_line('%-10s %-10s %s' % ('Ordinal', 'RVA', 'Name'))

+ for export in self.DIRECTORY_ENTRY_EXPORT.symbols:

+ dump.add('%-10d 0x%08Xh %s' % (

+ export.ordinal, export.address, export.name))

+ if export.forwarder:

+ dump.add_line(' forwarder: %s' % export.forwarder)

+ else:

+ dump.add_newline()

+ if hasattr(self, 'DIRECTORY_ENTRY_IMPORT'):

+ dump.add_header('Imported symbols')

+ for module in self.DIRECTORY_ENTRY_IMPORT:

+ dump.add_lines(module.struct.dump())

+ dump.add_newline()

+ for symbol in module.imports:

+ if symbol.import_by_ordinal is True:

+ dump.add('%s Ordinal[%s] (Imported by Ordinal)' % (

+ module.dll, str(symbol.ordinal)))

+ else:

+ dump.add('%s.%s Hint[%s]' % (

+ module.dll, symbol.name, str(symbol.hint)))

+ if symbol.bound:

+ dump.add_line(' Bound: 0x%08X' % (symbol.bound))

+ else:

+ dump.add_newline()

+ if hasattr(self, 'DIRECTORY_ENTRY_BOUND_IMPORT'):

+ dump.add_header('Bound imports')

+ for bound_imp_desc in self.DIRECTORY_ENTRY_BOUND_IMPORT:

+ dump.add_lines(bound_imp_desc.struct.dump())

+ dump.add_line('DLL: %s' % bound_imp_desc.name)

+ dump.add_newline()

+ for bound_imp_ref in bound_imp_desc.entries:

+ dump.add_lines(bound_imp_ref.struct.dump(), 4)

+ dump.add_line('DLL: %s' % bound_imp_ref.name, 4)

+ dump.add_newline()

+ if hasattr(self, 'DIRECTORY_ENTRY_DELAY_IMPORT'):

+ dump.add_header('Delay Imported symbols')

+ for module in self.DIRECTORY_ENTRY_DELAY_IMPORT:

+ dump.add_lines(module.struct.dump())

+ dump.add_newline()

+ for symbol in module.imports:

+ if symbol.import_by_ordinal is True:

+ dump.add('%s Ordinal[%s] (Imported by Ordinal)' % (

+ module.dll, str(symbol.ordinal)))

+ else:

+ dump.add('%s.%s Hint[%s]' % (

+ module.dll, symbol.name, str(symbol.hint)))

+ if symbol.bound:

+ dump.add_line(' Bound: 0x%08X' % (symbol.bound))

+ else:

+ dump.add_newline()

+ if hasattr(self, 'DIRECTORY_ENTRY_RESOURCE'):

+ dump.add_header('Resource directory')

+ dump.add_lines(self.DIRECTORY_ENTRY_RESOURCE.struct.dump())

+ for resource_type in self.DIRECTORY_ENTRY_RESOURCE.entries:

+ if resource_type.name is not None:

+ dump.add_line('Name: [%s]' % resource_type.name, 2)

+ else:

+ dump.add_line('Id: [0x%X] (%s)' % (

+ resource_type.struct.Id, RESOURCE_TYPE.get(

+ resource_type.struct.Id, '-')),

+ 2)

+ dump.add_lines(resource_type.struct.dump(), 2)

+ if hasattr(resource_type, 'directory'):

+ dump.add_lines(resource_type.directory.struct.dump(), 4)

+ for resource_id in resource_type.directory.entries:

+ if resource_id.name is not None:

+ dump.add_line('Name: [%s]' % resource_id.name, 6)

+ else:

+ dump.add_line('Id: [0x%X]' % resource_id.struct.Id, 6)

+ dump.add_lines(resource_id.struct.dump(), 6)

+ if hasattr(resource_id, 'directory'):

+ dump.add_lines(resource_id.directory.struct.dump(), 8)

+ for resource_lang in resource_id.directory.entries:

+ # dump.add_line('\\--- LANG [%d,%d][%s]' % (

+ # resource_lang.data.lang,

+ # resource_lang.data.sublang,

+ # LANG[resource_lang.data.lang]), 8)

+ dump.add_lines(resource_lang.struct.dump(), 10)

+ dump.add_lines(resource_lang.data.struct.dump(), 12)

+ dump.add_newline()

+ if ( hasattr(self, 'DIRECTORY_ENTRY_TLS') and

+ self.DIRECTORY_ENTRY_TLS and

+ self.DIRECTORY_ENTRY_TLS.struct ):

+ dump.add_header('TLS')

+ dump.add_lines(self.DIRECTORY_ENTRY_TLS.struct.dump())

+ dump.add_newline()

+ if hasattr(self, 'DIRECTORY_ENTRY_DEBUG'):

+ dump.add_header('Debug information')

+ for dbg in self.DIRECTORY_ENTRY_DEBUG:

+ dump.add_lines(dbg.struct.dump())

+ try:

+ dump.add_line('Type: '+DEBUG_TYPE[dbg.struct.Type])

+ except KeyError:

+ dump.add_line('Type: 0x%x(Unknown)' % dbg.struct.Type)

+ dump.add_newline()

+ if hasattr(self, 'DIRECTORY_ENTRY_BASERELOC'):

+ dump.add_header('Base relocations')

+ for base_reloc in self.DIRECTORY_ENTRY_BASERELOC:

+ dump.add_lines(base_reloc.struct.dump())

+ for reloc in base_reloc.entries:

+ try:

+ dump.add_line('%08Xh %s' % (

+ reloc.rva, RELOCATION_TYPE[reloc.type][16:]), 4)

+ except KeyError:

+ dump.add_line('0x%08X 0x%x(Unknown)' % (

+ reloc.rva, reloc.type), 4)

+ dump.add_newline()

+ return dump.get_text()

+ # OC Patch

+ def get_physical_by_rva(self, rva):

+ """Gets the physical address in the PE file from an RVA value."""

+ try:

+ return self.get_offset_from_rva(rva)

+ except Exception:

+ return None

+ ##

+ # Double-Word get/set

+ ##

+ def get_data_from_dword(self, dword):

+ """Return a four byte string representing the double word value. (little endian)."""

+ return struct.pack('<L', dword)

+ def get_dword_from_data(self, data, offset):

+ """Convert four bytes of data to a double word (little endian)

+ 'offset' is assumed to index into a dword array. So setting it to

+ N will return a dword out of the data sarting at offset N*4.

+ Returns None if the data can't be turned into a double word.

+ """

+ if (offset+1)*4 > len(data):

+ return None

+ return struct.unpack('<L', data[offset*4:(offset+1)*4])[0]

+ def get_dword_at_rva(self, rva):

+ """Return the double word value at the given RVA.

+ Returns None if the value can't be read, i.e. the RVA can't be mapped

+ to a file offset.

+ """

+ try:

+ return self.get_dword_from_data(self.get_data(rva)[:4], 0)

+ except PEFormatError:

+ return None

+ def get_dword_from_offset(self, offset):

+ """Return the double word value at the given file offset. (little endian)"""

+ if offset+4 > len(self.__data__):

+ return None

+ return self.get_dword_from_data(self.__data__[offset:offset+4], 0)

+ def set_dword_at_rva(self, rva, dword):

+ """Set the double word value at the file offset corresponding to the given RVA."""

+ return self.set_bytes_at_rva(rva, self.get_data_from_dword(dword))

+ def set_dword_at_offset(self, offset, dword):

+ """Set the double word value at the given file offset."""

+ return self.set_bytes_at_offset(offset, self.get_data_from_dword(dword))

+ ##

+ # Word get/set

+ ##

+ def get_data_from_word(self, word):

+ """Return a two byte string representing the word value. (little endian)."""

+ return struct.pack('<H', word)

+ def get_word_from_data(self, data, offset):

+ """Convert two bytes of data to a word (little endian)

+ 'offset' is assumed to index into a word array. So setting it to

+ N will return a dword out of the data sarting at offset N*2.

+ Returns None if the data can't be turned into a word.

+ """

+ if (offset+1)*2 > len(data):

+ return None

+ return struct.unpack('<H', data[offset*2:(offset+1)*2])[0]

+ def get_word_at_rva(self, rva):

+ """Return the word value at the given RVA.

+ Returns None if the value can't be read, i.e. the RVA can't be mapped

+ to a file offset.

+ """

+ try:

+ return self.get_word_from_data(self.get_data(rva)[:2], 0)

+ except PEFormatError:

+ return None

+ def get_word_from_offset(self, offset):

+ """Return the word value at the given file offset. (little endian)"""

+ if offset+2 > len(self.__data__):

+ return None

+ return self.get_word_from_data(self.__data__[offset:offset+2], 0)

+ def set_word_at_rva(self, rva, word):

+ """Set the word value at the file offset corresponding to the given RVA."""

+ return self.set_bytes_at_rva(rva, self.get_data_from_word(word))

+ def set_word_at_offset(self, offset, word):

+ """Set the word value at the given file offset."""

+ return self.set_bytes_at_offset(offset, self.get_data_from_word(word))

+ ##

+ # Quad-Word get/set

+ ##

+ def get_data_from_qword(self, word):

+ """Return a eight byte string representing the quad-word value. (little endian)."""

+ return struct.pack('<Q', word)

+ def get_qword_from_data(self, data, offset):

+ """Convert eight bytes of data to a word (little endian)

+ 'offset' is assumed to index into a word array. So setting it to

+ N will return a dword out of the data sarting at offset N*8.

+ Returns None if the data can't be turned into a quad word.

+ """

+ if (offset+1)*8 > len(data):

+ return None

+ return struct.unpack('<Q', data[offset*8:(offset+1)*8])[0]

+ def get_qword_at_rva(self, rva):

+ """Return the quad-word value at the given RVA.

+ Returns None if the value can't be read, i.e. the RVA can't be mapped

+ to a file offset.

+ """

+ try:

+ return self.get_qword_from_data(self.get_data(rva)[:8], 0)

+ except PEFormatError:

+ return None

+ def get_qword_from_offset(self, offset):

+ """Return the quad-word value at the given file offset. (little endian)"""

+ if offset+8 > len(self.__data__):

+ return None

+ return self.get_qword_from_data(self.__data__[offset:offset+8], 0)

+ def set_qword_at_rva(self, rva, qword):

+ """Set the quad-word value at the file offset corresponding to the given RVA."""

+ return self.set_bytes_at_rva(rva, self.get_data_from_qword(qword))

+ def set_qword_at_offset(self, offset, qword):

+ """Set the quad-word value at the given file offset."""

+ return self.set_bytes_at_offset(offset, self.get_data_from_qword(qword))

+ ##

+ # Set bytes

+ ##

+ def set_bytes_at_rva(self, rva, data):

+ """Overwrite, with the given string, the bytes at the file offset corresponding to the given RVA.

+ Return True if successful, False otherwise. It can fail if the

+ offset is outside the file's boundaries.

+ """

+ offset = self.get_physical_by_rva(rva)

+ if not offset:

+ raise False

+ return self.set_bytes_at_offset(offset, data)

+ def set_bytes_at_offset(self, offset, data):

+ """Overwrite the bytes at the given file offset with the given string.

+ Return True if successful, False otherwise. It can fail if the

+ offset is outside the file's boundaries.

+ """

+ if not isinstance(data, str):

+ raise TypeError('data should be of type: str')

+ if offset >= 0 and offset < len(self.__data__):

+ self.__data__ = ( self.__data__[:offset] +

+ data +

+ self.__data__[offset+len(data):] )

+ else:

+ return False

+ # Refresh the section's data with the modified information

+ #

+ for section in self.sections:

+ section_data_start = section.PointerToRawData

+ section_data_end = section_data_start+section.SizeOfRawData

+ section.data = self.__data__[section_data_start:section_data_end]

+ return True

+ def relocate_image(self, new_ImageBase):

+ """Apply the relocation information to the image using the provided new image base.

+ This method will apply the relocation information to the image. Given the new base,

+ all the relocations will be processed and both the raw data and the section's data

+ will be fixed accordingly.

+ The resulting image can be retrieved as well through the method:

+ get_memory_mapped_image()

+ In order to get something that would more closely match what could be found in memory

+ once the Windows loader finished its work.

+ """

+ relocation_difference = new_ImageBase - self.OPTIONAL_HEADER.ImageBase

+ for reloc in self.DIRECTORY_ENTRY_BASERELOC:

+ virtual_address = reloc.struct.VirtualAddress

+ size_of_block = reloc.struct.SizeOfBlock

+ # We iterate with an index because if the relocation is of type

+ # IMAGE_REL_BASED_HIGHADJ we need to also process the next entry

+ # at once and skip it for the next interation

+ #

+ entry_idx = 0

+ while entry_idx<len(reloc.entries):

+ entry = reloc.entries[entry_idx]

+ entry_idx += 1

+ if entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_ABSOLUTE']:

+ # Nothing to do for this type of relocation

+ pass

+ elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGH']:

+ # Fix the high 16bits of a relocation

+ #

+ # Add high 16bits of relocation_difference to the

+ # 16bit value at RVA=entry.rva

+ self.set_word_at_rva(

+ entry.rva,

+ ( self.get_word_at_rva(entry.rva) + relocation_difference>>16)&0xffff )

+ elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_LOW']:

+ # Fix the low 16bits of a relocation

+ #

+ # Add low 16 bits of relocation_difference to the 16bit value

+ # at RVA=entry.rva

+ self.set_word_at_rva(

+ entry.rva,

+ ( self.get_word_at_rva(entry.rva) + relocation_difference)&0xffff)

+ elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGHLOW']:

+ # Handle all high and low parts of a 32bit relocation

+ #

+ # Add relocation_difference to the value at RVA=entry.rva

+ self.set_dword_at_rva(

+ entry.rva,

+ self.get_dword_at_rva(entry.rva)+relocation_difference)

+ elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGHADJ']:

+ # Fix the high 16bits of a relocation and adjust

+ #

+ # Add high 16bits of relocation_difference to the 32bit value

+ # composed from the (16bit value at RVA=entry.rva)<<16 plus

+ # the 16bit value at the next relocation entry.

+ #

+ # If the next entry is beyond the array's limits,

+ # abort... the table is corrupt

+ #

+ if entry_idx == len(reloc.entries):

+ break

+ next_entry = reloc.entries[entry_idx]

+ entry_idx += 1

+ self.set_word_at_rva( entry.rva,

+ ((self.get_word_at_rva(entry.rva)<<16) + next_entry.rva +

+ relocation_difference & 0xffff0000) >> 16 )

+ elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_DIR64']:

+ # Apply the difference to the 64bit value at the offset

+ # RVA=entry.rva

+ self.set_qword_at_rva(

+ entry.rva,

+ self.get_qword_at_rva(entry.rva) + relocation_difference)

+ def verify_checksum(self):

+ return self.OPTIONAL_HEADER.CheckSum == self.generate_checksum()

+ def generate_checksum(self):

+ # Get the offset to the CheckSum field in the OptionalHeader

+ #

+ checksum_offset = self.OPTIONAL_HEADER.__file_offset__ + 0x40 # 64

+ checksum = 0

+ for i in range( len(self.__data__) / 4 ):

+ # Skip the checksum field

+ #

+ if i == checksum_offset / 4:

+ continue

+ dword = struct.unpack('L', self.__data__[ i*4 : i*4+4 ])[0]

+ checksum = (checksum & 0xffffffff) + dword + (checksum>>32)

+ if checksum > 2**32:

+ checksum = (checksum & 0xffffffff) + (checksum >> 32)

+ checksum = (checksum & 0xffff) + (checksum >> 16)

+ checksum = (checksum) + (checksum >> 16)

+ checksum = checksum & 0xffff

+ return checksum + len(self.__data__)

« no previous file with comments | « tools/symsrc/pdb_fingerprint_from_img.py ('k') | tools/symsrc/source_index.py » ('j') | no next file with comments »