| Index: tools/symsrc/pefile.py
|
| diff --git a/tools/symsrc/pefile.py b/tools/symsrc/pefile.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..e22fd1ae2457eed8d79f9e2ce2219be7c838a2ec
|
| --- /dev/null
|
| +++ b/tools/symsrc/pefile.py
|
| @@ -0,0 +1,3729 @@
|
| +# -*- coding: Latin-1 -*-
|
| +"""pefile, Portable Executable reader module
|
| +
|
| +
|
| +All the PE file basic structures are available with their default names
|
| +as attributes of the instance returned.
|
| +
|
| +Processed elements such as the import table are made available with lowercase
|
| +names, to differentiate them from the upper case basic structure names.
|
| +
|
| +pefile has been tested against the limits of valid PE headers, that is, malware.
|
| +Lots of packed malware attempt to abuse the format way beyond its standard use.
|
| +To the best of my knowledge most of the abuses are handled gracefully.
|
| +
|
| +Copyright (c) 2005, 2006, 2007, 2008 Ero Carrera <ero@dkbza.org>
|
| +
|
| +All rights reserved.
|
| +
|
| +For detailed copyright information see the file COPYING in
|
| +the root of the distribution archive.
|
| +"""
|
| +
|
| +__author__ = 'Ero Carrera'
|
| +__version__ = '1.2.9.1'
|
| +__contact__ = 'ero@dkbza.org'
|
| +
|
| +
|
| +import os
|
| +import struct
|
| +import time
|
| +import math
|
| +import re
|
| +import exceptions
|
| +import string
|
| +import array
|
| +
|
| +sha1, sha256, sha512, md5 = None, None, None, None
|
| +
|
| +try:
|
| + import hashlib
|
| + sha1 = hashlib.sha1
|
| + sha256 = hashlib.sha256
|
| + sha512 = hashlib.sha512
|
| + md5 = hashlib.md5
|
| +except ImportError:
|
| + try:
|
| + import sha
|
| + sha1 = sha.new
|
| + except ImportError:
|
| + pass
|
| + try:
|
| + import md5
|
| + md5 = md5.new
|
| + except ImportError:
|
| + pass
|
| +
|
| +
|
| +fast_load = False
|
| +
|
| +IMAGE_DOS_SIGNATURE = 0x5A4D
|
| +IMAGE_OS2_SIGNATURE = 0x454E
|
| +IMAGE_OS2_SIGNATURE_LE = 0x454C
|
| +IMAGE_VXD_SIGNATURE = 0x454C
|
| +IMAGE_NT_SIGNATURE = 0x00004550
|
| +IMAGE_NUMBEROF_DIRECTORY_ENTRIES= 16
|
| +IMAGE_ORDINAL_FLAG = 0x80000000L
|
| +IMAGE_ORDINAL_FLAG64 = 0x8000000000000000L
|
| +OPTIONAL_HEADER_MAGIC_PE = 0x10b
|
| +OPTIONAL_HEADER_MAGIC_PE_PLUS = 0x20b
|
| +
|
| +
|
| +directory_entry_types = [
|
| + ('IMAGE_DIRECTORY_ENTRY_EXPORT', 0),
|
| + ('IMAGE_DIRECTORY_ENTRY_IMPORT', 1),
|
| + ('IMAGE_DIRECTORY_ENTRY_RESOURCE', 2),
|
| + ('IMAGE_DIRECTORY_ENTRY_EXCEPTION', 3),
|
| + ('IMAGE_DIRECTORY_ENTRY_SECURITY', 4),
|
| + ('IMAGE_DIRECTORY_ENTRY_BASERELOC', 5),
|
| + ('IMAGE_DIRECTORY_ENTRY_DEBUG', 6),
|
| + ('IMAGE_DIRECTORY_ENTRY_COPYRIGHT', 7),
|
| + ('IMAGE_DIRECTORY_ENTRY_GLOBALPTR', 8),
|
| + ('IMAGE_DIRECTORY_ENTRY_TLS', 9),
|
| + ('IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG', 10),
|
| + ('IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT', 11),
|
| + ('IMAGE_DIRECTORY_ENTRY_IAT', 12),
|
| + ('IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT', 13),
|
| + ('IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR',14),
|
| + ('IMAGE_DIRECTORY_ENTRY_RESERVED', 15) ]
|
| +
|
| +DIRECTORY_ENTRY = dict([(e[1], e[0]) for e in directory_entry_types]+directory_entry_types)
|
| +
|
| +
|
| +image_characteristics = [
|
| + ('IMAGE_FILE_RELOCS_STRIPPED', 0x0001),
|
| + ('IMAGE_FILE_EXECUTABLE_IMAGE', 0x0002),
|
| + ('IMAGE_FILE_LINE_NUMS_STRIPPED', 0x0004),
|
| + ('IMAGE_FILE_LOCAL_SYMS_STRIPPED', 0x0008),
|
| + ('IMAGE_FILE_AGGRESIVE_WS_TRIM', 0x0010),
|
| + ('IMAGE_FILE_LARGE_ADDRESS_AWARE', 0x0020),
|
| + ('IMAGE_FILE_16BIT_MACHINE', 0x0040),
|
| + ('IMAGE_FILE_BYTES_REVERSED_LO', 0x0080),
|
| + ('IMAGE_FILE_32BIT_MACHINE', 0x0100),
|
| + ('IMAGE_FILE_DEBUG_STRIPPED', 0x0200),
|
| + ('IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP', 0x0400),
|
| + ('IMAGE_FILE_NET_RUN_FROM_SWAP', 0x0800),
|
| + ('IMAGE_FILE_SYSTEM', 0x1000),
|
| + ('IMAGE_FILE_DLL', 0x2000),
|
| + ('IMAGE_FILE_UP_SYSTEM_ONLY', 0x4000),
|
| + ('IMAGE_FILE_BYTES_REVERSED_HI', 0x8000) ]
|
| +
|
| +IMAGE_CHARACTERISTICS = dict([(e[1], e[0]) for e in
|
| + image_characteristics]+image_characteristics)
|
| +
|
| +
|
| +section_characteristics = [
|
| + ('IMAGE_SCN_CNT_CODE', 0x00000020),
|
| + ('IMAGE_SCN_CNT_INITIALIZED_DATA', 0x00000040),
|
| + ('IMAGE_SCN_CNT_UNINITIALIZED_DATA', 0x00000080),
|
| + ('IMAGE_SCN_LNK_OTHER', 0x00000100),
|
| + ('IMAGE_SCN_LNK_INFO', 0x00000200),
|
| + ('IMAGE_SCN_LNK_REMOVE', 0x00000800),
|
| + ('IMAGE_SCN_LNK_COMDAT', 0x00001000),
|
| + ('IMAGE_SCN_MEM_FARDATA', 0x00008000),
|
| + ('IMAGE_SCN_MEM_PURGEABLE', 0x00020000),
|
| + ('IMAGE_SCN_MEM_16BIT', 0x00020000),
|
| + ('IMAGE_SCN_MEM_LOCKED', 0x00040000),
|
| + ('IMAGE_SCN_MEM_PRELOAD', 0x00080000),
|
| + ('IMAGE_SCN_ALIGN_1BYTES', 0x00100000),
|
| + ('IMAGE_SCN_ALIGN_2BYTES', 0x00200000),
|
| + ('IMAGE_SCN_ALIGN_4BYTES', 0x00300000),
|
| + ('IMAGE_SCN_ALIGN_8BYTES', 0x00400000),
|
| + ('IMAGE_SCN_ALIGN_16BYTES', 0x00500000),
|
| + ('IMAGE_SCN_ALIGN_32BYTES', 0x00600000),
|
| + ('IMAGE_SCN_ALIGN_64BYTES', 0x00700000),
|
| + ('IMAGE_SCN_ALIGN_128BYTES', 0x00800000),
|
| + ('IMAGE_SCN_ALIGN_256BYTES', 0x00900000),
|
| + ('IMAGE_SCN_ALIGN_512BYTES', 0x00A00000),
|
| + ('IMAGE_SCN_ALIGN_1024BYTES', 0x00B00000),
|
| + ('IMAGE_SCN_ALIGN_2048BYTES', 0x00C00000),
|
| + ('IMAGE_SCN_ALIGN_4096BYTES', 0x00D00000),
|
| + ('IMAGE_SCN_ALIGN_8192BYTES', 0x00E00000),
|
| + ('IMAGE_SCN_ALIGN_MASK', 0x00F00000),
|
| + ('IMAGE_SCN_LNK_NRELOC_OVFL', 0x01000000),
|
| + ('IMAGE_SCN_MEM_DISCARDABLE', 0x02000000),
|
| + ('IMAGE_SCN_MEM_NOT_CACHED', 0x04000000),
|
| + ('IMAGE_SCN_MEM_NOT_PAGED', 0x08000000),
|
| + ('IMAGE_SCN_MEM_SHARED', 0x10000000),
|
| + ('IMAGE_SCN_MEM_EXECUTE', 0x20000000),
|
| + ('IMAGE_SCN_MEM_READ', 0x40000000),
|
| + ('IMAGE_SCN_MEM_WRITE', 0x80000000L) ]
|
| +
|
| +SECTION_CHARACTERISTICS = dict([(e[1], e[0]) for e in
|
| + section_characteristics]+section_characteristics)
|
| +
|
| +
|
| +debug_types = [
|
| + ('IMAGE_DEBUG_TYPE_UNKNOWN', 0),
|
| + ('IMAGE_DEBUG_TYPE_COFF', 1),
|
| + ('IMAGE_DEBUG_TYPE_CODEVIEW', 2),
|
| + ('IMAGE_DEBUG_TYPE_FPO', 3),
|
| + ('IMAGE_DEBUG_TYPE_MISC', 4),
|
| + ('IMAGE_DEBUG_TYPE_EXCEPTION', 5),
|
| + ('IMAGE_DEBUG_TYPE_FIXUP', 6),
|
| + ('IMAGE_DEBUG_TYPE_OMAP_TO_SRC', 7),
|
| + ('IMAGE_DEBUG_TYPE_OMAP_FROM_SRC', 8),
|
| + ('IMAGE_DEBUG_TYPE_BORLAND', 9),
|
| + ('IMAGE_DEBUG_TYPE_RESERVED10', 10) ]
|
| +
|
| +DEBUG_TYPE = dict([(e[1], e[0]) for e in debug_types]+debug_types)
|
| +
|
| +
|
| +subsystem_types = [
|
| + ('IMAGE_SUBSYSTEM_UNKNOWN', 0),
|
| + ('IMAGE_SUBSYSTEM_NATIVE', 1),
|
| + ('IMAGE_SUBSYSTEM_WINDOWS_GUI', 2),
|
| + ('IMAGE_SUBSYSTEM_WINDOWS_CUI', 3),
|
| + ('IMAGE_SUBSYSTEM_OS2_CUI', 5),
|
| + ('IMAGE_SUBSYSTEM_POSIX_CUI', 7),
|
| + ('IMAGE_SUBSYSTEM_WINDOWS_CE_GUI', 9),
|
| + ('IMAGE_SUBSYSTEM_EFI_APPLICATION', 10),
|
| + ('IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER', 11),
|
| + ('IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER', 12),
|
| + ('IMAGE_SUBSYSTEM_EFI_ROM', 13),
|
| + ('IMAGE_SUBSYSTEM_XBOX', 14)]
|
| +
|
| +SUBSYSTEM_TYPE = dict([(e[1], e[0]) for e in subsystem_types]+subsystem_types)
|
| +
|
| +
|
| +machine_types = [
|
| + ('IMAGE_FILE_MACHINE_UNKNOWN', 0),
|
| + ('IMAGE_FILE_MACHINE_AM33', 0x1d3),
|
| + ('IMAGE_FILE_MACHINE_AMD64', 0x8664),
|
| + ('IMAGE_FILE_MACHINE_ARM', 0x1c0),
|
| + ('IMAGE_FILE_MACHINE_EBC', 0xebc),
|
| + ('IMAGE_FILE_MACHINE_I386', 0x14c),
|
| + ('IMAGE_FILE_MACHINE_IA64', 0x200),
|
| + ('IMAGE_FILE_MACHINE_MR32', 0x9041),
|
| + ('IMAGE_FILE_MACHINE_MIPS16', 0x266),
|
| + ('IMAGE_FILE_MACHINE_MIPSFPU', 0x366),
|
| + ('IMAGE_FILE_MACHINE_MIPSFPU16',0x466),
|
| + ('IMAGE_FILE_MACHINE_POWERPC', 0x1f0),
|
| + ('IMAGE_FILE_MACHINE_POWERPCFP',0x1f1),
|
| + ('IMAGE_FILE_MACHINE_R4000', 0x166),
|
| + ('IMAGE_FILE_MACHINE_SH3', 0x1a2),
|
| + ('IMAGE_FILE_MACHINE_SH3DSP', 0x1a3),
|
| + ('IMAGE_FILE_MACHINE_SH4', 0x1a6),
|
| + ('IMAGE_FILE_MACHINE_SH5', 0x1a8),
|
| + ('IMAGE_FILE_MACHINE_THUMB', 0x1c2),
|
| + ('IMAGE_FILE_MACHINE_WCEMIPSV2',0x169),
|
| + ]
|
| +
|
| +MACHINE_TYPE = dict([(e[1], e[0]) for e in machine_types]+machine_types)
|
| +
|
| +
|
| +relocation_types = [
|
| + ('IMAGE_REL_BASED_ABSOLUTE', 0),
|
| + ('IMAGE_REL_BASED_HIGH', 1),
|
| + ('IMAGE_REL_BASED_LOW', 2),
|
| + ('IMAGE_REL_BASED_HIGHLOW', 3),
|
| + ('IMAGE_REL_BASED_HIGHADJ', 4),
|
| + ('IMAGE_REL_BASED_MIPS_JMPADDR', 5),
|
| + ('IMAGE_REL_BASED_SECTION', 6),
|
| + ('IMAGE_REL_BASED_REL', 7),
|
| + ('IMAGE_REL_BASED_MIPS_JMPADDR16', 9),
|
| + ('IMAGE_REL_BASED_IA64_IMM64', 9),
|
| + ('IMAGE_REL_BASED_DIR64', 10),
|
| + ('IMAGE_REL_BASED_HIGH3ADJ', 11) ]
|
| +
|
| +RELOCATION_TYPE = dict([(e[1], e[0]) for e in relocation_types]+relocation_types)
|
| +
|
| +
|
| +dll_characteristics = [
|
| + ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0001', 0x0001),
|
| + ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0002', 0x0002),
|
| + ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0004', 0x0004),
|
| + ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0008', 0x0008),
|
| + ('IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE', 0x0040),
|
| + ('IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY', 0x0080),
|
| + ('IMAGE_DLL_CHARACTERISTICS_NX_COMPAT', 0x0100),
|
| + ('IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION', 0x0200),
|
| + ('IMAGE_DLL_CHARACTERISTICS_NO_SEH', 0x0400),
|
| + ('IMAGE_DLL_CHARACTERISTICS_NO_BIND', 0x0800),
|
| + ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x1000', 0x1000),
|
| + ('IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER', 0x2000),
|
| + ('IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE', 0x8000) ]
|
| +
|
| +DLL_CHARACTERISTICS = dict([(e[1], e[0]) for e in dll_characteristics]+dll_characteristics)
|
| +
|
| +
|
| +# Resource types
|
| +resource_type = [
|
| + ('RT_CURSOR', 1),
|
| + ('RT_BITMAP', 2),
|
| + ('RT_ICON', 3),
|
| + ('RT_MENU', 4),
|
| + ('RT_DIALOG', 5),
|
| + ('RT_STRING', 6),
|
| + ('RT_FONTDIR', 7),
|
| + ('RT_FONT', 8),
|
| + ('RT_ACCELERATOR', 9),
|
| + ('RT_RCDATA', 10),
|
| + ('RT_MESSAGETABLE', 11),
|
| + ('RT_GROUP_CURSOR', 12),
|
| + ('RT_GROUP_ICON', 14),
|
| + ('RT_VERSION', 16),
|
| + ('RT_DLGINCLUDE', 17),
|
| + ('RT_PLUGPLAY', 19),
|
| + ('RT_VXD', 20),
|
| + ('RT_ANICURSOR', 21),
|
| + ('RT_ANIICON', 22),
|
| + ('RT_HTML', 23),
|
| + ('RT_MANIFEST', 24) ]
|
| +
|
| +RESOURCE_TYPE = dict([(e[1], e[0]) for e in resource_type]+resource_type)
|
| +
|
| +
|
| +# Language definitions
|
| +lang = [
|
| + ('LANG_NEUTRAL', 0x00),
|
| + ('LANG_INVARIANT', 0x7f),
|
| + ('LANG_AFRIKAANS', 0x36),
|
| + ('LANG_ALBANIAN', 0x1c),
|
| + ('LANG_ARABIC', 0x01),
|
| + ('LANG_ARMENIAN', 0x2b),
|
| + ('LANG_ASSAMESE', 0x4d),
|
| + ('LANG_AZERI', 0x2c),
|
| + ('LANG_BASQUE', 0x2d),
|
| + ('LANG_BELARUSIAN', 0x23),
|
| + ('LANG_BENGALI', 0x45),
|
| + ('LANG_BULGARIAN', 0x02),
|
| + ('LANG_CATALAN', 0x03),
|
| + ('LANG_CHINESE', 0x04),
|
| + ('LANG_CROATIAN', 0x1a),
|
| + ('LANG_CZECH', 0x05),
|
| + ('LANG_DANISH', 0x06),
|
| + ('LANG_DIVEHI', 0x65),
|
| + ('LANG_DUTCH', 0x13),
|
| + ('LANG_ENGLISH', 0x09),
|
| + ('LANG_ESTONIAN', 0x25),
|
| + ('LANG_FAEROESE', 0x38),
|
| + ('LANG_FARSI', 0x29),
|
| + ('LANG_FINNISH', 0x0b),
|
| + ('LANG_FRENCH', 0x0c),
|
| + ('LANG_GALICIAN', 0x56),
|
| + ('LANG_GEORGIAN', 0x37),
|
| + ('LANG_GERMAN', 0x07),
|
| + ('LANG_GREEK', 0x08),
|
| + ('LANG_GUJARATI', 0x47),
|
| + ('LANG_HEBREW', 0x0d),
|
| + ('LANG_HINDI', 0x39),
|
| + ('LANG_HUNGARIAN', 0x0e),
|
| + ('LANG_ICELANDIC', 0x0f),
|
| + ('LANG_INDONESIAN', 0x21),
|
| + ('LANG_ITALIAN', 0x10),
|
| + ('LANG_JAPANESE', 0x11),
|
| + ('LANG_KANNADA', 0x4b),
|
| + ('LANG_KASHMIRI', 0x60),
|
| + ('LANG_KAZAK', 0x3f),
|
| + ('LANG_KONKANI', 0x57),
|
| + ('LANG_KOREAN', 0x12),
|
| + ('LANG_KYRGYZ', 0x40),
|
| + ('LANG_LATVIAN', 0x26),
|
| + ('LANG_LITHUANIAN', 0x27),
|
| + ('LANG_MACEDONIAN', 0x2f),
|
| + ('LANG_MALAY', 0x3e),
|
| + ('LANG_MALAYALAM', 0x4c),
|
| + ('LANG_MANIPURI', 0x58),
|
| + ('LANG_MARATHI', 0x4e),
|
| + ('LANG_MONGOLIAN', 0x50),
|
| + ('LANG_NEPALI', 0x61),
|
| + ('LANG_NORWEGIAN', 0x14),
|
| + ('LANG_ORIYA', 0x48),
|
| + ('LANG_POLISH', 0x15),
|
| + ('LANG_PORTUGUESE', 0x16),
|
| + ('LANG_PUNJABI', 0x46),
|
| + ('LANG_ROMANIAN', 0x18),
|
| + ('LANG_RUSSIAN', 0x19),
|
| + ('LANG_SANSKRIT', 0x4f),
|
| + ('LANG_SERBIAN', 0x1a),
|
| + ('LANG_SINDHI', 0x59),
|
| + ('LANG_SLOVAK', 0x1b),
|
| + ('LANG_SLOVENIAN', 0x24),
|
| + ('LANG_SPANISH', 0x0a),
|
| + ('LANG_SWAHILI', 0x41),
|
| + ('LANG_SWEDISH', 0x1d),
|
| + ('LANG_SYRIAC', 0x5a),
|
| + ('LANG_TAMIL', 0x49),
|
| + ('LANG_TATAR', 0x44),
|
| + ('LANG_TELUGU', 0x4a),
|
| + ('LANG_THAI', 0x1e),
|
| + ('LANG_TURKISH', 0x1f),
|
| + ('LANG_UKRAINIAN', 0x22),
|
| + ('LANG_URDU', 0x20),
|
| + ('LANG_UZBEK', 0x43),
|
| + ('LANG_VIETNAMESE', 0x2a),
|
| + ('LANG_GAELIC', 0x3c),
|
| + ('LANG_MALTESE', 0x3a),
|
| + ('LANG_MAORI', 0x28),
|
| + ('LANG_RHAETO_ROMANCE',0x17),
|
| + ('LANG_SAAMI', 0x3b),
|
| + ('LANG_SORBIAN', 0x2e),
|
| + ('LANG_SUTU', 0x30),
|
| + ('LANG_TSONGA', 0x31),
|
| + ('LANG_TSWANA', 0x32),
|
| + ('LANG_VENDA', 0x33),
|
| + ('LANG_XHOSA', 0x34),
|
| + ('LANG_ZULU', 0x35),
|
| + ('LANG_ESPERANTO', 0x8f),
|
| + ('LANG_WALON', 0x90),
|
| + ('LANG_CORNISH', 0x91),
|
| + ('LANG_WELSH', 0x92),
|
| + ('LANG_BRETON', 0x93) ]
|
| +
|
| +LANG = dict(lang+[(e[1], e[0]) for e in lang])
|
| +
|
| +
|
| +# Sublanguage definitions
|
| +sublang = [
|
| + ('SUBLANG_NEUTRAL', 0x00),
|
| + ('SUBLANG_DEFAULT', 0x01),
|
| + ('SUBLANG_SYS_DEFAULT', 0x02),
|
| + ('SUBLANG_ARABIC_SAUDI_ARABIA', 0x01),
|
| + ('SUBLANG_ARABIC_IRAQ', 0x02),
|
| + ('SUBLANG_ARABIC_EGYPT', 0x03),
|
| + ('SUBLANG_ARABIC_LIBYA', 0x04),
|
| + ('SUBLANG_ARABIC_ALGERIA', 0x05),
|
| + ('SUBLANG_ARABIC_MOROCCO', 0x06),
|
| + ('SUBLANG_ARABIC_TUNISIA', 0x07),
|
| + ('SUBLANG_ARABIC_OMAN', 0x08),
|
| + ('SUBLANG_ARABIC_YEMEN', 0x09),
|
| + ('SUBLANG_ARABIC_SYRIA', 0x0a),
|
| + ('SUBLANG_ARABIC_JORDAN', 0x0b),
|
| + ('SUBLANG_ARABIC_LEBANON', 0x0c),
|
| + ('SUBLANG_ARABIC_KUWAIT', 0x0d),
|
| + ('SUBLANG_ARABIC_UAE', 0x0e),
|
| + ('SUBLANG_ARABIC_BAHRAIN', 0x0f),
|
| + ('SUBLANG_ARABIC_QATAR', 0x10),
|
| + ('SUBLANG_AZERI_LATIN', 0x01),
|
| + ('SUBLANG_AZERI_CYRILLIC', 0x02),
|
| + ('SUBLANG_CHINESE_TRADITIONAL', 0x01),
|
| + ('SUBLANG_CHINESE_SIMPLIFIED', 0x02),
|
| + ('SUBLANG_CHINESE_HONGKONG', 0x03),
|
| + ('SUBLANG_CHINESE_SINGAPORE', 0x04),
|
| + ('SUBLANG_CHINESE_MACAU', 0x05),
|
| + ('SUBLANG_DUTCH', 0x01),
|
| + ('SUBLANG_DUTCH_BELGIAN', 0x02),
|
| + ('SUBLANG_ENGLISH_US', 0x01),
|
| + ('SUBLANG_ENGLISH_UK', 0x02),
|
| + ('SUBLANG_ENGLISH_AUS', 0x03),
|
| + ('SUBLANG_ENGLISH_CAN', 0x04),
|
| + ('SUBLANG_ENGLISH_NZ', 0x05),
|
| + ('SUBLANG_ENGLISH_EIRE', 0x06),
|
| + ('SUBLANG_ENGLISH_SOUTH_AFRICA', 0x07),
|
| + ('SUBLANG_ENGLISH_JAMAICA', 0x08),
|
| + ('SUBLANG_ENGLISH_CARIBBEAN', 0x09),
|
| + ('SUBLANG_ENGLISH_BELIZE', 0x0a),
|
| + ('SUBLANG_ENGLISH_TRINIDAD', 0x0b),
|
| + ('SUBLANG_ENGLISH_ZIMBABWE', 0x0c),
|
| + ('SUBLANG_ENGLISH_PHILIPPINES', 0x0d),
|
| + ('SUBLANG_FRENCH', 0x01),
|
| + ('SUBLANG_FRENCH_BELGIAN', 0x02),
|
| + ('SUBLANG_FRENCH_CANADIAN', 0x03),
|
| + ('SUBLANG_FRENCH_SWISS', 0x04),
|
| + ('SUBLANG_FRENCH_LUXEMBOURG', 0x05),
|
| + ('SUBLANG_FRENCH_MONACO', 0x06),
|
| + ('SUBLANG_GERMAN', 0x01),
|
| + ('SUBLANG_GERMAN_SWISS', 0x02),
|
| + ('SUBLANG_GERMAN_AUSTRIAN', 0x03),
|
| + ('SUBLANG_GERMAN_LUXEMBOURG', 0x04),
|
| + ('SUBLANG_GERMAN_LIECHTENSTEIN', 0x05),
|
| + ('SUBLANG_ITALIAN', 0x01),
|
| + ('SUBLANG_ITALIAN_SWISS', 0x02),
|
| + ('SUBLANG_KASHMIRI_SASIA', 0x02),
|
| + ('SUBLANG_KASHMIRI_INDIA', 0x02),
|
| + ('SUBLANG_KOREAN', 0x01),
|
| + ('SUBLANG_LITHUANIAN', 0x01),
|
| + ('SUBLANG_MALAY_MALAYSIA', 0x01),
|
| + ('SUBLANG_MALAY_BRUNEI_DARUSSALAM', 0x02),
|
| + ('SUBLANG_NEPALI_INDIA', 0x02),
|
| + ('SUBLANG_NORWEGIAN_BOKMAL', 0x01),
|
| + ('SUBLANG_NORWEGIAN_NYNORSK', 0x02),
|
| + ('SUBLANG_PORTUGUESE', 0x02),
|
| + ('SUBLANG_PORTUGUESE_BRAZILIAN', 0x01),
|
| + ('SUBLANG_SERBIAN_LATIN', 0x02),
|
| + ('SUBLANG_SERBIAN_CYRILLIC', 0x03),
|
| + ('SUBLANG_SPANISH', 0x01),
|
| + ('SUBLANG_SPANISH_MEXICAN', 0x02),
|
| + ('SUBLANG_SPANISH_MODERN', 0x03),
|
| + ('SUBLANG_SPANISH_GUATEMALA', 0x04),
|
| + ('SUBLANG_SPANISH_COSTA_RICA', 0x05),
|
| + ('SUBLANG_SPANISH_PANAMA', 0x06),
|
| + ('SUBLANG_SPANISH_DOMINICAN_REPUBLIC', 0x07),
|
| + ('SUBLANG_SPANISH_VENEZUELA', 0x08),
|
| + ('SUBLANG_SPANISH_COLOMBIA', 0x09),
|
| + ('SUBLANG_SPANISH_PERU', 0x0a),
|
| + ('SUBLANG_SPANISH_ARGENTINA', 0x0b),
|
| + ('SUBLANG_SPANISH_ECUADOR', 0x0c),
|
| + ('SUBLANG_SPANISH_CHILE', 0x0d),
|
| + ('SUBLANG_SPANISH_URUGUAY', 0x0e),
|
| + ('SUBLANG_SPANISH_PARAGUAY', 0x0f),
|
| + ('SUBLANG_SPANISH_BOLIVIA', 0x10),
|
| + ('SUBLANG_SPANISH_EL_SALVADOR', 0x11),
|
| + ('SUBLANG_SPANISH_HONDURAS', 0x12),
|
| + ('SUBLANG_SPANISH_NICARAGUA', 0x13),
|
| + ('SUBLANG_SPANISH_PUERTO_RICO', 0x14),
|
| + ('SUBLANG_SWEDISH', 0x01),
|
| + ('SUBLANG_SWEDISH_FINLAND', 0x02),
|
| + ('SUBLANG_URDU_PAKISTAN', 0x01),
|
| + ('SUBLANG_URDU_INDIA', 0x02),
|
| + ('SUBLANG_UZBEK_LATIN', 0x01),
|
| + ('SUBLANG_UZBEK_CYRILLIC', 0x02),
|
| + ('SUBLANG_DUTCH_SURINAM', 0x03),
|
| + ('SUBLANG_ROMANIAN', 0x01),
|
| + ('SUBLANG_ROMANIAN_MOLDAVIA', 0x02),
|
| + ('SUBLANG_RUSSIAN', 0x01),
|
| + ('SUBLANG_RUSSIAN_MOLDAVIA', 0x02),
|
| + ('SUBLANG_CROATIAN', 0x01),
|
| + ('SUBLANG_LITHUANIAN_CLASSIC', 0x02),
|
| + ('SUBLANG_GAELIC', 0x01),
|
| + ('SUBLANG_GAELIC_SCOTTISH', 0x02),
|
| + ('SUBLANG_GAELIC_MANX', 0x03) ]
|
| +
|
| +SUBLANG = dict(sublang+[(e[1], e[0]) for e in sublang])
|
| +
|
| +
|
| +class UnicodeStringWrapperPostProcessor:
|
| + """This class attemps to help the process of identifying strings
|
| + that might be plain Unicode or Pascal. A list of strings will be
|
| + wrapped on it with the hope the overlappings will help make the
|
| + decission about their type."""
|
| +
|
| + def __init__(self, pe, rva_ptr):
|
| + self.pe = pe
|
| + self.rva_ptr = rva_ptr
|
| + self.string = None
|
| +
|
| +
|
| + def get_rva(self):
|
| + """Get the RVA of the string."""
|
| +
|
| + return self.rva_ptr
|
| +
|
| +
|
| + def __str__(self):
|
| + """Return the escaped ASCII representation of the string."""
|
| +
|
| + def convert_char(char):
|
| + if char in string.printable:
|
| + return char
|
| + else:
|
| + return r'\x%02x' % ord(char)
|
| +
|
| + if self.string:
|
| + return ''.join([convert_char(c) for c in self.string])
|
| +
|
| + return ''
|
| +
|
| +
|
| + def invalidate(self):
|
| + """Make this instance None, to express it's no known string type."""
|
| +
|
| + self = None
|
| +
|
| +
|
| + def render_pascal_16(self):
|
| +
|
| + self.string = self.pe.get_string_u_at_rva(
|
| + self.rva_ptr+2,
|
| + max_length=self.__get_pascal_16_length())
|
| +
|
| +
|
| + def ask_pascal_16(self, next_rva_ptr):
|
| + """The next RVA is taken to be the one immediately following this one.
|
| +
|
| + Such RVA could indicate the natural end of the string and will be checked
|
| + with the possible length contained in the first word.
|
| + """
|
| +
|
| + length = self.__get_pascal_16_length()
|
| +
|
| + if length == (next_rva_ptr - (self.rva_ptr+2)) / 2:
|
| + self.length = length
|
| + return True
|
| +
|
| + return False
|
| +
|
| +
|
| + def __get_pascal_16_length(self):
|
| +
|
| + return self.__get_word_value_at_rva(self.rva_ptr)
|
| +
|
| +
|
| + def __get_word_value_at_rva(self, rva):
|
| +
|
| + try:
|
| + data = self.pe.get_data(self.rva_ptr, 2)
|
| + except PEFormatError, e:
|
| + return False
|
| +
|
| + if len(data)<2:
|
| + return False
|
| +
|
| + return struct.unpack('<H', data)[0]
|
| +
|
| +
|
| + #def render_pascal_8(self):
|
| + # """"""
|
| +
|
| +
|
| + def ask_unicode_16(self, next_rva_ptr):
|
| + """The next RVA is taken to be the one immediately following this one.
|
| +
|
| + Such RVA could indicate the natural end of the string and will be checked
|
| + to see if there's a Unicode NULL character there.
|
| + """
|
| +
|
| + if self.__get_word_value_at_rva(next_rva_ptr-2) == 0:
|
| + self.length = next_rva_ptr - self.rva_ptr
|
| + return True
|
| +
|
| + return False
|
| +
|
| +
|
| + def render_unicode_16(self):
|
| + """"""
|
| +
|
| + self.string = self.pe.get_string_u_at_rva(self.rva_ptr)
|
| +
|
| +
|
| +class PEFormatError(Exception):
|
| + """Generic PE format error exception."""
|
| +
|
| + def __init__(self, value):
|
| + self.value = value
|
| +
|
| + def __str__(self):
|
| + return repr(self.value)
|
| +
|
| +
|
| +class Dump:
|
| + """Convenience class for dumping the PE information."""
|
| +
|
| + def __init__(self):
|
| + self.text = ''
|
| +
|
| +
|
| + def add_lines(self, txt, indent=0):
|
| + """Adds a list of lines.
|
| +
|
| + The list can be indented with the optional argument 'indent'.
|
| + """
|
| + for line in txt:
|
| + self.add_line(line, indent)
|
| +
|
| +
|
| + def add_line(self, txt, indent=0):
|
| + """Adds a line.
|
| +
|
| + The line can be indented with the optional argument 'indent'.
|
| + """
|
| +
|
| + self.add(txt+'\n', indent)
|
| +
|
| +
|
| + def add(self, txt, indent=0):
|
| + """Adds some text, no newline will be appended.
|
| +
|
| + The text can be indented with the optional argument 'indent'.
|
| + """
|
| +
|
| + if isinstance(txt, unicode):
|
| + s = []
|
| + for c in txt:
|
| + try:
|
| + s.append(str(c))
|
| + except UnicodeEncodeError, e:
|
| + s.append(repr(c))
|
| +
|
| + txt = ''.join(s)
|
| +
|
| + self.text += ' '*indent+txt
|
| +
|
| +
|
| + def add_header(self, txt):
|
| + """Adds a header element."""
|
| +
|
| + self.add_line('-'*10+txt+'-'*10+'\n')
|
| +
|
| +
|
| + def add_newline(self):
|
| + """Adds a newline."""
|
| +
|
| + self.text += '\n'
|
| +
|
| +
|
| + def get_text(self):
|
| + """Get the text in its current state."""
|
| +
|
| + return self.text
|
| +
|
| +
|
| +
|
| +class Structure:
|
| + """Prepare structure object to extract members from data.
|
| +
|
| + Format is a list containing definitions for the elements
|
| + of the structure.
|
| + """
|
| +
|
| +
|
| + def __init__(self, format, name=None, file_offset=None):
|
| + # Format is forced little endian, for big endian non Intel platforms
|
| + self.__format__ = '<'
|
| + self.__keys__ = []
|
| +# self.values = {}
|
| + self.__format_length__ = 0
|
| + self.__set_format__(format[1])
|
| + self._all_zeroes = False
|
| + self.__unpacked_data_elms__ = None
|
| + self.__file_offset__ = file_offset
|
| + if name:
|
| + self.name = name
|
| + else:
|
| + self.name = format[0]
|
| +
|
| +
|
| + def __get_format__(self):
|
| + return self.__format__
|
| +
|
| +
|
| + def get_file_offset(self):
|
| + return self.__file_offset__
|
| +
|
| + def set_file_offset(self, offset):
|
| + self.__file_offset__ = offset
|
| +
|
| + def all_zeroes(self):
|
| + """Returns true is the unpacked data is all zeroes."""
|
| +
|
| + return self._all_zeroes
|
| +
|
| +
|
| + def __set_format__(self, format):
|
| +
|
| + for elm in format:
|
| + if ',' in elm:
|
| + elm_type, elm_name = elm.split(',', 1)
|
| + self.__format__ += elm_type
|
| +
|
| + elm_names = elm_name.split(',')
|
| + names = []
|
| + for elm_name in elm_names:
|
| + if elm_name in self.__keys__:
|
| + search_list = [x[:len(elm_name)] for x in self.__keys__]
|
| + occ_count = search_list.count(elm_name)
|
| + elm_name = elm_name+'_'+str(occ_count)
|
| + names.append(elm_name)
|
| + # Some PE header structures have unions on them, so a certain
|
| + # value might have different names, so each key has a list of
|
| + # all the possible members referring to the data.
|
| + self.__keys__.append(names)
|
| +
|
| + self.__format_length__ = struct.calcsize(self.__format__)
|
| +
|
| +
|
| + def sizeof(self):
|
| + """Return size of the structure."""
|
| +
|
| + return self.__format_length__
|
| +
|
| +
|
| + def __unpack__(self, data):
|
| +
|
| + if len(data)>self.__format_length__:
|
| + data = data[:self.__format_length__]
|
| +
|
| + # OC Patch:
|
| + # Some malware have incorrect header lengths.
|
| + # Fail gracefully if this occurs
|
| + # Buggy malware: a29b0118af8b7408444df81701ad5a7f
|
| + #
|
| + elif len(data)<self.__format_length__:
|
| + raise PEFormatError('Data length less than expected header length.')
|
| +
|
| +
|
| + if data.count(chr(0)) == len(data):
|
| + self._all_zeroes = True
|
| +
|
| + self.__unpacked_data_elms__ = struct.unpack(self.__format__, data)
|
| + for i in xrange(len(self.__unpacked_data_elms__)):
|
| + for key in self.__keys__[i]:
|
| +# self.values[key] = self.__unpacked_data_elms__[i]
|
| + setattr(self, key, self.__unpacked_data_elms__[i])
|
| +
|
| +
|
| + def __pack__(self):
|
| +
|
| + new_values = []
|
| +
|
| + for i in xrange(len(self.__unpacked_data_elms__)):
|
| +
|
| + for key in self.__keys__[i]:
|
| + new_val = getattr(self, key)
|
| + old_val = self.__unpacked_data_elms__[i]
|
| +
|
| + # In the case of Unions, when the first changed value
|
| + # is picked the loop is exited
|
| + if new_val != old_val:
|
| + break
|
| +
|
| + new_values.append(new_val)
|
| +
|
| + return struct.pack(self.__format__, *new_values)
|
| +
|
| +
|
| + def __str__(self):
|
| + return '\n'.join( self.dump() )
|
| +
|
| + def __repr__(self):
|
| + return '<Structure: %s>' % (' '.join( [' '.join(s.split()) for s in self.dump()] ))
|
| +
|
| +
|
| + def dump(self, indentation=0):
|
| + """Returns a string representation of the structure."""
|
| +
|
| + dump = []
|
| +
|
| + dump.append('[%s]' % self.name)
|
| +
|
| + # Refer to the __set_format__ method for an explanation
|
| + # of the following construct.
|
| + for keys in self.__keys__:
|
| + for key in keys:
|
| +
|
| + val = getattr(self, key)
|
| + if isinstance(val, int) or isinstance(val, long):
|
| + val_str = '0x%-8X' % (val)
|
| + if key == 'TimeDateStamp' or key == 'dwTimeStamp':
|
| + try:
|
| + val_str += ' [%s UTC]' % time.asctime(time.gmtime(val))
|
| + except exceptions.ValueError, e:
|
| + val_str += ' [INVALID TIME]'
|
| + else:
|
| + val_str = ''.join(filter(lambda c:c != '\0', str(val)))
|
| +
|
| + dump.append('%-30s %s' % (key+':', val_str))
|
| +
|
| + return dump
|
| +
|
| +
|
| +
|
| +class SectionStructure(Structure):
|
| + """Convenience section handling class."""
|
| +
|
| + def get_data(self, start, length=None):
|
| + """Get data chunk from a section.
|
| +
|
| + Allows to query data from the section by passing the
|
| + addresses where the PE file would be loaded by default.
|
| + It is then possible to retrieve code and data by its real
|
| + addresses as it would be if loaded.
|
| + """
|
| +
|
| + offset = start - self.VirtualAddress
|
| +
|
| + if length:
|
| + end = offset+length
|
| + else:
|
| + end = len(self.data)
|
| +
|
| + return self.data[offset:end]
|
| +
|
| +
|
| + def get_rva_from_offset(self, offset):
|
| + return offset - self.PointerToRawData + self.VirtualAddress
|
| +
|
| +
|
| + def get_offset_from_rva(self, rva):
|
| + return (rva - self.VirtualAddress) + self.PointerToRawData
|
| +
|
| +
|
| + def contains_offset(self, offset):
|
| + """Check whether the section contains the file offset provided."""
|
| +
|
| + if not self.PointerToRawData:
|
| + # bss and other sections containing only uninitialized data must have 0
|
| + # and do not take space in the file
|
| + return False
|
| + return self.PointerToRawData <= offset < self.VirtualAddress + self.SizeOfRawData
|
| +
|
| +
|
| + def contains_rva(self, rva):
|
| + """Check whether the section contains the address provided."""
|
| +
|
| + # PECOFF documentation v8 says:
|
| + # The total size of the section when loaded into memory.
|
| + # If this value is greater than SizeOfRawData, the section is zero-padded.
|
| + # This field is valid only for executable images and should be set to zero
|
| + # for object files.
|
| +
|
| + if len(self.data) < self.SizeOfRawData:
|
| + size = self.Misc_VirtualSize
|
| + else:
|
| + size = max(self.SizeOfRawData, self.Misc_VirtualSize)
|
| +
|
| + return self.VirtualAddress <= rva < self.VirtualAddress + size
|
| +
|
| + def contains(self, rva):
|
| + #print "DEPRECATION WARNING: you should use contains_rva() instead of contains()"
|
| + return self.contains_rva(rva)
|
| +
|
| +
|
| + def set_data(self, data):
|
| + """Set the data belonging to the section."""
|
| +
|
| + self.data = data
|
| +
|
| +
|
| + def get_entropy(self):
|
| + """Calculate and return the entropy for the section."""
|
| +
|
| + return self.entropy_H( self.data )
|
| +
|
| +
|
| + def get_hash_sha1(self):
|
| + """Get the SHA-1 hex-digest of the section's data."""
|
| +
|
| + if sha1 is not None:
|
| + return sha1( self.data ).hexdigest()
|
| +
|
| +
|
| + def get_hash_sha256(self):
|
| + """Get the SHA-256 hex-digest of the section's data."""
|
| +
|
| + if sha256 is not None:
|
| + return sha256( self.data ).hexdigest()
|
| +
|
| +
|
| + def get_hash_sha512(self):
|
| + """Get the SHA-512 hex-digest of the section's data."""
|
| +
|
| + if sha512 is not None:
|
| + return sha512( self.data ).hexdigest()
|
| +
|
| +
|
| + def get_hash_md5(self):
|
| + """Get the MD5 hex-digest of the section's data."""
|
| +
|
| + if md5 is not None:
|
| + return md5( self.data ).hexdigest()
|
| +
|
| +
|
| + def entropy_H(self, data):
|
| + """Calculate the entropy of a chunk of data."""
|
| +
|
| + if len(data) == 0:
|
| + return 0.0
|
| +
|
| + occurences = array.array('L', [0]*256)
|
| +
|
| + for x in data:
|
| + occurences[ord(x)] += 1
|
| +
|
| + entropy = 0
|
| + for x in occurences:
|
| + if x:
|
| + p_x = float(x) / len(data)
|
| + entropy -= p_x*math.log(p_x, 2)
|
| +
|
| + return entropy
|
| +
|
| +
|
| +
|
| +class DataContainer:
|
| + """Generic data container."""
|
| +
|
| + def __init__(self, **args):
|
| + for key, value in args.items():
|
| + setattr(self, key, value)
|
| +
|
| +
|
| +
|
| +class ImportDescData(DataContainer):
|
| + """Holds import descriptor information.
|
| +
|
| + dll: name of the imported DLL
|
| + imports: list of imported symbols (ImportData instances)
|
| + struct: IMAGE_IMPORT_DESCRIPTOR sctruture
|
| + """
|
| +
|
| +class ImportData(DataContainer):
|
| + """Holds imported symbol's information.
|
| +
|
| + ordinal: Ordinal of the symbol
|
| + name: Name of the symbol
|
| + bound: If the symbol is bound, this contains
|
| + the address.
|
| + """
|
| +
|
| +class ExportDirData(DataContainer):
|
| + """Holds export directory information.
|
| +
|
| + struct: IMAGE_EXPORT_DIRECTORY structure
|
| + symbols: list of exported symbols (ExportData instances)
|
| +"""
|
| +
|
| +class ExportData(DataContainer):
|
| + """Holds exported symbols' information.
|
| +
|
| + ordinal: ordinal of the symbol
|
| + address: address of the symbol
|
| + name: name of the symbol (None if the symbol is
|
| + exported by ordinal only)
|
| + forwarder: if the symbol is forwarded it will
|
| + contain the name of the target symbol,
|
| + None otherwise.
|
| + """
|
| +
|
| +
|
| +class ResourceDirData(DataContainer):
|
| + """Holds resource directory information.
|
| +
|
| + struct: IMAGE_RESOURCE_DIRECTORY structure
|
| + entries: list of entries (ResourceDirEntryData instances)
|
| + """
|
| +
|
| +class ResourceDirEntryData(DataContainer):
|
| + """Holds resource directory entry data.
|
| +
|
| + struct: IMAGE_RESOURCE_DIRECTORY_ENTRY structure
|
| + name: If the resource is identified by name this
|
| + attribute will contain the name string. None
|
| + otherwise. If identified by id, the id is
|
| + availabe at 'struct.Id'
|
| + id: the id, also in struct.Id
|
| + directory: If this entry has a lower level directory
|
| + this attribute will point to the
|
| + ResourceDirData instance representing it.
|
| + data: If this entry has no futher lower directories
|
| + and points to the actual resource data, this
|
| + attribute will reference the corresponding
|
| + ResourceDataEntryData instance.
|
| + (Either of the 'directory' or 'data' attribute will exist,
|
| + but not both.)
|
| + """
|
| +
|
| +class ResourceDataEntryData(DataContainer):
|
| + """Holds resource data entry information.
|
| +
|
| + struct: IMAGE_RESOURCE_DATA_ENTRY structure
|
| + lang: Primary language ID
|
| + sublang: Sublanguage ID
|
| + """
|
| +
|
| +class DebugData(DataContainer):
|
| + """Holds debug information.
|
| +
|
| + struct: IMAGE_DEBUG_DIRECTORY structure
|
| + """
|
| +
|
| +class BaseRelocationData(DataContainer):
|
| + """Holds base relocation information.
|
| +
|
| + struct: IMAGE_BASE_RELOCATION structure
|
| + entries: list of relocation data (RelocationData instances)
|
| + """
|
| +
|
| +class RelocationData(DataContainer):
|
| + """Holds relocation information.
|
| +
|
| + type: Type of relocation
|
| + The type string is can be obtained by
|
| + RELOCATION_TYPE[type]
|
| + rva: RVA of the relocation
|
| + """
|
| +
|
| +class TlsData(DataContainer):
|
| + """Holds TLS information.
|
| +
|
| + struct: IMAGE_TLS_DIRECTORY structure
|
| + """
|
| +
|
| +class BoundImportDescData(DataContainer):
|
| + """Holds bound import descriptor data.
|
| +
|
| + This directory entry will provide with information on the
|
| + DLLs this PE files has been bound to (if bound at all).
|
| + The structure will contain the name and timestamp of the
|
| + DLL at the time of binding so that the loader can know
|
| + whether it differs from the one currently present in the
|
| + system and must, therefore, re-bind the PE's imports.
|
| +
|
| + struct: IMAGE_BOUND_IMPORT_DESCRIPTOR structure
|
| + name: DLL name
|
| + entries: list of entries (BoundImportRefData instances)
|
| + the entries will exist if this DLL has forwarded
|
| + symbols. If so, the destination DLL will have an
|
| + entry in this list.
|
| + """
|
| +
|
| +class BoundImportRefData(DataContainer):
|
| + """Holds bound import forwader reference data.
|
| +
|
| + Contains the same information as the bound descriptor but
|
| + for forwarded DLLs, if any.
|
| +
|
| + struct: IMAGE_BOUND_FORWARDER_REF structure
|
| + name: dll name
|
| + """
|
| +
|
| +
|
| +class PE:
|
| + """A Portable Executable representation.
|
| +
|
| + This class provides access to most of the information in a PE file.
|
| +
|
| + It expects to be supplied the name of the file to load or PE data
|
| + to process and an optional argument 'fast_load' (False by default)
|
| + which controls whether to load all the directories information,
|
| + which can be quite time consuming.
|
| +
|
| + pe = pefile.PE('module.dll')
|
| + pe = pefile.PE(name='module.dll')
|
| +
|
| + would load 'module.dll' and process it. If the data would be already
|
| + available in a buffer the same could be achieved with:
|
| +
|
| + pe = pefile.PE(data=module_dll_data)
|
| +
|
| + The "fast_load" can be set to a default by setting its value in the
|
| + module itself by means,for instance, of a "pefile.fast_load = True".
|
| + That will make all the subsequent instances not to load the
|
| + whole PE structure. The "full_load" method can be used to parse
|
| + the missing data at a later stage.
|
| +
|
| + Basic headers information will be available in the attributes:
|
| +
|
| + DOS_HEADER
|
| + NT_HEADERS
|
| + FILE_HEADER
|
| + OPTIONAL_HEADER
|
| +
|
| + All of them will contain among their attrbitues the members of the
|
| + corresponding structures as defined in WINNT.H
|
| +
|
| + The raw data corresponding to the header (from the beginning of the
|
| + file up to the start of the first section) will be avaiable in the
|
| + instance's attribute 'header' as a string.
|
| +
|
| + The sections will be available as a list in the 'sections' attribute.
|
| + Each entry will contain as attributes all the structure's members.
|
| +
|
| + Directory entries will be available as attributes (if they exist):
|
| + (no other entries are processed at this point)
|
| +
|
| + DIRECTORY_ENTRY_IMPORT (list of ImportDescData instances)
|
| + DIRECTORY_ENTRY_EXPORT (ExportDirData instance)
|
| + DIRECTORY_ENTRY_RESOURCE (ResourceDirData instance)
|
| + DIRECTORY_ENTRY_DEBUG (list of DebugData instances)
|
| + DIRECTORY_ENTRY_BASERELOC (list of BaseRelocationData instances)
|
| + DIRECTORY_ENTRY_TLS
|
| + DIRECTORY_ENTRY_BOUND_IMPORT (list of BoundImportData instances)
|
| +
|
| + The following dictionary attributes provide ways of mapping different
|
| + constants. They will accept the numeric value and return the string
|
| + representation and the opposite, feed in the string and get the
|
| + numeric constant:
|
| +
|
| + DIRECTORY_ENTRY
|
| + IMAGE_CHARACTERISTICS
|
| + SECTION_CHARACTERISTICS
|
| + DEBUG_TYPE
|
| + SUBSYSTEM_TYPE
|
| + MACHINE_TYPE
|
| + RELOCATION_TYPE
|
| + RESOURCE_TYPE
|
| + LANG
|
| + SUBLANG
|
| + """
|
| +
|
| + #
|
| + # Format specifications for PE structures.
|
| + #
|
| +
|
| + __IMAGE_DOS_HEADER_format__ = ('IMAGE_DOS_HEADER',
|
| + ('H,e_magic', 'H,e_cblp', 'H,e_cp',
|
| + 'H,e_crlc', 'H,e_cparhdr', 'H,e_minalloc',
|
| + 'H,e_maxalloc', 'H,e_ss', 'H,e_sp', 'H,e_csum',
|
| + 'H,e_ip', 'H,e_cs', 'H,e_lfarlc', 'H,e_ovno', '8s,e_res',
|
| + 'H,e_oemid', 'H,e_oeminfo', '20s,e_res2',
|
| + 'L,e_lfanew'))
|
| +
|
| + __IMAGE_FILE_HEADER_format__ = ('IMAGE_FILE_HEADER',
|
| + ('H,Machine', 'H,NumberOfSections',
|
| + 'L,TimeDateStamp', 'L,PointerToSymbolTable',
|
| + 'L,NumberOfSymbols', 'H,SizeOfOptionalHeader',
|
| + 'H,Characteristics'))
|
| +
|
| + __IMAGE_DATA_DIRECTORY_format__ = ('IMAGE_DATA_DIRECTORY',
|
| + ('L,VirtualAddress', 'L,Size'))
|
| +
|
| +
|
| + __IMAGE_OPTIONAL_HEADER_format__ = ('IMAGE_OPTIONAL_HEADER',
|
| + ('H,Magic', 'B,MajorLinkerVersion',
|
| + 'B,MinorLinkerVersion', 'L,SizeOfCode',
|
| + 'L,SizeOfInitializedData', 'L,SizeOfUninitializedData',
|
| + 'L,AddressOfEntryPoint', 'L,BaseOfCode', 'L,BaseOfData',
|
| + 'L,ImageBase', 'L,SectionAlignment', 'L,FileAlignment',
|
| + 'H,MajorOperatingSystemVersion', 'H,MinorOperatingSystemVersion',
|
| + 'H,MajorImageVersion', 'H,MinorImageVersion',
|
| + 'H,MajorSubsystemVersion', 'H,MinorSubsystemVersion',
|
| + 'L,Reserved1', 'L,SizeOfImage', 'L,SizeOfHeaders',
|
| + 'L,CheckSum', 'H,Subsystem', 'H,DllCharacteristics',
|
| + 'L,SizeOfStackReserve', 'L,SizeOfStackCommit',
|
| + 'L,SizeOfHeapReserve', 'L,SizeOfHeapCommit',
|
| + 'L,LoaderFlags', 'L,NumberOfRvaAndSizes' ))
|
| +
|
| +
|
| + __IMAGE_OPTIONAL_HEADER64_format__ = ('IMAGE_OPTIONAL_HEADER64',
|
| + ('H,Magic', 'B,MajorLinkerVersion',
|
| + 'B,MinorLinkerVersion', 'L,SizeOfCode',
|
| + 'L,SizeOfInitializedData', 'L,SizeOfUninitializedData',
|
| + 'L,AddressOfEntryPoint', 'L,BaseOfCode',
|
| + 'Q,ImageBase', 'L,SectionAlignment', 'L,FileAlignment',
|
| + 'H,MajorOperatingSystemVersion', 'H,MinorOperatingSystemVersion',
|
| + 'H,MajorImageVersion', 'H,MinorImageVersion',
|
| + 'H,MajorSubsystemVersion', 'H,MinorSubsystemVersion',
|
| + 'L,Reserved1', 'L,SizeOfImage', 'L,SizeOfHeaders',
|
| + 'L,CheckSum', 'H,Subsystem', 'H,DllCharacteristics',
|
| + 'Q,SizeOfStackReserve', 'Q,SizeOfStackCommit',
|
| + 'Q,SizeOfHeapReserve', 'Q,SizeOfHeapCommit',
|
| + 'L,LoaderFlags', 'L,NumberOfRvaAndSizes' ))
|
| +
|
| +
|
| + __IMAGE_NT_HEADERS_format__ = ('IMAGE_NT_HEADERS', ('L,Signature',))
|
| +
|
| + __IMAGE_SECTION_HEADER_format__ = ('IMAGE_SECTION_HEADER',
|
| + ('8s,Name', 'L,Misc,Misc_PhysicalAddress,Misc_VirtualSize',
|
| + 'L,VirtualAddress', 'L,SizeOfRawData', 'L,PointerToRawData',
|
| + 'L,PointerToRelocations', 'L,PointerToLinenumbers',
|
| + 'H,NumberOfRelocations', 'H,NumberOfLinenumbers',
|
| + 'L,Characteristics'))
|
| +
|
| + __IMAGE_DELAY_IMPORT_DESCRIPTOR_format__ = ('IMAGE_DELAY_IMPORT_DESCRIPTOR',
|
| + ('L,grAttrs', 'L,szName', 'L,phmod', 'L,pIAT', 'L,pINT',
|
| + 'L,pBoundIAT', 'L,pUnloadIAT', 'L,dwTimeStamp'))
|
| +
|
| + __IMAGE_IMPORT_DESCRIPTOR_format__ = ('IMAGE_IMPORT_DESCRIPTOR',
|
| + ('L,OriginalFirstThunk,Characteristics',
|
| + 'L,TimeDateStamp', 'L,ForwarderChain', 'L,Name', 'L,FirstThunk'))
|
| +
|
| + __IMAGE_EXPORT_DIRECTORY_format__ = ('IMAGE_EXPORT_DIRECTORY',
|
| + ('L,Characteristics',
|
| + 'L,TimeDateStamp', 'H,MajorVersion', 'H,MinorVersion', 'L,Name',
|
| + 'L,Base', 'L,NumberOfFunctions', 'L,NumberOfNames',
|
| + 'L,AddressOfFunctions', 'L,AddressOfNames', 'L,AddressOfNameOrdinals'))
|
| +
|
| + __IMAGE_RESOURCE_DIRECTORY_format__ = ('IMAGE_RESOURCE_DIRECTORY',
|
| + ('L,Characteristics',
|
| + 'L,TimeDateStamp', 'H,MajorVersion', 'H,MinorVersion',
|
| + 'H,NumberOfNamedEntries', 'H,NumberOfIdEntries'))
|
| +
|
| + __IMAGE_RESOURCE_DIRECTORY_ENTRY_format__ = ('IMAGE_RESOURCE_DIRECTORY_ENTRY',
|
| + ('L,Name',
|
| + 'L,OffsetToData'))
|
| +
|
| + __IMAGE_RESOURCE_DATA_ENTRY_format__ = ('IMAGE_RESOURCE_DATA_ENTRY',
|
| + ('L,OffsetToData', 'L,Size', 'L,CodePage', 'L,Reserved'))
|
| +
|
| + __VS_VERSIONINFO_format__ = ( 'VS_VERSIONINFO',
|
| + ('H,Length', 'H,ValueLength', 'H,Type' ))
|
| +
|
| + __VS_FIXEDFILEINFO_format__ = ( 'VS_FIXEDFILEINFO',
|
| + ('L,Signature', 'L,StrucVersion', 'L,FileVersionMS', 'L,FileVersionLS',
|
| + 'L,ProductVersionMS', 'L,ProductVersionLS', 'L,FileFlagsMask', 'L,FileFlags',
|
| + 'L,FileOS', 'L,FileType', 'L,FileSubtype', 'L,FileDateMS', 'L,FileDateLS'))
|
| +
|
| + __StringFileInfo_format__ = ( 'StringFileInfo',
|
| + ('H,Length', 'H,ValueLength', 'H,Type' ))
|
| +
|
| + __StringTable_format__ = ( 'StringTable',
|
| + ('H,Length', 'H,ValueLength', 'H,Type' ))
|
| +
|
| + __String_format__ = ( 'String',
|
| + ('H,Length', 'H,ValueLength', 'H,Type' ))
|
| +
|
| + __Var_format__ = ( 'Var', ('H,Length', 'H,ValueLength', 'H,Type' ))
|
| +
|
| + __IMAGE_THUNK_DATA_format__ = ('IMAGE_THUNK_DATA',
|
| + ('L,ForwarderString,Function,Ordinal,AddressOfData',))
|
| +
|
| + __IMAGE_THUNK_DATA64_format__ = ('IMAGE_THUNK_DATA',
|
| + ('Q,ForwarderString,Function,Ordinal,AddressOfData',))
|
| +
|
| + __IMAGE_DEBUG_DIRECTORY_format__ = ('IMAGE_DEBUG_DIRECTORY',
|
| + ('L,Characteristics', 'L,TimeDateStamp', 'H,MajorVersion',
|
| + 'H,MinorVersion', 'L,Type', 'L,SizeOfData', 'L,AddressOfRawData',
|
| + 'L,PointerToRawData'))
|
| +
|
| + __IMAGE_BASE_RELOCATION_format__ = ('IMAGE_BASE_RELOCATION',
|
| + ('L,VirtualAddress', 'L,SizeOfBlock') )
|
| +
|
| + __IMAGE_TLS_DIRECTORY_format__ = ('IMAGE_TLS_DIRECTORY',
|
| + ('L,StartAddressOfRawData', 'L,EndAddressOfRawData',
|
| + 'L,AddressOfIndex', 'L,AddressOfCallBacks',
|
| + 'L,SizeOfZeroFill', 'L,Characteristics' ) )
|
| +
|
| + __IMAGE_TLS_DIRECTORY64_format__ = ('IMAGE_TLS_DIRECTORY',
|
| + ('Q,StartAddressOfRawData', 'Q,EndAddressOfRawData',
|
| + 'Q,AddressOfIndex', 'Q,AddressOfCallBacks',
|
| + 'L,SizeOfZeroFill', 'L,Characteristics' ) )
|
| +
|
| + __IMAGE_BOUND_IMPORT_DESCRIPTOR_format__ = ('IMAGE_BOUND_IMPORT_DESCRIPTOR',
|
| + ('L,TimeDateStamp', 'H,OffsetModuleName', 'H,NumberOfModuleForwarderRefs'))
|
| +
|
| + __IMAGE_BOUND_FORWARDER_REF_format__ = ('IMAGE_BOUND_FORWARDER_REF',
|
| + ('L,TimeDateStamp', 'H,OffsetModuleName', 'H,Reserved') )
|
| +
|
| +
|
| + def __init__(self, name=None, data=None, fast_load=None):
|
| +
|
| + self.sections = []
|
| +
|
| + self.__warnings = []
|
| +
|
| + self.PE_TYPE = None
|
| +
|
| + if not name and not data:
|
| + return
|
| +
|
| + # This list will keep track of all the structures created.
|
| + # That will allow for an easy iteration through the list
|
| + # in order to save the modifications made
|
| + self.__structures__ = []
|
| +
|
| + if not fast_load:
|
| + fast_load = globals()['fast_load']
|
| + self.__parse__(name, data, fast_load)
|
| +
|
| +
|
| +
|
| + def __unpack_data__(self, format, data, file_offset):
|
| + """Apply structure format to raw data.
|
| +
|
| + Returns and unpacked structure object if successful, None otherwise.
|
| + """
|
| +
|
| + structure = Structure(format, file_offset=file_offset)
|
| + #if len(data) < structure.sizeof():
|
| + # return None
|
| +
|
| + try:
|
| + structure.__unpack__(data)
|
| + except PEFormatError, err:
|
| + self.__warnings.append(
|
| + 'Corrupt header "%s" at file offset %d. Exception: %s' % (
|
| + format[0], file_offset, str(err)) )
|
| + return None
|
| +
|
| + self.__structures__.append(structure)
|
| +
|
| + return structure
|
| +
|
| +
|
| +
|
| + def __parse__(self, fname, data, fast_load):
|
| + """Parse a Portable Executable file.
|
| +
|
| + Loads a PE file, parsing all its structures and making them available
|
| + through the instance's attributes.
|
| + """
|
| +
|
| + if fname:
|
| + fd = file(fname, 'rb')
|
| + self.__data__ = fd.read()
|
| + fd.close()
|
| + elif data:
|
| + self.__data__ = data
|
| +
|
| +
|
| + self.DOS_HEADER = self.__unpack_data__(
|
| + self.__IMAGE_DOS_HEADER_format__,
|
| + self.__data__, file_offset=0)
|
| +
|
| + if not self.DOS_HEADER or self.DOS_HEADER.e_magic != IMAGE_DOS_SIGNATURE:
|
| + raise PEFormatError('DOS Header magic not found.')
|
| +
|
| + # OC Patch:
|
| + # Check for sane value in e_lfanew
|
| + #
|
| + if self.DOS_HEADER.e_lfanew > len(self.__data__):
|
| + raise PEFormatError('Invalid e_lfanew value, probably not a PE file')
|
| +
|
| + nt_headers_offset = self.DOS_HEADER.e_lfanew
|
| +
|
| + self.NT_HEADERS = self.__unpack_data__(
|
| + self.__IMAGE_NT_HEADERS_format__,
|
| + self.__data__[nt_headers_offset:],
|
| + file_offset = nt_headers_offset)
|
| +
|
| + # We better check the signature right here, before the file screws
|
| + # around with sections:
|
| + # OC Patch:
|
| + # Some malware will cause the Signature value to not exist at all
|
| + if not self.NT_HEADERS or not self.NT_HEADERS.Signature:
|
| + raise PEFormatError('NT Headers not found.')
|
| +
|
| + if self.NT_HEADERS.Signature != IMAGE_NT_SIGNATURE:
|
| + raise PEFormatError('Invalid NT Headers signature.')
|
| +
|
| + self.FILE_HEADER = self.__unpack_data__(
|
| + self.__IMAGE_FILE_HEADER_format__,
|
| + self.__data__[nt_headers_offset+4:],
|
| + file_offset = nt_headers_offset+4)
|
| + image_flags = self.retrieve_flags(IMAGE_CHARACTERISTICS, 'IMAGE_FILE_')
|
| +
|
| + if not self.FILE_HEADER:
|
| + raise PEFormatError('File Header missing')
|
| +
|
| + # Set the image's flags according the the Characteristics member
|
| + self.set_flags(self.FILE_HEADER, self.FILE_HEADER.Characteristics, image_flags)
|
| +
|
| + optional_header_offset = \
|
| + nt_headers_offset+4+self.FILE_HEADER.sizeof()
|
| +
|
| + # Note: location of sections can be controlled from PE header:
|
| + sections_offset = optional_header_offset + self.FILE_HEADER.SizeOfOptionalHeader
|
| +
|
| + self.OPTIONAL_HEADER = self.__unpack_data__(
|
| + self.__IMAGE_OPTIONAL_HEADER_format__,
|
| + self.__data__[optional_header_offset:],
|
| + file_offset = optional_header_offset)
|
| +
|
| + # According to solardesigner's findings for his
|
| + # Tiny PE project, the optional header does not
|
| + # need fields beyond "Subsystem" in order to be
|
| + # loadable by the Windows loader (given that zeroes
|
| + # are acceptable values and the header is loaded
|
| + # in a zeroed memory page)
|
| + # If trying to parse a full Optional Header fails
|
| + # we try to parse it again with some 0 padding
|
| + #
|
| + MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE = 69
|
| +
|
| + if ( self.OPTIONAL_HEADER is None and
|
| + len(self.__data__[optional_header_offset:])
|
| + >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE ):
|
| +
|
| + # Add enough zeroes to make up for the unused fields
|
| + #
|
| + padding_length = 128
|
| +
|
| + # Create padding
|
| + #
|
| + padded_data = self.__data__[optional_header_offset:] + (
|
| + '\0' * padding_length)
|
| +
|
| + self.OPTIONAL_HEADER = self.__unpack_data__(
|
| + self.__IMAGE_OPTIONAL_HEADER_format__,
|
| + padded_data,
|
| + file_offset = optional_header_offset)
|
| +
|
| +
|
| + # Check the Magic in the OPTIONAL_HEADER and set the PE file
|
| + # type accordingly
|
| + #
|
| + if self.OPTIONAL_HEADER is not None:
|
| +
|
| + if self.OPTIONAL_HEADER.Magic == OPTIONAL_HEADER_MAGIC_PE:
|
| +
|
| + self.PE_TYPE = OPTIONAL_HEADER_MAGIC_PE
|
| +
|
| + elif self.OPTIONAL_HEADER.Magic == OPTIONAL_HEADER_MAGIC_PE_PLUS:
|
| +
|
| + self.PE_TYPE = OPTIONAL_HEADER_MAGIC_PE_PLUS
|
| +
|
| + self.OPTIONAL_HEADER = self.__unpack_data__(
|
| + self.__IMAGE_OPTIONAL_HEADER64_format__,
|
| + self.__data__[optional_header_offset:],
|
| + file_offset = optional_header_offset)
|
| +
|
| + # Again, as explained above, we try to parse
|
| + # a reduced form of the Optional Header which
|
| + # is still valid despite not including all
|
| + # structure members
|
| + #
|
| + MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE = 69+4
|
| +
|
| + if ( self.OPTIONAL_HEADER is None and
|
| + len(self.__data__[optional_header_offset:])
|
| + >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE ):
|
| +
|
| + padding_length = 128
|
| + padded_data = self.__data__[optional_header_offset:] + (
|
| + '\0' * padding_length)
|
| + self.OPTIONAL_HEADER = self.__unpack_data__(
|
| + self.__IMAGE_OPTIONAL_HEADER64_format__,
|
| + padded_data,
|
| + file_offset = optional_header_offset)
|
| +
|
| +
|
| + if not self.FILE_HEADER:
|
| + raise PEFormatError('File Header missing')
|
| +
|
| +
|
| + # OC Patch:
|
| + # Die gracefully if there is no OPTIONAL_HEADER field
|
| + # 975440f5ad5e2e4a92c4d9a5f22f75c1
|
| + if self.PE_TYPE is None or self.OPTIONAL_HEADER is None:
|
| + raise PEFormatError("No Optional Header found, invalid PE32 or PE32+ file")
|
| +
|
| + dll_characteristics_flags = self.retrieve_flags(DLL_CHARACTERISTICS, 'IMAGE_DLL_CHARACTERISTICS_')
|
| +
|
| + # Set the Dll Characteristics flags according the the DllCharacteristics member
|
| + self.set_flags(
|
| + self.OPTIONAL_HEADER,
|
| + self.OPTIONAL_HEADER.DllCharacteristics,
|
| + dll_characteristics_flags)
|
| +
|
| +
|
| + self.OPTIONAL_HEADER.DATA_DIRECTORY = []
|
| + #offset = (optional_header_offset + self.FILE_HEADER.SizeOfOptionalHeader)
|
| + offset = (optional_header_offset + self.OPTIONAL_HEADER.sizeof())
|
| +
|
| +
|
| + self.NT_HEADERS.FILE_HEADER = self.FILE_HEADER
|
| + self.NT_HEADERS.OPTIONAL_HEADER = self.OPTIONAL_HEADER
|
| +
|
| +
|
| + # The NumberOfRvaAndSizes is sanitized to stay within
|
| + # reasonable limits so can be casted to an int
|
| + #
|
| + if self.OPTIONAL_HEADER.NumberOfRvaAndSizes > 0x10:
|
| + self.__warnings.append(
|
| + 'Suspicious NumberOfRvaAndSizes in the Optional Header. ' +
|
| + 'Normal values are never larger than 0x10, the value is: 0x%x' %
|
| + self.OPTIONAL_HEADER.NumberOfRvaAndSizes )
|
| +
|
| + for i in xrange(int(0x7fffffffL & self.OPTIONAL_HEADER.NumberOfRvaAndSizes)):
|
| +
|
| + if len(self.__data__[offset:]) == 0:
|
| + break
|
| +
|
| + if len(self.__data__[offset:]) < 8:
|
| + data = self.__data__[offset:]+'\0'*8
|
| + else:
|
| + data = self.__data__[offset:]
|
| +
|
| + dir_entry = self.__unpack_data__(
|
| + self.__IMAGE_DATA_DIRECTORY_format__,
|
| + data,
|
| + file_offset = offset)
|
| +
|
| + if dir_entry is None:
|
| + break
|
| +
|
| + # Would fail if missing an entry
|
| + # 1d4937b2fa4d84ad1bce0309857e70ca offending sample
|
| + try:
|
| + dir_entry.name = DIRECTORY_ENTRY[i]
|
| + except (KeyError, AttributeError):
|
| + break
|
| +
|
| + offset += dir_entry.sizeof()
|
| +
|
| + self.OPTIONAL_HEADER.DATA_DIRECTORY.append(dir_entry)
|
| +
|
| + # If the offset goes outside the optional header,
|
| + # the loop is broken, regardless of how many directories
|
| + # NumberOfRvaAndSizes says there are
|
| + #
|
| + # We assume a normally sized optional header, hence that we do
|
| + # a sizeof() instead of reading SizeOfOptionalHeader.
|
| + # Then we add a default number of drectories times their size,
|
| + # if we go beyond that, we assume the number of directories
|
| + # is wrong and stop processing
|
| + if offset >= (optional_header_offset +
|
| + self.OPTIONAL_HEADER.sizeof() + 8*16) :
|
| +
|
| + break
|
| +
|
| +
|
| + offset = self.parse_sections(sections_offset)
|
| +
|
| + # OC Patch:
|
| + # There could be a problem if there are no raw data sections
|
| + # greater than 0
|
| + # fc91013eb72529da005110a3403541b6 example
|
| + # Should this throw an exception in the minimum header offset
|
| + # can't be found?
|
| + #
|
| + rawDataPointers = [
|
| + s.PointerToRawData for s in self.sections if s.PointerToRawData>0]
|
| +
|
| + if len(rawDataPointers) > 0:
|
| + lowest_section_offset = min(rawDataPointers)
|
| + else:
|
| + lowest_section_offset = None
|
| +
|
| + if not lowest_section_offset or lowest_section_offset<offset:
|
| + self.header = self.__data__[:offset]
|
| + else:
|
| + self.header = self.__data__[:lowest_section_offset]
|
| +
|
| +
|
| + # Check whether the entry point lies within a section
|
| + #
|
| + if self.get_section_by_rva(self.OPTIONAL_HEADER.AddressOfEntryPoint) is not None:
|
| +
|
| + # Check whether the entry point lies within the file
|
| + #
|
| + ep_offset = self.get_offset_from_rva(self.OPTIONAL_HEADER.AddressOfEntryPoint)
|
| + if ep_offset > len(self.__data__):
|
| +
|
| + self.__warnings.append(
|
| + 'Possibly corrupt file. AddressOfEntryPoint lies outside the file. ' +
|
| + 'AddressOfEntryPoint: 0x%x' %
|
| + self.OPTIONAL_HEADER.AddressOfEntryPoint )
|
| +
|
| + else:
|
| +
|
| + self.__warnings.append(
|
| + 'AddressOfEntryPoint lies outside the sections\' boundaries. ' +
|
| + 'AddressOfEntryPoint: 0x%x' %
|
| + self.OPTIONAL_HEADER.AddressOfEntryPoint )
|
| +
|
| +
|
| + if not fast_load:
|
| + self.parse_data_directories()
|
| +
|
| +
|
| + def get_warnings(self):
|
| + """Return the list of warnings.
|
| +
|
| + Non-critical problems found when parsing the PE file are
|
| + appended to a list of warnings. This method returns the
|
| + full list.
|
| + """
|
| +
|
| + return self.__warnings
|
| +
|
| +
|
| + def show_warnings(self):
|
| + """Print the list of warnings.
|
| +
|
| + Non-critical problems found when parsing the PE file are
|
| + appended to a list of warnings. This method prints the
|
| + full list to standard output.
|
| + """
|
| +
|
| + for warning in self.__warnings:
|
| + print '>', warning
|
| +
|
| +
|
| + def full_load(self):
|
| + """Process the data directories.
|
| +
|
| + This mathod will load the data directories which might not have
|
| + been loaded if the "fast_load" option was used.
|
| + """
|
| +
|
| + self.parse_data_directories()
|
| +
|
| +
|
| + def write(self, filename=None):
|
| + """Write the PE file.
|
| +
|
| + This function will process all headers and components
|
| + of the PE file and include all changes made (by just
|
| + assigning to attributes in the PE objects) and write
|
| + the changes back to a file whose name is provided as
|
| + an argument. The filename is optional.
|
| + The data to be written to the file will be returned
|
| + as a 'str' object.
|
| + """
|
| +
|
| + file_data = list(self.__data__)
|
| + for struct in self.__structures__:
|
| +
|
| + struct_data = list(struct.__pack__())
|
| + offset = struct.get_file_offset()
|
| +
|
| + file_data[offset:offset+len(struct_data)] = struct_data
|
| +
|
| + if hasattr(self, 'VS_VERSIONINFO'):
|
| + if hasattr(self, 'FileInfo'):
|
| + for entry in self.FileInfo:
|
| + if hasattr(entry, 'StringTable'):
|
| + for st_entry in entry.StringTable:
|
| + for key, entry in st_entry.entries.items():
|
| +
|
| + offsets = st_entry.entries_offsets[key]
|
| + lengths = st_entry.entries_lengths[key]
|
| +
|
| + if len( entry ) > lengths[1]:
|
| +
|
| + uc = zip(
|
| + list(entry[:lengths[1]]), ['\0'] * lengths[1] )
|
| + l = list()
|
| + map(l.extend, uc)
|
| +
|
| + file_data[
|
| + offsets[1] : offsets[1] + lengths[1]*2 ] = l
|
| +
|
| + else:
|
| +
|
| + uc = zip(
|
| + list(entry), ['\0'] * len(entry) )
|
| + l = list()
|
| + map(l.extend, uc)
|
| +
|
| + file_data[
|
| + offsets[1] : offsets[1] + len(entry)*2 ] = l
|
| +
|
| + remainder = lengths[1] - len(entry)
|
| + file_data[
|
| + offsets[1] + len(entry)*2 :
|
| + offsets[1] + lengths[1]*2 ] = [
|
| + u'\0' ] * remainder*2
|
| +
|
| + new_file_data = ''.join( [ chr(ord(c)) for c in file_data ] )
|
| +
|
| + if filename:
|
| + f = file(filename, 'wb+')
|
| + f.write(new_file_data)
|
| + f.close()
|
| +
|
| + return new_file_data
|
| +
|
| +
|
| +
|
| + def parse_sections(self, offset):
|
| + """Fetch the PE file sections.
|
| +
|
| + The sections will be readily available in the "sections" attribute.
|
| + Its attributes will contain all the section information plus "data"
|
| + a buffer containing the section's data.
|
| +
|
| + The "Characteristics" member will be processed and attributes
|
| + representing the section characteristics (with the 'IMAGE_SCN_'
|
| + string trimmed from the constant's names) will be added to the
|
| + section instance.
|
| +
|
| + Refer to the SectionStructure class for additional info.
|
| + """
|
| +
|
| + self.sections = []
|
| +
|
| + for i in xrange(self.FILE_HEADER.NumberOfSections):
|
| + section = SectionStructure(self.__IMAGE_SECTION_HEADER_format__)
|
| + if not section:
|
| + break
|
| + section_offset = offset + section.sizeof() * i
|
| + section.set_file_offset(section_offset)
|
| + section.__unpack__(self.__data__[section_offset:])
|
| + self.__structures__.append(section)
|
| +
|
| + if section.SizeOfRawData > len(self.__data__):
|
| + self.__warnings.append(
|
| + ('Error parsing section %d. ' % i) +
|
| + 'SizeOfRawData is larger than file.')
|
| +
|
| + if section.PointerToRawData > len(self.__data__):
|
| + self.__warnings.append(
|
| + ('Error parsing section %d. ' % i) +
|
| + 'PointerToRawData points beyond the end of the file.')
|
| +
|
| + if section.Misc_VirtualSize > 0x10000000:
|
| + self.__warnings.append(
|
| + ('Suspicious value found parsing section %d. ' % i) +
|
| + 'VirtualSize is extremely large > 256MiB.')
|
| +
|
| + if section.VirtualAddress > 0x10000000:
|
| + self.__warnings.append(
|
| + ('Suspicious value found parsing section %d. ' % i) +
|
| + 'VirtualAddress is beyond 0x10000000.')
|
| +
|
| + #
|
| + # Some packer used a non-aligned PointerToRawData in the sections,
|
| + # which causes several common tools not to load the section data
|
| + # properly as they blindly read from the indicated offset.
|
| + # It seems that Windows will round the offset down to the largest
|
| + # offset multiple of FileAlignment which is smaller than
|
| + # PointerToRawData. The following code will do the same.
|
| + #
|
| +
|
| + #alignment = self.OPTIONAL_HEADER.FileAlignment
|
| + section_data_start = section.PointerToRawData
|
| +
|
| + if ( self.OPTIONAL_HEADER.FileAlignment != 0 and
|
| + (section.PointerToRawData % self.OPTIONAL_HEADER.FileAlignment) != 0):
|
| + self.__warnings.append(
|
| + ('Error parsing section %d. ' % i) +
|
| + 'Suspicious value for FileAlignment in the Optional Header. ' +
|
| + 'Normally the PointerToRawData entry of the sections\' structures ' +
|
| + 'is a multiple of FileAlignment, this might imply the file ' +
|
| + 'is trying to confuse tools which parse this incorrectly')
|
| +
|
| + section_data_end = section_data_start+section.SizeOfRawData
|
| + section.set_data(self.__data__[section_data_start:section_data_end])
|
| +
|
| + section_flags = self.retrieve_flags(SECTION_CHARACTERISTICS, 'IMAGE_SCN_')
|
| +
|
| + # Set the section's flags according the the Characteristics member
|
| + self.set_flags(section, section.Characteristics, section_flags)
|
| +
|
| + if ( section.__dict__.get('IMAGE_SCN_MEM_WRITE', False) and
|
| + section.__dict__.get('IMAGE_SCN_MEM_EXECUTE', False) ):
|
| +
|
| + self.__warnings.append(
|
| + ('Suspicious flags set for section %d. ' % i) +
|
| + 'Both IMAGE_SCN_MEM_WRITE and IMAGE_SCN_MEM_EXECUTE are set.' +
|
| + 'This might indicate a packed executable.')
|
| +
|
| + self.sections.append(section)
|
| +
|
| + if self.FILE_HEADER.NumberOfSections > 0 and self.sections:
|
| + return offset + self.sections[0].sizeof()*self.FILE_HEADER.NumberOfSections
|
| + else:
|
| + return offset
|
| +
|
| +
|
| + def retrieve_flags(self, flag_dict, flag_filter):
|
| + """Read the flags from a dictionary and return them in a usable form.
|
| +
|
| + Will return a list of (flag, value) for all flags in "flag_dict"
|
| + matching the filter "flag_filter".
|
| + """
|
| +
|
| + return [(f[0], f[1]) for f in flag_dict.items() if
|
| + isinstance(f[0], str) and f[0].startswith(flag_filter)]
|
| +
|
| +
|
| + def set_flags(self, obj, flag_field, flags):
|
| + """Will process the flags and set attributes in the object accordingly.
|
| +
|
| + The object "obj" will gain attritutes named after the flags provided in
|
| + "flags" and valued True/False, matching the results of applyin each
|
| + flag value from "flags" to flag_field.
|
| + """
|
| +
|
| + for flag in flags:
|
| + if flag[1] & flag_field:
|
| + setattr(obj, flag[0], True)
|
| + else:
|
| + setattr(obj, flag[0], False)
|
| +
|
| +
|
| +
|
| + def parse_data_directories(self):
|
| + """Parse and process the PE file's data directories."""
|
| +
|
| + directory_parsing = (
|
| + ('IMAGE_DIRECTORY_ENTRY_IMPORT', self.parse_import_directory),
|
| + ('IMAGE_DIRECTORY_ENTRY_EXPORT', self.parse_export_directory),
|
| + ('IMAGE_DIRECTORY_ENTRY_RESOURCE', self.parse_resources_directory),
|
| + ('IMAGE_DIRECTORY_ENTRY_DEBUG', self.parse_debug_directory),
|
| + ('IMAGE_DIRECTORY_ENTRY_BASERELOC', self.parse_relocations_directory),
|
| + ('IMAGE_DIRECTORY_ENTRY_TLS', self.parse_directory_tls),
|
| + ('IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT', self.parse_delay_import_directory),
|
| + ('IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT', self.parse_directory_bound_imports) )
|
| +
|
| + for entry in directory_parsing:
|
| + # OC Patch:
|
| + #
|
| + try:
|
| + dir_entry = self.OPTIONAL_HEADER.DATA_DIRECTORY[
|
| + DIRECTORY_ENTRY[entry[0]]]
|
| + except IndexError:
|
| + break
|
| + if dir_entry.VirtualAddress:
|
| + value = entry[1](dir_entry.VirtualAddress, dir_entry.Size)
|
| + if value:
|
| + setattr(self, entry[0][6:], value)
|
| +
|
| +
|
| + def parse_directory_bound_imports(self, rva, size):
|
| + """"""
|
| +
|
| + bnd_descr = Structure(self.__IMAGE_BOUND_IMPORT_DESCRIPTOR_format__)
|
| + bnd_descr_size = bnd_descr.sizeof()
|
| + start = rva
|
| +
|
| + bound_imports = []
|
| + while True:
|
| +
|
| + bnd_descr = self.__unpack_data__(
|
| + self.__IMAGE_BOUND_IMPORT_DESCRIPTOR_format__,
|
| + self.__data__[rva:rva+bnd_descr_size],
|
| + file_offset = rva)
|
| + if bnd_descr is None:
|
| + # If can't parse directory then silently return.
|
| + # This directory does not necesarily have to be valid to
|
| + # still have a valid PE file
|
| +
|
| + self.__warnings.append(
|
| + 'The Bound Imports directory exists but can\'t be parsed.')
|
| +
|
| + return
|
| +
|
| + if bnd_descr.all_zeroes():
|
| + break
|
| +
|
| + rva += bnd_descr.sizeof()
|
| +
|
| + forwarder_refs = []
|
| + for idx in xrange(bnd_descr.NumberOfModuleForwarderRefs):
|
| + # Both structures IMAGE_BOUND_IMPORT_DESCRIPTOR and
|
| + # IMAGE_BOUND_FORWARDER_REF have the same size.
|
| + bnd_frwd_ref = self.__unpack_data__(
|
| + self.__IMAGE_BOUND_FORWARDER_REF_format__,
|
| + self.__data__[rva:rva+bnd_descr_size],
|
| + file_offset = rva)
|
| + # OC Patch:
|
| + if not bnd_frwd_ref:
|
| + raise PEFormatError(
|
| + "IMAGE_BOUND_FORWARDER_REF cannot be read")
|
| + rva += bnd_frwd_ref.sizeof()
|
| +
|
| + name_str = self.get_string_from_data(
|
| + start+bnd_frwd_ref.OffsetModuleName, self.__data__)
|
| +
|
| + if not name_str:
|
| + break
|
| + forwarder_refs.append(BoundImportRefData(
|
| + struct = bnd_frwd_ref,
|
| + name = name_str))
|
| +
|
| + name_str = self.get_string_from_data(
|
| + start+bnd_descr.OffsetModuleName, self.__data__)
|
| +
|
| + if not name_str:
|
| + break
|
| + bound_imports.append(
|
| + BoundImportDescData(
|
| + struct = bnd_descr,
|
| + name = name_str,
|
| + entries = forwarder_refs))
|
| +
|
| + return bound_imports
|
| +
|
| +
|
| + def parse_directory_tls(self, rva, size):
|
| + """"""
|
| +
|
| + if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
|
| + format = self.__IMAGE_TLS_DIRECTORY_format__
|
| +
|
| + elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
|
| + format = self.__IMAGE_TLS_DIRECTORY64_format__
|
| +
|
| + tls_struct = self.__unpack_data__(
|
| + format,
|
| + self.get_data(rva),
|
| + file_offset = self.get_offset_from_rva(rva))
|
| +
|
| + if not tls_struct:
|
| + return None
|
| +
|
| + return TlsData( struct = tls_struct )
|
| +
|
| +
|
| + def parse_relocations_directory(self, rva, size):
|
| + """"""
|
| +
|
| + rlc = Structure(self.__IMAGE_BASE_RELOCATION_format__)
|
| + rlc_size = rlc.sizeof()
|
| + end = rva+size
|
| +
|
| + relocations = []
|
| + while rva<end:
|
| +
|
| + # OC Patch:
|
| + # Malware that has bad rva entries will cause an error.
|
| + # Just continue on after an exception
|
| + #
|
| + try:
|
| + rlc = self.__unpack_data__(
|
| + self.__IMAGE_BASE_RELOCATION_format__,
|
| + self.get_data(rva, rlc_size),
|
| + file_offset = self.get_offset_from_rva(rva) )
|
| + except PEFormatError:
|
| + self.__warnings.append(
|
| + 'Invalid relocation information. Can\'t read ' +
|
| + 'data at RVA: 0x%x' % rva)
|
| + rlc = None
|
| +
|
| + if not rlc:
|
| + break
|
| +
|
| + reloc_entries = self.parse_relocations(
|
| + rva+rlc_size, rlc.VirtualAddress, rlc.SizeOfBlock-rlc_size)
|
| +
|
| + relocations.append(
|
| + BaseRelocationData(
|
| + struct = rlc,
|
| + entries = reloc_entries))
|
| +
|
| + if not rlc.SizeOfBlock:
|
| + break
|
| + rva += rlc.SizeOfBlock
|
| +
|
| + return relocations
|
| +
|
| +
|
| + def parse_relocations(self, data_rva, rva, size):
|
| + """"""
|
| +
|
| + data = self.get_data(data_rva, size)
|
| +
|
| + entries = []
|
| + for idx in xrange(len(data)/2):
|
| + word = struct.unpack('<H', data[idx*2:(idx+1)*2])[0]
|
| + reloc_type = (word>>12)
|
| + reloc_offset = (word&0x0fff)
|
| + entries.append(
|
| + RelocationData(
|
| + type = reloc_type,
|
| + rva = reloc_offset+rva))
|
| +
|
| + return entries
|
| +
|
| +
|
| + def parse_debug_directory(self, rva, size):
|
| + """"""
|
| +
|
| + dbg = Structure(self.__IMAGE_DEBUG_DIRECTORY_format__)
|
| + dbg_size = dbg.sizeof()
|
| +
|
| + debug = []
|
| + for idx in xrange(size/dbg_size):
|
| + try:
|
| + data = self.get_data(rva+dbg_size*idx, dbg_size)
|
| + except PEFormatError, e:
|
| + self.__warnings.append(
|
| + 'Invalid debug information. Can\'t read ' +
|
| + 'data at RVA: 0x%x' % rva)
|
| + return None
|
| +
|
| + dbg = self.__unpack_data__(
|
| + self.__IMAGE_DEBUG_DIRECTORY_format__,
|
| + data, file_offset = self.get_offset_from_rva(rva+dbg_size*idx))
|
| +
|
| + if not dbg:
|
| + return None
|
| +
|
| + debug.append(
|
| + DebugData(
|
| + struct = dbg))
|
| +
|
| + return debug
|
| +
|
| +
|
| + def parse_resources_directory(self, rva, size=0, base_rva = None, level = 0):
|
| + """Parse the resources directory.
|
| +
|
| + Given the rva of the resources directory, it will process all
|
| + its entries.
|
| +
|
| + The root will have the corresponding member of its structure,
|
| + IMAGE_RESOURCE_DIRECTORY plus 'entries', a list of all the
|
| + entries in the directory.
|
| +
|
| + Those entries will have, correspondingly, all the structure's
|
| + members (IMAGE_RESOURCE_DIRECTORY_ENTRY) and an additional one,
|
| + "directory", pointing to the IMAGE_RESOURCE_DIRECTORY structure
|
| + representing upper layers of the tree. This one will also have
|
| + an 'entries' attribute, pointing to the 3rd, and last, level.
|
| + Another directory with more entries. Those last entries will
|
| + have a new atribute (both 'leaf' or 'data_entry' can be used to
|
| + access it). This structure finally points to the resource data.
|
| + All the members of this structure, IMAGE_RESOURCE_DATA_ENTRY,
|
| + are available as its attributes.
|
| + """
|
| +
|
| + # OC Patch:
|
| + original_rva = rva
|
| +
|
| + if base_rva is None:
|
| + base_rva = rva
|
| +
|
| + resources_section = self.get_section_by_rva(rva)
|
| +
|
| + try:
|
| + # If the RVA is invalid all would blow up. Some EXEs seem to be
|
| + # specially nasty and have an invalid RVA.
|
| + data = self.get_data(rva)
|
| + except PEFormatError, e:
|
| + self.__warnings.append(
|
| + 'Invalid resources directory. Can\'t read ' +
|
| + 'directory data at RVA: 0x%x' % rva)
|
| + return None
|
| +
|
| + # Get the resource directory structure, that is, the header
|
| + # of the table preceding the actual entries
|
| + #
|
| + resource_dir = self.__unpack_data__(
|
| + self.__IMAGE_RESOURCE_DIRECTORY_format__, data,
|
| + file_offset = self.get_offset_from_rva(rva) )
|
| + if resource_dir is None:
|
| + # If can't parse resources directory then silently return.
|
| + # This directory does not necesarily have to be valid to
|
| + # still have a valid PE file
|
| + self.__warnings.append(
|
| + 'Invalid resources directory. Can\'t parse ' +
|
| + 'directory data at RVA: 0x%x' % rva)
|
| + return None
|
| +
|
| + dir_entries = []
|
| +
|
| + # Advance the rva to the positon immediately following the directory
|
| + # table header and pointing to the first entry in the table
|
| + #
|
| + rva += resource_dir.sizeof()
|
| +
|
| + number_of_entries = (
|
| + resource_dir.NumberOfNamedEntries +
|
| + resource_dir.NumberOfIdEntries )
|
| +
|
| + strings_to_postprocess = list()
|
| +
|
| + for idx in xrange(number_of_entries):
|
| +
|
| + res = self.parse_resource_entry(rva)
|
| + if res is None:
|
| + self.__warnings.append(
|
| + 'Error parsing the resources directory, ' +
|
| + 'Entry %d is invalid, RVA = 0x%x. ' %
|
| + (idx, rva) )
|
| + break
|
| +
|
| +
|
| + entry_name = None
|
| + entry_id = None
|
| +
|
| + # If all named entries have been processed, only Id ones
|
| + # remain
|
| +
|
| + if idx >= resource_dir.NumberOfNamedEntries:
|
| + entry_id = res.Name
|
| + else:
|
| + ustr_offset = base_rva+res.NameOffset
|
| + try:
|
| + #entry_name = self.get_string_u_at_rva(ustr_offset, max_length=16)
|
| + entry_name = UnicodeStringWrapperPostProcessor(self, ustr_offset)
|
| + strings_to_postprocess.append(entry_name)
|
| +
|
| + except PEFormatError, excp:
|
| + self.__warnings.append(
|
| + 'Error parsing the resources directory, ' +
|
| + 'attempting to read entry name. ' +
|
| + 'Can\'t read unicode string at offset 0x%x' %
|
| + (ustr_offset) )
|
| +
|
| +
|
| + if res.DataIsDirectory:
|
| + # OC Patch:
|
| + #
|
| + # One trick malware can do is to recursively reference
|
| + # the next directory. This causes hilarity to ensue when
|
| + # trying to parse everything correctly.
|
| + # If the original RVA given to this function is equal to
|
| + # the next one to parse, we assume that it's a trick.
|
| + # Instead of raising a PEFormatError this would skip some
|
| + # reasonable data so we just break.
|
| + #
|
| + # 9ee4d0a0caf095314fd7041a3e4404dc is the offending sample
|
| + if original_rva == (base_rva + res.OffsetToDirectory):
|
| +
|
| + break
|
| +
|
| + else:
|
| + entry_directory = self.parse_resources_directory(
|
| + base_rva+res.OffsetToDirectory,
|
| + base_rva=base_rva, level = level+1)
|
| +
|
| + if not entry_directory:
|
| + break
|
| + dir_entries.append(
|
| + ResourceDirEntryData(
|
| + struct = res,
|
| + name = entry_name,
|
| + id = entry_id,
|
| + directory = entry_directory))
|
| +
|
| + else:
|
| + struct = self.parse_resource_data_entry(
|
| + base_rva + res.OffsetToDirectory)
|
| +
|
| + if struct:
|
| + entry_data = ResourceDataEntryData(
|
| + struct = struct,
|
| + lang = res.Name & 0xff,
|
| + sublang = (res.Name>>8) & 0xff)
|
| +
|
| + dir_entries.append(
|
| + ResourceDirEntryData(
|
| + struct = res,
|
| + name = entry_name,
|
| + id = entry_id,
|
| + data = entry_data))
|
| +
|
| + else:
|
| + break
|
| +
|
| +
|
| +
|
| + # Check if this entry contains version information
|
| + #
|
| + if level == 0 and res.Id == RESOURCE_TYPE['RT_VERSION']:
|
| + if len(dir_entries)>0:
|
| + last_entry = dir_entries[-1]
|
| +
|
| + rt_version_struct = None
|
| + try:
|
| + rt_version_struct = last_entry.directory.entries[0].directory.entries[0].data.struct
|
| + except:
|
| + # Maybe a malformed directory structure...?
|
| + # Lets ignore it
|
| + pass
|
| +
|
| + if rt_version_struct is not None:
|
| + self.parse_version_information(rt_version_struct)
|
| +
|
| + rva += res.sizeof()
|
| +
|
| +
|
| + string_rvas = [s.get_rva() for s in strings_to_postprocess]
|
| + string_rvas.sort()
|
| +
|
| + for idx, s in enumerate(strings_to_postprocess):
|
| + s.render_pascal_16()
|
| +
|
| +
|
| + resource_directory_data = ResourceDirData(
|
| + struct = resource_dir,
|
| + entries = dir_entries)
|
| +
|
| + return resource_directory_data
|
| +
|
| +
|
| + def parse_resource_data_entry(self, rva):
|
| + """Parse a data entry from the resources directory."""
|
| +
|
| + try:
|
| + # If the RVA is invalid all would blow up. Some EXEs seem to be
|
| + # specially nasty and have an invalid RVA.
|
| + data = self.get_data(rva)
|
| + except PEFormatError, excp:
|
| + self.__warnings.append(
|
| + 'Error parsing a resource directory data entry, ' +
|
| + 'the RVA is invalid: 0x%x' % ( rva ) )
|
| + return None
|
| +
|
| + data_entry = self.__unpack_data__(
|
| + self.__IMAGE_RESOURCE_DATA_ENTRY_format__, data,
|
| + file_offset = self.get_offset_from_rva(rva) )
|
| +
|
| + return data_entry
|
| +
|
| +
|
| + def parse_resource_entry(self, rva):
|
| + """Parse a directory entry from the resources directory."""
|
| +
|
| + resource = self.__unpack_data__(
|
| + self.__IMAGE_RESOURCE_DIRECTORY_ENTRY_format__, self.get_data(rva),
|
| + file_offset = self.get_offset_from_rva(rva) )
|
| +
|
| + if resource is None:
|
| + return None
|
| +
|
| + #resource.NameIsString = (resource.Name & 0x80000000L) >> 31
|
| + resource.NameOffset = resource.Name & 0x7FFFFFFFL
|
| +
|
| + resource.__pad = resource.Name & 0xFFFF0000L
|
| + resource.Id = resource.Name & 0x0000FFFFL
|
| +
|
| + resource.DataIsDirectory = (resource.OffsetToData & 0x80000000L) >> 31
|
| + resource.OffsetToDirectory = resource.OffsetToData & 0x7FFFFFFFL
|
| +
|
| + return resource
|
| +
|
| +
|
| + def parse_version_information(self, version_struct):
|
| + """Parse version information structure.
|
| +
|
| + The date will be made available in three attributes of the PE object.
|
| +
|
| + VS_VERSIONINFO will contain the first three fields of the main structure:
|
| + 'Length', 'ValueLength', and 'Type'
|
| +
|
| + VS_FIXEDFILEINFO will hold the rest of the fields, accessible as sub-attributes:
|
| + 'Signature', 'StrucVersion', 'FileVersionMS', 'FileVersionLS',
|
| + 'ProductVersionMS', 'ProductVersionLS', 'FileFlagsMask', 'FileFlags',
|
| + 'FileOS', 'FileType', 'FileSubtype', 'FileDateMS', 'FileDateLS'
|
| +
|
| + FileInfo is a list of all StringFileInfo and VarFileInfo structures.
|
| +
|
| + StringFileInfo structures will have a list as an attribute named 'StringTable'
|
| + containing all the StringTable structures. Each of those structures contains a
|
| + dictionary 'entries' with all the key/value version information string pairs.
|
| +
|
| + VarFileInfo structures will have a list as an attribute named 'Var' containing
|
| + all Var structures. Each Var structure will have a dictionary as an attribute
|
| + named 'entry' which will contain the name and value of the Var.
|
| + """
|
| +
|
| +
|
| + # Retrieve the data for the version info resource
|
| + #
|
| + start_offset = self.get_offset_from_rva( version_struct.OffsetToData )
|
| + raw_data = self.__data__[ start_offset : start_offset+version_struct.Size ]
|
| +
|
| +
|
| + # Map the main structure and the subsequent string
|
| + #
|
| + versioninfo_struct = self.__unpack_data__(
|
| + self.__VS_VERSIONINFO_format__, raw_data,
|
| + file_offset = start_offset )
|
| +
|
| + if versioninfo_struct is None:
|
| + return
|
| +
|
| + ustr_offset = version_struct.OffsetToData + versioninfo_struct.sizeof()
|
| + try:
|
| + versioninfo_string = self.get_string_u_at_rva( ustr_offset )
|
| + except PEFormatError, excp:
|
| + self.__warnings.append(
|
| + 'Error parsing the version information, ' +
|
| + 'attempting to read VS_VERSION_INFO string. Can\'t ' +
|
| + 'read unicode string at offset 0x%x' % (
|
| + ustr_offset ) )
|
| +
|
| + versioninfo_string = None
|
| +
|
| + # If the structure does not contain the expected name, it's assumed to be invalid
|
| + #
|
| + if versioninfo_string != u'VS_VERSION_INFO':
|
| +
|
| + self.__warnings.append('Invalid VS_VERSION_INFO block')
|
| + return
|
| +
|
| +
|
| + # Set the PE object's VS_VERSIONINFO to this one
|
| + #
|
| + self.VS_VERSIONINFO = versioninfo_struct
|
| +
|
| + # The the Key attribute to point to the unicode string identifying the structure
|
| + #
|
| + self.VS_VERSIONINFO.Key = versioninfo_string
|
| +
|
| +
|
| + # Process the fixed version information, get the offset and structure
|
| + #
|
| + fixedfileinfo_offset = self.dword_align(
|
| + versioninfo_struct.sizeof() + 2 * (len(versioninfo_string) + 1),
|
| + version_struct.OffsetToData)
|
| + fixedfileinfo_struct = self.__unpack_data__(
|
| + self.__VS_FIXEDFILEINFO_format__,
|
| + raw_data[fixedfileinfo_offset:],
|
| + file_offset = start_offset+fixedfileinfo_offset )
|
| +
|
| + if not fixedfileinfo_struct:
|
| + return
|
| +
|
| +
|
| + # Set the PE object's VS_FIXEDFILEINFO to this one
|
| + #
|
| + self.VS_FIXEDFILEINFO = fixedfileinfo_struct
|
| +
|
| +
|
| + # Start parsing all the StringFileInfo and VarFileInfo structures
|
| + #
|
| +
|
| + # Get the first one
|
| + #
|
| + stringfileinfo_offset = self.dword_align(
|
| + fixedfileinfo_offset + fixedfileinfo_struct.sizeof(),
|
| + version_struct.OffsetToData)
|
| + original_stringfileinfo_offset = stringfileinfo_offset
|
| +
|
| +
|
| + # Set the PE object's attribute that will contain them all.
|
| + #
|
| + self.FileInfo = list()
|
| +
|
| +
|
| + while True:
|
| +
|
| + # Process the StringFileInfo/VarFileInfo struct
|
| + #
|
| + stringfileinfo_struct = self.__unpack_data__(
|
| + self.__StringFileInfo_format__,
|
| + raw_data[stringfileinfo_offset:],
|
| + file_offset = start_offset+stringfileinfo_offset )
|
| +
|
| + if stringfileinfo_struct is None:
|
| + self.__warnings.append(
|
| + 'Error parsing StringFileInfo/VarFileInfo struct' )
|
| + return None
|
| +
|
| + # Get the subsequent string defining the structure.
|
| + #
|
| + ustr_offset = ( version_struct.OffsetToData +
|
| + stringfileinfo_offset + versioninfo_struct.sizeof() )
|
| + try:
|
| + stringfileinfo_string = self.get_string_u_at_rva( ustr_offset )
|
| + except PEFormatError, excp:
|
| + self.__warnings.append(
|
| + 'Error parsing the version information, ' +
|
| + 'attempting to read StringFileInfo string. Can\'t ' +
|
| + 'read unicode string at offset 0x%x' % ( ustr_offset ) )
|
| + break
|
| +
|
| + # Set such string as the Key attribute
|
| + #
|
| + stringfileinfo_struct.Key = stringfileinfo_string
|
| +
|
| +
|
| + # Append the structure to the PE object's list
|
| + #
|
| + self.FileInfo.append(stringfileinfo_struct)
|
| +
|
| +
|
| + # Parse a StringFileInfo entry
|
| + #
|
| + if stringfileinfo_string == u'StringFileInfo':
|
| +
|
| + if stringfileinfo_struct.Type == 1 and stringfileinfo_struct.ValueLength == 0:
|
| +
|
| + stringtable_offset = self.dword_align(
|
| + stringfileinfo_offset + stringfileinfo_struct.sizeof() +
|
| + 2*(len(stringfileinfo_string)+1),
|
| + version_struct.OffsetToData)
|
| +
|
| + stringfileinfo_struct.StringTable = list()
|
| +
|
| + # Process the String Table entries
|
| + #
|
| + while True:
|
| + stringtable_struct = self.__unpack_data__(
|
| + self.__StringTable_format__,
|
| + raw_data[stringtable_offset:],
|
| + file_offset = start_offset+stringtable_offset )
|
| +
|
| + if not stringtable_struct:
|
| + break
|
| +
|
| + ustr_offset = ( version_struct.OffsetToData + stringtable_offset +
|
| + stringtable_struct.sizeof() )
|
| + try:
|
| + stringtable_string = self.get_string_u_at_rva( ustr_offset )
|
| + except PEFormatError, excp:
|
| + self.__warnings.append(
|
| + 'Error parsing the version information, ' +
|
| + 'attempting to read StringTable string. Can\'t ' +
|
| + 'read unicode string at offset 0x%x' % ( ustr_offset ) )
|
| + break
|
| +
|
| + stringtable_struct.LangID = stringtable_string
|
| + stringtable_struct.entries = dict()
|
| + stringtable_struct.entries_offsets = dict()
|
| + stringtable_struct.entries_lengths = dict()
|
| + stringfileinfo_struct.StringTable.append(stringtable_struct)
|
| +
|
| + entry_offset = self.dword_align(
|
| + stringtable_offset + stringtable_struct.sizeof() +
|
| + 2*(len(stringtable_string)+1),
|
| + version_struct.OffsetToData)
|
| +
|
| + # Process all entries in the string table
|
| + #
|
| +
|
| + while entry_offset < stringtable_offset + stringtable_struct.Length:
|
| +
|
| + string_struct = self.__unpack_data__(
|
| + self.__String_format__, raw_data[entry_offset:],
|
| + file_offset = start_offset+entry_offset )
|
| +
|
| + if not string_struct:
|
| + break
|
| +
|
| + ustr_offset = ( version_struct.OffsetToData + entry_offset +
|
| + string_struct.sizeof() )
|
| + try:
|
| + key = self.get_string_u_at_rva( ustr_offset )
|
| + key_offset = self.get_offset_from_rva( ustr_offset )
|
| + except PEFormatError, excp:
|
| + self.__warnings.append(
|
| + 'Error parsing the version information, ' +
|
| + 'attempting to read StringTable Key string. Can\'t ' +
|
| + 'read unicode string at offset 0x%x' % ( ustr_offset ) )
|
| + break
|
| +
|
| + value_offset = self.dword_align(
|
| + 2*(len(key)+1) + entry_offset + string_struct.sizeof(),
|
| + version_struct.OffsetToData)
|
| +
|
| + ustr_offset = version_struct.OffsetToData + value_offset
|
| + try:
|
| + value = self.get_string_u_at_rva( ustr_offset,
|
| + max_length = string_struct.ValueLength )
|
| + value_offset = self.get_offset_from_rva( ustr_offset )
|
| + except PEFormatError, excp:
|
| + self.__warnings.append(
|
| + 'Error parsing the version information, ' +
|
| + 'attempting to read StringTable Value string. ' +
|
| + 'Can\'t read unicode string at offset 0x%x' % (
|
| + ustr_offset ) )
|
| + break
|
| +
|
| + if string_struct.Length == 0:
|
| + entry_offset = stringtable_offset + stringtable_struct.Length
|
| + else:
|
| + entry_offset = self.dword_align(
|
| + string_struct.Length+entry_offset, version_struct.OffsetToData)
|
| +
|
| + key_as_char = []
|
| + for c in key:
|
| + if ord(c)>128:
|
| + key_as_char.append('\\x%02x' %ord(c))
|
| + else:
|
| + key_as_char.append(c)
|
| +
|
| + key_as_char = ''.join(key_as_char)
|
| +
|
| + setattr(stringtable_struct, key_as_char, value)
|
| + stringtable_struct.entries[key] = value
|
| + stringtable_struct.entries_offsets[key] = (key_offset, value_offset)
|
| + stringtable_struct.entries_lengths[key] = (len(key), len(value))
|
| +
|
| +
|
| + stringtable_offset = self.dword_align(
|
| + stringtable_struct.Length + stringtable_offset,
|
| + version_struct.OffsetToData)
|
| + if stringtable_offset >= stringfileinfo_struct.Length:
|
| + break
|
| +
|
| + # Parse a VarFileInfo entry
|
| + #
|
| + elif stringfileinfo_string == u'VarFileInfo':
|
| +
|
| + varfileinfo_struct = stringfileinfo_struct
|
| + varfileinfo_struct.name = 'VarFileInfo'
|
| +
|
| + if varfileinfo_struct.Type == 1 and varfileinfo_struct.ValueLength == 0:
|
| +
|
| + var_offset = self.dword_align(
|
| + stringfileinfo_offset + varfileinfo_struct.sizeof() +
|
| + 2*(len(stringfileinfo_string)+1),
|
| + version_struct.OffsetToData)
|
| +
|
| + varfileinfo_struct.Var = list()
|
| +
|
| + # Process all entries
|
| + #
|
| +
|
| + while True:
|
| + var_struct = self.__unpack_data__(
|
| + self.__Var_format__,
|
| + raw_data[var_offset:],
|
| + file_offset = start_offset+var_offset )
|
| +
|
| + if not var_struct:
|
| + break
|
| +
|
| + ustr_offset = ( version_struct.OffsetToData + var_offset +
|
| + var_struct.sizeof() )
|
| + try:
|
| + var_string = self.get_string_u_at_rva( ustr_offset )
|
| + except PEFormatError, excp:
|
| + self.__warnings.append(
|
| + 'Error parsing the version information, ' +
|
| + 'attempting to read VarFileInfo Var string. ' +
|
| + 'Can\'t read unicode string at offset 0x%x' % (ustr_offset))
|
| + break
|
| +
|
| +
|
| + varfileinfo_struct.Var.append(var_struct)
|
| +
|
| + varword_offset = self.dword_align(
|
| + 2*(len(var_string)+1) + var_offset + var_struct.sizeof(),
|
| + version_struct.OffsetToData)
|
| + orig_varword_offset = varword_offset
|
| +
|
| + while varword_offset < orig_varword_offset + var_struct.ValueLength:
|
| + word1 = self.get_word_from_data(
|
| + raw_data[varword_offset:varword_offset+2], 0)
|
| + word2 = self.get_word_from_data(
|
| + raw_data[varword_offset+2:varword_offset+4], 0)
|
| + varword_offset += 4
|
| +
|
| + var_struct.entry = {var_string: '0x%04x 0x%04x' % (word1, word2)}
|
| +
|
| + var_offset = self.dword_align(
|
| + var_offset+var_struct.Length, version_struct.OffsetToData)
|
| +
|
| + if var_offset <= var_offset+var_struct.Length:
|
| + break
|
| +
|
| +
|
| +
|
| + # Increment and align the offset
|
| + #
|
| + stringfileinfo_offset = self.dword_align(
|
| + stringfileinfo_struct.Length+stringfileinfo_offset,
|
| + version_struct.OffsetToData)
|
| +
|
| + # Check if all the StringFileInfo and VarFileInfo items have been processed
|
| + #
|
| + if stringfileinfo_struct.Length == 0 or stringfileinfo_offset >= versioninfo_struct.Length:
|
| + break
|
| +
|
| +
|
| +
|
| + def parse_export_directory(self, rva, size):
|
| + """Parse the export directory.
|
| +
|
| + Given the rva of the export directory, it will process all
|
| + its entries.
|
| +
|
| + The exports will be made available through a list "exports"
|
| + containing a tuple with the following elements:
|
| +
|
| + (ordinal, symbol_address, symbol_name)
|
| +
|
| + And also through a dicionary "exports_by_ordinal" whose keys
|
| + will be the ordinals and the values tuples of the from:
|
| +
|
| + (symbol_address, symbol_name)
|
| +
|
| + The symbol addresses are relative, not absolute.
|
| + """
|
| +
|
| + try:
|
| + export_dir = self.__unpack_data__(
|
| + self.__IMAGE_EXPORT_DIRECTORY_format__, self.get_data(rva),
|
| + file_offset = self.get_offset_from_rva(rva) )
|
| + except PEFormatError:
|
| + self.__warnings.append(
|
| + 'Error parsing export directory at RVA: 0x%x' % ( rva ) )
|
| + return
|
| +
|
| + if not export_dir:
|
| + return
|
| +
|
| + try:
|
| + address_of_names = self.get_data(
|
| + export_dir.AddressOfNames, export_dir.NumberOfNames*4)
|
| + address_of_name_ordinals = self.get_data(
|
| + export_dir.AddressOfNameOrdinals, export_dir.NumberOfNames*4)
|
| + address_of_functions = self.get_data(
|
| + export_dir.AddressOfFunctions, export_dir.NumberOfFunctions*4)
|
| + except PEFormatError:
|
| + self.__warnings.append(
|
| + 'Error parsing export directory at RVA: 0x%x' % ( rva ) )
|
| + return
|
| +
|
| + exports = []
|
| +
|
| + for i in xrange(export_dir.NumberOfNames):
|
| +
|
| +
|
| + symbol_name = self.get_string_at_rva(
|
| + self.get_dword_from_data(address_of_names, i))
|
| +
|
| + symbol_ordinal = self.get_word_from_data(
|
| + address_of_name_ordinals, i)
|
| +
|
| +
|
| + if symbol_ordinal*4<len(address_of_functions):
|
| + symbol_address = self.get_dword_from_data(
|
| + address_of_functions, symbol_ordinal)
|
| + else:
|
| + # Corrupt? a bad pointer... we assume it's all
|
| + # useless, no exports
|
| + return None
|
| +
|
| + # If the funcion's rva points within the export directory
|
| + # it will point to a string with the forwarded symbol's string
|
| + # instead of pointing the the function start address.
|
| +
|
| + if symbol_address>=rva and symbol_address<rva+size:
|
| + forwarder_str = self.get_string_at_rva(symbol_address)
|
| + else:
|
| + forwarder_str = None
|
| +
|
| +
|
| + exports.append(
|
| + ExportData(
|
| + ordinal = export_dir.Base+symbol_ordinal,
|
| + address = symbol_address,
|
| + name = symbol_name,
|
| + forwarder = forwarder_str))
|
| +
|
| + ordinals = [exp.ordinal for exp in exports]
|
| +
|
| + for idx in xrange(export_dir.NumberOfFunctions):
|
| +
|
| + if not idx+export_dir.Base in ordinals:
|
| + symbol_address = self.get_dword_from_data(
|
| + address_of_functions,
|
| + idx)
|
| +
|
| + #
|
| + # Checking for forwarder again.
|
| + #
|
| + if symbol_address>=rva and symbol_address<rva+size:
|
| + forwarder_str = self.get_string_at_rva(symbol_address)
|
| + else:
|
| + forwarder_str = None
|
| +
|
| + exports.append(
|
| + ExportData(
|
| + ordinal = export_dir.Base+idx,
|
| + address = symbol_address,
|
| + name = None,
|
| + forwarder = forwarder_str))
|
| +
|
| + return ExportDirData(
|
| + struct = export_dir,
|
| + symbols = exports)
|
| +
|
| +
|
| + def dword_align(self, offset, base):
|
| + offset += base
|
| + return (offset+3) - ((offset+3)%4) - base
|
| +
|
| +
|
| +
|
| + def parse_delay_import_directory(self, rva, size):
|
| + """Walk and parse the delay import directory."""
|
| +
|
| + import_descs = []
|
| + while True:
|
| + try:
|
| + # If the RVA is invalid all would blow up. Some PEs seem to be
|
| + # specially nasty and have an invalid RVA.
|
| + data = self.get_data(rva)
|
| + except PEFormatError, e:
|
| + self.__warnings.append(
|
| + 'Error parsing the Delay import directory at RVA: 0x%x' % ( rva ) )
|
| + break
|
| +
|
| + import_desc = self.__unpack_data__(
|
| + self.__IMAGE_DELAY_IMPORT_DESCRIPTOR_format__,
|
| + data, file_offset = self.get_offset_from_rva(rva) )
|
| +
|
| +
|
| + # If the structure is all zeores, we reached the end of the list
|
| + if not import_desc or import_desc.all_zeroes():
|
| + break
|
| +
|
| +
|
| + rva += import_desc.sizeof()
|
| +
|
| + try:
|
| + import_data = self.parse_imports(
|
| + import_desc.pINT,
|
| + import_desc.pIAT,
|
| + None)
|
| + except PEFormatError, e:
|
| + self.__warnings.append(
|
| + 'Error parsing the Delay import directory. ' +
|
| + 'Invalid import data at RVA: 0x%x' % ( rva ) )
|
| + break
|
| +
|
| + if not import_data:
|
| + continue
|
| +
|
| +
|
| + dll = self.get_string_at_rva(import_desc.szName)
|
| + if dll:
|
| + import_descs.append(
|
| + ImportDescData(
|
| + struct = import_desc,
|
| + imports = import_data,
|
| + dll = dll))
|
| +
|
| + return import_descs
|
| +
|
| +
|
| +
|
| + def parse_import_directory(self, rva, size):
|
| + """Walk and parse the import directory."""
|
| +
|
| + import_descs = []
|
| + while True:
|
| + try:
|
| + # If the RVA is invalid all would blow up. Some EXEs seem to be
|
| + # specially nasty and have an invalid RVA.
|
| + data = self.get_data(rva)
|
| + except PEFormatError, e:
|
| + self.__warnings.append(
|
| + 'Error parsing the Import directory at RVA: 0x%x' % ( rva ) )
|
| + break
|
| +
|
| + import_desc = self.__unpack_data__(
|
| + self.__IMAGE_IMPORT_DESCRIPTOR_format__,
|
| + data, file_offset = self.get_offset_from_rva(rva) )
|
| +
|
| + # If the structure is all zeores, we reached the end of the list
|
| + if not import_desc or import_desc.all_zeroes():
|
| + break
|
| +
|
| + rva += import_desc.sizeof()
|
| +
|
| + try:
|
| + import_data = self.parse_imports(
|
| + import_desc.OriginalFirstThunk,
|
| + import_desc.FirstThunk,
|
| + import_desc.ForwarderChain)
|
| + except PEFormatError, excp:
|
| + self.__warnings.append(
|
| + 'Error parsing the Import directory. ' +
|
| + 'Invalid Import data at RVA: 0x%x' % ( rva ) )
|
| + break
|
| + #raise excp
|
| +
|
| + if not import_data:
|
| + continue
|
| +
|
| + dll = self.get_string_at_rva(import_desc.Name)
|
| + if dll:
|
| + import_descs.append(
|
| + ImportDescData(
|
| + struct = import_desc,
|
| + imports = import_data,
|
| + dll = dll))
|
| +
|
| + return import_descs
|
| +
|
| +
|
| +
|
| + def parse_imports(self, original_first_thunk, first_thunk, forwarder_chain):
|
| + """Parse the imported symbols.
|
| +
|
| + It will fill a list, which will be avalable as the dictionary
|
| + attribute "imports". Its keys will be the DLL names and the values
|
| + all the symbols imported from that object.
|
| + """
|
| +
|
| + imported_symbols = []
|
| + imports_section = self.get_section_by_rva(first_thunk)
|
| + if not imports_section:
|
| + raise PEFormatError, 'Invalid/corrupt imports.'
|
| +
|
| +
|
| + # Import Lookup Table. Contains ordinals or pointers to strings.
|
| + ilt = self.get_import_table(original_first_thunk)
|
| + # Import Address Table. May have identical content to ILT if
|
| + # PE file is not bounded, Will contain the address of the
|
| + # imported symbols once the binary is loaded or if it is already
|
| + # bound.
|
| + iat = self.get_import_table(first_thunk)
|
| +
|
| + # OC Patch:
|
| + # Would crash if iat or ilt had None type
|
| + if not iat and not ilt:
|
| + raise PEFormatError(
|
| + 'Invalid Import Table information. ' +
|
| + 'Both ILT and IAT appear to be broken.')
|
| +
|
| + if not iat and ilt:
|
| + table = ilt
|
| + elif iat and not ilt:
|
| + table = iat
|
| + elif ilt and ((len(ilt) and len(iat)==0) or (len(ilt) == len(iat))):
|
| + table = ilt
|
| + elif (ilt and len(ilt))==0 and (iat and len(iat)):
|
| + table = iat
|
| + else:
|
| + return None
|
| +
|
| + for idx in xrange(len(table)):
|
| +
|
| + imp_ord = None
|
| + imp_hint = None
|
| + imp_name = None
|
| + hint_name_table_rva = None
|
| +
|
| + if table[idx].AddressOfData:
|
| +
|
| + if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
|
| + ordinal_flag = IMAGE_ORDINAL_FLAG
|
| + elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
|
| + ordinal_flag = IMAGE_ORDINAL_FLAG64
|
| +
|
| + # If imported by ordinal, we will append the ordinal number
|
| + #
|
| + if table[idx].AddressOfData & ordinal_flag:
|
| + import_by_ordinal = True
|
| + imp_ord = table[idx].AddressOfData & 0xffff
|
| + imp_name = None
|
| + else:
|
| + import_by_ordinal = False
|
| + try:
|
| + hint_name_table_rva = table[idx].AddressOfData & 0x7fffffff
|
| + data = self.get_data(hint_name_table_rva, 2)
|
| + # Get the Hint
|
| + imp_hint = self.get_word_from_data(data, 0)
|
| + imp_name = self.get_string_at_rva(table[idx].AddressOfData+2)
|
| + except PEFormatError, e:
|
| + pass
|
| +
|
| + imp_address = first_thunk+self.OPTIONAL_HEADER.ImageBase+idx*4
|
| +
|
| + if iat and ilt and ilt[idx].AddressOfData != iat[idx].AddressOfData:
|
| + imp_bound = iat[idx].AddressOfData
|
| + else:
|
| + imp_bound = None
|
| +
|
| + if imp_name != '' and (imp_ord or imp_name):
|
| + imported_symbols.append(
|
| + ImportData(
|
| + import_by_ordinal = import_by_ordinal,
|
| + ordinal = imp_ord,
|
| + hint = imp_hint,
|
| + name = imp_name,
|
| + bound = imp_bound,
|
| + address = imp_address,
|
| + hint_name_table_rva = hint_name_table_rva))
|
| +
|
| + return imported_symbols
|
| +
|
| +
|
| +
|
| + def get_import_table(self, rva):
|
| +
|
| + table = []
|
| +
|
| + while True and rva:
|
| + try:
|
| + data = self.get_data(rva)
|
| + except PEFormatError, e:
|
| + self.__warnings.append(
|
| + 'Error parsing the import table. ' +
|
| + 'Invalid data at RVA: 0x%x' % ( rva ) )
|
| + return None
|
| +
|
| + if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
|
| + format = self.__IMAGE_THUNK_DATA_format__
|
| + elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
|
| + format = self.__IMAGE_THUNK_DATA64_format__
|
| +
|
| + thunk_data = self.__unpack_data__(
|
| + format, data, file_offset=self.get_offset_from_rva(rva) )
|
| +
|
| + if not thunk_data or thunk_data.all_zeroes():
|
| + break
|
| +
|
| + rva += thunk_data.sizeof()
|
| +
|
| + table.append(thunk_data)
|
| +
|
| + return table
|
| +
|
| +
|
| + def get_memory_mapped_image(self, max_virtual_address=0x10000000, ImageBase=None):
|
| + """Returns the data corresponding to the memory layout of the PE file.
|
| +
|
| + The data includes the PE header and the sections loaded at offsets
|
| + corresponding to their relative virtual addresses. (the VirtualAddress
|
| + section header member).
|
| + Any offset in this data corresponds to the absolute memory address
|
| + ImageBase+offset.
|
| +
|
| + The optional argument 'max_virtual_address' provides with means of limiting
|
| + which section are processed.
|
| + Any section with their VirtualAddress beyond this value will be skipped.
|
| + Normally, sections with values beyond this range are just there to confuse
|
| + tools. It's a common trick to see in packed executables.
|
| +
|
| + If the 'ImageBase' optional argument is supplied, the file's relocations
|
| + will be applied to the image by calling the 'relocate_image()' method.
|
| + """
|
| +
|
| + # Collect all sections in one code block
|
| + data = self.header
|
| + for section in self.sections:
|
| +
|
| + # Miscellanous integrity tests.
|
| + # Some packer will set these to bogus values to
|
| + # make tools go nuts.
|
| + #
|
| + if section.Misc_VirtualSize == 0 or section.SizeOfRawData == 0:
|
| + continue
|
| +
|
| + if section.SizeOfRawData > len(self.__data__):
|
| + continue
|
| +
|
| + if section.PointerToRawData > len(self.__data__):
|
| + continue
|
| +
|
| + if section.VirtualAddress >= max_virtual_address:
|
| + continue
|
| +
|
| + padding_length = section.VirtualAddress - len(data)
|
| +
|
| + if padding_length>0:
|
| + data += '\0'*padding_length
|
| + elif padding_length<0:
|
| + data = data[:padding_length]
|
| +
|
| + data += section.data
|
| +
|
| + return data
|
| +
|
| +
|
| + def get_data(self, rva, length=None):
|
| + """Get data regardless of the section where it lies on.
|
| +
|
| + Given a rva and the size of the chunk to retrieve, this method
|
| + will find the section where the data lies and return the data.
|
| + """
|
| +
|
| + s = self.get_section_by_rva(rva)
|
| +
|
| + if not s:
|
| + if rva<len(self.header):
|
| + if length:
|
| + end = rva+length
|
| + else:
|
| + end = None
|
| + return self.header[rva:end]
|
| +
|
| + raise PEFormatError, 'data at RVA can\'t be fetched. Corrupt header?'
|
| +
|
| + return s.get_data(rva, length)
|
| +
|
| +
|
| + def get_rva_from_offset(self, offset):
|
| + """Get the rva corresponding to this file offset. """
|
| +
|
| + s = self.get_section_by_offset(offset)
|
| + if not s:
|
| + raise PEFormatError("specified offset (0x%x) doesn't belong to any section." % offset)
|
| + return s.get_rva_from_offset(offset)
|
| +
|
| + def get_offset_from_rva(self, rva):
|
| + """Get the file offset corresponding to this rva.
|
| +
|
| + Given a rva , this method will find the section where the
|
| + data lies and return the offset within the file.
|
| + """
|
| +
|
| + s = self.get_section_by_rva(rva)
|
| + if not s:
|
| +
|
| + raise PEFormatError, 'data at RVA can\'t be fetched. Corrupt header?'
|
| +
|
| + return s.get_offset_from_rva(rva)
|
| +
|
| +
|
| + def get_string_at_rva(self, rva):
|
| + """Get an ASCII string located at the given address."""
|
| +
|
| + s = self.get_section_by_rva(rva)
|
| + if not s:
|
| + if rva<len(self.header):
|
| + return self.get_string_from_data(rva, self.header)
|
| + return None
|
| +
|
| + return self.get_string_from_data(rva-s.VirtualAddress, s.data)
|
| +
|
| +
|
| + def get_string_from_data(self, offset, data):
|
| + """Get an ASCII string from within the data."""
|
| +
|
| + # OC Patch
|
| + b = None
|
| +
|
| + try:
|
| + b = data[offset]
|
| + except IndexError:
|
| + return ''
|
| +
|
| + s = ''
|
| + while ord(b):
|
| + s += b
|
| + offset += 1
|
| + try:
|
| + b = data[offset]
|
| + except IndexError:
|
| + break
|
| +
|
| + return s
|
| +
|
| +
|
| + def get_string_u_at_rva(self, rva, max_length = 2**16):
|
| + """Get an Unicode string located at the given address."""
|
| +
|
| + try:
|
| + # If the RVA is invalid all would blow up. Some EXEs seem to be
|
| + # specially nasty and have an invalid RVA.
|
| + data = self.get_data(rva, 2)
|
| + except PEFormatError, e:
|
| + return None
|
| +
|
| + #length = struct.unpack('<H', data)[0]
|
| +
|
| + s = u''
|
| + for idx in xrange(max_length):
|
| + try:
|
| + uchr = struct.unpack('<H', self.get_data(rva+2*idx, 2))[0]
|
| + except struct.error:
|
| + break
|
| +
|
| + if unichr(uchr) == u'\0':
|
| + break
|
| + s += unichr(uchr)
|
| +
|
| + return s
|
| +
|
| +
|
| + def get_section_by_offset(self, offset):
|
| + """Get the section containing the given file offset."""
|
| +
|
| + sections = [s for s in self.sections if s.contains_offset(offset)]
|
| +
|
| + if sections:
|
| + return sections[0]
|
| +
|
| + return None
|
| +
|
| +
|
| + def get_section_by_rva(self, rva):
|
| + """Get the section containing the given address."""
|
| +
|
| + sections = [s for s in self.sections if s.contains_rva(rva)]
|
| +
|
| + if sections:
|
| + return sections[0]
|
| +
|
| + return None
|
| +
|
| + def __str__(self):
|
| + return self.dump_info()
|
| +
|
| +
|
| + def print_info(self):
|
| + """Print all the PE header information in a human readable from."""
|
| + print self.dump_info()
|
| +
|
| +
|
| + def dump_info(self, dump=None):
|
| + """Dump all the PE header information into human readable string."""
|
| +
|
| +
|
| + if dump is None:
|
| + dump = Dump()
|
| +
|
| + warnings = self.get_warnings()
|
| + if warnings:
|
| + dump.add_header('Parsing Warnings')
|
| + for warning in warnings:
|
| + dump.add_line(warning)
|
| + dump.add_newline()
|
| +
|
| +
|
| + dump.add_header('DOS_HEADER')
|
| + dump.add_lines(self.DOS_HEADER.dump())
|
| + dump.add_newline()
|
| +
|
| + dump.add_header('NT_HEADERS')
|
| + dump.add_lines(self.NT_HEADERS.dump())
|
| + dump.add_newline()
|
| +
|
| + dump.add_header('FILE_HEADER')
|
| + dump.add_lines(self.FILE_HEADER.dump())
|
| +
|
| + image_flags = self.retrieve_flags(IMAGE_CHARACTERISTICS, 'IMAGE_FILE_')
|
| +
|
| + dump.add('Flags: ')
|
| + flags = []
|
| + for flag in image_flags:
|
| + if getattr(self.FILE_HEADER, flag[0]):
|
| + flags.append(flag[0])
|
| + dump.add_line(', '.join(flags))
|
| + dump.add_newline()
|
| +
|
| + if hasattr(self, 'OPTIONAL_HEADER') and self.OPTIONAL_HEADER is not None:
|
| + dump.add_header('OPTIONAL_HEADER')
|
| + dump.add_lines(self.OPTIONAL_HEADER.dump())
|
| +
|
| + dll_characteristics_flags = self.retrieve_flags(DLL_CHARACTERISTICS, 'IMAGE_DLL_CHARACTERISTICS_')
|
| +
|
| + dump.add('DllCharacteristics: ')
|
| + flags = []
|
| + for flag in dll_characteristics_flags:
|
| + if getattr(self.OPTIONAL_HEADER, flag[0]):
|
| + flags.append(flag[0])
|
| + dump.add_line(', '.join(flags))
|
| + dump.add_newline()
|
| +
|
| +
|
| + dump.add_header('PE Sections')
|
| +
|
| + section_flags = self.retrieve_flags(SECTION_CHARACTERISTICS, 'IMAGE_SCN_')
|
| +
|
| + for section in self.sections:
|
| + dump.add_lines(section.dump())
|
| + dump.add('Flags: ')
|
| + flags = []
|
| + for flag in section_flags:
|
| + if getattr(section, flag[0]):
|
| + flags.append(flag[0])
|
| + dump.add_line(', '.join(flags))
|
| + dump.add_line('Entropy: %f (Min=0.0, Max=8.0)' % section.get_entropy() )
|
| + if md5 is not None:
|
| + dump.add_line('MD5 hash: %s' % section.get_hash_md5() )
|
| + if sha1 is not None:
|
| + dump.add_line('SHA-1 hash: %s' % section.get_hash_sha1() )
|
| + if sha256 is not None:
|
| + dump.add_line('SHA-256 hash: %s' % section.get_hash_sha256() )
|
| + if sha512 is not None:
|
| + dump.add_line('SHA-512 hash: %s' % section.get_hash_sha512() )
|
| + dump.add_newline()
|
| +
|
| +
|
| +
|
| + if (hasattr(self, 'OPTIONAL_HEADER') and
|
| + hasattr(self.OPTIONAL_HEADER, 'DATA_DIRECTORY') ):
|
| +
|
| + dump.add_header('Directories')
|
| + for idx in xrange(len(self.OPTIONAL_HEADER.DATA_DIRECTORY)):
|
| + directory = self.OPTIONAL_HEADER.DATA_DIRECTORY[idx]
|
| + dump.add_lines(directory.dump())
|
| + dump.add_newline()
|
| +
|
| +
|
| + if hasattr(self, 'VS_VERSIONINFO'):
|
| + dump.add_header('Version Information')
|
| + dump.add_lines(self.VS_VERSIONINFO.dump())
|
| + dump.add_newline()
|
| +
|
| + if hasattr(self, 'VS_FIXEDFILEINFO'):
|
| + dump.add_lines(self.VS_FIXEDFILEINFO.dump())
|
| + dump.add_newline()
|
| +
|
| + if hasattr(self, 'FileInfo'):
|
| + for entry in self.FileInfo:
|
| + dump.add_lines(entry.dump())
|
| + dump.add_newline()
|
| +
|
| + if hasattr(entry, 'StringTable'):
|
| + for st_entry in entry.StringTable:
|
| + [dump.add_line(' '+line) for line in st_entry.dump()]
|
| + dump.add_line(' LangID: '+st_entry.LangID)
|
| + dump.add_newline()
|
| + for str_entry in st_entry.entries.items():
|
| + dump.add_line(' '+str_entry[0]+': '+str_entry[1])
|
| + dump.add_newline()
|
| +
|
| + elif hasattr(entry, 'Var'):
|
| + for var_entry in entry.Var:
|
| + if hasattr(var_entry, 'entry'):
|
| + [dump.add_line(' '+line) for line in var_entry.dump()]
|
| + dump.add_line(
|
| + ' ' + var_entry.entry.keys()[0] +
|
| + ': ' + var_entry.entry.values()[0])
|
| +
|
| + dump.add_newline()
|
| +
|
| +
|
| +
|
| + if hasattr(self, 'DIRECTORY_ENTRY_EXPORT'):
|
| + dump.add_header('Exported symbols')
|
| + dump.add_lines(self.DIRECTORY_ENTRY_EXPORT.struct.dump())
|
| + dump.add_newline()
|
| + dump.add_line('%-10s %-10s %s' % ('Ordinal', 'RVA', 'Name'))
|
| + for export in self.DIRECTORY_ENTRY_EXPORT.symbols:
|
| + dump.add('%-10d 0x%08Xh %s' % (
|
| + export.ordinal, export.address, export.name))
|
| + if export.forwarder:
|
| + dump.add_line(' forwarder: %s' % export.forwarder)
|
| + else:
|
| + dump.add_newline()
|
| +
|
| + dump.add_newline()
|
| +
|
| + if hasattr(self, 'DIRECTORY_ENTRY_IMPORT'):
|
| + dump.add_header('Imported symbols')
|
| + for module in self.DIRECTORY_ENTRY_IMPORT:
|
| + dump.add_lines(module.struct.dump())
|
| + dump.add_newline()
|
| + for symbol in module.imports:
|
| +
|
| + if symbol.import_by_ordinal is True:
|
| + dump.add('%s Ordinal[%s] (Imported by Ordinal)' % (
|
| + module.dll, str(symbol.ordinal)))
|
| + else:
|
| + dump.add('%s.%s Hint[%s]' % (
|
| + module.dll, symbol.name, str(symbol.hint)))
|
| +
|
| + if symbol.bound:
|
| + dump.add_line(' Bound: 0x%08X' % (symbol.bound))
|
| + else:
|
| + dump.add_newline()
|
| + dump.add_newline()
|
| +
|
| +
|
| + if hasattr(self, 'DIRECTORY_ENTRY_BOUND_IMPORT'):
|
| + dump.add_header('Bound imports')
|
| + for bound_imp_desc in self.DIRECTORY_ENTRY_BOUND_IMPORT:
|
| +
|
| + dump.add_lines(bound_imp_desc.struct.dump())
|
| + dump.add_line('DLL: %s' % bound_imp_desc.name)
|
| + dump.add_newline()
|
| +
|
| + for bound_imp_ref in bound_imp_desc.entries:
|
| + dump.add_lines(bound_imp_ref.struct.dump(), 4)
|
| + dump.add_line('DLL: %s' % bound_imp_ref.name, 4)
|
| + dump.add_newline()
|
| +
|
| +
|
| + if hasattr(self, 'DIRECTORY_ENTRY_DELAY_IMPORT'):
|
| + dump.add_header('Delay Imported symbols')
|
| + for module in self.DIRECTORY_ENTRY_DELAY_IMPORT:
|
| +
|
| + dump.add_lines(module.struct.dump())
|
| + dump.add_newline()
|
| +
|
| + for symbol in module.imports:
|
| + if symbol.import_by_ordinal is True:
|
| + dump.add('%s Ordinal[%s] (Imported by Ordinal)' % (
|
| + module.dll, str(symbol.ordinal)))
|
| + else:
|
| + dump.add('%s.%s Hint[%s]' % (
|
| + module.dll, symbol.name, str(symbol.hint)))
|
| +
|
| + if symbol.bound:
|
| + dump.add_line(' Bound: 0x%08X' % (symbol.bound))
|
| + else:
|
| + dump.add_newline()
|
| + dump.add_newline()
|
| +
|
| +
|
| + if hasattr(self, 'DIRECTORY_ENTRY_RESOURCE'):
|
| + dump.add_header('Resource directory')
|
| +
|
| + dump.add_lines(self.DIRECTORY_ENTRY_RESOURCE.struct.dump())
|
| +
|
| + for resource_type in self.DIRECTORY_ENTRY_RESOURCE.entries:
|
| +
|
| + if resource_type.name is not None:
|
| + dump.add_line('Name: [%s]' % resource_type.name, 2)
|
| + else:
|
| + dump.add_line('Id: [0x%X] (%s)' % (
|
| + resource_type.struct.Id, RESOURCE_TYPE.get(
|
| + resource_type.struct.Id, '-')),
|
| + 2)
|
| +
|
| + dump.add_lines(resource_type.struct.dump(), 2)
|
| +
|
| + if hasattr(resource_type, 'directory'):
|
| +
|
| + dump.add_lines(resource_type.directory.struct.dump(), 4)
|
| +
|
| + for resource_id in resource_type.directory.entries:
|
| +
|
| + if resource_id.name is not None:
|
| + dump.add_line('Name: [%s]' % resource_id.name, 6)
|
| + else:
|
| + dump.add_line('Id: [0x%X]' % resource_id.struct.Id, 6)
|
| +
|
| + dump.add_lines(resource_id.struct.dump(), 6)
|
| +
|
| + if hasattr(resource_id, 'directory'):
|
| + dump.add_lines(resource_id.directory.struct.dump(), 8)
|
| +
|
| + for resource_lang in resource_id.directory.entries:
|
| + # dump.add_line('\\--- LANG [%d,%d][%s]' % (
|
| + # resource_lang.data.lang,
|
| + # resource_lang.data.sublang,
|
| + # LANG[resource_lang.data.lang]), 8)
|
| + dump.add_lines(resource_lang.struct.dump(), 10)
|
| + dump.add_lines(resource_lang.data.struct.dump(), 12)
|
| + dump.add_newline()
|
| +
|
| + dump.add_newline()
|
| +
|
| +
|
| + if ( hasattr(self, 'DIRECTORY_ENTRY_TLS') and
|
| + self.DIRECTORY_ENTRY_TLS and
|
| + self.DIRECTORY_ENTRY_TLS.struct ):
|
| +
|
| + dump.add_header('TLS')
|
| + dump.add_lines(self.DIRECTORY_ENTRY_TLS.struct.dump())
|
| + dump.add_newline()
|
| +
|
| +
|
| + if hasattr(self, 'DIRECTORY_ENTRY_DEBUG'):
|
| + dump.add_header('Debug information')
|
| + for dbg in self.DIRECTORY_ENTRY_DEBUG:
|
| + dump.add_lines(dbg.struct.dump())
|
| + try:
|
| + dump.add_line('Type: '+DEBUG_TYPE[dbg.struct.Type])
|
| + except KeyError:
|
| + dump.add_line('Type: 0x%x(Unknown)' % dbg.struct.Type)
|
| + dump.add_newline()
|
| +
|
| +
|
| + if hasattr(self, 'DIRECTORY_ENTRY_BASERELOC'):
|
| + dump.add_header('Base relocations')
|
| + for base_reloc in self.DIRECTORY_ENTRY_BASERELOC:
|
| + dump.add_lines(base_reloc.struct.dump())
|
| + for reloc in base_reloc.entries:
|
| + try:
|
| + dump.add_line('%08Xh %s' % (
|
| + reloc.rva, RELOCATION_TYPE[reloc.type][16:]), 4)
|
| + except KeyError:
|
| + dump.add_line('0x%08X 0x%x(Unknown)' % (
|
| + reloc.rva, reloc.type), 4)
|
| + dump.add_newline()
|
| +
|
| +
|
| + return dump.get_text()
|
| +
|
| + # OC Patch
|
| + def get_physical_by_rva(self, rva):
|
| + """Gets the physical address in the PE file from an RVA value."""
|
| + try:
|
| + return self.get_offset_from_rva(rva)
|
| + except Exception:
|
| + return None
|
| +
|
| +
|
| + ##
|
| + # Double-Word get/set
|
| + ##
|
| +
|
| + def get_data_from_dword(self, dword):
|
| + """Return a four byte string representing the double word value. (little endian)."""
|
| + return struct.pack('<L', dword)
|
| +
|
| +
|
| + def get_dword_from_data(self, data, offset):
|
| + """Convert four bytes of data to a double word (little endian)
|
| +
|
| + 'offset' is assumed to index into a dword array. So setting it to
|
| + N will return a dword out of the data sarting at offset N*4.
|
| +
|
| + Returns None if the data can't be turned into a double word.
|
| + """
|
| +
|
| + if (offset+1)*4 > len(data):
|
| + return None
|
| +
|
| + return struct.unpack('<L', data[offset*4:(offset+1)*4])[0]
|
| +
|
| +
|
| + def get_dword_at_rva(self, rva):
|
| + """Return the double word value at the given RVA.
|
| +
|
| + Returns None if the value can't be read, i.e. the RVA can't be mapped
|
| + to a file offset.
|
| + """
|
| +
|
| + try:
|
| + return self.get_dword_from_data(self.get_data(rva)[:4], 0)
|
| + except PEFormatError:
|
| + return None
|
| +
|
| +
|
| + def get_dword_from_offset(self, offset):
|
| + """Return the double word value at the given file offset. (little endian)"""
|
| +
|
| + if offset+4 > len(self.__data__):
|
| + return None
|
| +
|
| + return self.get_dword_from_data(self.__data__[offset:offset+4], 0)
|
| +
|
| +
|
| + def set_dword_at_rva(self, rva, dword):
|
| + """Set the double word value at the file offset corresponding to the given RVA."""
|
| + return self.set_bytes_at_rva(rva, self.get_data_from_dword(dword))
|
| +
|
| +
|
| + def set_dword_at_offset(self, offset, dword):
|
| + """Set the double word value at the given file offset."""
|
| + return self.set_bytes_at_offset(offset, self.get_data_from_dword(dword))
|
| +
|
| +
|
| +
|
| + ##
|
| + # Word get/set
|
| + ##
|
| +
|
| + def get_data_from_word(self, word):
|
| + """Return a two byte string representing the word value. (little endian)."""
|
| + return struct.pack('<H', word)
|
| +
|
| +
|
| + def get_word_from_data(self, data, offset):
|
| + """Convert two bytes of data to a word (little endian)
|
| +
|
| + 'offset' is assumed to index into a word array. So setting it to
|
| + N will return a dword out of the data sarting at offset N*2.
|
| +
|
| + Returns None if the data can't be turned into a word.
|
| + """
|
| +
|
| + if (offset+1)*2 > len(data):
|
| + return None
|
| +
|
| + return struct.unpack('<H', data[offset*2:(offset+1)*2])[0]
|
| +
|
| +
|
| + def get_word_at_rva(self, rva):
|
| + """Return the word value at the given RVA.
|
| +
|
| + Returns None if the value can't be read, i.e. the RVA can't be mapped
|
| + to a file offset.
|
| + """
|
| +
|
| + try:
|
| + return self.get_word_from_data(self.get_data(rva)[:2], 0)
|
| + except PEFormatError:
|
| + return None
|
| +
|
| +
|
| + def get_word_from_offset(self, offset):
|
| + """Return the word value at the given file offset. (little endian)"""
|
| +
|
| + if offset+2 > len(self.__data__):
|
| + return None
|
| +
|
| + return self.get_word_from_data(self.__data__[offset:offset+2], 0)
|
| +
|
| +
|
| + def set_word_at_rva(self, rva, word):
|
| + """Set the word value at the file offset corresponding to the given RVA."""
|
| + return self.set_bytes_at_rva(rva, self.get_data_from_word(word))
|
| +
|
| +
|
| + def set_word_at_offset(self, offset, word):
|
| + """Set the word value at the given file offset."""
|
| + return self.set_bytes_at_offset(offset, self.get_data_from_word(word))
|
| +
|
| +
|
| + ##
|
| + # Quad-Word get/set
|
| + ##
|
| +
|
| + def get_data_from_qword(self, word):
|
| + """Return a eight byte string representing the quad-word value. (little endian)."""
|
| + return struct.pack('<Q', word)
|
| +
|
| +
|
| + def get_qword_from_data(self, data, offset):
|
| + """Convert eight bytes of data to a word (little endian)
|
| +
|
| + 'offset' is assumed to index into a word array. So setting it to
|
| + N will return a dword out of the data sarting at offset N*8.
|
| +
|
| + Returns None if the data can't be turned into a quad word.
|
| + """
|
| +
|
| + if (offset+1)*8 > len(data):
|
| + return None
|
| +
|
| + return struct.unpack('<Q', data[offset*8:(offset+1)*8])[0]
|
| +
|
| +
|
| + def get_qword_at_rva(self, rva):
|
| + """Return the quad-word value at the given RVA.
|
| +
|
| + Returns None if the value can't be read, i.e. the RVA can't be mapped
|
| + to a file offset.
|
| + """
|
| +
|
| + try:
|
| + return self.get_qword_from_data(self.get_data(rva)[:8], 0)
|
| + except PEFormatError:
|
| + return None
|
| +
|
| +
|
| + def get_qword_from_offset(self, offset):
|
| + """Return the quad-word value at the given file offset. (little endian)"""
|
| +
|
| + if offset+8 > len(self.__data__):
|
| + return None
|
| +
|
| + return self.get_qword_from_data(self.__data__[offset:offset+8], 0)
|
| +
|
| +
|
| + def set_qword_at_rva(self, rva, qword):
|
| + """Set the quad-word value at the file offset corresponding to the given RVA."""
|
| + return self.set_bytes_at_rva(rva, self.get_data_from_qword(qword))
|
| +
|
| +
|
| + def set_qword_at_offset(self, offset, qword):
|
| + """Set the quad-word value at the given file offset."""
|
| + return self.set_bytes_at_offset(offset, self.get_data_from_qword(qword))
|
| +
|
| +
|
| +
|
| + ##
|
| + # Set bytes
|
| + ##
|
| +
|
| +
|
| + def set_bytes_at_rva(self, rva, data):
|
| + """Overwrite, with the given string, the bytes at the file offset corresponding to the given RVA.
|
| +
|
| + Return True if successful, False otherwise. It can fail if the
|
| + offset is outside the file's boundaries.
|
| + """
|
| +
|
| + offset = self.get_physical_by_rva(rva)
|
| + if not offset:
|
| + raise False
|
| +
|
| + return self.set_bytes_at_offset(offset, data)
|
| +
|
| +
|
| + def set_bytes_at_offset(self, offset, data):
|
| + """Overwrite the bytes at the given file offset with the given string.
|
| +
|
| + Return True if successful, False otherwise. It can fail if the
|
| + offset is outside the file's boundaries.
|
| + """
|
| +
|
| + if not isinstance(data, str):
|
| + raise TypeError('data should be of type: str')
|
| +
|
| + if offset >= 0 and offset < len(self.__data__):
|
| + self.__data__ = ( self.__data__[:offset] +
|
| + data +
|
| + self.__data__[offset+len(data):] )
|
| + else:
|
| + return False
|
| +
|
| + # Refresh the section's data with the modified information
|
| + #
|
| + for section in self.sections:
|
| + section_data_start = section.PointerToRawData
|
| + section_data_end = section_data_start+section.SizeOfRawData
|
| + section.data = self.__data__[section_data_start:section_data_end]
|
| +
|
| + return True
|
| +
|
| +
|
| +
|
| + def relocate_image(self, new_ImageBase):
|
| + """Apply the relocation information to the image using the provided new image base.
|
| +
|
| + This method will apply the relocation information to the image. Given the new base,
|
| + all the relocations will be processed and both the raw data and the section's data
|
| + will be fixed accordingly.
|
| + The resulting image can be retrieved as well through the method:
|
| +
|
| + get_memory_mapped_image()
|
| +
|
| + In order to get something that would more closely match what could be found in memory
|
| + once the Windows loader finished its work.
|
| + """
|
| +
|
| + relocation_difference = new_ImageBase - self.OPTIONAL_HEADER.ImageBase
|
| +
|
| +
|
| + for reloc in self.DIRECTORY_ENTRY_BASERELOC:
|
| +
|
| + virtual_address = reloc.struct.VirtualAddress
|
| + size_of_block = reloc.struct.SizeOfBlock
|
| +
|
| + # We iterate with an index because if the relocation is of type
|
| + # IMAGE_REL_BASED_HIGHADJ we need to also process the next entry
|
| + # at once and skip it for the next interation
|
| + #
|
| + entry_idx = 0
|
| + while entry_idx<len(reloc.entries):
|
| +
|
| + entry = reloc.entries[entry_idx]
|
| + entry_idx += 1
|
| +
|
| + if entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_ABSOLUTE']:
|
| + # Nothing to do for this type of relocation
|
| + pass
|
| +
|
| + elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGH']:
|
| + # Fix the high 16bits of a relocation
|
| + #
|
| + # Add high 16bits of relocation_difference to the
|
| + # 16bit value at RVA=entry.rva
|
| +
|
| + self.set_word_at_rva(
|
| + entry.rva,
|
| + ( self.get_word_at_rva(entry.rva) + relocation_difference>>16)&0xffff )
|
| +
|
| + elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_LOW']:
|
| + # Fix the low 16bits of a relocation
|
| + #
|
| + # Add low 16 bits of relocation_difference to the 16bit value
|
| + # at RVA=entry.rva
|
| +
|
| + self.set_word_at_rva(
|
| + entry.rva,
|
| + ( self.get_word_at_rva(entry.rva) + relocation_difference)&0xffff)
|
| +
|
| + elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGHLOW']:
|
| + # Handle all high and low parts of a 32bit relocation
|
| + #
|
| + # Add relocation_difference to the value at RVA=entry.rva
|
| +
|
| + self.set_dword_at_rva(
|
| + entry.rva,
|
| + self.get_dword_at_rva(entry.rva)+relocation_difference)
|
| +
|
| + elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGHADJ']:
|
| + # Fix the high 16bits of a relocation and adjust
|
| + #
|
| + # Add high 16bits of relocation_difference to the 32bit value
|
| + # composed from the (16bit value at RVA=entry.rva)<<16 plus
|
| + # the 16bit value at the next relocation entry.
|
| + #
|
| +
|
| + # If the next entry is beyond the array's limits,
|
| + # abort... the table is corrupt
|
| + #
|
| + if entry_idx == len(reloc.entries):
|
| + break
|
| +
|
| + next_entry = reloc.entries[entry_idx]
|
| + entry_idx += 1
|
| + self.set_word_at_rva( entry.rva,
|
| + ((self.get_word_at_rva(entry.rva)<<16) + next_entry.rva +
|
| + relocation_difference & 0xffff0000) >> 16 )
|
| +
|
| + elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_DIR64']:
|
| + # Apply the difference to the 64bit value at the offset
|
| + # RVA=entry.rva
|
| +
|
| + self.set_qword_at_rva(
|
| + entry.rva,
|
| + self.get_qword_at_rva(entry.rva) + relocation_difference)
|
| +
|
| +
|
| + def verify_checksum(self):
|
| +
|
| + return self.OPTIONAL_HEADER.CheckSum == self.generate_checksum()
|
| +
|
| +
|
| + def generate_checksum(self):
|
| +
|
| + # Get the offset to the CheckSum field in the OptionalHeader
|
| + #
|
| + checksum_offset = self.OPTIONAL_HEADER.__file_offset__ + 0x40 # 64
|
| +
|
| + checksum = 0
|
| +
|
| + for i in range( len(self.__data__) / 4 ):
|
| +
|
| + # Skip the checksum field
|
| + #
|
| + if i == checksum_offset / 4:
|
| + continue
|
| +
|
| + dword = struct.unpack('L', self.__data__[ i*4 : i*4+4 ])[0]
|
| + checksum = (checksum & 0xffffffff) + dword + (checksum>>32)
|
| + if checksum > 2**32:
|
| + checksum = (checksum & 0xffffffff) + (checksum >> 32)
|
| +
|
| + checksum = (checksum & 0xffff) + (checksum >> 16)
|
| + checksum = (checksum) + (checksum >> 16)
|
| + checksum = checksum & 0xffff
|
| +
|
| + return checksum + len(self.__data__)
|
|
|