tools/symsrc/pefile.py - Issue 155136: Add the symbol and source server scripts.

Side by Side Diff: tools/symsrc/pefile.py

Issue 155136: Add the symbol and source server scripts. (Closed)

Patch Set: review feedback Created 11 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 # -- coding: Latin-1 --

	2 """pefile, Portable Executable reader module

	3

	4

	5 All the PE file basic structures are available with their default names

	6 as attributes of the instance returned.

	7

	8 Processed elements such as the import table are made available with lowercase

	9 names, to differentiate them from the upper case basic structure names.

	10

	11 pefile has been tested against the limits of valid PE headers, that is, malware.

	12 Lots of packed malware attempt to abuse the format way beyond its standard use.

	13 To the best of my knowledge most of the abuses are handled gracefully.

	14

	15 Copyright (c) 2005, 2006, 2007, 2008 Ero Carrera <ero@dkbza.org>

	16

	17 All rights reserved.

	18

	19 For detailed copyright information see the file COPYING in

	20 the root of the distribution archive.

	21 """

	22

	23 __author__ = 'Ero Carrera'

	24 __version__ = '1.2.9.1'

	25 __contact__ = 'ero@dkbza.org'

	26

	27

	28 import os

	29 import struct

	30 import time

	31 import math

	32 import re

	33 import exceptions

	34 import string

	35 import array

	36

	37 sha1, sha256, sha512, md5 = None, None, None, None

	38

	39 try:

	40 import hashlib

	41 sha1 = hashlib.sha1

	42 sha256 = hashlib.sha256

	43 sha512 = hashlib.sha512

	44 md5 = hashlib.md5

	45 except ImportError:

	46 try:

	47 import sha

	48 sha1 = sha.new

	49 except ImportError:

	50 pass

	51 try:

	52 import md5

	53 md5 = md5.new

	54 except ImportError:

	55 pass

	56

	57

	58 fast_load = False

	59

	60 IMAGE_DOS_SIGNATURE = 0x5A4D

	61 IMAGE_OS2_SIGNATURE = 0x454E

	62 IMAGE_OS2_SIGNATURE_LE = 0x454C

	63 IMAGE_VXD_SIGNATURE = 0x454C

	64 IMAGE_NT_SIGNATURE = 0x00004550

	65 IMAGE_NUMBEROF_DIRECTORY_ENTRIES= 16

	66 IMAGE_ORDINAL_FLAG = 0x80000000L

	67 IMAGE_ORDINAL_FLAG64 = 0x8000000000000000L

	68 OPTIONAL_HEADER_MAGIC_PE = 0x10b

	69 OPTIONAL_HEADER_MAGIC_PE_PLUS = 0x20b

	70

	71

	72 directory_entry_types = [

	73 ('IMAGE_DIRECTORY_ENTRY_EXPORT', 0),

	74 ('IMAGE_DIRECTORY_ENTRY_IMPORT', 1),

	75 ('IMAGE_DIRECTORY_ENTRY_RESOURCE', 2),

	76 ('IMAGE_DIRECTORY_ENTRY_EXCEPTION', 3),

	77 ('IMAGE_DIRECTORY_ENTRY_SECURITY', 4),

	78 ('IMAGE_DIRECTORY_ENTRY_BASERELOC', 5),

	79 ('IMAGE_DIRECTORY_ENTRY_DEBUG', 6),

	80 ('IMAGE_DIRECTORY_ENTRY_COPYRIGHT', 7),

	81 ('IMAGE_DIRECTORY_ENTRY_GLOBALPTR', 8),

	82 ('IMAGE_DIRECTORY_ENTRY_TLS', 9),

	83 ('IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG', 10),

	84 ('IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT', 11),

	85 ('IMAGE_DIRECTORY_ENTRY_IAT', 12),

	86 ('IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT', 13),

	87 ('IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR',14),

	88 ('IMAGE_DIRECTORY_ENTRY_RESERVED', 15) ]

	89

	90 DIRECTORY_ENTRY = dict([(e[1], e[0]) for e in directory_entry_types]+directory_e ntry_types)

	91

	92

	93 image_characteristics = [

	94 ('IMAGE_FILE_RELOCS_STRIPPED', 0x0001),

	95 ('IMAGE_FILE_EXECUTABLE_IMAGE', 0x0002),

	96 ('IMAGE_FILE_LINE_NUMS_STRIPPED', 0x0004),

	97 ('IMAGE_FILE_LOCAL_SYMS_STRIPPED', 0x0008),

	98 ('IMAGE_FILE_AGGRESIVE_WS_TRIM', 0x0010),

	99 ('IMAGE_FILE_LARGE_ADDRESS_AWARE', 0x0020),

	100 ('IMAGE_FILE_16BIT_MACHINE', 0x0040),

	101 ('IMAGE_FILE_BYTES_REVERSED_LO', 0x0080),

	102 ('IMAGE_FILE_32BIT_MACHINE', 0x0100),

	103 ('IMAGE_FILE_DEBUG_STRIPPED', 0x0200),

	104 ('IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP', 0x0400),

	105 ('IMAGE_FILE_NET_RUN_FROM_SWAP', 0x0800),

	106 ('IMAGE_FILE_SYSTEM', 0x1000),

	107 ('IMAGE_FILE_DLL', 0x2000),

	108 ('IMAGE_FILE_UP_SYSTEM_ONLY', 0x4000),

	109 ('IMAGE_FILE_BYTES_REVERSED_HI', 0x8000) ]

	110

	111 IMAGE_CHARACTERISTICS = dict([(e[1], e[0]) for e in

	112 image_characteristics]+image_characteristics)

	113

	114

	115 section_characteristics = [

	116 ('IMAGE_SCN_CNT_CODE', 0x00000020),

	117 ('IMAGE_SCN_CNT_INITIALIZED_DATA', 0x00000040),

	118 ('IMAGE_SCN_CNT_UNINITIALIZED_DATA', 0x00000080),

	119 ('IMAGE_SCN_LNK_OTHER', 0x00000100),

	120 ('IMAGE_SCN_LNK_INFO', 0x00000200),

	121 ('IMAGE_SCN_LNK_REMOVE', 0x00000800),

	122 ('IMAGE_SCN_LNK_COMDAT', 0x00001000),

	123 ('IMAGE_SCN_MEM_FARDATA', 0x00008000),

	124 ('IMAGE_SCN_MEM_PURGEABLE', 0x00020000),

	125 ('IMAGE_SCN_MEM_16BIT', 0x00020000),

	126 ('IMAGE_SCN_MEM_LOCKED', 0x00040000),

	127 ('IMAGE_SCN_MEM_PRELOAD', 0x00080000),

	128 ('IMAGE_SCN_ALIGN_1BYTES', 0x00100000),

	129 ('IMAGE_SCN_ALIGN_2BYTES', 0x00200000),

	130 ('IMAGE_SCN_ALIGN_4BYTES', 0x00300000),

	131 ('IMAGE_SCN_ALIGN_8BYTES', 0x00400000),

	132 ('IMAGE_SCN_ALIGN_16BYTES', 0x00500000),

	133 ('IMAGE_SCN_ALIGN_32BYTES', 0x00600000),

	134 ('IMAGE_SCN_ALIGN_64BYTES', 0x00700000),

	135 ('IMAGE_SCN_ALIGN_128BYTES', 0x00800000),

	136 ('IMAGE_SCN_ALIGN_256BYTES', 0x00900000),

	137 ('IMAGE_SCN_ALIGN_512BYTES', 0x00A00000),

	138 ('IMAGE_SCN_ALIGN_1024BYTES', 0x00B00000),

	139 ('IMAGE_SCN_ALIGN_2048BYTES', 0x00C00000),

	140 ('IMAGE_SCN_ALIGN_4096BYTES', 0x00D00000),

	141 ('IMAGE_SCN_ALIGN_8192BYTES', 0x00E00000),

	142 ('IMAGE_SCN_ALIGN_MASK', 0x00F00000),

	143 ('IMAGE_SCN_LNK_NRELOC_OVFL', 0x01000000),

	144 ('IMAGE_SCN_MEM_DISCARDABLE', 0x02000000),

	145 ('IMAGE_SCN_MEM_NOT_CACHED', 0x04000000),

	146 ('IMAGE_SCN_MEM_NOT_PAGED', 0x08000000),

	147 ('IMAGE_SCN_MEM_SHARED', 0x10000000),

	148 ('IMAGE_SCN_MEM_EXECUTE', 0x20000000),

	149 ('IMAGE_SCN_MEM_READ', 0x40000000),

	150 ('IMAGE_SCN_MEM_WRITE', 0x80000000L) ]

	151

	152 SECTION_CHARACTERISTICS = dict([(e[1], e[0]) for e in

	153 section_characteristics]+section_characteristics)

	154

	155

	156 debug_types = [

	157 ('IMAGE_DEBUG_TYPE_UNKNOWN', 0),

	158 ('IMAGE_DEBUG_TYPE_COFF', 1),

	159 ('IMAGE_DEBUG_TYPE_CODEVIEW', 2),

	160 ('IMAGE_DEBUG_TYPE_FPO', 3),

	161 ('IMAGE_DEBUG_TYPE_MISC', 4),

	162 ('IMAGE_DEBUG_TYPE_EXCEPTION', 5),

	163 ('IMAGE_DEBUG_TYPE_FIXUP', 6),

	164 ('IMAGE_DEBUG_TYPE_OMAP_TO_SRC', 7),

	165 ('IMAGE_DEBUG_TYPE_OMAP_FROM_SRC', 8),

	166 ('IMAGE_DEBUG_TYPE_BORLAND', 9),

	167 ('IMAGE_DEBUG_TYPE_RESERVED10', 10) ]

	168

	169 DEBUG_TYPE = dict([(e[1], e[0]) for e in debug_types]+debug_types)

	170

	171

	172 subsystem_types = [

	173 ('IMAGE_SUBSYSTEM_UNKNOWN', 0),

	174 ('IMAGE_SUBSYSTEM_NATIVE', 1),

	175 ('IMAGE_SUBSYSTEM_WINDOWS_GUI', 2),

	176 ('IMAGE_SUBSYSTEM_WINDOWS_CUI', 3),

	177 ('IMAGE_SUBSYSTEM_OS2_CUI', 5),

	178 ('IMAGE_SUBSYSTEM_POSIX_CUI', 7),

	179 ('IMAGE_SUBSYSTEM_WINDOWS_CE_GUI', 9),

	180 ('IMAGE_SUBSYSTEM_EFI_APPLICATION', 10),

	181 ('IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER', 11),

	182 ('IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER', 12),

	183 ('IMAGE_SUBSYSTEM_EFI_ROM', 13),

	184 ('IMAGE_SUBSYSTEM_XBOX', 14)]

	185

	186 SUBSYSTEM_TYPE = dict([(e[1], e[0]) for e in subsystem_types]+subsystem_types)

	187

	188

	189 machine_types = [

	190 ('IMAGE_FILE_MACHINE_UNKNOWN', 0),

	191 ('IMAGE_FILE_MACHINE_AM33', 0x1d3),

	192 ('IMAGE_FILE_MACHINE_AMD64', 0x8664),

	193 ('IMAGE_FILE_MACHINE_ARM', 0x1c0),

	194 ('IMAGE_FILE_MACHINE_EBC', 0xebc),

	195 ('IMAGE_FILE_MACHINE_I386', 0x14c),

	196 ('IMAGE_FILE_MACHINE_IA64', 0x200),

	197 ('IMAGE_FILE_MACHINE_MR32', 0x9041),

	198 ('IMAGE_FILE_MACHINE_MIPS16', 0x266),

	199 ('IMAGE_FILE_MACHINE_MIPSFPU', 0x366),

	200 ('IMAGE_FILE_MACHINE_MIPSFPU16',0x466),

	201 ('IMAGE_FILE_MACHINE_POWERPC', 0x1f0),

	202 ('IMAGE_FILE_MACHINE_POWERPCFP',0x1f1),

	203 ('IMAGE_FILE_MACHINE_R4000', 0x166),

	204 ('IMAGE_FILE_MACHINE_SH3', 0x1a2),

	205 ('IMAGE_FILE_MACHINE_SH3DSP', 0x1a3),

	206 ('IMAGE_FILE_MACHINE_SH4', 0x1a6),

	207 ('IMAGE_FILE_MACHINE_SH5', 0x1a8),

	208 ('IMAGE_FILE_MACHINE_THUMB', 0x1c2),

	209 ('IMAGE_FILE_MACHINE_WCEMIPSV2',0x169),

	210 ]

	211

	212 MACHINE_TYPE = dict([(e[1], e[0]) for e in machine_types]+machine_types)

	213

	214

	215 relocation_types = [

	216 ('IMAGE_REL_BASED_ABSOLUTE', 0),

	217 ('IMAGE_REL_BASED_HIGH', 1),

	218 ('IMAGE_REL_BASED_LOW', 2),

	219 ('IMAGE_REL_BASED_HIGHLOW', 3),

	220 ('IMAGE_REL_BASED_HIGHADJ', 4),

	221 ('IMAGE_REL_BASED_MIPS_JMPADDR', 5),

	222 ('IMAGE_REL_BASED_SECTION', 6),

	223 ('IMAGE_REL_BASED_REL', 7),

	224 ('IMAGE_REL_BASED_MIPS_JMPADDR16', 9),

	225 ('IMAGE_REL_BASED_IA64_IMM64', 9),

	226 ('IMAGE_REL_BASED_DIR64', 10),

	227 ('IMAGE_REL_BASED_HIGH3ADJ', 11) ]

	228

	229 RELOCATION_TYPE = dict([(e[1], e[0]) for e in relocation_types]+relocation_types )

	230

	231

	232 dll_characteristics = [

	233 ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0001', 0x0001),

	234 ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0002', 0x0002),

	235 ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0004', 0x0004),

	236 ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0008', 0x0008),

	237 ('IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE', 0x0040),

	238 ('IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY', 0x0080),

	239 ('IMAGE_DLL_CHARACTERISTICS_NX_COMPAT', 0x0100),

	240 ('IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION', 0x0200),

	241 ('IMAGE_DLL_CHARACTERISTICS_NO_SEH', 0x0400),

	242 ('IMAGE_DLL_CHARACTERISTICS_NO_BIND', 0x0800),

	243 ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x1000', 0x1000),

	244 ('IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER', 0x2000),

	245 ('IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE', 0x8000) ]

	246

	247 DLL_CHARACTERISTICS = dict([(e[1], e[0]) for e in dll_characteristics]+dll_chara cteristics)

	248

	249

	250 # Resource types

	251 resource_type = [

	252 ('RT_CURSOR', 1),

	253 ('RT_BITMAP', 2),

	254 ('RT_ICON', 3),

	255 ('RT_MENU', 4),

	256 ('RT_DIALOG', 5),

	257 ('RT_STRING', 6),

	258 ('RT_FONTDIR', 7),

	259 ('RT_FONT', 8),

	260 ('RT_ACCELERATOR', 9),

	261 ('RT_RCDATA', 10),

	262 ('RT_MESSAGETABLE', 11),

	263 ('RT_GROUP_CURSOR', 12),

	264 ('RT_GROUP_ICON', 14),

	265 ('RT_VERSION', 16),

	266 ('RT_DLGINCLUDE', 17),

	267 ('RT_PLUGPLAY', 19),

	268 ('RT_VXD', 20),

	269 ('RT_ANICURSOR', 21),

	270 ('RT_ANIICON', 22),

	271 ('RT_HTML', 23),

	272 ('RT_MANIFEST', 24) ]

	273

	274 RESOURCE_TYPE = dict([(e[1], e[0]) for e in resource_type]+resource_type)

	275

	276

	277 # Language definitions

	278 lang = [

	279 ('LANG_NEUTRAL', 0x00),

	280 ('LANG_INVARIANT', 0x7f),

	281 ('LANG_AFRIKAANS', 0x36),

	282 ('LANG_ALBANIAN', 0x1c),

	283 ('LANG_ARABIC', 0x01),

	284 ('LANG_ARMENIAN', 0x2b),

	285 ('LANG_ASSAMESE', 0x4d),

	286 ('LANG_AZERI', 0x2c),

	287 ('LANG_BASQUE', 0x2d),

	288 ('LANG_BELARUSIAN', 0x23),

	289 ('LANG_BENGALI', 0x45),

	290 ('LANG_BULGARIAN', 0x02),

	291 ('LANG_CATALAN', 0x03),

	292 ('LANG_CHINESE', 0x04),

	293 ('LANG_CROATIAN', 0x1a),

	294 ('LANG_CZECH', 0x05),

	295 ('LANG_DANISH', 0x06),

	296 ('LANG_DIVEHI', 0x65),

	297 ('LANG_DUTCH', 0x13),

	298 ('LANG_ENGLISH', 0x09),

	299 ('LANG_ESTONIAN', 0x25),

	300 ('LANG_FAEROESE', 0x38),

	301 ('LANG_FARSI', 0x29),

	302 ('LANG_FINNISH', 0x0b),

	303 ('LANG_FRENCH', 0x0c),

	304 ('LANG_GALICIAN', 0x56),

	305 ('LANG_GEORGIAN', 0x37),

	306 ('LANG_GERMAN', 0x07),

	307 ('LANG_GREEK', 0x08),

	308 ('LANG_GUJARATI', 0x47),

	309 ('LANG_HEBREW', 0x0d),

	310 ('LANG_HINDI', 0x39),

	311 ('LANG_HUNGARIAN', 0x0e),

	312 ('LANG_ICELANDIC', 0x0f),

	313 ('LANG_INDONESIAN', 0x21),

	314 ('LANG_ITALIAN', 0x10),

	315 ('LANG_JAPANESE', 0x11),

	316 ('LANG_KANNADA', 0x4b),

	317 ('LANG_KASHMIRI', 0x60),

	318 ('LANG_KAZAK', 0x3f),

	319 ('LANG_KONKANI', 0x57),

	320 ('LANG_KOREAN', 0x12),

	321 ('LANG_KYRGYZ', 0x40),

	322 ('LANG_LATVIAN', 0x26),

	323 ('LANG_LITHUANIAN', 0x27),

	324 ('LANG_MACEDONIAN', 0x2f),

	325 ('LANG_MALAY', 0x3e),

	326 ('LANG_MALAYALAM', 0x4c),

	327 ('LANG_MANIPURI', 0x58),

	328 ('LANG_MARATHI', 0x4e),

	329 ('LANG_MONGOLIAN', 0x50),

	330 ('LANG_NEPALI', 0x61),

	331 ('LANG_NORWEGIAN', 0x14),

	332 ('LANG_ORIYA', 0x48),

	333 ('LANG_POLISH', 0x15),

	334 ('LANG_PORTUGUESE', 0x16),

	335 ('LANG_PUNJABI', 0x46),

	336 ('LANG_ROMANIAN', 0x18),

	337 ('LANG_RUSSIAN', 0x19),

	338 ('LANG_SANSKRIT', 0x4f),

	339 ('LANG_SERBIAN', 0x1a),

	340 ('LANG_SINDHI', 0x59),

	341 ('LANG_SLOVAK', 0x1b),

	342 ('LANG_SLOVENIAN', 0x24),

	343 ('LANG_SPANISH', 0x0a),

	344 ('LANG_SWAHILI', 0x41),

	345 ('LANG_SWEDISH', 0x1d),

	346 ('LANG_SYRIAC', 0x5a),

	347 ('LANG_TAMIL', 0x49),

	348 ('LANG_TATAR', 0x44),

	349 ('LANG_TELUGU', 0x4a),

	350 ('LANG_THAI', 0x1e),

	351 ('LANG_TURKISH', 0x1f),

	352 ('LANG_UKRAINIAN', 0x22),

	353 ('LANG_URDU', 0x20),

	354 ('LANG_UZBEK', 0x43),

	355 ('LANG_VIETNAMESE', 0x2a),

	356 ('LANG_GAELIC', 0x3c),

	357 ('LANG_MALTESE', 0x3a),

	358 ('LANG_MAORI', 0x28),

	359 ('LANG_RHAETO_ROMANCE',0x17),

	360 ('LANG_SAAMI', 0x3b),

	361 ('LANG_SORBIAN', 0x2e),

	362 ('LANG_SUTU', 0x30),

	363 ('LANG_TSONGA', 0x31),

	364 ('LANG_TSWANA', 0x32),

	365 ('LANG_VENDA', 0x33),

	366 ('LANG_XHOSA', 0x34),

	367 ('LANG_ZULU', 0x35),

	368 ('LANG_ESPERANTO', 0x8f),

	369 ('LANG_WALON', 0x90),

	370 ('LANG_CORNISH', 0x91),

	371 ('LANG_WELSH', 0x92),

	372 ('LANG_BRETON', 0x93) ]

	373

	374 LANG = dict(lang+[(e[1], e[0]) for e in lang])

	375

	376

	377 # Sublanguage definitions

	378 sublang = [

	379 ('SUBLANG_NEUTRAL', 0x00),

	380 ('SUBLANG_DEFAULT', 0x01),

	381 ('SUBLANG_SYS_DEFAULT', 0x02),

	382 ('SUBLANG_ARABIC_SAUDI_ARABIA', 0x01),

	383 ('SUBLANG_ARABIC_IRAQ', 0x02),

	384 ('SUBLANG_ARABIC_EGYPT', 0x03),

	385 ('SUBLANG_ARABIC_LIBYA', 0x04),

	386 ('SUBLANG_ARABIC_ALGERIA', 0x05),

	387 ('SUBLANG_ARABIC_MOROCCO', 0x06),

	388 ('SUBLANG_ARABIC_TUNISIA', 0x07),

	389 ('SUBLANG_ARABIC_OMAN', 0x08),

	390 ('SUBLANG_ARABIC_YEMEN', 0x09),

	391 ('SUBLANG_ARABIC_SYRIA', 0x0a),

	392 ('SUBLANG_ARABIC_JORDAN', 0x0b),

	393 ('SUBLANG_ARABIC_LEBANON', 0x0c),

	394 ('SUBLANG_ARABIC_KUWAIT', 0x0d),

	395 ('SUBLANG_ARABIC_UAE', 0x0e),

	396 ('SUBLANG_ARABIC_BAHRAIN', 0x0f),

	397 ('SUBLANG_ARABIC_QATAR', 0x10),

	398 ('SUBLANG_AZERI_LATIN', 0x01),

	399 ('SUBLANG_AZERI_CYRILLIC', 0x02),

	400 ('SUBLANG_CHINESE_TRADITIONAL', 0x01),

	401 ('SUBLANG_CHINESE_SIMPLIFIED', 0x02),

	402 ('SUBLANG_CHINESE_HONGKONG', 0x03),

	403 ('SUBLANG_CHINESE_SINGAPORE', 0x04),

	404 ('SUBLANG_CHINESE_MACAU', 0x05),

	405 ('SUBLANG_DUTCH', 0x01),

	406 ('SUBLANG_DUTCH_BELGIAN', 0x02),

	407 ('SUBLANG_ENGLISH_US', 0x01),

	408 ('SUBLANG_ENGLISH_UK', 0x02),

	409 ('SUBLANG_ENGLISH_AUS', 0x03),

	410 ('SUBLANG_ENGLISH_CAN', 0x04),

	411 ('SUBLANG_ENGLISH_NZ', 0x05),

	412 ('SUBLANG_ENGLISH_EIRE', 0x06),

	413 ('SUBLANG_ENGLISH_SOUTH_AFRICA', 0x07),

	414 ('SUBLANG_ENGLISH_JAMAICA', 0x08),

	415 ('SUBLANG_ENGLISH_CARIBBEAN', 0x09),

	416 ('SUBLANG_ENGLISH_BELIZE', 0x0a),

	417 ('SUBLANG_ENGLISH_TRINIDAD', 0x0b),

	418 ('SUBLANG_ENGLISH_ZIMBABWE', 0x0c),

	419 ('SUBLANG_ENGLISH_PHILIPPINES', 0x0d),

	420 ('SUBLANG_FRENCH', 0x01),

	421 ('SUBLANG_FRENCH_BELGIAN', 0x02),

	422 ('SUBLANG_FRENCH_CANADIAN', 0x03),

	423 ('SUBLANG_FRENCH_SWISS', 0x04),

	424 ('SUBLANG_FRENCH_LUXEMBOURG', 0x05),

	425 ('SUBLANG_FRENCH_MONACO', 0x06),

	426 ('SUBLANG_GERMAN', 0x01),

	427 ('SUBLANG_GERMAN_SWISS', 0x02),

	428 ('SUBLANG_GERMAN_AUSTRIAN', 0x03),

	429 ('SUBLANG_GERMAN_LUXEMBOURG', 0x04),

	430 ('SUBLANG_GERMAN_LIECHTENSTEIN', 0x05),

	431 ('SUBLANG_ITALIAN', 0x01),

	432 ('SUBLANG_ITALIAN_SWISS', 0x02),

	433 ('SUBLANG_KASHMIRI_SASIA', 0x02),

	434 ('SUBLANG_KASHMIRI_INDIA', 0x02),

	435 ('SUBLANG_KOREAN', 0x01),

	436 ('SUBLANG_LITHUANIAN', 0x01),

	437 ('SUBLANG_MALAY_MALAYSIA', 0x01),

	438 ('SUBLANG_MALAY_BRUNEI_DARUSSALAM', 0x02),

	439 ('SUBLANG_NEPALI_INDIA', 0x02),

	440 ('SUBLANG_NORWEGIAN_BOKMAL', 0x01),

	441 ('SUBLANG_NORWEGIAN_NYNORSK', 0x02),

	442 ('SUBLANG_PORTUGUESE', 0x02),

	443 ('SUBLANG_PORTUGUESE_BRAZILIAN', 0x01),

	444 ('SUBLANG_SERBIAN_LATIN', 0x02),

	445 ('SUBLANG_SERBIAN_CYRILLIC', 0x03),

	446 ('SUBLANG_SPANISH', 0x01),

	447 ('SUBLANG_SPANISH_MEXICAN', 0x02),

	448 ('SUBLANG_SPANISH_MODERN', 0x03),

	449 ('SUBLANG_SPANISH_GUATEMALA', 0x04),

	450 ('SUBLANG_SPANISH_COSTA_RICA', 0x05),

	451 ('SUBLANG_SPANISH_PANAMA', 0x06),

	452 ('SUBLANG_SPANISH_DOMINICAN_REPUBLIC', 0x07),

	453 ('SUBLANG_SPANISH_VENEZUELA', 0x08),

	454 ('SUBLANG_SPANISH_COLOMBIA', 0x09),

	455 ('SUBLANG_SPANISH_PERU', 0x0a),

	456 ('SUBLANG_SPANISH_ARGENTINA', 0x0b),

	457 ('SUBLANG_SPANISH_ECUADOR', 0x0c),

	458 ('SUBLANG_SPANISH_CHILE', 0x0d),

	459 ('SUBLANG_SPANISH_URUGUAY', 0x0e),

	460 ('SUBLANG_SPANISH_PARAGUAY', 0x0f),

	461 ('SUBLANG_SPANISH_BOLIVIA', 0x10),

	462 ('SUBLANG_SPANISH_EL_SALVADOR', 0x11),

	463 ('SUBLANG_SPANISH_HONDURAS', 0x12),

	464 ('SUBLANG_SPANISH_NICARAGUA', 0x13),

	465 ('SUBLANG_SPANISH_PUERTO_RICO', 0x14),

	466 ('SUBLANG_SWEDISH', 0x01),

	467 ('SUBLANG_SWEDISH_FINLAND', 0x02),

	468 ('SUBLANG_URDU_PAKISTAN', 0x01),

	469 ('SUBLANG_URDU_INDIA', 0x02),

	470 ('SUBLANG_UZBEK_LATIN', 0x01),

	471 ('SUBLANG_UZBEK_CYRILLIC', 0x02),

	472 ('SUBLANG_DUTCH_SURINAM', 0x03),

	473 ('SUBLANG_ROMANIAN', 0x01),

	474 ('SUBLANG_ROMANIAN_MOLDAVIA', 0x02),

	475 ('SUBLANG_RUSSIAN', 0x01),

	476 ('SUBLANG_RUSSIAN_MOLDAVIA', 0x02),

	477 ('SUBLANG_CROATIAN', 0x01),

	478 ('SUBLANG_LITHUANIAN_CLASSIC', 0x02),

	479 ('SUBLANG_GAELIC', 0x01),

	480 ('SUBLANG_GAELIC_SCOTTISH', 0x02),

	481 ('SUBLANG_GAELIC_MANX', 0x03) ]

	482

	483 SUBLANG = dict(sublang+[(e[1], e[0]) for e in sublang])

	484

	485

	486 class UnicodeStringWrapperPostProcessor:

	487 """This class attemps to help the process of identifying strings

	488 that might be plain Unicode or Pascal. A list of strings will be

	489 wrapped on it with the hope the overlappings will help make the

	490 decission about their type."""

	491

	492 def __init__(self, pe, rva_ptr):

	493 self.pe = pe

	494 self.rva_ptr = rva_ptr

	495 self.string = None

	496

	497

	498 def get_rva(self):

	499 """Get the RVA of the string."""

	500

	501 return self.rva_ptr

	502

	503

	504 def __str__(self):

	505 """Return the escaped ASCII representation of the string."""

	506

	507 def convert_char(char):

	508 if char in string.printable:

	509 return char

	510 else:

	511 return r'\x%02x' % ord(char)

	512

	513 if self.string:

	514 return ''.join([convert_char(c) for c in self.string])

	515

	516 return ''

	517

	518

	519 def invalidate(self):

	520 """Make this instance None, to express it's no known string type."""

	521

	522 self = None

	523

	524

	525 def render_pascal_16(self):

	526

	527 self.string = self.pe.get_string_u_at_rva(

	528 self.rva_ptr+2,

	529 max_length=self.__get_pascal_16_length())

	530

	531

	532 def ask_pascal_16(self, next_rva_ptr):

	533 """The next RVA is taken to be the one immediately following this one.

	534

	535 Such RVA could indicate the natural end of the string and will be checke d

	536 with the possible length contained in the first word.

	537 """

	538

	539 length = self.__get_pascal_16_length()

	540

	541 if length == (next_rva_ptr - (self.rva_ptr+2)) / 2:

	542 self.length = length

	543 return True

	544

	545 return False

	546

	547

	548 def __get_pascal_16_length(self):

	549

	550 return self.__get_word_value_at_rva(self.rva_ptr)

	551

	552

	553 def __get_word_value_at_rva(self, rva):

	554

	555 try:

	556 data = self.pe.get_data(self.rva_ptr, 2)

	557 except PEFormatError, e:

	558 return False

	559

	560 if len(data)<2:

	561 return False

	562

	563 return struct.unpack('<H', data)[0]

	564

	565

	566 #def render_pascal_8(self):

	567 # """"""

	568

	569

	570 def ask_unicode_16(self, next_rva_ptr):

	571 """The next RVA is taken to be the one immediately following this one.

	572

	573 Such RVA could indicate the natural end of the string and will be checke d

	574 to see if there's a Unicode NULL character there.

	575 """

	576

	577 if self.__get_word_value_at_rva(next_rva_ptr-2) == 0:

	578 self.length = next_rva_ptr - self.rva_ptr

	579 return True

	580

	581 return False

	582

	583

	584 def render_unicode_16(self):

	585 """"""

	586

	587 self.string = self.pe.get_string_u_at_rva(self.rva_ptr)

	588

	589

	590 class PEFormatError(Exception):

	591 """Generic PE format error exception."""

	592

	593 def __init__(self, value):

	594 self.value = value

	595

	596 def __str__(self):

	597 return repr(self.value)

	598

	599

	600 class Dump:

	601 """Convenience class for dumping the PE information."""

	602

	603 def __init__(self):

	604 self.text = ''

	605

	606

	607 def add_lines(self, txt, indent=0):

	608 """Adds a list of lines.

	609

	610 The list can be indented with the optional argument 'indent'.

	611 """

	612 for line in txt:

	613 self.add_line(line, indent)

	614

	615

	616 def add_line(self, txt, indent=0):

	617 """Adds a line.

	618

	619 The line can be indented with the optional argument 'indent'.

	620 """

	621

	622 self.add(txt+'\n', indent)

	623

	624

	625 def add(self, txt, indent=0):

	626 """Adds some text, no newline will be appended.

	627

	628 The text can be indented with the optional argument 'indent'.

	629 """

	630

	631 if isinstance(txt, unicode):

	632 s = []

	633 for c in txt:

	634 try:

	635 s.append(str(c))

	636 except UnicodeEncodeError, e:

	637 s.append(repr(c))

	638

	639 txt = ''.join(s)

	640

	641 self.text += ' '*indent+txt

	642

	643

	644 def add_header(self, txt):

	645 """Adds a header element."""

	646

	647 self.add_line('-'10+txt+'-'10+'\n')

	648

	649

	650 def add_newline(self):

	651 """Adds a newline."""

	652

	653 self.text += '\n'

	654

	655

	656 def get_text(self):

	657 """Get the text in its current state."""

	658

	659 return self.text

	660

	661

	662

	663 class Structure:

	664 """Prepare structure object to extract members from data.

	665

	666 Format is a list containing definitions for the elements

	667 of the structure.

	668 """

	669

	670

	671 def __init__(self, format, name=None, file_offset=None):

	672 # Format is forced little endian, for big endian non Intel platforms

	673 self.__format__ = '<'

	674 self.__keys__ = []

	675 # self.values = {}

	676 self.__format_length__ = 0

	677 self.__set_format__(format[1])

	678 self._all_zeroes = False

	679 self.__unpacked_data_elms__ = None

	680 self.__file_offset__ = file_offset

	681 if name:

	682 self.name = name

	683 else:

	684 self.name = format[0]

	685

	686

	687 def __get_format__(self):

	688 return self.__format__

	689

	690

	691 def get_file_offset(self):

	692 return self.__file_offset__

	693

	694 def set_file_offset(self, offset):

	695 self.__file_offset__ = offset

	696

	697 def all_zeroes(self):

	698 """Returns true is the unpacked data is all zeroes."""

	699

	700 return self._all_zeroes

	701

	702

	703 def __set_format__(self, format):

	704

	705 for elm in format:

	706 if ',' in elm:

	707 elm_type, elm_name = elm.split(',', 1)

	708 self.__format__ += elm_type

	709

	710 elm_names = elm_name.split(',')

	711 names = []

	712 for elm_name in elm_names:

	713 if elm_name in self.__keys__:

	714 search_list = [x[:len(elm_name)] for x in self.__keys__]

	715 occ_count = search_list.count(elm_name)

	716 elm_name = elm_name+'_'+str(occ_count)

	717 names.append(elm_name)

	718 # Some PE header structures have unions on them, so a certain

	719 # value might have different names, so each key has a list of

	720 # all the possible members referring to the data.

	721 self.__keys__.append(names)

	722

	723 self.__format_length__ = struct.calcsize(self.__format__)

	724

	725

	726 def sizeof(self):

	727 """Return size of the structure."""

	728

	729 return self.__format_length__

	730

	731

	732 def __unpack__(self, data):

	733

	734 if len(data)>self.__format_length__:

	735 data = data[:self.__format_length__]

	736

	737 # OC Patch:

	738 # Some malware have incorrect header lengths.

	739 # Fail gracefully if this occurs

	740 # Buggy malware: a29b0118af8b7408444df81701ad5a7f

	741 #

	742 elif len(data)<self.__format_length__:

	743 raise PEFormatError('Data length less than expected header length.')

	744

	745

	746 if data.count(chr(0)) == len(data):

	747 self._all_zeroes = True

	748

	749 self.__unpacked_data_elms__ = struct.unpack(self.__format__, data)

	750 for i in xrange(len(self.__unpacked_data_elms__)):

	751 for key in self.__keys__[i]:

	752 # self.values[key] = self.__unpacked_data_elms__[i]

	753 setattr(self, key, self.__unpacked_data_elms__[i])

	754

	755

	756 def __pack__(self):

	757

	758 new_values = []

	759

	760 for i in xrange(len(self.__unpacked_data_elms__)):

	761

	762 for key in self.__keys__[i]:

	763 new_val = getattr(self, key)

	764 old_val = self.__unpacked_data_elms__[i]

	765

	766 # In the case of Unions, when the first changed value

	767 # is picked the loop is exited

	768 if new_val != old_val:

	769 break

	770

	771 new_values.append(new_val)

	772

	773 return struct.pack(self.__format__, *new_values)

	774

	775

	776 def __str__(self):

	777 return '\n'.join( self.dump() )

	778

	779 def __repr__(self):

	780 return '<Structure: %s>' % (' '.join( [' '.join(s.split()) for s in self .dump()] ))

	781

	782

	783 def dump(self, indentation=0):

	784 """Returns a string representation of the structure."""

	785

	786 dump = []

	787

	788 dump.append('[%s]' % self.name)

	789

	790 # Refer to the __set_format__ method for an explanation

	791 # of the following construct.

	792 for keys in self.__keys__:

	793 for key in keys:

	794

	795 val = getattr(self, key)

	796 if isinstance(val, int) or isinstance(val, long):

	797 val_str = '0x%-8X' % (val)

	798 if key == 'TimeDateStamp' or key == 'dwTimeStamp':

	799 try:

	800 val_str += ' [%s UTC]' % time.asctime(time.gmtime(va l))

	801 except exceptions.ValueError, e:

	802 val_str += ' [INVALID TIME]'

	803 else:

	804 val_str = ''.join(filter(lambda c:c != '\0', str(val)))

	805

	806 dump.append('%-30s %s' % (key+':', val_str))

	807

	808 return dump

	809

	810

	811

	812 class SectionStructure(Structure):

	813 """Convenience section handling class."""

	814

	815 def get_data(self, start, length=None):

	816 """Get data chunk from a section.

	817

	818 Allows to query data from the section by passing the

	819 addresses where the PE file would be loaded by default.

	820 It is then possible to retrieve code and data by its real

	821 addresses as it would be if loaded.

	822 """

	823

	824 offset = start - self.VirtualAddress

	825

	826 if length:

	827 end = offset+length

	828 else:

	829 end = len(self.data)

	830

	831 return self.data[offset:end]

	832

	833

	834 def get_rva_from_offset(self, offset):

	835 return offset - self.PointerToRawData + self.VirtualAddress

	836

	837

	838 def get_offset_from_rva(self, rva):

	839 return (rva - self.VirtualAddress) + self.PointerToRawData

	840

	841

	842 def contains_offset(self, offset):

	843 """Check whether the section contains the file offset provided."""

	844

	845 if not self.PointerToRawData:

	846 # bss and other sections containing only uninitialized data must have 0

	847 # and do not take space in the file

	848 return False

	849 return self.PointerToRawData <= offset < self.VirtualAddress + self.Size OfRawData

	850

	851

	852 def contains_rva(self, rva):

	853 """Check whether the section contains the address provided."""

	854

	855 # PECOFF documentation v8 says:

	856 # The total size of the section when loaded into memory.

	857 # If this value is greater than SizeOfRawData, the section is zero-padde d.

	858 # This field is valid only for executable images and should be set to ze ro

	859 # for object files.

	860

	861 if len(self.data) < self.SizeOfRawData:

	862 size = self.Misc_VirtualSize

	863 else:

	864 size = max(self.SizeOfRawData, self.Misc_VirtualSize)

	865

	866 return self.VirtualAddress <= rva < self.VirtualAddress + size

	867

	868 def contains(self, rva):

	869 #print "DEPRECATION WARNING: you should use contains_rva() instead of co ntains()"

	870 return self.contains_rva(rva)

	871

	872

	873 def set_data(self, data):

	874 """Set the data belonging to the section."""

	875

	876 self.data = data

	877

	878

	879 def get_entropy(self):

	880 """Calculate and return the entropy for the section."""

	881

	882 return self.entropy_H( self.data )

	883

	884

	885 def get_hash_sha1(self):

	886 """Get the SHA-1 hex-digest of the section's data."""

	887

	888 if sha1 is not None:

	889 return sha1( self.data ).hexdigest()

	890

	891

	892 def get_hash_sha256(self):

	893 """Get the SHA-256 hex-digest of the section's data."""

	894

	895 if sha256 is not None:

	896 return sha256( self.data ).hexdigest()

	897

	898

	899 def get_hash_sha512(self):

	900 """Get the SHA-512 hex-digest of the section's data."""

	901

	902 if sha512 is not None:

	903 return sha512( self.data ).hexdigest()

	904

	905

	906 def get_hash_md5(self):

	907 """Get the MD5 hex-digest of the section's data."""

	908

	909 if md5 is not None:

	910 return md5( self.data ).hexdigest()

	911

	912

	913 def entropy_H(self, data):

	914 """Calculate the entropy of a chunk of data."""

	915

	916 if len(data) == 0:

	917 return 0.0

	918

	919 occurences = array.array('L', [0]*256)

	920

	921 for x in data:

	922 occurences[ord(x)] += 1

	923

	924 entropy = 0

	925 for x in occurences:

	926 if x:

	927 p_x = float(x) / len(data)

	928 entropy -= p_x*math.log(p_x, 2)

	929

	930 return entropy

	931

	932

	933

	934 class DataContainer:

	935 """Generic data container."""

	936

	937 def __init__(self, **args):

	938 for key, value in args.items():

	939 setattr(self, key, value)

	940

	941

	942

	943 class ImportDescData(DataContainer):

	944 """Holds import descriptor information.

	945

	946 dll: name of the imported DLL

	947 imports: list of imported symbols (ImportData instances)

	948 struct: IMAGE_IMPORT_DESCRIPTOR sctruture

	949 """

	950

	951 class ImportData(DataContainer):

	952 """Holds imported symbol's information.

	953

	954 ordinal: Ordinal of the symbol

	955 name: Name of the symbol

	956 bound: If the symbol is bound, this contains

	957 the address.

	958 """

	959

	960 class ExportDirData(DataContainer):

	961 """Holds export directory information.

	962

	963 struct: IMAGE_EXPORT_DIRECTORY structure

	964 symbols: list of exported symbols (ExportData instances)

	965 """

	966

	967 class ExportData(DataContainer):

	968 """Holds exported symbols' information.

	969

	970 ordinal: ordinal of the symbol

	971 address: address of the symbol

	972 name: name of the symbol (None if the symbol is

	973 exported by ordinal only)

	974 forwarder: if the symbol is forwarded it will

	975 contain the name of the target symbol,

	976 None otherwise.

	977 """

	978

	979

	980 class ResourceDirData(DataContainer):

	981 """Holds resource directory information.

	982

	983 struct: IMAGE_RESOURCE_DIRECTORY structure

	984 entries: list of entries (ResourceDirEntryData instances)

	985 """

	986

	987 class ResourceDirEntryData(DataContainer):

	988 """Holds resource directory entry data.

	989

	990 struct: IMAGE_RESOURCE_DIRECTORY_ENTRY structure

	991 name: If the resource is identified by name this

	992 attribute will contain the name string. None

	993 otherwise. If identified by id, the id is

	994 availabe at 'struct.Id'

	995 id: the id, also in struct.Id

	996 directory: If this entry has a lower level directory

	997 this attribute will point to the

	998 ResourceDirData instance representing it.

	999 data: If this entry has no futher lower directories

	1000 and points to the actual resource data, this

	1001 attribute will reference the corresponding

	1002 ResourceDataEntryData instance.

	1003 (Either of the 'directory' or 'data' attribute will exist,

	1004 but not both.)

	1005 """

	1006

	1007 class ResourceDataEntryData(DataContainer):

	1008 """Holds resource data entry information.

	1009

	1010 struct: IMAGE_RESOURCE_DATA_ENTRY structure

	1011 lang: Primary language ID

	1012 sublang: Sublanguage ID

	1013 """

	1014

	1015 class DebugData(DataContainer):

	1016 """Holds debug information.

	1017

	1018 struct: IMAGE_DEBUG_DIRECTORY structure

	1019 """

	1020

	1021 class BaseRelocationData(DataContainer):

	1022 """Holds base relocation information.

	1023

	1024 struct: IMAGE_BASE_RELOCATION structure

	1025 entries: list of relocation data (RelocationData instances)

	1026 """

	1027

	1028 class RelocationData(DataContainer):

	1029 """Holds relocation information.

	1030

	1031 type: Type of relocation

	1032 The type string is can be obtained by

	1033 RELOCATION_TYPE[type]

	1034 rva: RVA of the relocation

	1035 """

	1036

	1037 class TlsData(DataContainer):

	1038 """Holds TLS information.

	1039

	1040 struct: IMAGE_TLS_DIRECTORY structure

	1041 """

	1042

	1043 class BoundImportDescData(DataContainer):

	1044 """Holds bound import descriptor data.

	1045

	1046 This directory entry will provide with information on the

	1047 DLLs this PE files has been bound to (if bound at all).

	1048 The structure will contain the name and timestamp of the

	1049 DLL at the time of binding so that the loader can know

	1050 whether it differs from the one currently present in the

	1051 system and must, therefore, re-bind the PE's imports.

	1052

	1053 struct: IMAGE_BOUND_IMPORT_DESCRIPTOR structure

	1054 name: DLL name

	1055 entries: list of entries (BoundImportRefData instances)

	1056 the entries will exist if this DLL has forwarded

	1057 symbols. If so, the destination DLL will have an

	1058 entry in this list.

	1059 """

	1060

	1061 class BoundImportRefData(DataContainer):

	1062 """Holds bound import forwader reference data.

	1063

	1064 Contains the same information as the bound descriptor but

	1065 for forwarded DLLs, if any.

	1066

	1067 struct: IMAGE_BOUND_FORWARDER_REF structure

	1068 name: dll name

	1069 """

	1070

	1071

	1072 class PE:

	1073 """A Portable Executable representation.

	1074

	1075 This class provides access to most of the information in a PE file.

	1076

	1077 It expects to be supplied the name of the file to load or PE data

	1078 to process and an optional argument 'fast_load' (False by default)

	1079 which controls whether to load all the directories information,

	1080 which can be quite time consuming.

	1081

	1082 pe = pefile.PE('module.dll')

	1083 pe = pefile.PE(name='module.dll')

	1084

	1085 would load 'module.dll' and process it. If the data would be already

	1086 available in a buffer the same could be achieved with:

	1087

	1088 pe = pefile.PE(data=module_dll_data)

	1089

	1090 The "fast_load" can be set to a default by setting its value in the

	1091 module itself by means,for instance, of a "pefile.fast_load = True".

	1092 That will make all the subsequent instances not to load the

	1093 whole PE structure. The "full_load" method can be used to parse

	1094 the missing data at a later stage.

	1095

	1096 Basic headers information will be available in the attributes:

	1097

	1098 DOS_HEADER

	1099 NT_HEADERS

	1100 FILE_HEADER

	1101 OPTIONAL_HEADER

	1102

	1103 All of them will contain among their attrbitues the members of the

	1104 corresponding structures as defined in WINNT.H

	1105

	1106 The raw data corresponding to the header (from the beginning of the

	1107 file up to the start of the first section) will be avaiable in the

	1108 instance's attribute 'header' as a string.

	1109

	1110 The sections will be available as a list in the 'sections' attribute.

	1111 Each entry will contain as attributes all the structure's members.

	1112

	1113 Directory entries will be available as attributes (if they exist):

	1114 (no other entries are processed at this point)

	1115

	1116 DIRECTORY_ENTRY_IMPORT (list of ImportDescData instances)

	1117 DIRECTORY_ENTRY_EXPORT (ExportDirData instance)

	1118 DIRECTORY_ENTRY_RESOURCE (ResourceDirData instance)

	1119 DIRECTORY_ENTRY_DEBUG (list of DebugData instances)

	1120 DIRECTORY_ENTRY_BASERELOC (list of BaseRelocationData instances)

	1121 DIRECTORY_ENTRY_TLS

	1122 DIRECTORY_ENTRY_BOUND_IMPORT (list of BoundImportData instances)

	1123

	1124 The following dictionary attributes provide ways of mapping different

	1125 constants. They will accept the numeric value and return the string

	1126 representation and the opposite, feed in the string and get the

	1127 numeric constant:

	1128

	1129 DIRECTORY_ENTRY

	1130 IMAGE_CHARACTERISTICS

	1131 SECTION_CHARACTERISTICS

	1132 DEBUG_TYPE

	1133 SUBSYSTEM_TYPE

	1134 MACHINE_TYPE

	1135 RELOCATION_TYPE

	1136 RESOURCE_TYPE

	1137 LANG

	1138 SUBLANG

	1139 """

	1140

	1141 #

	1142 # Format specifications for PE structures.

	1143 #

	1144

	1145 __IMAGE_DOS_HEADER_format__ = ('IMAGE_DOS_HEADER',

	1146 ('H,e_magic', 'H,e_cblp', 'H,e_cp',

	1147 'H,e_crlc', 'H,e_cparhdr', 'H,e_minalloc',

	1148 'H,e_maxalloc', 'H,e_ss', 'H,e_sp', 'H,e_csum',

	1149 'H,e_ip', 'H,e_cs', 'H,e_lfarlc', 'H,e_ovno', '8s,e_res',

	1150 'H,e_oemid', 'H,e_oeminfo', '20s,e_res2',

	1151 'L,e_lfanew'))

	1152

	1153 __IMAGE_FILE_HEADER_format__ = ('IMAGE_FILE_HEADER',

	1154 ('H,Machine', 'H,NumberOfSections',

	1155 'L,TimeDateStamp', 'L,PointerToSymbolTable',

	1156 'L,NumberOfSymbols', 'H,SizeOfOptionalHeader',

	1157 'H,Characteristics'))

	1158

	1159 __IMAGE_DATA_DIRECTORY_format__ = ('IMAGE_DATA_DIRECTORY',

	1160 ('L,VirtualAddress', 'L,Size'))

	1161

	1162

	1163 __IMAGE_OPTIONAL_HEADER_format__ = ('IMAGE_OPTIONAL_HEADER',

	1164 ('H,Magic', 'B,MajorLinkerVersion',

	1165 'B,MinorLinkerVersion', 'L,SizeOfCode',

	1166 'L,SizeOfInitializedData', 'L,SizeOfUninitializedData',

	1167 'L,AddressOfEntryPoint', 'L,BaseOfCode', 'L,BaseOfData',

	1168 'L,ImageBase', 'L,SectionAlignment', 'L,FileAlignment',

	1169 'H,MajorOperatingSystemVersion', 'H,MinorOperatingSystemVersion',

	1170 'H,MajorImageVersion', 'H,MinorImageVersion',

	1171 'H,MajorSubsystemVersion', 'H,MinorSubsystemVersion',

	1172 'L,Reserved1', 'L,SizeOfImage', 'L,SizeOfHeaders',

	1173 'L,CheckSum', 'H,Subsystem', 'H,DllCharacteristics',

	1174 'L,SizeOfStackReserve', 'L,SizeOfStackCommit',

	1175 'L,SizeOfHeapReserve', 'L,SizeOfHeapCommit',

	1176 'L,LoaderFlags', 'L,NumberOfRvaAndSizes' ))

	1177

	1178

	1179 __IMAGE_OPTIONAL_HEADER64_format__ = ('IMAGE_OPTIONAL_HEADER64',

	1180 ('H,Magic', 'B,MajorLinkerVersion',

	1181 'B,MinorLinkerVersion', 'L,SizeOfCode',

	1182 'L,SizeOfInitializedData', 'L,SizeOfUninitializedData',

	1183 'L,AddressOfEntryPoint', 'L,BaseOfCode',

	1184 'Q,ImageBase', 'L,SectionAlignment', 'L,FileAlignment',

	1185 'H,MajorOperatingSystemVersion', 'H,MinorOperatingSystemVersion',

	1186 'H,MajorImageVersion', 'H,MinorImageVersion',

	1187 'H,MajorSubsystemVersion', 'H,MinorSubsystemVersion',

	1188 'L,Reserved1', 'L,SizeOfImage', 'L,SizeOfHeaders',

	1189 'L,CheckSum', 'H,Subsystem', 'H,DllCharacteristics',

	1190 'Q,SizeOfStackReserve', 'Q,SizeOfStackCommit',

	1191 'Q,SizeOfHeapReserve', 'Q,SizeOfHeapCommit',

	1192 'L,LoaderFlags', 'L,NumberOfRvaAndSizes' ))

	1193

	1194

	1195 __IMAGE_NT_HEADERS_format__ = ('IMAGE_NT_HEADERS', ('L,Signature',))

	1196

	1197 __IMAGE_SECTION_HEADER_format__ = ('IMAGE_SECTION_HEADER',

	1198 ('8s,Name', 'L,Misc,Misc_PhysicalAddress,Misc_VirtualSize',

	1199 'L,VirtualAddress', 'L,SizeOfRawData', 'L,PointerToRawData',

	1200 'L,PointerToRelocations', 'L,PointerToLinenumbers',

	1201 'H,NumberOfRelocations', 'H,NumberOfLinenumbers',

	1202 'L,Characteristics'))

	1203

	1204 __IMAGE_DELAY_IMPORT_DESCRIPTOR_format__ = ('IMAGE_DELAY_IMPORT_DESCRIPTOR',

	1205 ('L,grAttrs', 'L,szName', 'L,phmod', 'L,pIAT', 'L,pINT',

	1206 'L,pBoundIAT', 'L,pUnloadIAT', 'L,dwTimeStamp'))

	1207

	1208 __IMAGE_IMPORT_DESCRIPTOR_format__ = ('IMAGE_IMPORT_DESCRIPTOR',

	1209 ('L,OriginalFirstThunk,Characteristics',

	1210 'L,TimeDateStamp', 'L,ForwarderChain', 'L,Name', 'L,FirstThunk'))

	1211

	1212 __IMAGE_EXPORT_DIRECTORY_format__ = ('IMAGE_EXPORT_DIRECTORY',

	1213 ('L,Characteristics',

	1214 'L,TimeDateStamp', 'H,MajorVersion', 'H,MinorVersion', 'L,Name',

	1215 'L,Base', 'L,NumberOfFunctions', 'L,NumberOfNames',

	1216 'L,AddressOfFunctions', 'L,AddressOfNames', 'L,AddressOfNameOrdinals'))

	1217

	1218 __IMAGE_RESOURCE_DIRECTORY_format__ = ('IMAGE_RESOURCE_DIRECTORY',

	1219 ('L,Characteristics',

	1220 'L,TimeDateStamp', 'H,MajorVersion', 'H,MinorVersion',

	1221 'H,NumberOfNamedEntries', 'H,NumberOfIdEntries'))

	1222

	1223 __IMAGE_RESOURCE_DIRECTORY_ENTRY_format__ = ('IMAGE_RESOURCE_DIRECTORY_ENTRY ',

	1224 ('L,Name',

	1225 'L,OffsetToData'))

	1226

	1227 __IMAGE_RESOURCE_DATA_ENTRY_format__ = ('IMAGE_RESOURCE_DATA_ENTRY',

	1228 ('L,OffsetToData', 'L,Size', 'L,CodePage', 'L,Reserved'))

	1229

	1230 __VS_VERSIONINFO_format__ = ( 'VS_VERSIONINFO',

	1231 ('H,Length', 'H,ValueLength', 'H,Type' ))

	1232

	1233 __VS_FIXEDFILEINFO_format__ = ( 'VS_FIXEDFILEINFO',

	1234 ('L,Signature', 'L,StrucVersion', 'L,FileVersionMS', 'L,FileVersionLS',

	1235 'L,ProductVersionMS', 'L,ProductVersionLS', 'L,FileFlagsMask', 'L,FileF lags',

	1236 'L,FileOS', 'L,FileType', 'L,FileSubtype', 'L,FileDateMS', 'L,FileDateL S'))

	1237

	1238 __StringFileInfo_format__ = ( 'StringFileInfo',

	1239 ('H,Length', 'H,ValueLength', 'H,Type' ))

	1240

	1241 __StringTable_format__ = ( 'StringTable',

	1242 ('H,Length', 'H,ValueLength', 'H,Type' ))

	1243

	1244 __String_format__ = ( 'String',

	1245 ('H,Length', 'H,ValueLength', 'H,Type' ))

	1246

	1247 __Var_format__ = ( 'Var', ('H,Length', 'H,ValueLength', 'H,Type' ))

	1248

	1249 __IMAGE_THUNK_DATA_format__ = ('IMAGE_THUNK_DATA',

	1250 ('L,ForwarderString,Function,Ordinal,AddressOfData',))

	1251

	1252 __IMAGE_THUNK_DATA64_format__ = ('IMAGE_THUNK_DATA',

	1253 ('Q,ForwarderString,Function,Ordinal,AddressOfData',))

	1254

	1255 __IMAGE_DEBUG_DIRECTORY_format__ = ('IMAGE_DEBUG_DIRECTORY',

	1256 ('L,Characteristics', 'L,TimeDateStamp', 'H,MajorVersion',

	1257 'H,MinorVersion', 'L,Type', 'L,SizeOfData', 'L,AddressOfRawData',

	1258 'L,PointerToRawData'))

	1259

	1260 __IMAGE_BASE_RELOCATION_format__ = ('IMAGE_BASE_RELOCATION',

	1261 ('L,VirtualAddress', 'L,SizeOfBlock') )

	1262

	1263 __IMAGE_TLS_DIRECTORY_format__ = ('IMAGE_TLS_DIRECTORY',

	1264 ('L,StartAddressOfRawData', 'L,EndAddressOfRawData',

	1265 'L,AddressOfIndex', 'L,AddressOfCallBacks',

	1266 'L,SizeOfZeroFill', 'L,Characteristics' ) )

	1267

	1268 __IMAGE_TLS_DIRECTORY64_format__ = ('IMAGE_TLS_DIRECTORY',

	1269 ('Q,StartAddressOfRawData', 'Q,EndAddressOfRawData',

	1270 'Q,AddressOfIndex', 'Q,AddressOfCallBacks',

	1271 'L,SizeOfZeroFill', 'L,Characteristics' ) )

	1272

	1273 __IMAGE_BOUND_IMPORT_DESCRIPTOR_format__ = ('IMAGE_BOUND_IMPORT_DESCRIPTOR',

	1274 ('L,TimeDateStamp', 'H,OffsetModuleName', 'H,NumberOfModuleForwarderRefs '))

	1275

	1276 __IMAGE_BOUND_FORWARDER_REF_format__ = ('IMAGE_BOUND_FORWARDER_REF',

	1277 ('L,TimeDateStamp', 'H,OffsetModuleName', 'H,Reserved') )

	1278

	1279

	1280 def __init__(self, name=None, data=None, fast_load=None):

	1281

	1282 self.sections = []

	1283

	1284 self.__warnings = []

	1285

	1286 self.PE_TYPE = None

	1287

	1288 if not name and not data:

	1289 return

	1290

	1291 # This list will keep track of all the structures created.

	1292 # That will allow for an easy iteration through the list

	1293 # in order to save the modifications made

	1294 self.__structures__ = []

	1295

	1296 if not fast_load:

	1297 fast_load = globals()['fast_load']

	1298 self.__parse__(name, data, fast_load)

	1299

	1300

	1301

	1302 def __unpack_data__(self, format, data, file_offset):

	1303 """Apply structure format to raw data.

	1304

	1305 Returns and unpacked structure object if successful, None otherwise.

	1306 """

	1307

	1308 structure = Structure(format, file_offset=file_offset)

	1309 #if len(data) < structure.sizeof():

	1310 # return None

	1311

	1312 try:

	1313 structure.__unpack__(data)

	1314 except PEFormatError, err:

	1315 self.__warnings.append(

	1316 'Corrupt header "%s" at file offset %d. Exception: %s' % (

	1317 format[0], file_offset, str(err)) )

	1318 return None

	1319

	1320 self.__structures__.append(structure)

	1321

	1322 return structure

	1323

	1324

	1325

	1326 def __parse__(self, fname, data, fast_load):

	1327 """Parse a Portable Executable file.

	1328

	1329 Loads a PE file, parsing all its structures and making them available

	1330 through the instance's attributes.

	1331 """

	1332

	1333 if fname:

	1334 fd = file(fname, 'rb')

	1335 self.__data__ = fd.read()

	1336 fd.close()

	1337 elif data:

	1338 self.__data__ = data

	1339

	1340

	1341 self.DOS_HEADER = self.__unpack_data__(

	1342 self.__IMAGE_DOS_HEADER_format__,

	1343 self.__data__, file_offset=0)

	1344

	1345 if not self.DOS_HEADER or self.DOS_HEADER.e_magic != IMAGE_DOS_SIGNATURE :

	1346 raise PEFormatError('DOS Header magic not found.')

	1347

	1348 # OC Patch:

	1349 # Check for sane value in e_lfanew

	1350 #

	1351 if self.DOS_HEADER.e_lfanew > len(self.__data__):

	1352 raise PEFormatError('Invalid e_lfanew value, probably not a PE file' )

	1353

	1354 nt_headers_offset = self.DOS_HEADER.e_lfanew

	1355

	1356 self.NT_HEADERS = self.__unpack_data__(

	1357 self.__IMAGE_NT_HEADERS_format__,

	1358 self.__data__[nt_headers_offset:],

	1359 file_offset = nt_headers_offset)

	1360

	1361 # We better check the signature right here, before the file screws

	1362 # around with sections:

	1363 # OC Patch:

	1364 # Some malware will cause the Signature value to not exist at all

	1365 if not self.NT_HEADERS or not self.NT_HEADERS.Signature:

	1366 raise PEFormatError('NT Headers not found.')

	1367

	1368 if self.NT_HEADERS.Signature != IMAGE_NT_SIGNATURE:

	1369 raise PEFormatError('Invalid NT Headers signature.')

	1370

	1371 self.FILE_HEADER = self.__unpack_data__(

	1372 self.__IMAGE_FILE_HEADER_format__,

	1373 self.__data__[nt_headers_offset+4:],

	1374 file_offset = nt_headers_offset+4)

	1375 image_flags = self.retrieve_flags(IMAGE_CHARACTERISTICS, 'IMAGE_FILE_')

	1376

	1377 if not self.FILE_HEADER:

	1378 raise PEFormatError('File Header missing')

	1379

	1380 # Set the image's flags according the the Characteristics member

	1381 self.set_flags(self.FILE_HEADER, self.FILE_HEADER.Characteristics, image _flags)

	1382

	1383 optional_header_offset = \

	1384 nt_headers_offset+4+self.FILE_HEADER.sizeof()

	1385

	1386 # Note: location of sections can be controlled from PE header:

	1387 sections_offset = optional_header_offset + self.FILE_HEADER.SizeOfOption alHeader

	1388

	1389 self.OPTIONAL_HEADER = self.__unpack_data__(

	1390 self.__IMAGE_OPTIONAL_HEADER_format__,

	1391 self.__data__[optional_header_offset:],

	1392 file_offset = optional_header_offset)

	1393

	1394 # According to solardesigner's findings for his

	1395 # Tiny PE project, the optional header does not

	1396 # need fields beyond "Subsystem" in order to be

	1397 # loadable by the Windows loader (given that zeroes

	1398 # are acceptable values and the header is loaded

	1399 # in a zeroed memory page)

	1400 # If trying to parse a full Optional Header fails

	1401 # we try to parse it again with some 0 padding

	1402 #

	1403 MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE = 69

	1404

	1405 if ( self.OPTIONAL_HEADER is None and

	1406 len(self.__data__[optional_header_offset:])

	1407 >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE ):

	1408

	1409 # Add enough zeroes to make up for the unused fields

	1410 #

	1411 padding_length = 128

	1412

	1413 # Create padding

	1414 #

	1415 padded_data = self.__data__[optional_header_offset:] + (

	1416 '\0' * padding_length)

	1417

	1418 self.OPTIONAL_HEADER = self.__unpack_data__(

	1419 self.__IMAGE_OPTIONAL_HEADER_format__,

	1420 padded_data,

	1421 file_offset = optional_header_offset)

	1422

	1423

	1424 # Check the Magic in the OPTIONAL_HEADER and set the PE file

	1425 # type accordingly

	1426 #

	1427 if self.OPTIONAL_HEADER is not None:

	1428

	1429 if self.OPTIONAL_HEADER.Magic == OPTIONAL_HEADER_MAGIC_PE:

	1430

	1431 self.PE_TYPE = OPTIONAL_HEADER_MAGIC_PE

	1432

	1433 elif self.OPTIONAL_HEADER.Magic == OPTIONAL_HEADER_MAGIC_PE_PLUS:

	1434

	1435 self.PE_TYPE = OPTIONAL_HEADER_MAGIC_PE_PLUS

	1436

	1437 self.OPTIONAL_HEADER = self.__unpack_data__(

	1438 self.__IMAGE_OPTIONAL_HEADER64_format__,

	1439 self.__data__[optional_header_offset:],

	1440 file_offset = optional_header_offset)

	1441

	1442 # Again, as explained above, we try to parse

	1443 # a reduced form of the Optional Header which

	1444 # is still valid despite not including all

	1445 # structure members

	1446 #

	1447 MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE = 69+4

	1448

	1449 if ( self.OPTIONAL_HEADER is None and

	1450 len(self.__data__[optional_header_offset:])

	1451 >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE ):

	1452

	1453 padding_length = 128

	1454 padded_data = self.__data__[optional_header_offset:] + (

	1455 '\0' * padding_length)

	1456 self.OPTIONAL_HEADER = self.__unpack_data__(

	1457 self.__IMAGE_OPTIONAL_HEADER64_format__,

	1458 padded_data,

	1459 file_offset = optional_header_offset)

	1460

	1461

	1462 if not self.FILE_HEADER:

	1463 raise PEFormatError('File Header missing')

	1464

	1465

	1466 # OC Patch:

	1467 # Die gracefully if there is no OPTIONAL_HEADER field

	1468 # 975440f5ad5e2e4a92c4d9a5f22f75c1

	1469 if self.PE_TYPE is None or self.OPTIONAL_HEADER is None:

	1470 raise PEFormatError("No Optional Header found, invalid PE32 or PE32+ file")

	1471

	1472 dll_characteristics_flags = self.retrieve_flags(DLL_CHARACTERISTICS, 'IM AGE_DLL_CHARACTERISTICS_')

	1473

	1474 # Set the Dll Characteristics flags according the the DllCharacteristics member

	1475 self.set_flags(

	1476 self.OPTIONAL_HEADER,

	1477 self.OPTIONAL_HEADER.DllCharacteristics,

	1478 dll_characteristics_flags)

	1479

	1480

	1481 self.OPTIONAL_HEADER.DATA_DIRECTORY = []

	1482 #offset = (optional_header_offset + self.FILE_HEADER.SizeOfOptionalHeade r)

	1483 offset = (optional_header_offset + self.OPTIONAL_HEADER.sizeof())

	1484

	1485

	1486 self.NT_HEADERS.FILE_HEADER = self.FILE_HEADER

	1487 self.NT_HEADERS.OPTIONAL_HEADER = self.OPTIONAL_HEADER

	1488

	1489

	1490 # The NumberOfRvaAndSizes is sanitized to stay within

	1491 # reasonable limits so can be casted to an int

	1492 #

	1493 if self.OPTIONAL_HEADER.NumberOfRvaAndSizes > 0x10:

	1494 self.__warnings.append(

	1495 'Suspicious NumberOfRvaAndSizes in the Optional Header. ' +

	1496 'Normal values are never larger than 0x10, the value is: 0x%x' %

	1497 self.OPTIONAL_HEADER.NumberOfRvaAndSizes )

	1498

	1499 for i in xrange(int(0x7fffffffL & self.OPTIONAL_HEADER.NumberOfRvaAndSiz es)):

	1500

	1501 if len(self.__data__[offset:]) == 0:

	1502 break

	1503

	1504 if len(self.__data__[offset:]) < 8:

	1505 data = self.__data__[offset:]+'\0'*8

	1506 else:

	1507 data = self.__data__[offset:]

	1508

	1509 dir_entry = self.__unpack_data__(

	1510 self.__IMAGE_DATA_DIRECTORY_format__,

	1511 data,

	1512 file_offset = offset)

	1513

	1514 if dir_entry is None:

	1515 break

	1516

	1517 # Would fail if missing an entry

	1518 # 1d4937b2fa4d84ad1bce0309857e70ca offending sample

	1519 try:

	1520 dir_entry.name = DIRECTORY_ENTRY[i]

	1521 except (KeyError, AttributeError):

	1522 break

	1523

	1524 offset += dir_entry.sizeof()

	1525

	1526 self.OPTIONAL_HEADER.DATA_DIRECTORY.append(dir_entry)

	1527

	1528 # If the offset goes outside the optional header,

	1529 # the loop is broken, regardless of how many directories

	1530 # NumberOfRvaAndSizes says there are

	1531 #

	1532 # We assume a normally sized optional header, hence that we do

	1533 # a sizeof() instead of reading SizeOfOptionalHeader.

	1534 # Then we add a default number of drectories times their size,

	1535 # if we go beyond that, we assume the number of directories

	1536 # is wrong and stop processing

	1537 if offset >= (optional_header_offset +

	1538 self.OPTIONAL_HEADER.sizeof() + 8*16) :

	1539

	1540 break

	1541

	1542

	1543 offset = self.parse_sections(sections_offset)

	1544

	1545 # OC Patch:

	1546 # There could be a problem if there are no raw data sections

	1547 # greater than 0

	1548 # fc91013eb72529da005110a3403541b6 example

	1549 # Should this throw an exception in the minimum header offset

	1550 # can't be found?

	1551 #

	1552 rawDataPointers = [

	1553 s.PointerToRawData for s in self.sections if s.PointerToRawData>0]

	1554

	1555 if len(rawDataPointers) > 0:

	1556 lowest_section_offset = min(rawDataPointers)

	1557 else:

	1558 lowest_section_offset = None

	1559

	1560 if not lowest_section_offset or lowest_section_offset<offset:

	1561 self.header = self.__data__[:offset]

	1562 else:

	1563 self.header = self.__data__[:lowest_section_offset]

	1564

	1565

	1566 # Check whether the entry point lies within a section

	1567 #

	1568 if self.get_section_by_rva(self.OPTIONAL_HEADER.AddressOfEntryPoint) is not None:

	1569

	1570 # Check whether the entry point lies within the file

	1571 #

	1572 ep_offset = self.get_offset_from_rva(self.OPTIONAL_HEADER.AddressOfE ntryPoint)

	1573 if ep_offset > len(self.__data__):

	1574

	1575 self.__warnings.append(

	1576 'Possibly corrupt file. AddressOfEntryPoint lies outside the file. ' +

	1577 'AddressOfEntryPoint: 0x%x' %

	1578 self.OPTIONAL_HEADER.AddressOfEntryPoint )

	1579

	1580 else:

	1581

	1582 self.__warnings.append(

	1583 'AddressOfEntryPoint lies outside the sections\' boundaries. ' +

	1584 'AddressOfEntryPoint: 0x%x' %

	1585 self.OPTIONAL_HEADER.AddressOfEntryPoint )

	1586

	1587

	1588 if not fast_load:

	1589 self.parse_data_directories()

	1590

	1591

	1592 def get_warnings(self):

	1593 """Return the list of warnings.

	1594

	1595 Non-critical problems found when parsing the PE file are

	1596 appended to a list of warnings. This method returns the

	1597 full list.

	1598 """

	1599

	1600 return self.__warnings

	1601

	1602

	1603 def show_warnings(self):

	1604 """Print the list of warnings.

	1605

	1606 Non-critical problems found when parsing the PE file are

	1607 appended to a list of warnings. This method prints the

	1608 full list to standard output.

	1609 """

	1610

	1611 for warning in self.__warnings:

	1612 print '>', warning

	1613

	1614

	1615 def full_load(self):

	1616 """Process the data directories.

	1617

	1618 This mathod will load the data directories which might not have

	1619 been loaded if the "fast_load" option was used.

	1620 """

	1621

	1622 self.parse_data_directories()

	1623

	1624

	1625 def write(self, filename=None):

	1626 """Write the PE file.

	1627

	1628 This function will process all headers and components

	1629 of the PE file and include all changes made (by just

	1630 assigning to attributes in the PE objects) and write

	1631 the changes back to a file whose name is provided as

	1632 an argument. The filename is optional.

	1633 The data to be written to the file will be returned

	1634 as a 'str' object.

	1635 """

	1636

	1637 file_data = list(self.__data__)

	1638 for struct in self.__structures__:

	1639

	1640 struct_data = list(struct.__pack__())

	1641 offset = struct.get_file_offset()

	1642

	1643 file_data[offset:offset+len(struct_data)] = struct_data

	1644

	1645 if hasattr(self, 'VS_VERSIONINFO'):

	1646 if hasattr(self, 'FileInfo'):

	1647 for entry in self.FileInfo:

	1648 if hasattr(entry, 'StringTable'):

	1649 for st_entry in entry.StringTable:

	1650 for key, entry in st_entry.entries.items():

	1651

	1652 offsets = st_entry.entries_offsets[key]

	1653 lengths = st_entry.entries_lengths[key]

	1654

	1655 if len( entry ) > lengths[1]:

	1656

	1657 uc = zip(

	1658 list(entry[:lengths[1]]), ['\0'] * l engths[1] )

	1659 l = list()

	1660 map(l.extend, uc)

	1661

	1662 file_data[

	1663 offsets[1] : offsets[1] + lengths[1]*2 ] = l

	1664

	1665 else:

	1666

	1667 uc = zip(

	1668 list(entry), ['\0'] * len(entry) )

	1669 l = list()

	1670 map(l.extend, uc)

	1671

	1672 file_data[

	1673 offsets[1] : offsets[1] + len(entry)*2 ] = l

	1674

	1675 remainder = lengths[1] - len(entry)

	1676 file_data[

	1677 offsets[1] + len(entry)*2 :

	1678 offsets[1] + lengths[1]*2 ] = [

	1679 u'\0' ] * remainder*2

	1680

	1681 new_file_data = ''.join( [ chr(ord(c)) for c in file_data ] )

	1682

	1683 if filename:

	1684 f = file(filename, 'wb+')

	1685 f.write(new_file_data)

	1686 f.close()

	1687

	1688 return new_file_data

	1689

	1690

	1691

	1692 def parse_sections(self, offset):

	1693 """Fetch the PE file sections.

	1694

	1695 The sections will be readily available in the "sections" attribute.

	1696 Its attributes will contain all the section information plus "data"

	1697 a buffer containing the section's data.

	1698

	1699 The "Characteristics" member will be processed and attributes

	1700 representing the section characteristics (with the 'IMAGE_SCN_'

	1701 string trimmed from the constant's names) will be added to the

	1702 section instance.

	1703

	1704 Refer to the SectionStructure class for additional info.

	1705 """

	1706

	1707 self.sections = []

	1708

	1709 for i in xrange(self.FILE_HEADER.NumberOfSections):

	1710 section = SectionStructure(self.__IMAGE_SECTION_HEADER_format__)

	1711 if not section:

	1712 break

	1713 section_offset = offset + section.sizeof() * i

	1714 section.set_file_offset(section_offset)

	1715 section.__unpack__(self.__data__[section_offset:])

	1716 self.__structures__.append(section)

	1717

	1718 if section.SizeOfRawData > len(self.__data__):

	1719 self.__warnings.append(

	1720 ('Error parsing section %d. ' % i) +

	1721 'SizeOfRawData is larger than file.')

	1722

	1723 if section.PointerToRawData > len(self.__data__):

	1724 self.__warnings.append(

	1725 ('Error parsing section %d. ' % i) +

	1726 'PointerToRawData points beyond the end of the file.')

	1727

	1728 if section.Misc_VirtualSize > 0x10000000:

	1729 self.__warnings.append(

	1730 ('Suspicious value found parsing section %d. ' % i) +

	1731 'VirtualSize is extremely large > 256MiB.')

	1732

	1733 if section.VirtualAddress > 0x10000000:

	1734 self.__warnings.append(

	1735 ('Suspicious value found parsing section %d. ' % i) +

	1736 'VirtualAddress is beyond 0x10000000.')

	1737

	1738 #

	1739 # Some packer used a non-aligned PointerToRawData in the sections,

	1740 # which causes several common tools not to load the section data

	1741 # properly as they blindly read from the indicated offset.

	1742 # It seems that Windows will round the offset down to the largest

	1743 # offset multiple of FileAlignment which is smaller than

	1744 # PointerToRawData. The following code will do the same.

	1745 #

	1746

	1747 #alignment = self.OPTIONAL_HEADER.FileAlignment

	1748 section_data_start = section.PointerToRawData

	1749

	1750 if ( self.OPTIONAL_HEADER.FileAlignment != 0 and

	1751 (section.PointerToRawData % self.OPTIONAL_HEADER.FileAlignment) != 0):

	1752 self.__warnings.append(

	1753 ('Error parsing section %d. ' % i) +

	1754 'Suspicious value for FileAlignment in the Optional Header. ' +

	1755 'Normally the PointerToRawData entry of the sections\' struc tures ' +

	1756 'is a multiple of FileAlignment, this might imply the file ' +

	1757 'is trying to confuse tools which parse this incorrectly')

	1758

	1759 section_data_end = section_data_start+section.SizeOfRawData

	1760 section.set_data(self.__data__[section_data_start:section_data_end])

	1761

	1762 section_flags = self.retrieve_flags(SECTION_CHARACTERISTICS, 'IMAGE_ SCN_')

	1763

	1764 # Set the section's flags according the the Characteristics member

	1765 self.set_flags(section, section.Characteristics, section_flags)

	1766

	1767 if ( section.__dict__.get('IMAGE_SCN_MEM_WRITE', False) and

	1768 section.__dict__.get('IMAGE_SCN_MEM_EXECUTE', False) ):

	1769

	1770 self.__warnings.append(

	1771 ('Suspicious flags set for section %d. ' % i) +

	1772 'Both IMAGE_SCN_MEM_WRITE and IMAGE_SCN_MEM_EXECUTE are set. ' +

	1773 'This might indicate a packed executable.')

	1774

	1775 self.sections.append(section)

	1776

	1777 if self.FILE_HEADER.NumberOfSections > 0 and self.sections:

	1778 return offset + self.sections[0].sizeof()*self.FILE_HEADER.NumberOfS ections

	1779 else:

	1780 return offset

	1781

	1782

	1783 def retrieve_flags(self, flag_dict, flag_filter):

	1784 """Read the flags from a dictionary and return them in a usable form.

	1785

	1786 Will return a list of (flag, value) for all flags in "flag_dict"

	1787 matching the filter "flag_filter".

	1788 """

	1789

	1790 return [(f[0], f[1]) for f in flag_dict.items() if

	1791 isinstance(f[0], str) and f[0].startswith(flag_filter)]

	1792

	1793

	1794 def set_flags(self, obj, flag_field, flags):

	1795 """Will process the flags and set attributes in the object accordingly.

	1796

	1797 The object "obj" will gain attritutes named after the flags provided in

	1798 "flags" and valued True/False, matching the results of applyin each

	1799 flag value from "flags" to flag_field.

	1800 """

	1801

	1802 for flag in flags:

	1803 if flag[1] & flag_field:

	1804 setattr(obj, flag[0], True)

	1805 else:

	1806 setattr(obj, flag[0], False)

	1807

	1808

	1809

	1810 def parse_data_directories(self):

	1811 """Parse and process the PE file's data directories."""

	1812

	1813 directory_parsing = (

	1814 ('IMAGE_DIRECTORY_ENTRY_IMPORT', self.parse_import_directory),

	1815 ('IMAGE_DIRECTORY_ENTRY_EXPORT', self.parse_export_directory),

	1816 ('IMAGE_DIRECTORY_ENTRY_RESOURCE', self.parse_resources_directory),

	1817 ('IMAGE_DIRECTORY_ENTRY_DEBUG', self.parse_debug_directory),

	1818 ('IMAGE_DIRECTORY_ENTRY_BASERELOC', self.parse_relocations_directory ),

	1819 ('IMAGE_DIRECTORY_ENTRY_TLS', self.parse_directory_tls),

	1820 ('IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT', self.parse_delay_import_direc tory),

	1821 ('IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT', self.parse_directory_bound_im ports) )

	1822

	1823 for entry in directory_parsing:

	1824 # OC Patch:

	1825 #

	1826 try:

	1827 dir_entry = self.OPTIONAL_HEADER.DATA_DIRECTORY[

	1828 DIRECTORY_ENTRY[entry[0]]]

	1829 except IndexError:

	1830 break

	1831 if dir_entry.VirtualAddress:

	1832 value = entry[1](dir_entry.VirtualAddress, dir_entry.Size)

	1833 if value:

	1834 setattr(self, entry[0][6:], value)

	1835

	1836

	1837 def parse_directory_bound_imports(self, rva, size):

	1838 """"""

	1839

	1840 bnd_descr = Structure(self.__IMAGE_BOUND_IMPORT_DESCRIPTOR_format__)

	1841 bnd_descr_size = bnd_descr.sizeof()

	1842 start = rva

	1843

	1844 bound_imports = []

	1845 while True:

	1846

	1847 bnd_descr = self.__unpack_data__(

	1848 self.__IMAGE_BOUND_IMPORT_DESCRIPTOR_format__,

	1849 self.__data__[rva:rva+bnd_descr_size],

	1850 file_offset = rva)

	1851 if bnd_descr is None:

	1852 # If can't parse directory then silently return.

	1853 # This directory does not necesarily have to be valid to

	1854 # still have a valid PE file

	1855

	1856 self.__warnings.append(

	1857 'The Bound Imports directory exists but can\'t be parsed.')

	1858

	1859 return

	1860

	1861 if bnd_descr.all_zeroes():

	1862 break

	1863

	1864 rva += bnd_descr.sizeof()

	1865

	1866 forwarder_refs = []

	1867 for idx in xrange(bnd_descr.NumberOfModuleForwarderRefs):

	1868 # Both structures IMAGE_BOUND_IMPORT_DESCRIPTOR and

	1869 # IMAGE_BOUND_FORWARDER_REF have the same size.

	1870 bnd_frwd_ref = self.__unpack_data__(

	1871 self.__IMAGE_BOUND_FORWARDER_REF_format__,

	1872 self.__data__[rva:rva+bnd_descr_size],

	1873 file_offset = rva)

	1874 # OC Patch:

	1875 if not bnd_frwd_ref:

	1876 raise PEFormatError(

	1877 "IMAGE_BOUND_FORWARDER_REF cannot be read")

	1878 rva += bnd_frwd_ref.sizeof()

	1879

	1880 name_str = self.get_string_from_data(

	1881 start+bnd_frwd_ref.OffsetModuleName, self.__data__)

	1882

	1883 if not name_str:

	1884 break

	1885 forwarder_refs.append(BoundImportRefData(

	1886 struct = bnd_frwd_ref,

	1887 name = name_str))

	1888

	1889 name_str = self.get_string_from_data(

	1890 start+bnd_descr.OffsetModuleName, self.__data__)

	1891

	1892 if not name_str:

	1893 break

	1894 bound_imports.append(

	1895 BoundImportDescData(

	1896 struct = bnd_descr,

	1897 name = name_str,

	1898 entries = forwarder_refs))

	1899

	1900 return bound_imports

	1901

	1902

	1903 def parse_directory_tls(self, rva, size):

	1904 """"""

	1905

	1906 if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:

	1907 format = self.__IMAGE_TLS_DIRECTORY_format__

	1908

	1909 elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:

	1910 format = self.__IMAGE_TLS_DIRECTORY64_format__

	1911

	1912 tls_struct = self.__unpack_data__(

	1913 format,

	1914 self.get_data(rva),

	1915 file_offset = self.get_offset_from_rva(rva))

	1916

	1917 if not tls_struct:

	1918 return None

	1919

	1920 return TlsData( struct = tls_struct )

	1921

	1922

	1923 def parse_relocations_directory(self, rva, size):

	1924 """"""

	1925

	1926 rlc = Structure(self.__IMAGE_BASE_RELOCATION_format__)

	1927 rlc_size = rlc.sizeof()

	1928 end = rva+size

	1929

	1930 relocations = []

	1931 while rva<end:

	1932

	1933 # OC Patch:

	1934 # Malware that has bad rva entries will cause an error.

	1935 # Just continue on after an exception

	1936 #

	1937 try:

	1938 rlc = self.__unpack_data__(

	1939 self.__IMAGE_BASE_RELOCATION_format__,

	1940 self.get_data(rva, rlc_size),

	1941 file_offset = self.get_offset_from_rva(rva) )

	1942 except PEFormatError:

	1943 self.__warnings.append(

	1944 'Invalid relocation information. Can\'t read ' +

	1945 'data at RVA: 0x%x' % rva)

	1946 rlc = None

	1947

	1948 if not rlc:

	1949 break

	1950

	1951 reloc_entries = self.parse_relocations(

	1952 rva+rlc_size, rlc.VirtualAddress, rlc.SizeOfBlock-rlc_size)

	1953

	1954 relocations.append(

	1955 BaseRelocationData(

	1956 struct = rlc,

	1957 entries = reloc_entries))

	1958

	1959 if not rlc.SizeOfBlock:

	1960 break

	1961 rva += rlc.SizeOfBlock

	1962

	1963 return relocations

	1964

	1965

	1966 def parse_relocations(self, data_rva, rva, size):

	1967 """"""

	1968

	1969 data = self.get_data(data_rva, size)

	1970

	1971 entries = []

	1972 for idx in xrange(len(data)/2):

	1973 word = struct.unpack('<H', data[idx2:(idx+1)2])[0]

	1974 reloc_type = (word>>12)

	1975 reloc_offset = (word&0x0fff)

	1976 entries.append(

	1977 RelocationData(

	1978 type = reloc_type,

	1979 rva = reloc_offset+rva))

	1980

	1981 return entries

	1982

	1983

	1984 def parse_debug_directory(self, rva, size):

	1985 """"""

	1986

	1987 dbg = Structure(self.__IMAGE_DEBUG_DIRECTORY_format__)

	1988 dbg_size = dbg.sizeof()

	1989

	1990 debug = []

	1991 for idx in xrange(size/dbg_size):

	1992 try:

	1993 data = self.get_data(rva+dbg_size*idx, dbg_size)

	1994 except PEFormatError, e:

	1995 self.__warnings.append(

	1996 'Invalid debug information. Can\'t read ' +

	1997 'data at RVA: 0x%x' % rva)

	1998 return None

	1999

	2000 dbg = self.__unpack_data__(

	2001 self.__IMAGE_DEBUG_DIRECTORY_format__,

	2002 data, file_offset = self.get_offset_from_rva(rva+dbg_size*idx))

	2003

	2004 if not dbg:

	2005 return None

	2006

	2007 debug.append(

	2008 DebugData(

	2009 struct = dbg))

	2010

	2011 return debug

	2012

	2013

	2014 def parse_resources_directory(self, rva, size=0, base_rva = None, level = 0) :

	2015 """Parse the resources directory.

	2016

	2017 Given the rva of the resources directory, it will process all

	2018 its entries.

	2019

	2020 The root will have the corresponding member of its structure,

	2021 IMAGE_RESOURCE_DIRECTORY plus 'entries', a list of all the

	2022 entries in the directory.

	2023

	2024 Those entries will have, correspondingly, all the structure's

	2025 members (IMAGE_RESOURCE_DIRECTORY_ENTRY) and an additional one,

	2026 "directory", pointing to the IMAGE_RESOURCE_DIRECTORY structure

	2027 representing upper layers of the tree. This one will also have

	2028 an 'entries' attribute, pointing to the 3rd, and last, level.

	2029 Another directory with more entries. Those last entries will

	2030 have a new atribute (both 'leaf' or 'data_entry' can be used to

	2031 access it). This structure finally points to the resource data.

	2032 All the members of this structure, IMAGE_RESOURCE_DATA_ENTRY,

	2033 are available as its attributes.

	2034 """

	2035

	2036 # OC Patch:

	2037 original_rva = rva

	2038

	2039 if base_rva is None:

	2040 base_rva = rva

	2041

	2042 resources_section = self.get_section_by_rva(rva)

	2043

	2044 try:

	2045 # If the RVA is invalid all would blow up. Some EXEs seem to be

	2046 # specially nasty and have an invalid RVA.

	2047 data = self.get_data(rva)

	2048 except PEFormatError, e:

	2049 self.__warnings.append(

	2050 'Invalid resources directory. Can\'t read ' +

	2051 'directory data at RVA: 0x%x' % rva)

	2052 return None

	2053

	2054 # Get the resource directory structure, that is, the header

	2055 # of the table preceding the actual entries

	2056 #

	2057 resource_dir = self.__unpack_data__(

	2058 self.__IMAGE_RESOURCE_DIRECTORY_format__, data,

	2059 file_offset = self.get_offset_from_rva(rva) )

	2060 if resource_dir is None:

	2061 # If can't parse resources directory then silently return.

	2062 # This directory does not necesarily have to be valid to

	2063 # still have a valid PE file

	2064 self.__warnings.append(

	2065 'Invalid resources directory. Can\'t parse ' +

	2066 'directory data at RVA: 0x%x' % rva)

	2067 return None

	2068

	2069 dir_entries = []

	2070

	2071 # Advance the rva to the positon immediately following the directory

	2072 # table header and pointing to the first entry in the table

	2073 #

	2074 rva += resource_dir.sizeof()

	2075

	2076 number_of_entries = (

	2077 resource_dir.NumberOfNamedEntries +

	2078 resource_dir.NumberOfIdEntries )

	2079

	2080 strings_to_postprocess = list()

	2081

	2082 for idx in xrange(number_of_entries):

	2083

	2084 res = self.parse_resource_entry(rva)

	2085 if res is None:

	2086 self.__warnings.append(

	2087 'Error parsing the resources directory, ' +

	2088 'Entry %d is invalid, RVA = 0x%x. ' %

	2089 (idx, rva) )

	2090 break

	2091

	2092

	2093 entry_name = None

	2094 entry_id = None

	2095

	2096 # If all named entries have been processed, only Id ones

	2097 # remain

	2098

	2099 if idx >= resource_dir.NumberOfNamedEntries:

	2100 entry_id = res.Name

	2101 else:

	2102 ustr_offset = base_rva+res.NameOffset

	2103 try:

	2104 #entry_name = self.get_string_u_at_rva(ustr_offset, max_leng th=16)

	2105 entry_name = UnicodeStringWrapperPostProcessor(self, ustr_of fset)

	2106 strings_to_postprocess.append(entry_name)

	2107

	2108 except PEFormatError, excp:

	2109 self.__warnings.append(

	2110 'Error parsing the resources directory, ' +

	2111 'attempting to read entry name. ' +

	2112 'Can\'t read unicode string at offset 0x%x' %

	2113 (ustr_offset) )

	2114

	2115

	2116 if res.DataIsDirectory:

	2117 # OC Patch:

	2118 #

	2119 # One trick malware can do is to recursively reference

	2120 # the next directory. This causes hilarity to ensue when

	2121 # trying to parse everything correctly.

	2122 # If the original RVA given to this function is equal to

	2123 # the next one to parse, we assume that it's a trick.

	2124 # Instead of raising a PEFormatError this would skip some

	2125 # reasonable data so we just break.

	2126 #

	2127 # 9ee4d0a0caf095314fd7041a3e4404dc is the offending sample

	2128 if original_rva == (base_rva + res.OffsetToDirectory):

	2129

	2130 break

	2131

	2132 else:

	2133 entry_directory = self.parse_resources_directory(

	2134 base_rva+res.OffsetToDirectory,

	2135 base_rva=base_rva, level = level+1)

	2136

	2137 if not entry_directory:

	2138 break

	2139 dir_entries.append(

	2140 ResourceDirEntryData(

	2141 struct = res,

	2142 name = entry_name,

	2143 id = entry_id,

	2144 directory = entry_directory))

	2145

	2146 else:

	2147 struct = self.parse_resource_data_entry(

	2148 base_rva + res.OffsetToDirectory)

	2149

	2150 if struct:

	2151 entry_data = ResourceDataEntryData(

	2152 struct = struct,

	2153 lang = res.Name & 0xff,

	2154 sublang = (res.Name>>8) & 0xff)

	2155

	2156 dir_entries.append(

	2157 ResourceDirEntryData(

	2158 struct = res,

	2159 name = entry_name,

	2160 id = entry_id,

	2161 data = entry_data))

	2162

	2163 else:

	2164 break

	2165

	2166

	2167

	2168 # Check if this entry contains version information

	2169 #

	2170 if level == 0 and res.Id == RESOURCE_TYPE['RT_VERSION']:

	2171 if len(dir_entries)>0:

	2172 last_entry = dir_entries[-1]

	2173

	2174 rt_version_struct = None

	2175 try:

	2176 rt_version_struct = last_entry.directory.entries[0].director y.entries[0].data.struct

	2177 except:

	2178 # Maybe a malformed directory structure...?

	2179 # Lets ignore it

	2180 pass

	2181

	2182 if rt_version_struct is not None:

	2183 self.parse_version_information(rt_version_struct)

	2184

	2185 rva += res.sizeof()

	2186

	2187

	2188 string_rvas = [s.get_rva() for s in strings_to_postprocess]

	2189 string_rvas.sort()

	2190

	2191 for idx, s in enumerate(strings_to_postprocess):

	2192 s.render_pascal_16()

	2193

	2194

	2195 resource_directory_data = ResourceDirData(

	2196 struct = resource_dir,

	2197 entries = dir_entries)

	2198

	2199 return resource_directory_data

	2200

	2201

	2202 def parse_resource_data_entry(self, rva):

	2203 """Parse a data entry from the resources directory."""

	2204

	2205 try:

	2206 # If the RVA is invalid all would blow up. Some EXEs seem to be

	2207 # specially nasty and have an invalid RVA.

	2208 data = self.get_data(rva)

	2209 except PEFormatError, excp:

	2210 self.__warnings.append(

	2211 'Error parsing a resource directory data entry, ' +

	2212 'the RVA is invalid: 0x%x' % ( rva ) )

	2213 return None

	2214

	2215 data_entry = self.__unpack_data__(

	2216 self.__IMAGE_RESOURCE_DATA_ENTRY_format__, data,

	2217 file_offset = self.get_offset_from_rva(rva) )

	2218

	2219 return data_entry

	2220

	2221

	2222 def parse_resource_entry(self, rva):

	2223 """Parse a directory entry from the resources directory."""

	2224

	2225 resource = self.__unpack_data__(

	2226 self.__IMAGE_RESOURCE_DIRECTORY_ENTRY_format__, self.get_data(rva),

	2227 file_offset = self.get_offset_from_rva(rva) )

	2228

	2229 if resource is None:

	2230 return None

	2231

	2232 #resource.NameIsString = (resource.Name & 0x80000000L) >> 31

	2233 resource.NameOffset = resource.Name & 0x7FFFFFFFL

	2234

	2235 resource.__pad = resource.Name & 0xFFFF0000L

	2236 resource.Id = resource.Name & 0x0000FFFFL

	2237

	2238 resource.DataIsDirectory = (resource.OffsetToData & 0x80000000L) >> 31

	2239 resource.OffsetToDirectory = resource.OffsetToData & 0x7FFFFFFFL

	2240

	2241 return resource

	2242

	2243

	2244 def parse_version_information(self, version_struct):

	2245 """Parse version information structure.

	2246

	2247 The date will be made available in three attributes of the PE object.

	2248

	2249 VS_VERSIONINFO will contain the first three fields of the main struc ture:

	2250 'Length', 'ValueLength', and 'Type'

	2251

	2252 VS_FIXEDFILEINFO will hold the rest of the fields, accessible as sub- attributes:

	2253 'Signature', 'StrucVersion', 'FileVersionMS', 'FileVersionLS',

	2254 'ProductVersionMS', 'ProductVersionLS', 'FileFlagsMask', 'FileFlags' ,

	2255 'FileOS', 'FileType', 'FileSubtype', 'FileDateMS', 'FileDateLS'

	2256

	2257 FileInfo is a list of all StringFileInfo and VarFileInfo structures.

	2258

	2259 StringFileInfo structures will have a list as an attribute named 'String Table'

	2260 containing all the StringTable structures. Each of those structures cont ains a

	2261 dictionary 'entries' with all the key/value version information string p airs.

	2262

	2263 VarFileInfo structures will have a list as an attribute named 'Var' cont aining

	2264 all Var structures. Each Var structure will have a dictionary as an attr ibute

	2265 named 'entry' which will contain the name and value of the Var.

	2266 """

	2267

	2268

	2269 # Retrieve the data for the version info resource

	2270 #

	2271 start_offset = self.get_offset_from_rva( version_struct.OffsetToData )

	2272 raw_data = self.__data__[ start_offset : start_offset+version_struct.Siz e ]

	2273

	2274

	2275 # Map the main structure and the subsequent string

	2276 #

	2277 versioninfo_struct = self.__unpack_data__(

	2278 self.__VS_VERSIONINFO_format__, raw_data,

	2279 file_offset = start_offset )

	2280

	2281 if versioninfo_struct is None:

	2282 return

	2283

	2284 ustr_offset = version_struct.OffsetToData + versioninfo_struct.sizeof()

	2285 try:

	2286 versioninfo_string = self.get_string_u_at_rva( ustr_offset )

	2287 except PEFormatError, excp:

	2288 self.__warnings.append(

	2289 'Error parsing the version information, ' +

	2290 'attempting to read VS_VERSION_INFO string. Can\'t ' +

	2291 'read unicode string at offset 0x%x' % (

	2292 ustr_offset ) )

	2293

	2294 versioninfo_string = None

	2295

	2296 # If the structure does not contain the expected name, it's assumed to b e invalid

	2297 #

	2298 if versioninfo_string != u'VS_VERSION_INFO':

	2299

	2300 self.__warnings.append('Invalid VS_VERSION_INFO block')

	2301 return

	2302

	2303

	2304 # Set the PE object's VS_VERSIONINFO to this one

	2305 #

	2306 self.VS_VERSIONINFO = versioninfo_struct

	2307

	2308 # The the Key attribute to point to the unicode string identifying the s tructure

	2309 #

	2310 self.VS_VERSIONINFO.Key = versioninfo_string

	2311

	2312

	2313 # Process the fixed version information, get the offset and structure

	2314 #

	2315 fixedfileinfo_offset = self.dword_align(

	2316 versioninfo_struct.sizeof() + 2 * (len(versioninfo_string) + 1),

	2317 version_struct.OffsetToData)

	2318 fixedfileinfo_struct = self.__unpack_data__(

	2319 self.__VS_FIXEDFILEINFO_format__,

	2320 raw_data[fixedfileinfo_offset:],

	2321 file_offset = start_offset+fixedfileinfo_offset )

	2322

	2323 if not fixedfileinfo_struct:

	2324 return

	2325

	2326

	2327 # Set the PE object's VS_FIXEDFILEINFO to this one

	2328 #

	2329 self.VS_FIXEDFILEINFO = fixedfileinfo_struct

	2330

	2331

	2332 # Start parsing all the StringFileInfo and VarFileInfo structures

	2333 #

	2334

	2335 # Get the first one

	2336 #

	2337 stringfileinfo_offset = self.dword_align(

	2338 fixedfileinfo_offset + fixedfileinfo_struct.sizeof(),

	2339 version_struct.OffsetToData)

	2340 original_stringfileinfo_offset = stringfileinfo_offset

	2341

	2342

	2343 # Set the PE object's attribute that will contain them all.

	2344 #

	2345 self.FileInfo = list()

	2346

	2347

	2348 while True:

	2349

	2350 # Process the StringFileInfo/VarFileInfo struct

	2351 #

	2352 stringfileinfo_struct = self.__unpack_data__(

	2353 self.__StringFileInfo_format__,

	2354 raw_data[stringfileinfo_offset:],

	2355 file_offset = start_offset+stringfileinfo_offset )

	2356

	2357 if stringfileinfo_struct is None:

	2358 self.__warnings.append(

	2359 'Error parsing StringFileInfo/VarFileInfo struct' )

	2360 return None

	2361

	2362 # Get the subsequent string defining the structure.

	2363 #

	2364 ustr_offset = ( version_struct.OffsetToData +

	2365 stringfileinfo_offset + versioninfo_struct.sizeof() )

	2366 try:

	2367 stringfileinfo_string = self.get_string_u_at_rva( ustr_offset )

	2368 except PEFormatError, excp:

	2369 self.__warnings.append(

	2370 'Error parsing the version information, ' +

	2371 'attempting to read StringFileInfo string. Can\'t ' +

	2372 'read unicode string at offset 0x%x' % ( ustr_offset ) )

	2373 break

	2374

	2375 # Set such string as the Key attribute

	2376 #

	2377 stringfileinfo_struct.Key = stringfileinfo_string

	2378

	2379

	2380 # Append the structure to the PE object's list

	2381 #

	2382 self.FileInfo.append(stringfileinfo_struct)

	2383

	2384

	2385 # Parse a StringFileInfo entry

	2386 #

	2387 if stringfileinfo_string == u'StringFileInfo':

	2388

	2389 if stringfileinfo_struct.Type == 1 and stringfileinfo_struct.Val ueLength == 0:

	2390

	2391 stringtable_offset = self.dword_align(

	2392 stringfileinfo_offset + stringfileinfo_struct.sizeof() +

	2393 2*(len(stringfileinfo_string)+1),

	2394 version_struct.OffsetToData)

	2395

	2396 stringfileinfo_struct.StringTable = list()

	2397

	2398 # Process the String Table entries

	2399 #

	2400 while True:

	2401 stringtable_struct = self.__unpack_data__(

	2402 self.__StringTable_format__,

	2403 raw_data[stringtable_offset:],

	2404 file_offset = start_offset+stringtable_offset )

	2405

	2406 if not stringtable_struct:

	2407 break

	2408

	2409 ustr_offset = ( version_struct.OffsetToData + stringtabl e_offset +

	2410 stringtable_struct.sizeof() )

	2411 try:

	2412 stringtable_string = self.get_string_u_at_rva( ustr_ offset )

	2413 except PEFormatError, excp:

	2414 self.__warnings.append(

	2415 'Error parsing the version information, ' +

	2416 'attempting to read StringTable string. Can\'t ' +

	2417 'read unicode string at offset 0x%x' % ( ustr_of fset ) )

	2418 break

	2419

	2420 stringtable_struct.LangID = stringtable_string

	2421 stringtable_struct.entries = dict()

	2422 stringtable_struct.entries_offsets = dict()

	2423 stringtable_struct.entries_lengths = dict()

	2424 stringfileinfo_struct.StringTable.append(stringtable_str uct)

	2425

	2426 entry_offset = self.dword_align(

	2427 stringtable_offset + stringtable_struct.sizeof() +

	2428 2*(len(stringtable_string)+1),

	2429 version_struct.OffsetToData)

	2430

	2431 # Process all entries in the string table

	2432 #

	2433

	2434 while entry_offset < stringtable_offset + stringtable_st ruct.Length:

	2435

	2436 string_struct = self.__unpack_data__(

	2437 self.__String_format__, raw_data[entry_offset:],

	2438 file_offset = start_offset+entry_offset )

	2439

	2440 if not string_struct:

	2441 break

	2442

	2443 ustr_offset = ( version_struct.OffsetToData + entry_ offset +

	2444 string_struct.sizeof() )

	2445 try:

	2446 key = self.get_string_u_at_rva( ustr_offset )

	2447 key_offset = self.get_offset_from_rva( ustr_offs et )

	2448 except PEFormatError, excp:

	2449 self.__warnings.append(

	2450 'Error parsing the version information, ' +

	2451 'attempting to read StringTable Key string. Can\'t ' +

	2452 'read unicode string at offset 0x%x' % ( ust r_offset ) )

	2453 break

	2454

	2455 value_offset = self.dword_align(

	2456 2*(len(key)+1) + entry_offset + string_struct.si zeof(),

	2457 version_struct.OffsetToData)

	2458

	2459 ustr_offset = version_struct.OffsetToData + value_of fset

	2460 try:

	2461 value = self.get_string_u_at_rva( ustr_offset,

	2462 max_length = string_struct.ValueLength )

	2463 value_offset = self.get_offset_from_rva( ustr_of fset )

	2464 except PEFormatError, excp:

	2465 self.__warnings.append(

	2466 'Error parsing the version information, ' +

	2467 'attempting to read StringTable Value string . ' +

	2468 'Can\'t read unicode string at offset 0x%x' % (

	2469 ustr_offset ) )

	2470 break

	2471

	2472 if string_struct.Length == 0:

	2473 entry_offset = stringtable_offset + stringtable_ struct.Length

	2474 else:

	2475 entry_offset = self.dword_align(

	2476 string_struct.Length+entry_offset, version_s truct.OffsetToData)

	2477

	2478 key_as_char = []

	2479 for c in key:

	2480 if ord(c)>128:

	2481 key_as_char.append('\\x%02x' %ord(c))

	2482 else:

	2483 key_as_char.append(c)

	2484

	2485 key_as_char = ''.join(key_as_char)

	2486

	2487 setattr(stringtable_struct, key_as_char, value)

	2488 stringtable_struct.entries[key] = value

	2489 stringtable_struct.entries_offsets[key] = (key_offse t, value_offset)

	2490 stringtable_struct.entries_lengths[key] = (len(key), len(value))

	2491

	2492

	2493 stringtable_offset = self.dword_align(

	2494 stringtable_struct.Length + stringtable_offset,

	2495 version_struct.OffsetToData)

	2496 if stringtable_offset >= stringfileinfo_struct.Length:

	2497 break

	2498

	2499 # Parse a VarFileInfo entry

	2500 #

	2501 elif stringfileinfo_string == u'VarFileInfo':

	2502

	2503 varfileinfo_struct = stringfileinfo_struct

	2504 varfileinfo_struct.name = 'VarFileInfo'

	2505

	2506 if varfileinfo_struct.Type == 1 and varfileinfo_struct.ValueLeng th == 0:

	2507

	2508 var_offset = self.dword_align(

	2509 stringfileinfo_offset + varfileinfo_struct.sizeof() +

	2510 2*(len(stringfileinfo_string)+1),

	2511 version_struct.OffsetToData)

	2512

	2513 varfileinfo_struct.Var = list()

	2514

	2515 # Process all entries

	2516 #

	2517

	2518 while True:

	2519 var_struct = self.__unpack_data__(

	2520 self.__Var_format__,

	2521 raw_data[var_offset:],

	2522 file_offset = start_offset+var_offset )

	2523

	2524 if not var_struct:

	2525 break

	2526

	2527 ustr_offset = ( version_struct.OffsetToData + var_offset +

	2528 var_struct.sizeof() )

	2529 try:

	2530 var_string = self.get_string_u_at_rva( ustr_offset )

	2531 except PEFormatError, excp:

	2532 self.__warnings.append(

	2533 'Error parsing the version information, ' +

	2534 'attempting to read VarFileInfo Var string. ' +

	2535 'Can\'t read unicode string at offset 0x%x' % (u str_offset))

	2536 break

	2537

	2538

	2539 varfileinfo_struct.Var.append(var_struct)

	2540

	2541 varword_offset = self.dword_align(

	2542 2*(len(var_string)+1) + var_offset + var_struct.size of(),

	2543 version_struct.OffsetToData)

	2544 orig_varword_offset = varword_offset

	2545

	2546 while varword_offset < orig_varword_offset + var_struct. ValueLength:

	2547 word1 = self.get_word_from_data(

	2548 raw_data[varword_offset:varword_offset+2], 0)

	2549 word2 = self.get_word_from_data(

	2550 raw_data[varword_offset+2:varword_offset+4], 0)

	2551 varword_offset += 4

	2552

	2553 var_struct.entry = {var_string: '0x%04x 0x%04x' % (w ord1, word2)}

	2554

	2555 var_offset = self.dword_align(

	2556 var_offset+var_struct.Length, version_struct.OffsetT oData)

	2557

	2558 if var_offset <= var_offset+var_struct.Length:

	2559 break

	2560

	2561

	2562

	2563 # Increment and align the offset

	2564 #

	2565 stringfileinfo_offset = self.dword_align(

	2566 stringfileinfo_struct.Length+stringfileinfo_offset,

	2567 version_struct.OffsetToData)

	2568

	2569 # Check if all the StringFileInfo and VarFileInfo items have been pr ocessed

	2570 #

	2571 if stringfileinfo_struct.Length == 0 or stringfileinfo_offset >= ver sioninfo_struct.Length:

	2572 break

	2573

	2574

	2575

	2576 def parse_export_directory(self, rva, size):

	2577 """Parse the export directory.

	2578

	2579 Given the rva of the export directory, it will process all

	2580 its entries.

	2581

	2582 The exports will be made available through a list "exports"

	2583 containing a tuple with the following elements:

	2584

	2585 (ordinal, symbol_address, symbol_name)

	2586

	2587 And also through a dicionary "exports_by_ordinal" whose keys

	2588 will be the ordinals and the values tuples of the from:

	2589

	2590 (symbol_address, symbol_name)

	2591

	2592 The symbol addresses are relative, not absolute.

	2593 """

	2594

	2595 try:

	2596 export_dir = self.__unpack_data__(

	2597 self.__IMAGE_EXPORT_DIRECTORY_format__, self.get_data(rva),

	2598 file_offset = self.get_offset_from_rva(rva) )

	2599 except PEFormatError:

	2600 self.__warnings.append(

	2601 'Error parsing export directory at RVA: 0x%x' % ( rva ) )

	2602 return

	2603

	2604 if not export_dir:

	2605 return

	2606

	2607 try:

	2608 address_of_names = self.get_data(

	2609 export_dir.AddressOfNames, export_dir.NumberOfNames*4)

	2610 address_of_name_ordinals = self.get_data(

	2611 export_dir.AddressOfNameOrdinals, export_dir.NumberOfNames*4)

	2612 address_of_functions = self.get_data(

	2613 export_dir.AddressOfFunctions, export_dir.NumberOfFunctions*4)

	2614 except PEFormatError:

	2615 self.__warnings.append(

	2616 'Error parsing export directory at RVA: 0x%x' % ( rva ) )

	2617 return

	2618

	2619 exports = []

	2620

	2621 for i in xrange(export_dir.NumberOfNames):

	2622

	2623

	2624 symbol_name = self.get_string_at_rva(

	2625 self.get_dword_from_data(address_of_names, i))

	2626

	2627 symbol_ordinal = self.get_word_from_data(

	2628 address_of_name_ordinals, i)

	2629

	2630

	2631 if symbol_ordinal*4<len(address_of_functions):

	2632 symbol_address = self.get_dword_from_data(

	2633 address_of_functions, symbol_ordinal)

	2634 else:

	2635 # Corrupt? a bad pointer... we assume it's all

	2636 # useless, no exports

	2637 return None

	2638

	2639 # If the funcion's rva points within the export directory

	2640 # it will point to a string with the forwarded symbol's string

	2641 # instead of pointing the the function start address.

	2642

	2643 if symbol_address>=rva and symbol_address<rva+size:

	2644 forwarder_str = self.get_string_at_rva(symbol_address)

	2645 else:

	2646 forwarder_str = None

	2647

	2648

	2649 exports.append(

	2650 ExportData(

	2651 ordinal = export_dir.Base+symbol_ordinal,

	2652 address = symbol_address,

	2653 name = symbol_name,

	2654 forwarder = forwarder_str))

	2655

	2656 ordinals = [exp.ordinal for exp in exports]

	2657

	2658 for idx in xrange(export_dir.NumberOfFunctions):

	2659

	2660 if not idx+export_dir.Base in ordinals:

	2661 symbol_address = self.get_dword_from_data(

	2662 address_of_functions,

	2663 idx)

	2664

	2665 #

	2666 # Checking for forwarder again.

	2667 #

	2668 if symbol_address>=rva and symbol_address<rva+size:

	2669 forwarder_str = self.get_string_at_rva(symbol_address)

	2670 else:

	2671 forwarder_str = None

	2672

	2673 exports.append(

	2674 ExportData(

	2675 ordinal = export_dir.Base+idx,

	2676 address = symbol_address,

	2677 name = None,

	2678 forwarder = forwarder_str))

	2679

	2680 return ExportDirData(

	2681 struct = export_dir,

	2682 symbols = exports)

	2683

	2684

	2685 def dword_align(self, offset, base):

	2686 offset += base

	2687 return (offset+3) - ((offset+3)%4) - base

	2688

	2689

	2690

	2691 def parse_delay_import_directory(self, rva, size):

	2692 """Walk and parse the delay import directory."""

	2693

	2694 import_descs = []

	2695 while True:

	2696 try:

	2697 # If the RVA is invalid all would blow up. Some PEs seem to be

	2698 # specially nasty and have an invalid RVA.

	2699 data = self.get_data(rva)

	2700 except PEFormatError, e:

	2701 self.__warnings.append(

	2702 'Error parsing the Delay import directory at RVA: 0x%x' % ( rva ) )

	2703 break

	2704

	2705 import_desc = self.__unpack_data__(

	2706 self.__IMAGE_DELAY_IMPORT_DESCRIPTOR_format__,

	2707 data, file_offset = self.get_offset_from_rva(rva) )

	2708

	2709

	2710 # If the structure is all zeores, we reached the end of the list

	2711 if not import_desc or import_desc.all_zeroes():

	2712 break

	2713

	2714

	2715 rva += import_desc.sizeof()

	2716

	2717 try:

	2718 import_data = self.parse_imports(

	2719 import_desc.pINT,

	2720 import_desc.pIAT,

	2721 None)

	2722 except PEFormatError, e:

	2723 self.__warnings.append(

	2724 'Error parsing the Delay import directory. ' +

	2725 'Invalid import data at RVA: 0x%x' % ( rva ) )

	2726 break

	2727

	2728 if not import_data:

	2729 continue

	2730

	2731

	2732 dll = self.get_string_at_rva(import_desc.szName)

	2733 if dll:

	2734 import_descs.append(

	2735 ImportDescData(

	2736 struct = import_desc,

	2737 imports = import_data,

	2738 dll = dll))

	2739

	2740 return import_descs

	2741

	2742

	2743

	2744 def parse_import_directory(self, rva, size):

	2745 """Walk and parse the import directory."""

	2746

	2747 import_descs = []

	2748 while True:

	2749 try:

	2750 # If the RVA is invalid all would blow up. Some EXEs seem to be

	2751 # specially nasty and have an invalid RVA.

	2752 data = self.get_data(rva)

	2753 except PEFormatError, e:

	2754 self.__warnings.append(

	2755 'Error parsing the Import directory at RVA: 0x%x' % ( rva ) )

	2756 break

	2757

	2758 import_desc = self.__unpack_data__(

	2759 self.__IMAGE_IMPORT_DESCRIPTOR_format__,

	2760 data, file_offset = self.get_offset_from_rva(rva) )

	2761

	2762 # If the structure is all zeores, we reached the end of the list

	2763 if not import_desc or import_desc.all_zeroes():

	2764 break

	2765

	2766 rva += import_desc.sizeof()

	2767

	2768 try:

	2769 import_data = self.parse_imports(

	2770 import_desc.OriginalFirstThunk,

	2771 import_desc.FirstThunk,

	2772 import_desc.ForwarderChain)

	2773 except PEFormatError, excp:

	2774 self.__warnings.append(

	2775 'Error parsing the Import directory. ' +

	2776 'Invalid Import data at RVA: 0x%x' % ( rva ) )

	2777 break

	2778 #raise excp

	2779

	2780 if not import_data:

	2781 continue

	2782

	2783 dll = self.get_string_at_rva(import_desc.Name)

	2784 if dll:

	2785 import_descs.append(

	2786 ImportDescData(

	2787 struct = import_desc,

	2788 imports = import_data,

	2789 dll = dll))

	2790

	2791 return import_descs

	2792

	2793

	2794

	2795 def parse_imports(self, original_first_thunk, first_thunk, forwarder_chain):

	2796 """Parse the imported symbols.

	2797

	2798 It will fill a list, which will be avalable as the dictionary

	2799 attribute "imports". Its keys will be the DLL names and the values

	2800 all the symbols imported from that object.

	2801 """

	2802

	2803 imported_symbols = []

	2804 imports_section = self.get_section_by_rva(first_thunk)

	2805 if not imports_section:

	2806 raise PEFormatError, 'Invalid/corrupt imports.'

	2807

	2808

	2809 # Import Lookup Table. Contains ordinals or pointers to strings.

	2810 ilt = self.get_import_table(original_first_thunk)

	2811 # Import Address Table. May have identical content to ILT if

	2812 # PE file is not bounded, Will contain the address of the

	2813 # imported symbols once the binary is loaded or if it is already

	2814 # bound.

	2815 iat = self.get_import_table(first_thunk)

	2816

	2817 # OC Patch:

	2818 # Would crash if iat or ilt had None type

	2819 if not iat and not ilt:

	2820 raise PEFormatError(

	2821 'Invalid Import Table information. ' +

	2822 'Both ILT and IAT appear to be broken.')

	2823

	2824 if not iat and ilt:

	2825 table = ilt

	2826 elif iat and not ilt:

	2827 table = iat

	2828 elif ilt and ((len(ilt) and len(iat)==0) or (len(ilt) == len(iat))):

	2829 table = ilt

	2830 elif (ilt and len(ilt))==0 and (iat and len(iat)):

	2831 table = iat

	2832 else:

	2833 return None

	2834

	2835 for idx in xrange(len(table)):

	2836

	2837 imp_ord = None

	2838 imp_hint = None

	2839 imp_name = None

	2840 hint_name_table_rva = None

	2841

	2842 if table[idx].AddressOfData:

	2843

	2844 if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:

	2845 ordinal_flag = IMAGE_ORDINAL_FLAG

	2846 elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:

	2847 ordinal_flag = IMAGE_ORDINAL_FLAG64

	2848

	2849 # If imported by ordinal, we will append the ordinal number

	2850 #

	2851 if table[idx].AddressOfData & ordinal_flag:

	2852 import_by_ordinal = True

	2853 imp_ord = table[idx].AddressOfData & 0xffff

	2854 imp_name = None

	2855 else:

	2856 import_by_ordinal = False

	2857 try:

	2858 hint_name_table_rva = table[idx].AddressOfData & 0x7ffff fff

	2859 data = self.get_data(hint_name_table_rva, 2)

	2860 # Get the Hint

	2861 imp_hint = self.get_word_from_data(data, 0)

	2862 imp_name = self.get_string_at_rva(table[idx].AddressOfDa ta+2)

	2863 except PEFormatError, e:

	2864 pass

	2865

	2866 imp_address = first_thunk+self.OPTIONAL_HEADER.ImageBase+idx*4

	2867

	2868 if iat and ilt and ilt[idx].AddressOfData != iat[idx].AddressOfData:

	2869 imp_bound = iat[idx].AddressOfData

	2870 else:

	2871 imp_bound = None

	2872

	2873 if imp_name != '' and (imp_ord or imp_name):

	2874 imported_symbols.append(

	2875 ImportData(

	2876 import_by_ordinal = import_by_ordinal,

	2877 ordinal = imp_ord,

	2878 hint = imp_hint,

	2879 name = imp_name,

	2880 bound = imp_bound,

	2881 address = imp_address,

	2882 hint_name_table_rva = hint_name_table_rva))

	2883

	2884 return imported_symbols

	2885

	2886

	2887

	2888 def get_import_table(self, rva):

	2889

	2890 table = []

	2891

	2892 while True and rva:

	2893 try:

	2894 data = self.get_data(rva)

	2895 except PEFormatError, e:

	2896 self.__warnings.append(

	2897 'Error parsing the import table. ' +

	2898 'Invalid data at RVA: 0x%x' % ( rva ) )

	2899 return None

	2900

	2901 if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:

	2902 format = self.__IMAGE_THUNK_DATA_format__

	2903 elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:

	2904 format = self.__IMAGE_THUNK_DATA64_format__

	2905

	2906 thunk_data = self.__unpack_data__(

	2907 format, data, file_offset=self.get_offset_from_rva(rva) )

	2908

	2909 if not thunk_data or thunk_data.all_zeroes():

	2910 break

	2911

	2912 rva += thunk_data.sizeof()

	2913

	2914 table.append(thunk_data)

	2915

	2916 return table

	2917

	2918

	2919 def get_memory_mapped_image(self, max_virtual_address=0x10000000, ImageBase= None):

	2920 """Returns the data corresponding to the memory layout of the PE file.

	2921

	2922 The data includes the PE header and the sections loaded at offsets

	2923 corresponding to their relative virtual addresses. (the VirtualAddress

	2924 section header member).

	2925 Any offset in this data corresponds to the absolute memory address

	2926 ImageBase+offset.

	2927

	2928 The optional argument 'max_virtual_address' provides with means of limit ing

	2929 which section are processed.

	2930 Any section with their VirtualAddress beyond this value will be skipped.

	2931 Normally, sections with values beyond this range are just there to confu se

	2932 tools. It's a common trick to see in packed executables.

	2933

	2934 If the 'ImageBase' optional argument is supplied, the file's relocations

	2935 will be applied to the image by calling the 'relocate_image()' method.

	2936 """

	2937

	2938 # Collect all sections in one code block

	2939 data = self.header

	2940 for section in self.sections:

	2941

	2942 # Miscellanous integrity tests.

	2943 # Some packer will set these to bogus values to

	2944 # make tools go nuts.

	2945 #

	2946 if section.Misc_VirtualSize == 0 or section.SizeOfRawData == 0:

	2947 continue

	2948

	2949 if section.SizeOfRawData > len(self.__data__):

	2950 continue

	2951

	2952 if section.PointerToRawData > len(self.__data__):

	2953 continue

	2954

	2955 if section.VirtualAddress >= max_virtual_address:

	2956 continue

	2957

	2958 padding_length = section.VirtualAddress - len(data)

	2959

	2960 if padding_length>0:

	2961 data += '\0'*padding_length

	2962 elif padding_length<0:

	2963 data = data[:padding_length]

	2964

	2965 data += section.data

	2966

	2967 return data

	2968

	2969

	2970 def get_data(self, rva, length=None):

	2971 """Get data regardless of the section where it lies on.

	2972

	2973 Given a rva and the size of the chunk to retrieve, this method

	2974 will find the section where the data lies and return the data.

	2975 """

	2976

	2977 s = self.get_section_by_rva(rva)

	2978

	2979 if not s:

	2980 if rva<len(self.header):

	2981 if length:

	2982 end = rva+length

	2983 else:

	2984 end = None

	2985 return self.header[rva:end]

	2986

	2987 raise PEFormatError, 'data at RVA can\'t be fetched. Corrupt header? '

	2988

	2989 return s.get_data(rva, length)

	2990

	2991

	2992 def get_rva_from_offset(self, offset):

	2993 """Get the rva corresponding to this file offset. """

	2994

	2995 s = self.get_section_by_offset(offset)

	2996 if not s:

	2997 raise PEFormatError("specified offset (0x%x) doesn't belong to any s ection." % offset)

	2998 return s.get_rva_from_offset(offset)

	2999

	3000 def get_offset_from_rva(self, rva):

	3001 """Get the file offset corresponding to this rva.

	3002

	3003 Given a rva , this method will find the section where the

	3004 data lies and return the offset within the file.

	3005 """

	3006

	3007 s = self.get_section_by_rva(rva)

	3008 if not s:

	3009

	3010 raise PEFormatError, 'data at RVA can\'t be fetched. Corrupt header? '

	3011

	3012 return s.get_offset_from_rva(rva)

	3013

	3014

	3015 def get_string_at_rva(self, rva):

	3016 """Get an ASCII string located at the given address."""

	3017

	3018 s = self.get_section_by_rva(rva)

	3019 if not s:

	3020 if rva<len(self.header):

	3021 return self.get_string_from_data(rva, self.header)

	3022 return None

	3023

	3024 return self.get_string_from_data(rva-s.VirtualAddress, s.data)

	3025

	3026

	3027 def get_string_from_data(self, offset, data):

	3028 """Get an ASCII string from within the data."""

	3029

	3030 # OC Patch

	3031 b = None

	3032

	3033 try:

	3034 b = data[offset]

	3035 except IndexError:

	3036 return ''

	3037

	3038 s = ''

	3039 while ord(b):

	3040 s += b

	3041 offset += 1

	3042 try:

	3043 b = data[offset]

	3044 except IndexError:

	3045 break

	3046

	3047 return s

	3048

	3049

	3050 def get_string_u_at_rva(self, rva, max_length = 2**16):

	3051 """Get an Unicode string located at the given address."""

	3052

	3053 try:

	3054 # If the RVA is invalid all would blow up. Some EXEs seem to be

	3055 # specially nasty and have an invalid RVA.

	3056 data = self.get_data(rva, 2)

	3057 except PEFormatError, e:

	3058 return None

	3059

	3060 #length = struct.unpack('<H', data)[0]

	3061

	3062 s = u''

	3063 for idx in xrange(max_length):

	3064 try:

	3065 uchr = struct.unpack('<H', self.get_data(rva+2*idx, 2))[0]

	3066 except struct.error:

	3067 break

	3068

	3069 if unichr(uchr) == u'\0':

	3070 break

	3071 s += unichr(uchr)

	3072

	3073 return s

	3074

	3075

	3076 def get_section_by_offset(self, offset):

	3077 """Get the section containing the given file offset."""

	3078

	3079 sections = [s for s in self.sections if s.contains_offset(offset)]

	3080

	3081 if sections:

	3082 return sections[0]

	3083

	3084 return None

	3085

	3086

	3087 def get_section_by_rva(self, rva):

	3088 """Get the section containing the given address."""

	3089

	3090 sections = [s for s in self.sections if s.contains_rva(rva)]

	3091

	3092 if sections:

	3093 return sections[0]

	3094

	3095 return None

	3096

	3097 def __str__(self):

	3098 return self.dump_info()

	3099

	3100

	3101 def print_info(self):

	3102 """Print all the PE header information in a human readable from."""

	3103 print self.dump_info()

	3104

	3105

	3106 def dump_info(self, dump=None):

	3107 """Dump all the PE header information into human readable string."""

	3108

	3109

	3110 if dump is None:

	3111 dump = Dump()

	3112

	3113 warnings = self.get_warnings()

	3114 if warnings:

	3115 dump.add_header('Parsing Warnings')

	3116 for warning in warnings:

	3117 dump.add_line(warning)

	3118 dump.add_newline()

	3119

	3120

	3121 dump.add_header('DOS_HEADER')

	3122 dump.add_lines(self.DOS_HEADER.dump())

	3123 dump.add_newline()

	3124

	3125 dump.add_header('NT_HEADERS')

	3126 dump.add_lines(self.NT_HEADERS.dump())

	3127 dump.add_newline()

	3128

	3129 dump.add_header('FILE_HEADER')

	3130 dump.add_lines(self.FILE_HEADER.dump())

	3131

	3132 image_flags = self.retrieve_flags(IMAGE_CHARACTERISTICS, 'IMAGE_FILE_')

	3133

	3134 dump.add('Flags: ')

	3135 flags = []

	3136 for flag in image_flags:

	3137 if getattr(self.FILE_HEADER, flag[0]):

	3138 flags.append(flag[0])

	3139 dump.add_line(', '.join(flags))

	3140 dump.add_newline()

	3141

	3142 if hasattr(self, 'OPTIONAL_HEADER') and self.OPTIONAL_HEADER is not None :

	3143 dump.add_header('OPTIONAL_HEADER')

	3144 dump.add_lines(self.OPTIONAL_HEADER.dump())

	3145

	3146 dll_characteristics_flags = self.retrieve_flags(DLL_CHARACTERISTICS, 'IM AGE_DLL_CHARACTERISTICS_')

	3147

	3148 dump.add('DllCharacteristics: ')

	3149 flags = []

	3150 for flag in dll_characteristics_flags:

	3151 if getattr(self.OPTIONAL_HEADER, flag[0]):

	3152 flags.append(flag[0])

	3153 dump.add_line(', '.join(flags))

	3154 dump.add_newline()

	3155

	3156

	3157 dump.add_header('PE Sections')

	3158

	3159 section_flags = self.retrieve_flags(SECTION_CHARACTERISTICS, 'IMAGE_SCN_ ')

	3160

	3161 for section in self.sections:

	3162 dump.add_lines(section.dump())

	3163 dump.add('Flags: ')

	3164 flags = []

	3165 for flag in section_flags:

	3166 if getattr(section, flag[0]):

	3167 flags.append(flag[0])

	3168 dump.add_line(', '.join(flags))

	3169 dump.add_line('Entropy: %f (Min=0.0, Max=8.0)' % section.get_entropy () )

	3170 if md5 is not None:

	3171 dump.add_line('MD5 hash: %s' % section.get_hash_md5() )

	3172 if sha1 is not None:

	3173 dump.add_line('SHA-1 hash: %s' % section.get_hash_sha1() )

	3174 if sha256 is not None:

	3175 dump.add_line('SHA-256 hash: %s' % section.get_hash_sha256() )

	3176 if sha512 is not None:

	3177 dump.add_line('SHA-512 hash: %s' % section.get_hash_sha512() )

	3178 dump.add_newline()

	3179

	3180

	3181

	3182 if (hasattr(self, 'OPTIONAL_HEADER') and

	3183 hasattr(self.OPTIONAL_HEADER, 'DATA_DIRECTORY') ):

	3184

	3185 dump.add_header('Directories')

	3186 for idx in xrange(len(self.OPTIONAL_HEADER.DATA_DIRECTORY)):

	3187 directory = self.OPTIONAL_HEADER.DATA_DIRECTORY[idx]

	3188 dump.add_lines(directory.dump())

	3189 dump.add_newline()

	3190

	3191

	3192 if hasattr(self, 'VS_VERSIONINFO'):

	3193 dump.add_header('Version Information')

	3194 dump.add_lines(self.VS_VERSIONINFO.dump())

	3195 dump.add_newline()

	3196

	3197 if hasattr(self, 'VS_FIXEDFILEINFO'):

	3198 dump.add_lines(self.VS_FIXEDFILEINFO.dump())

	3199 dump.add_newline()

	3200

	3201 if hasattr(self, 'FileInfo'):

	3202 for entry in self.FileInfo:

	3203 dump.add_lines(entry.dump())

	3204 dump.add_newline()

	3205

	3206 if hasattr(entry, 'StringTable'):

	3207 for st_entry in entry.StringTable:

	3208 [dump.add_line(' '+line) for line in st_entry.dump( )]

	3209 dump.add_line(' LangID: '+st_entry.LangID)

	3210 dump.add_newline()

	3211 for str_entry in st_entry.entries.items():

	3212 dump.add_line(' '+str_entry[0]+': '+str_entry [1])

	3213 dump.add_newline()

	3214

	3215 elif hasattr(entry, 'Var'):

	3216 for var_entry in entry.Var:

	3217 if hasattr(var_entry, 'entry'):

	3218 [dump.add_line(' '+line) for line in var_entry. dump()]

	3219 dump.add_line(

	3220 ' ' + var_entry.entry.keys()[0] +

	3221 ': ' + var_entry.entry.values()[0])

	3222

	3223 dump.add_newline()

	3224

	3225

	3226

	3227 if hasattr(self, 'DIRECTORY_ENTRY_EXPORT'):

	3228 dump.add_header('Exported symbols')

	3229 dump.add_lines(self.DIRECTORY_ENTRY_EXPORT.struct.dump())

	3230 dump.add_newline()

	3231 dump.add_line('%-10s %-10s %s' % ('Ordinal', 'RVA', 'Name'))

	3232 for export in self.DIRECTORY_ENTRY_EXPORT.symbols:

	3233 dump.add('%-10d 0x%08Xh %s' % (

	3234 export.ordinal, export.address, export.name))

	3235 if export.forwarder:

	3236 dump.add_line(' forwarder: %s' % export.forwarder)

	3237 else:

	3238 dump.add_newline()

	3239

	3240 dump.add_newline()

	3241

	3242 if hasattr(self, 'DIRECTORY_ENTRY_IMPORT'):

	3243 dump.add_header('Imported symbols')

	3244 for module in self.DIRECTORY_ENTRY_IMPORT:

	3245 dump.add_lines(module.struct.dump())

	3246 dump.add_newline()

	3247 for symbol in module.imports:

	3248

	3249 if symbol.import_by_ordinal is True:

	3250 dump.add('%s Ordinal[%s] (Imported by Ordinal)' % (

	3251 module.dll, str(symbol.ordinal)))

	3252 else:

	3253 dump.add('%s.%s Hint[%s]' % (

	3254 module.dll, symbol.name, str(symbol.hint)))

	3255

	3256 if symbol.bound:

	3257 dump.add_line(' Bound: 0x%08X' % (symbol.bound))

	3258 else:

	3259 dump.add_newline()

	3260 dump.add_newline()

	3261

	3262

	3263 if hasattr(self, 'DIRECTORY_ENTRY_BOUND_IMPORT'):

	3264 dump.add_header('Bound imports')

	3265 for bound_imp_desc in self.DIRECTORY_ENTRY_BOUND_IMPORT:

	3266

	3267 dump.add_lines(bound_imp_desc.struct.dump())

	3268 dump.add_line('DLL: %s' % bound_imp_desc.name)

	3269 dump.add_newline()

	3270

	3271 for bound_imp_ref in bound_imp_desc.entries:

	3272 dump.add_lines(bound_imp_ref.struct.dump(), 4)

	3273 dump.add_line('DLL: %s' % bound_imp_ref.name, 4)

	3274 dump.add_newline()

	3275

	3276

	3277 if hasattr(self, 'DIRECTORY_ENTRY_DELAY_IMPORT'):

	3278 dump.add_header('Delay Imported symbols')

	3279 for module in self.DIRECTORY_ENTRY_DELAY_IMPORT:

	3280

	3281 dump.add_lines(module.struct.dump())

	3282 dump.add_newline()

	3283

	3284 for symbol in module.imports:

	3285 if symbol.import_by_ordinal is True:

	3286 dump.add('%s Ordinal[%s] (Imported by Ordinal)' % (

	3287 module.dll, str(symbol.ordinal)))

	3288 else:

	3289 dump.add('%s.%s Hint[%s]' % (

	3290 module.dll, symbol.name, str(symbol.hint)))

	3291

	3292 if symbol.bound:

	3293 dump.add_line(' Bound: 0x%08X' % (symbol.bound))

	3294 else:

	3295 dump.add_newline()

	3296 dump.add_newline()

	3297

	3298

	3299 if hasattr(self, 'DIRECTORY_ENTRY_RESOURCE'):

	3300 dump.add_header('Resource directory')

	3301

	3302 dump.add_lines(self.DIRECTORY_ENTRY_RESOURCE.struct.dump())

	3303

	3304 for resource_type in self.DIRECTORY_ENTRY_RESOURCE.entries:

	3305

	3306 if resource_type.name is not None:

	3307 dump.add_line('Name: [%s]' % resource_type.name, 2)

	3308 else:

	3309 dump.add_line('Id: [0x%X] (%s)' % (

	3310 resource_type.struct.Id, RESOURCE_TYPE.get(

	3311 resource_type.struct.Id, '-')),

	3312 2)

	3313

	3314 dump.add_lines(resource_type.struct.dump(), 2)

	3315

	3316 if hasattr(resource_type, 'directory'):

	3317

	3318 dump.add_lines(resource_type.directory.struct.dump(), 4)

	3319

	3320 for resource_id in resource_type.directory.entries:

	3321

	3322 if resource_id.name is not None:

	3323 dump.add_line('Name: [%s]' % resource_id.name, 6)

	3324 else:

	3325 dump.add_line('Id: [0x%X]' % resource_id.struct.Id, 6)

	3326

	3327 dump.add_lines(resource_id.struct.dump(), 6)

	3328

	3329 if hasattr(resource_id, 'directory'):

	3330 dump.add_lines(resource_id.directory.struct.dump(), 8)

	3331

	3332 for resource_lang in resource_id.directory.entries:

	3333 # dump.add_line('\\--- LANG [%d,%d][%s]' % (

	3334 # resource_lang.data.lang,

	3335 # resource_lang.data.sublang,

	3336 # LANG[resource_lang.data.lang]), 8)

	3337 dump.add_lines(resource_lang.struct.dump(), 10)

	3338 dump.add_lines(resource_lang.data.struct.dump(), 12)

	3339 dump.add_newline()

	3340

	3341 dump.add_newline()

	3342

	3343

	3344 if ( hasattr(self, 'DIRECTORY_ENTRY_TLS') and

	3345 self.DIRECTORY_ENTRY_TLS and

	3346 self.DIRECTORY_ENTRY_TLS.struct ):

	3347

	3348 dump.add_header('TLS')

	3349 dump.add_lines(self.DIRECTORY_ENTRY_TLS.struct.dump())

	3350 dump.add_newline()

	3351

	3352

	3353 if hasattr(self, 'DIRECTORY_ENTRY_DEBUG'):

	3354 dump.add_header('Debug information')

	3355 for dbg in self.DIRECTORY_ENTRY_DEBUG:

	3356 dump.add_lines(dbg.struct.dump())

	3357 try:

	3358 dump.add_line('Type: '+DEBUG_TYPE[dbg.struct.Type])

	3359 except KeyError:

	3360 dump.add_line('Type: 0x%x(Unknown)' % dbg.struct.Type)

	3361 dump.add_newline()

	3362

	3363

	3364 if hasattr(self, 'DIRECTORY_ENTRY_BASERELOC'):

	3365 dump.add_header('Base relocations')

	3366 for base_reloc in self.DIRECTORY_ENTRY_BASERELOC:

	3367 dump.add_lines(base_reloc.struct.dump())

	3368 for reloc in base_reloc.entries:

	3369 try:

	3370 dump.add_line('%08Xh %s' % (

	3371 reloc.rva, RELOCATION_TYPE[reloc.type][16:]), 4)

	3372 except KeyError:

	3373 dump.add_line('0x%08X 0x%x(Unknown)' % (

	3374 reloc.rva, reloc.type), 4)

	3375 dump.add_newline()

	3376

	3377

	3378 return dump.get_text()

	3379

	3380 # OC Patch

	3381 def get_physical_by_rva(self, rva):

	3382 """Gets the physical address in the PE file from an RVA value."""

	3383 try:

	3384 return self.get_offset_from_rva(rva)

	3385 except Exception:

	3386 return None

	3387

	3388

	3389 ##

	3390 # Double-Word get/set

	3391 ##

	3392

	3393 def get_data_from_dword(self, dword):

	3394 """Return a four byte string representing the double word value. (little endian)."""

	3395 return struct.pack('<L', dword)

	3396

	3397

	3398 def get_dword_from_data(self, data, offset):

	3399 """Convert four bytes of data to a double word (little endian)

	3400

	3401 'offset' is assumed to index into a dword array. So setting it to

	3402 N will return a dword out of the data sarting at offset N*4.

	3403

	3404 Returns None if the data can't be turned into a double word.

	3405 """

	3406

	3407 if (offset+1)*4 > len(data):

	3408 return None

	3409

	3410 return struct.unpack('<L', data[offset4:(offset+1)4])[0]

	3411

	3412

	3413 def get_dword_at_rva(self, rva):

	3414 """Return the double word value at the given RVA.

	3415

	3416 Returns None if the value can't be read, i.e. the RVA can't be mapped

	3417 to a file offset.

	3418 """

	3419

	3420 try:

	3421 return self.get_dword_from_data(self.get_data(rva)[:4], 0)

	3422 except PEFormatError:

	3423 return None

	3424

	3425

	3426 def get_dword_from_offset(self, offset):

	3427 """Return the double word value at the given file offset. (little endian )"""

	3428

	3429 if offset+4 > len(self.__data__):

	3430 return None

	3431

	3432 return self.get_dword_from_data(self.__data__[offset:offset+4], 0)

	3433

	3434

	3435 def set_dword_at_rva(self, rva, dword):

	3436 """Set the double word value at the file offset corresponding to the giv en RVA."""

	3437 return self.set_bytes_at_rva(rva, self.get_data_from_dword(dword))

	3438

	3439

	3440 def set_dword_at_offset(self, offset, dword):

	3441 """Set the double word value at the given file offset."""

	3442 return self.set_bytes_at_offset(offset, self.get_data_from_dword(dword))

	3443

	3444

	3445

	3446 ##

	3447 # Word get/set

	3448 ##

	3449

	3450 def get_data_from_word(self, word):

	3451 """Return a two byte string representing the word value. (little endian) ."""

	3452 return struct.pack('<H', word)

	3453

	3454

	3455 def get_word_from_data(self, data, offset):

	3456 """Convert two bytes of data to a word (little endian)

	3457

	3458 'offset' is assumed to index into a word array. So setting it to

	3459 N will return a dword out of the data sarting at offset N*2.

	3460

	3461 Returns None if the data can't be turned into a word.

	3462 """

	3463

	3464 if (offset+1)*2 > len(data):

	3465 return None

	3466

	3467 return struct.unpack('<H', data[offset2:(offset+1)2])[0]

	3468

	3469

	3470 def get_word_at_rva(self, rva):

	3471 """Return the word value at the given RVA.

	3472

	3473 Returns None if the value can't be read, i.e. the RVA can't be mapped

	3474 to a file offset.

	3475 """

	3476

	3477 try:

	3478 return self.get_word_from_data(self.get_data(rva)[:2], 0)

	3479 except PEFormatError:

	3480 return None

	3481

	3482

	3483 def get_word_from_offset(self, offset):

	3484 """Return the word value at the given file offset. (little endian)"""

	3485

	3486 if offset+2 > len(self.__data__):

	3487 return None

	3488

	3489 return self.get_word_from_data(self.__data__[offset:offset+2], 0)

	3490

	3491

	3492 def set_word_at_rva(self, rva, word):

	3493 """Set the word value at the file offset corresponding to the given RVA. """

	3494 return self.set_bytes_at_rva(rva, self.get_data_from_word(word))

	3495

	3496

	3497 def set_word_at_offset(self, offset, word):

	3498 """Set the word value at the given file offset."""

	3499 return self.set_bytes_at_offset(offset, self.get_data_from_word(word))

	3500

	3501

	3502 ##

	3503 # Quad-Word get/set

	3504 ##

	3505

	3506 def get_data_from_qword(self, word):

	3507 """Return a eight byte string representing the quad-word value. (little endian)."""

	3508 return struct.pack('<Q', word)

	3509

	3510

	3511 def get_qword_from_data(self, data, offset):

	3512 """Convert eight bytes of data to a word (little endian)

	3513

	3514 'offset' is assumed to index into a word array. So setting it to

	3515 N will return a dword out of the data sarting at offset N*8.

	3516

	3517 Returns None if the data can't be turned into a quad word.

	3518 """

	3519

	3520 if (offset+1)*8 > len(data):

	3521 return None

	3522

	3523 return struct.unpack('<Q', data[offset8:(offset+1)8])[0]

	3524

	3525

	3526 def get_qword_at_rva(self, rva):

	3527 """Return the quad-word value at the given RVA.

	3528

	3529 Returns None if the value can't be read, i.e. the RVA can't be mapped

	3530 to a file offset.

	3531 """

	3532

	3533 try:

	3534 return self.get_qword_from_data(self.get_data(rva)[:8], 0)

	3535 except PEFormatError:

	3536 return None

	3537

	3538

	3539 def get_qword_from_offset(self, offset):

	3540 """Return the quad-word value at the given file offset. (little endian)" ""

	3541

	3542 if offset+8 > len(self.__data__):

	3543 return None

	3544

	3545 return self.get_qword_from_data(self.__data__[offset:offset+8], 0)

	3546

	3547

	3548 def set_qword_at_rva(self, rva, qword):

	3549 """Set the quad-word value at the file offset corresponding to the given RVA."""

	3550 return self.set_bytes_at_rva(rva, self.get_data_from_qword(qword))

	3551

	3552

	3553 def set_qword_at_offset(self, offset, qword):

	3554 """Set the quad-word value at the given file offset."""

	3555 return self.set_bytes_at_offset(offset, self.get_data_from_qword(qword))

	3556

	3557

	3558

	3559 ##

	3560 # Set bytes

	3561 ##

	3562

	3563

	3564 def set_bytes_at_rva(self, rva, data):

	3565 """Overwrite, with the given string, the bytes at the file offset corres ponding to the given RVA.

	3566

	3567 Return True if successful, False otherwise. It can fail if the

	3568 offset is outside the file's boundaries.

	3569 """

	3570

	3571 offset = self.get_physical_by_rva(rva)

	3572 if not offset:

	3573 raise False

	3574

	3575 return self.set_bytes_at_offset(offset, data)

	3576

	3577

	3578 def set_bytes_at_offset(self, offset, data):

	3579 """Overwrite the bytes at the given file offset with the given string.

	3580

	3581 Return True if successful, False otherwise. It can fail if the

	3582 offset is outside the file's boundaries.

	3583 """

	3584

	3585 if not isinstance(data, str):

	3586 raise TypeError('data should be of type: str')

	3587

	3588 if offset >= 0 and offset < len(self.__data__):

	3589 self.__data__ = ( self.__data__[:offset] +

	3590 data +

	3591 self.__data__[offset+len(data):] )

	3592 else:

	3593 return False

	3594

	3595 # Refresh the section's data with the modified information

	3596 #

	3597 for section in self.sections:

	3598 section_data_start = section.PointerToRawData

	3599 section_data_end = section_data_start+section.SizeOfRawData

	3600 section.data = self.__data__[section_data_start:section_data_end]

	3601

	3602 return True

	3603

	3604

	3605

	3606 def relocate_image(self, new_ImageBase):

	3607 """Apply the relocation information to the image using the provided new image base.

	3608

	3609 This method will apply the relocation information to the image. Given th e new base,

	3610 all the relocations will be processed and both the raw data and the sect ion's data

	3611 will be fixed accordingly.

	3612 The resulting image can be retrieved as well through the method:

	3613

	3614 get_memory_mapped_image()

	3615

	3616 In order to get something that would more closely match what could be fo und in memory

	3617 once the Windows loader finished its work.

	3618 """

	3619

	3620 relocation_difference = new_ImageBase - self.OPTIONAL_HEADER.ImageBase

	3621

	3622

	3623 for reloc in self.DIRECTORY_ENTRY_BASERELOC:

	3624

	3625 virtual_address = reloc.struct.VirtualAddress

	3626 size_of_block = reloc.struct.SizeOfBlock

	3627

	3628 # We iterate with an index because if the relocation is of type

	3629 # IMAGE_REL_BASED_HIGHADJ we need to also process the next entry

	3630 # at once and skip it for the next interation

	3631 #

	3632 entry_idx = 0

	3633 while entry_idx<len(reloc.entries):

	3634

	3635 entry = reloc.entries[entry_idx]

	3636 entry_idx += 1

	3637

	3638 if entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_ABSOLUTE']:

	3639 # Nothing to do for this type of relocation

	3640 pass

	3641

	3642 elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGH']:

	3643 # Fix the high 16bits of a relocation

	3644 #

	3645 # Add high 16bits of relocation_difference to the

	3646 # 16bit value at RVA=entry.rva

	3647

	3648 self.set_word_at_rva(

	3649 entry.rva,

	3650 ( self.get_word_at_rva(entry.rva) + relocation_differenc e>>16)&0xffff )

	3651

	3652 elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_LOW']:

	3653 # Fix the low 16bits of a relocation

	3654 #

	3655 # Add low 16 bits of relocation_difference to the 16bit valu e

	3656 # at RVA=entry.rva

	3657

	3658 self.set_word_at_rva(

	3659 entry.rva,

	3660 ( self.get_word_at_rva(entry.rva) + relocation_differenc e)&0xffff)

	3661

	3662 elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGHLOW']:

	3663 # Handle all high and low parts of a 32bit relocation

	3664 #

	3665 # Add relocation_difference to the value at RVA=entry.rva

	3666

	3667 self.set_dword_at_rva(

	3668 entry.rva,

	3669 self.get_dword_at_rva(entry.rva)+relocation_difference)

	3670

	3671 elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGHADJ']:

	3672 # Fix the high 16bits of a relocation and adjust

	3673 #

	3674 # Add high 16bits of relocation_difference to the 32bit valu e

	3675 # composed from the (16bit value at RVA=entry.rva)<<16 plus

	3676 # the 16bit value at the next relocation entry.

	3677 #

	3678

	3679 # If the next entry is beyond the array's limits,

	3680 # abort... the table is corrupt

	3681 #

	3682 if entry_idx == len(reloc.entries):

	3683 break

	3684

	3685 next_entry = reloc.entries[entry_idx]

	3686 entry_idx += 1

	3687 self.set_word_at_rva( entry.rva,

	3688 ((self.get_word_at_rva(entry.rva)<<16) + next_entry.rva +

	3689 relocation_difference & 0xffff0000) >> 16 )

	3690

	3691 elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_DIR64']:

	3692 # Apply the difference to the 64bit value at the offset

	3693 # RVA=entry.rva

	3694

	3695 self.set_qword_at_rva(

	3696 entry.rva,

	3697 self.get_qword_at_rva(entry.rva) + relocation_difference )

	3698

	3699

	3700 def verify_checksum(self):

	3701

	3702 return self.OPTIONAL_HEADER.CheckSum == self.generate_checksum()

	3703

	3704

	3705 def generate_checksum(self):

	3706

	3707 # Get the offset to the CheckSum field in the OptionalHeader

	3708 #

	3709 checksum_offset = self.OPTIONAL_HEADER.__file_offset__ + 0x40 # 64

	3710

	3711 checksum = 0

	3712

	3713 for i in range( len(self.__data__) / 4 ):

	3714

	3715 # Skip the checksum field

	3716 #

	3717 if i == checksum_offset / 4:

	3718 continue

	3719

	3720 dword = struct.unpack('L', self.__data__[ i4 : i4+4 ])[0]

	3721 checksum = (checksum & 0xffffffff) + dword + (checksum>>32)

	3722 if checksum > 2**32:

	3723 checksum = (checksum & 0xffffffff) + (checksum >> 32)

	3724

	3725 checksum = (checksum & 0xffff) + (checksum >> 16)

	3726 checksum = (checksum) + (checksum >> 16)

	3727 checksum = checksum & 0xffff

	3728

	3729 return checksum + len(self.__data__)

OLD	NEW

« no previous file with comments | « tools/symsrc/pdb_fingerprint_from_img.py ('k') | tools/symsrc/source_index.py » ('j') | no next file with comments »