OLD | NEW |
---|---|
(Empty) | |
1 # Copyright (c) 2016 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 | |
5 """ | |
6 Parse information about a PE file to summarize the on-disk and | |
7 in-memory sizes of the sections, in decimal MB instead of in hex. This | |
8 script will also automatically display diffs between two files if they | |
9 have the same name. This script relies on having VS 2015 installed and is used | |
10 to help investigate binary size regressions and improvements. | |
11 | |
12 Section information printed by dumpbin looks like this: | |
13 | |
14 SECTION HEADER #2 | |
15 .rdata name | |
16 5CCD56 virtual size | |
17 1CEF000 virtual address (11CEF000 to 122BBD55) | |
18 5CCE00 size of raw data | |
19 1CEE000 file pointer to raw data (01CEE000 to 022BADFF) | |
20 0 file pointer to relocation table | |
21 0 file pointer to line numbers | |
22 0 number of relocations | |
23 0 number of line numbers | |
24 40000040 flags | |
25 Initialized Data | |
26 Read Only | |
27 | |
28 The reports generated by this script look like this: | |
29 | |
30 > python tools\win\pe_summarize.py out\release\chrome.dll | |
31 Size of out\release\chrome.dll is 41.190912 MB | |
32 name: mem size , disk size | |
33 .text: 33.199959 MB | |
34 .rdata: 6.170416 MB | |
35 .data: 0.713864 MB, 0.270336 MB | |
36 .tls: 0.000025 MB | |
37 CPADinfo: 0.000036 MB | |
38 .rodata: 0.003216 MB | |
39 .crthunk: 0.000064 MB | |
40 .gfids: 0.001052 MB | |
41 _RDATA: 0.000288 MB | |
42 .rsrc: 0.130808 MB | |
43 .reloc: 1.410172 MB | |
44 | |
45 Note that the .data section has separate in-memory and on-disk sizes due to | |
46 zero-initialized data. Other sections have smaller discrepancies - the disk size | |
47 is only printed if it differs from the memory size by more than 512 bytes. | |
48 | |
49 Note that many of the sections - such as .text, .rdata, and .rsrc - are shared | |
50 between processes. Some sections - such as .reloc - are discarded after a | |
51 process is loaded. Other sections, such as .data, produce private pages and are | |
52 therefore objectively 'worse' than the others. | |
53 """ | |
54 | |
55 import os | |
56 import subprocess | |
57 import sys | |
58 | |
59 | |
60 def main(): | |
61 if len(sys.argv) < 2: | |
62 print r'Usage: %s PEFileName [OtherPeFileNames...]' % sys.argv[0] | |
63 print r'Sample: %s chrome.dll' % sys.argv[0] | |
64 print r'Sample: %s chrome.dll original\chrome.dll' % sys.argv[0] | |
65 return 0 | |
66 | |
67 # Add to the path so that dumpbin can run. | |
68 vs_dir = r'C:\Program Files (x86)\Microsoft Visual Studio 14.0' | |
stanisc
2016/12/13 22:58:53
Does this assume VS is installed or it just adds a
brucedawson
2016/12/14 00:30:23
I'm not sure why I was adding two directories. I t
| |
69 os.environ['PATH'] = (os.path.join(vs_dir, r'VC\bin\amd64_x86;') + | |
70 os.path.join(vs_dir, r'VC\bin\amd64;') + | |
71 os.environ["PATH"]) | |
72 | |
73 # Track the name of the last PE (Portable Executable) file to be processed - | |
74 # file name only, without the path. | |
75 last_pe_filepart = "" | |
76 | |
77 for pe_path in sys.argv[1:]: | |
78 results = [] | |
79 if not os.path.exists(pe_path): | |
80 print '%s does not exist!' % pe_path | |
81 continue | |
82 | |
83 print 'Size of %s is %1.6f MB' % (pe_path, os.path.getsize(pe_path) / 1e6) | |
84 print '%10s: %9s , %9s' % ('name', 'mem size', 'disk size') | |
85 | |
86 sections = None | |
87 command = 'dumpbin.exe /headers "%s"' % pe_path | |
88 for line in subprocess.check_output(command).splitlines(): | |
89 if line.startswith('SECTION HEADER #'): | |
90 sections = [] | |
91 elif type(sections) == type([]): | |
92 # We must be processing a section header. | |
93 sections.append(line.strip()) | |
94 # When we've accumulated four lines of data, process them. | |
95 if len(sections) == 4: | |
96 name, memory_size, _, disk_size = sections | |
97 assert name.count('name') == 1 | |
98 assert memory_size.count('virtual size') == 1 | |
99 assert disk_size.count('size of raw data') == 1 | |
100 name = name.split()[0] | |
101 memory_size = int(memory_size.split()[0], 16) | |
102 disk_size = int(disk_size.split()[0], 16) | |
103 # Print the sizes in decimal MB. This makes large numbers easier to | |
stanisc
2016/12/13 22:58:53
This is a bit unusual, but I guess it doesn't matt
brucedawson
2016/12/14 00:30:23
Yeah, I'm a pretty strong believer in decimal for
| |
104 # understand - 33.199959 is easier to read than 33199959. Decimal MB | |
105 # is used to allow simple conversions to a precise number of bytes. | |
106 if abs(memory_size - disk_size) < 512: | |
107 print '%10s: %9.6f MB' % (name, memory_size / 1e6) | |
108 else: | |
109 print '%10s: %9.6f MB, %9.6f MB' % (name, memory_size / 1e6, | |
110 disk_size / 1e6) | |
111 results.append((name, memory_size)) | |
112 sections = None | |
113 | |
114 print | |
115 pe_filepart = os.path.split(pe_path)[1] | |
116 if pe_filepart.lower() == last_pe_filepart.lower(): | |
117 # Print out the section-by-section size changes, for memory sizes only. | |
118 print 'Memory size change from %s to %s' % (last_pe_path, pe_path) | |
119 total_delta = 0 | |
120 for i in range(len(results)): | |
121 assert(results[i][0] == last_results[i][0]) | |
stanisc
2016/12/13 22:58:53
Why is this?
brucedawson
2016/12/14 00:30:23
Comment and error message added.
| |
122 delta = results[i][1] - last_results[i][1] | |
123 total_delta += delta | |
124 if delta: | |
125 print '%12s: %7d bytes change' % (results[i][0], delta) | |
126 print 'Total change: %7d bytes' % total_delta | |
127 last_pe_filepart = pe_filepart | |
128 last_pe_path = pe_path | |
129 last_results = results | |
130 | |
131 | |
132 if __name__ == '__main__': | |
133 sys.exit(main()) | |
OLD | NEW |