Index: tools/win/pe_summarize.py |
diff --git a/tools/win/pe_summarize.py b/tools/win/pe_summarize.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..95becaa7d415f9b2db460f5b1b80805690341425 |
--- /dev/null |
+++ b/tools/win/pe_summarize.py |
@@ -0,0 +1,133 @@ |
+# Copyright (c) 2016 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+""" |
+Parse information about a PE file to summarize the on-disk and |
+in-memory sizes of the sections, in decimal MB instead of in hex. This |
+script will also automatically display diffs between two files if they |
+have the same name. This script relies on having VS 2015 installed and is used |
+to help investigate binary size regressions and improvements. |
+ |
+Section information printed by dumpbin looks like this: |
+ |
+SECTION HEADER #2 |
+ .rdata name |
+ 5CCD56 virtual size |
+ 1CEF000 virtual address (11CEF000 to 122BBD55) |
+ 5CCE00 size of raw data |
+ 1CEE000 file pointer to raw data (01CEE000 to 022BADFF) |
+ 0 file pointer to relocation table |
+ 0 file pointer to line numbers |
+ 0 number of relocations |
+ 0 number of line numbers |
+40000040 flags |
+ Initialized Data |
+ Read Only |
+ |
+The reports generated by this script look like this: |
+ |
+> python tools\win\pe_summarize.py out\release\chrome.dll |
+Size of out\release\chrome.dll is 41.190912 MB |
+ name: mem size , disk size |
+ .text: 33.199959 MB |
+ .rdata: 6.170416 MB |
+ .data: 0.713864 MB, 0.270336 MB |
+ .tls: 0.000025 MB |
+ CPADinfo: 0.000036 MB |
+ .rodata: 0.003216 MB |
+ .crthunk: 0.000064 MB |
+ .gfids: 0.001052 MB |
+ _RDATA: 0.000288 MB |
+ .rsrc: 0.130808 MB |
+ .reloc: 1.410172 MB |
+ |
+Note that the .data section has separate in-memory and on-disk sizes due to |
+zero-initialized data. Other sections have smaller discrepancies - the disk size |
+is only printed if it differs from the memory size by more than 512 bytes. |
+ |
+Note that many of the sections - such as .text, .rdata, and .rsrc - are shared |
+between processes. Some sections - such as .reloc - are discarded after a |
+process is loaded. Other sections, such as .data, produce private pages and are |
+therefore objectively 'worse' than the others. |
+""" |
+ |
+import os |
+import subprocess |
+import sys |
+ |
+ |
+def main(): |
+ if len(sys.argv) < 2: |
+ print r'Usage: %s PEFileName [OtherPeFileNames...]' % sys.argv[0] |
+ print r'Sample: %s chrome.dll' % sys.argv[0] |
+ print r'Sample: %s chrome.dll original\chrome.dll' % sys.argv[0] |
+ return 0 |
+ |
+ # Add to the path so that dumpbin can run. |
+ vs_dir = r'C:\Program Files (x86)\Microsoft Visual Studio 14.0' |
stanisc
2016/12/13 22:58:53
Does this assume VS is installed or it just adds a
brucedawson
2016/12/14 00:30:23
I'm not sure why I was adding two directories. I t
|
+ os.environ['PATH'] = (os.path.join(vs_dir, r'VC\bin\amd64_x86;') + |
+ os.path.join(vs_dir, r'VC\bin\amd64;') + |
+ os.environ["PATH"]) |
+ |
+ # Track the name of the last PE (Portable Executable) file to be processed - |
+ # file name only, without the path. |
+ last_pe_filepart = "" |
+ |
+ for pe_path in sys.argv[1:]: |
+ results = [] |
+ if not os.path.exists(pe_path): |
+ print '%s does not exist!' % pe_path |
+ continue |
+ |
+ print 'Size of %s is %1.6f MB' % (pe_path, os.path.getsize(pe_path) / 1e6) |
+ print '%10s: %9s , %9s' % ('name', 'mem size', 'disk size') |
+ |
+ sections = None |
+ command = 'dumpbin.exe /headers "%s"' % pe_path |
+ for line in subprocess.check_output(command).splitlines(): |
+ if line.startswith('SECTION HEADER #'): |
+ sections = [] |
+ elif type(sections) == type([]): |
+ # We must be processing a section header. |
+ sections.append(line.strip()) |
+ # When we've accumulated four lines of data, process them. |
+ if len(sections) == 4: |
+ name, memory_size, _, disk_size = sections |
+ assert name.count('name') == 1 |
+ assert memory_size.count('virtual size') == 1 |
+ assert disk_size.count('size of raw data') == 1 |
+ name = name.split()[0] |
+ memory_size = int(memory_size.split()[0], 16) |
+ disk_size = int(disk_size.split()[0], 16) |
+ # Print the sizes in decimal MB. This makes large numbers easier to |
stanisc
2016/12/13 22:58:53
This is a bit unusual, but I guess it doesn't matt
brucedawson
2016/12/14 00:30:23
Yeah, I'm a pretty strong believer in decimal for
|
+ # understand - 33.199959 is easier to read than 33199959. Decimal MB |
+ # is used to allow simple conversions to a precise number of bytes. |
+ if abs(memory_size - disk_size) < 512: |
+ print '%10s: %9.6f MB' % (name, memory_size / 1e6) |
+ else: |
+ print '%10s: %9.6f MB, %9.6f MB' % (name, memory_size / 1e6, |
+ disk_size / 1e6) |
+ results.append((name, memory_size)) |
+ sections = None |
+ |
+ pe_filepart = os.path.split(pe_path)[1] |
+ if pe_filepart.lower() == last_pe_filepart.lower(): |
+ # Print out the section-by-section size changes, for memory sizes only. |
+ print 'Memory size change from %s to %s' % (last_pe_path, pe_path) |
+ total_delta = 0 |
+ for i in range(len(results)): |
+ assert(results[i][0] == last_results[i][0]) |
stanisc
2016/12/13 22:58:53
Why is this?
brucedawson
2016/12/14 00:30:23
Comment and error message added.
|
+ delta = results[i][1] - last_results[i][1] |
+ total_delta += delta |
+ if delta: |
+ print '%12s: %7d bytes change' % (results[i][0], delta) |
+ print 'Total change: %7d bytes' % total_delta |
+ last_pe_filepart = pe_filepart |
+ last_pe_path = pe_path |
+ last_results = results |
+ |
+ |
+if __name__ == '__main__': |
+ sys.exit(main()) |