Chromium Code Reviews| Index: tools/win/pe_summarize.py |
| diff --git a/tools/win/pe_summarize.py b/tools/win/pe_summarize.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..95becaa7d415f9b2db460f5b1b80805690341425 |
| --- /dev/null |
| +++ b/tools/win/pe_summarize.py |
| @@ -0,0 +1,133 @@ |
| +# Copyright (c) 2016 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +""" |
| +Parse information about a PE file to summarize the on-disk and |
| +in-memory sizes of the sections, in decimal MB instead of in hex. This |
| +script will also automatically display diffs between two files if they |
| +have the same name. This script relies on having VS 2015 installed and is used |
| +to help investigate binary size regressions and improvements. |
| + |
| +Section information printed by dumpbin looks like this: |
| + |
| +SECTION HEADER #2 |
| + .rdata name |
| + 5CCD56 virtual size |
| + 1CEF000 virtual address (11CEF000 to 122BBD55) |
| + 5CCE00 size of raw data |
| + 1CEE000 file pointer to raw data (01CEE000 to 022BADFF) |
| + 0 file pointer to relocation table |
| + 0 file pointer to line numbers |
| + 0 number of relocations |
| + 0 number of line numbers |
| +40000040 flags |
| + Initialized Data |
| + Read Only |
| + |
| +The reports generated by this script look like this: |
| + |
| +> python tools\win\pe_summarize.py out\release\chrome.dll |
| +Size of out\release\chrome.dll is 41.190912 MB |
| + name: mem size , disk size |
| + .text: 33.199959 MB |
| + .rdata: 6.170416 MB |
| + .data: 0.713864 MB, 0.270336 MB |
| + .tls: 0.000025 MB |
| + CPADinfo: 0.000036 MB |
| + .rodata: 0.003216 MB |
| + .crthunk: 0.000064 MB |
| + .gfids: 0.001052 MB |
| + _RDATA: 0.000288 MB |
| + .rsrc: 0.130808 MB |
| + .reloc: 1.410172 MB |
| + |
| +Note that the .data section has separate in-memory and on-disk sizes due to |
| +zero-initialized data. Other sections have smaller discrepancies - the disk size |
| +is only printed if it differs from the memory size by more than 512 bytes. |
| + |
| +Note that many of the sections - such as .text, .rdata, and .rsrc - are shared |
| +between processes. Some sections - such as .reloc - are discarded after a |
| +process is loaded. Other sections, such as .data, produce private pages and are |
| +therefore objectively 'worse' than the others. |
| +""" |
| + |
| +import os |
| +import subprocess |
| +import sys |
| + |
| + |
| +def main(): |
| + if len(sys.argv) < 2: |
| + print r'Usage: %s PEFileName [OtherPeFileNames...]' % sys.argv[0] |
| + print r'Sample: %s chrome.dll' % sys.argv[0] |
| + print r'Sample: %s chrome.dll original\chrome.dll' % sys.argv[0] |
| + return 0 |
| + |
| + # Add to the path so that dumpbin can run. |
| + vs_dir = r'C:\Program Files (x86)\Microsoft Visual Studio 14.0' |
|
stanisc
2016/12/13 22:58:53
Does this assume VS is installed or it just adds a
brucedawson
2016/12/14 00:30:23
I'm not sure why I was adding two directories. I t
|
| + os.environ['PATH'] = (os.path.join(vs_dir, r'VC\bin\amd64_x86;') + |
| + os.path.join(vs_dir, r'VC\bin\amd64;') + |
| + os.environ["PATH"]) |
| + |
| + # Track the name of the last PE (Portable Executable) file to be processed - |
| + # file name only, without the path. |
| + last_pe_filepart = "" |
| + |
| + for pe_path in sys.argv[1:]: |
| + results = [] |
| + if not os.path.exists(pe_path): |
| + print '%s does not exist!' % pe_path |
| + continue |
| + |
| + print 'Size of %s is %1.6f MB' % (pe_path, os.path.getsize(pe_path) / 1e6) |
| + print '%10s: %9s , %9s' % ('name', 'mem size', 'disk size') |
| + |
| + sections = None |
| + command = 'dumpbin.exe /headers "%s"' % pe_path |
| + for line in subprocess.check_output(command).splitlines(): |
| + if line.startswith('SECTION HEADER #'): |
| + sections = [] |
| + elif type(sections) == type([]): |
| + # We must be processing a section header. |
| + sections.append(line.strip()) |
| + # When we've accumulated four lines of data, process them. |
| + if len(sections) == 4: |
| + name, memory_size, _, disk_size = sections |
| + assert name.count('name') == 1 |
| + assert memory_size.count('virtual size') == 1 |
| + assert disk_size.count('size of raw data') == 1 |
| + name = name.split()[0] |
| + memory_size = int(memory_size.split()[0], 16) |
| + disk_size = int(disk_size.split()[0], 16) |
| + # Print the sizes in decimal MB. This makes large numbers easier to |
|
stanisc
2016/12/13 22:58:53
This is a bit unusual, but I guess it doesn't matt
brucedawson
2016/12/14 00:30:23
Yeah, I'm a pretty strong believer in decimal for
|
| + # understand - 33.199959 is easier to read than 33199959. Decimal MB |
| + # is used to allow simple conversions to a precise number of bytes. |
| + if abs(memory_size - disk_size) < 512: |
| + print '%10s: %9.6f MB' % (name, memory_size / 1e6) |
| + else: |
| + print '%10s: %9.6f MB, %9.6f MB' % (name, memory_size / 1e6, |
| + disk_size / 1e6) |
| + results.append((name, memory_size)) |
| + sections = None |
| + |
| + pe_filepart = os.path.split(pe_path)[1] |
| + if pe_filepart.lower() == last_pe_filepart.lower(): |
| + # Print out the section-by-section size changes, for memory sizes only. |
| + print 'Memory size change from %s to %s' % (last_pe_path, pe_path) |
| + total_delta = 0 |
| + for i in range(len(results)): |
| + assert(results[i][0] == last_results[i][0]) |
|
stanisc
2016/12/13 22:58:53
Why is this?
brucedawson
2016/12/14 00:30:23
Comment and error message added.
|
| + delta = results[i][1] - last_results[i][1] |
| + total_delta += delta |
| + if delta: |
| + print '%12s: %7d bytes change' % (results[i][0], delta) |
| + print 'Total change: %7d bytes' % total_delta |
| + last_pe_filepart = pe_filepart |
| + last_pe_path = pe_path |
| + last_results = results |
| + |
| + |
| +if __name__ == '__main__': |
| + sys.exit(main()) |