Index: tools/binary_size/binary_size_utils.py |
diff --git a/tools/binary_size/binary_size_utils.py b/tools/binary_size/binary_size_utils.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..86678217c8c77ed7e34dceabb9df085017785f54 |
--- /dev/null |
+++ b/tools/binary_size/binary_size_utils.py |
@@ -0,0 +1,60 @@ |
+# Copyright 2014 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+"""Common utilities for tools that deal with binary size information. |
+""" |
+ |
+import re |
+import sys |
+ |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
Nit: add an extra newline here
Daniel Bratell
2014/05/21 08:42:13
Done.
|
+def ParseNm(input): |
+ """Parse nm output. |
+ |
+ Argument: an iterable over lines of nm output. |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
Nit: the format should look like: this
Parse nm o
Daniel Bratell
2014/05/21 08:42:13
Done.
Primiano Tucci (use gerrit)
2014/05/21 10:05:59
Ehm, I think the first line of the docstring shoul
|
+ |
+ Yields: (symbol name, symbol type, symbol size, source file path). |
+ Path may be None if nm couldn't figure out the source file. |
+ """ |
+ |
+ # Match lines with size, symbol, optional location, optional discriminator |
+ sym_re = re.compile(r'^[0-9a-f]{8,} ' # address (8+ hex digits) |
+ '([0-9a-f]{8,}) ' # size (8+ hex digits) |
+ '(.) ' # symbol type, one character |
+ '([^\t]+)' # symbol name, separated from next by tab |
+ '(?:\t(.*):[\d\?]+)?.*$') # location |
+ # Match lines with addr but no size. |
+ addr_re = re.compile(r'^[0-9a-f]{8,} (.) ([^\t]+)(?:\t.*)?$') |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
Hmm what is the sense of the last non capturing gr
Daniel Bratell
2014/05/21 08:42:13
I suspect it is to document the format and reject
Primiano Tucci (use gerrit)
2014/05/21 10:05:59
I tend to not overengineer regex, especially with
|
+ # Match lines that don't have an address at all -- typically external symbols. |
+ noaddr_re = re.compile(r'^ {8,} (.) (.*)$') |
+ # Match lines with no symbol name, only addr and type |
+ addr_only_re = re.compile(r'^[0-9a-f]{8,} (.)$') |
+ |
+ for line in input: |
+ line = line.rstrip() |
+ match = sym_re.match(line) |
+ if match: |
+ size, type, sym = match.groups()[0:3] |
+ size = int(size, 16) |
+ if type.lower() == 'b': |
+ continue # skip all BSS for now |
+ path = match.group(4) |
+ yield sym, type, size, path |
+ continue |
+ match = addr_re.match(line) |
+ if match: |
+ type, sym = match.groups()[0:2] |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
Uhm I'm missing something? why you get type and sy
Daniel Bratell
2014/05/21 08:42:13
For documentation I guess. I'm making it a comment
|
+ # No size == we don't care. |
+ continue |
+ match = noaddr_re.match(line) |
+ if match: |
+ type, sym = match.groups() |
+ if type in ('U', 'w'): |
+ # external or weak symbol |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
continue # external or weak symbol
(save one line
|
+ continue |
+ match = addr_only_re.match(line) |
+ if match: |
+ # Nothing to do. |
+ continue |
+ |
+ print >>sys.stderr, 'unparsed:', repr(line) |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
What about using logging.error()?
Most of the cod
|