OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/env python |
| 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 """Dump functions called by static intializers in a Linux Release binary. |
| 7 |
| 8 Usage example: |
| 9 tools/linux/dump-static-intializers.py out/Release/chrome |
| 10 |
| 11 A brief overview of static initialization: |
| 12 1) the compiler writes out, per object file, a function that contains |
| 13 the static intializers for that file. |
| 14 2) the compiler also writes out a pointer to that function in a special |
| 15 section. |
| 16 3) at link time, the linker concatenates the function pointer sections |
| 17 into a single list of all initializers. |
| 18 4) at run time, on startup the binary runs all function pointers. |
| 19 |
| 20 The functions in (1) all have mangled names of the form |
| 21 _GLOBAL__I_foobar.cc |
| 22 using objdump, we can disassemble those functions and dump all symbols that |
| 23 they reference. |
| 24 """ |
| 25 |
| 26 import optparse |
| 27 import re |
| 28 import subprocess |
| 29 import sys |
| 30 |
| 31 # A map of symbol => informative text about it. |
| 32 NOTES = { |
| 33 '__cxa_atexit@plt': 'registers a dtor to run at exit', |
| 34 'std::__ioinit': '#includes <iostream>, use <ostream> instead', |
| 35 } |
| 36 |
| 37 # Determine whether this is a git checkout (as opposed to e.g. svn). |
| 38 IS_GIT_WORKSPACE = (subprocess.Popen( |
| 39 ['git', 'rev-parse'], stderr=subprocess.PIPE).wait() == 0) |
| 40 |
| 41 class Demangler(object): |
| 42 """A wrapper around c++filt to provide a function to demangle symbols.""" |
| 43 def __init__(self): |
| 44 self.cppfilt = subprocess.Popen(['c++filt'], |
| 45 stdin=subprocess.PIPE, |
| 46 stdout=subprocess.PIPE) |
| 47 |
| 48 def Demangle(self, sym): |
| 49 """Given mangled symbol |sym|, return its demangled form.""" |
| 50 self.cppfilt.stdin.write(sym + '\n') |
| 51 return self.cppfilt.stdout.readline().strip() |
| 52 |
| 53 # Matches for example: "cert_logger.pb.cc", capturing "cert_logger". |
| 54 protobuf_filename_re = re.compile(r'(.*)\.pb\.cc$') |
| 55 def QualifyFilenameAsProto(filename): |
| 56 """Attempt to qualify a bare |filename| with a src-relative path, assuming it |
| 57 is a protoc-generated file. If a single match is found, it is returned. |
| 58 Otherwise the original filename is returned.""" |
| 59 if not IS_GIT_WORKSPACE: |
| 60 return filename |
| 61 match = protobuf_filename_re.match(filename) |
| 62 if not match: |
| 63 return filename |
| 64 basename = match.groups(0) |
| 65 gitlsfiles = subprocess.Popen( |
| 66 ['git', 'ls-files', '--', '*/%s.proto' % basename], |
| 67 stdout=subprocess.PIPE) |
| 68 candidate = filename |
| 69 for line in gitlsfiles.stdout: |
| 70 if candidate != filename: |
| 71 return filename # Multiple hits, can't help. |
| 72 candidate = line.strip() |
| 73 return candidate |
| 74 |
| 75 # Regex matching the substring of a symbol's demangled text representation most |
| 76 # likely to appear in a source file. |
| 77 # Example: "v8::internal::Builtins::InitBuiltinFunctionTable()" becomes |
| 78 # "InitBuiltinFunctionTable", since the first (optional & non-capturing) group |
| 79 # picks up any ::-qualification and the last fragment picks up a suffix that |
| 80 # starts with an opener. |
| 81 symbol_code_name_re = re.compile(r'^(?:[^(<[]*::)?([^:(<[]*).*?$') |
| 82 def QualifyFilename(filename, symbol): |
| 83 """Given a bare filename and a symbol that occurs in it, attempt to qualify |
| 84 it with a src-relative path. If more than one file matches, return the |
| 85 original filename.""" |
| 86 if not IS_GIT_WORKSPACE: |
| 87 return filename |
| 88 match = symbol_code_name_re.match(symbol) |
| 89 if not match: |
| 90 return filename |
| 91 symbol = match.group(1) |
| 92 gitgrep = subprocess.Popen( |
| 93 ['git', 'grep', '-l', symbol, '--', '*/%s' % filename], |
| 94 stdout=subprocess.PIPE) |
| 95 candidate = filename |
| 96 for line in gitgrep.stdout: |
| 97 if candidate != filename: # More than one candidate; return bare filename. |
| 98 return filename |
| 99 candidate = line.strip() |
| 100 return candidate |
| 101 |
| 102 # Regex matching nm output for the symbols we're interested in. |
| 103 # See test_ParseNmLine for examples. |
| 104 nm_re = re.compile(r'(\S+) (\S+) t (?:_ZN12)?_GLOBAL__(?:sub_)?I_(.*)') |
| 105 def ParseNmLine(line): |
| 106 """Given a line of nm output, parse static initializers as a |
| 107 (file, start, size) tuple.""" |
| 108 match = nm_re.match(line) |
| 109 if match: |
| 110 addr, size, filename = match.groups() |
| 111 return (filename, int(addr, 16), int(size, 16)) |
| 112 |
| 113 |
| 114 def test_ParseNmLine(): |
| 115 """Verify the nm_re regex matches some sample lines.""" |
| 116 parse = ParseNmLine( |
| 117 '0000000001919920 0000000000000008 t ' |
| 118 '_ZN12_GLOBAL__I_safe_browsing_service.cc') |
| 119 assert parse == ('safe_browsing_service.cc', 26319136, 8), parse |
| 120 |
| 121 parse = ParseNmLine( |
| 122 '00000000026b9eb0 0000000000000024 t ' |
| 123 '_GLOBAL__sub_I_extension_specifics.pb.cc') |
| 124 assert parse == ('extension_specifics.pb.cc', 40607408, 36), parse |
| 125 |
| 126 # Just always run the test; it is fast enough. |
| 127 test_ParseNmLine() |
| 128 |
| 129 |
| 130 def ParseNm(binary): |
| 131 """Given a binary, yield static initializers as (file, start, size) tuples.""" |
| 132 nm = subprocess.Popen(['nm', '-S', binary], stdout=subprocess.PIPE) |
| 133 for line in nm.stdout: |
| 134 parse = ParseNmLine(line) |
| 135 if parse: |
| 136 yield parse |
| 137 |
| 138 # Regex matching objdump output for the symbols we're interested in. |
| 139 # Example line: |
| 140 # 12354ab: (disassembly, including <FunctionReference>) |
| 141 disassembly_re = re.compile(r'^\s+[0-9a-f]+:.*<(\S+)>') |
| 142 def ExtractSymbolReferences(binary, start, end): |
| 143 """Given a span of addresses, returns symbol references from disassembly.""" |
| 144 cmd = ['objdump', binary, '--disassemble', |
| 145 '--start-address=0x%x' % start, '--stop-address=0x%x' % end] |
| 146 objdump = subprocess.Popen(cmd, stdout=subprocess.PIPE) |
| 147 |
| 148 refs = set() |
| 149 for line in objdump.stdout: |
| 150 if '__static_initialization_and_destruction' in line: |
| 151 raise RuntimeError, ('code mentions ' |
| 152 '__static_initialization_and_destruction; ' |
| 153 'did you accidentally run this on a Debug binary?') |
| 154 match = disassembly_re.search(line) |
| 155 if match: |
| 156 (ref,) = match.groups() |
| 157 if ref.startswith('.LC') or ref.startswith('_DYNAMIC'): |
| 158 # Ignore these, they are uninformative. |
| 159 continue |
| 160 if ref.startswith('_GLOBAL__I_'): |
| 161 # Probably a relative jump within this function. |
| 162 continue |
| 163 refs.add(ref) |
| 164 |
| 165 return sorted(refs) |
| 166 |
| 167 def main(): |
| 168 parser = optparse.OptionParser(usage='%prog [option] filename') |
| 169 parser.add_option('-d', '--diffable', dest='diffable', |
| 170 action='store_true', default=False, |
| 171 help='Prints the filename on each line, for more easily ' |
| 172 'diff-able output. (Used by sizes.py)') |
| 173 opts, args = parser.parse_args() |
| 174 if len(args) != 1: |
| 175 parser.error('missing filename argument') |
| 176 return 1 |
| 177 binary = args[0] |
| 178 |
| 179 demangler = Demangler() |
| 180 file_count = 0 |
| 181 initializer_count = 0 |
| 182 |
| 183 files = ParseNm(binary) |
| 184 if opts.diffable: |
| 185 files = sorted(files) |
| 186 for filename, addr, size in files: |
| 187 file_count += 1 |
| 188 ref_output = [] |
| 189 |
| 190 qualified_filename = QualifyFilenameAsProto(filename) |
| 191 |
| 192 if size == 2: |
| 193 # gcc generates a two-byte 'repz retq' initializer when there is a |
| 194 # ctor even when the ctor is empty. This is fixed in gcc 4.6, but |
| 195 # Android uses gcc 4.4. |
| 196 ref_output.append('[empty ctor, but it still has cost on gcc <4.6]') |
| 197 else: |
| 198 for ref in ExtractSymbolReferences(binary, addr, addr+size): |
| 199 initializer_count += 1 |
| 200 |
| 201 ref = demangler.Demangle(ref) |
| 202 if qualified_filename == filename: |
| 203 qualified_filename = QualifyFilename(filename, ref) |
| 204 |
| 205 note = '' |
| 206 if ref in NOTES: |
| 207 note = NOTES[ref] |
| 208 elif ref.endswith('_2eproto()'): |
| 209 note = 'protocol compiler bug: crbug.com/105626' |
| 210 |
| 211 if note: |
| 212 ref_output.append('%s [%s]' % (ref, note)) |
| 213 else: |
| 214 ref_output.append(ref) |
| 215 |
| 216 if opts.diffable: |
| 217 if ref_output: |
| 218 print '\n'.join('# ' + qualified_filename + ' ' + r for r in ref_output) |
| 219 else: |
| 220 print '# %s: (empty initializer list)' % qualified_filename |
| 221 else: |
| 222 print '%s (initializer offset 0x%x size 0x%x)' % (qualified_filename, |
| 223 addr, size) |
| 224 print ''.join(' %s\n' % r for r in ref_output) |
| 225 |
| 226 if opts.diffable: |
| 227 print '#', |
| 228 print 'Found %d static initializers in %d files.' % (initializer_count, |
| 229 file_count) |
| 230 |
| 231 return 0 |
| 232 |
| 233 if '__main__' == __name__: |
| 234 sys.exit(main()) |
OLD | NEW |