Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 #!/usr/bin/python | |
| 2 # Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 3 # Use of this source code is governed by a BSD-style license that can be | |
| 4 # found in the LICENSE file. | |
| 5 | |
| 6 """Dump functions called by static intializers in a Linux binary. | |
|
Nico
2011/09/06 22:45:00
"in a Linux release binary"?
| |
| 7 | |
| 8 A brief overview of static initialization: | |
| 9 1) the compiler writes out, per object file, a function that contains | |
|
Nico
2011/09/06 22:45:00
nit: move "per object file" to the beginning or en
| |
| 10 the static intializers for that file. | |
| 11 2) the compiler also writes out a pointer to that function in a special | |
| 12 section. | |
| 13 3) at link time, the linker concatenates the function pointer sections | |
| 14 into a single list of all initializers. | |
| 15 4) at run time, on startup the binary runs all function pointers. | |
| 16 | |
| 17 The functions in (1) all have mangled names of the form | |
| 18 _GLOBAL__I_foobar.cc | |
| 19 using objdump, we can disassemble those functions and dump all symbols that | |
| 20 they reference. | |
|
Nico
2011/09/06 22:45:00
Example invocation command line?
| |
| 21 """ | |
| 22 | |
| 23 import re | |
| 24 import subprocess | |
| 25 import sys | |
| 26 | |
| 27 # A map of symbol => informative text about it. | |
| 28 NOTES = { | |
| 29 '__cxa_atexit@plt': 'registers a dtor to run at exit', | |
| 30 'std::__ioinit': '#includes <iostream>, use <ostream> instead', | |
| 31 } | |
| 32 | |
| 33 class Demangler(object): | |
| 34 """A wrapper around c++filt to provide a function to demangle symbols.""" | |
| 35 def __init__(self): | |
| 36 self.cppfilt = subprocess.Popen(['c++filt'], | |
| 37 stdin=subprocess.PIPE, | |
| 38 stdout=subprocess.PIPE) | |
| 39 | |
| 40 def Demangle(self, sym): | |
| 41 """Given mangled symbol |sym|, return its demangled form.""" | |
| 42 self.cppfilt.stdin.write(sym + '\n') | |
| 43 return self.cppfilt.stdout.readline().strip() | |
| 44 | |
| 45 | |
| 46 # Regex matching nm output for the symbols we're interested in. | |
|
Nico
2011/09/06 22:45:00
nit: Add "# Matches e.g. '+923afe3 5647 t _GLOBAL_
| |
| 47 nm_re = re.compile(r'(\S+) (\S+) t _GLOBAL__I_(.*)') | |
|
Nico
2011/09/06 22:45:00
(Unactionable remark: I like how a script that fig
| |
| 48 def ParseNm(binary): | |
| 49 """Given a binary, yield static initializers as (start, size, file) pairs.""" | |
| 50 | |
| 51 nm = subprocess.Popen(['nm', '-S', binary], stdout=subprocess.PIPE) | |
| 52 for line in nm.stdout: | |
| 53 match = nm_re.match(line) | |
| 54 if match: | |
| 55 addr, size, filename = match.groups() | |
| 56 yield int(addr, 16), int(size, 16), filename | |
| 57 | |
| 58 | |
| 59 # Regex matching objdump output for the symbols we're interested in. | |
|
Nico
2011/09/06 22:45:00
nit: also add an example matching line
| |
| 60 disassembly_re = re.compile(r'^\s+[0-9a-f]+:.*<(\S+)>') | |
| 61 def ExtractSymbolReferences(binary, start, end): | |
| 62 """Given a span of addresses, yields symbol references from disassembly.""" | |
| 63 cmd = ['objdump', binary, '--disassemble', | |
| 64 '--start-address=0x%x' % start, '--stop-address=0x%x' % end] | |
| 65 objdump = subprocess.Popen(cmd, stdout=subprocess.PIPE) | |
| 66 | |
| 67 refs = set() | |
| 68 for line in objdump.stdout: | |
| 69 #print line, | |
|
Nico
2011/09/06 22:45:00
remove?
| |
| 70 match = disassembly_re.search(line) | |
| 71 if match: | |
| 72 (ref,) = match.groups() | |
| 73 if ref.startswith('.LC') or ref.startswith('_DYNAMIC'): | |
| 74 # Ignore these, they are uninformative. | |
| 75 continue | |
| 76 if ref.startswith('_GLOBAL__I_'): | |
| 77 # Probably a relative jump within this function. | |
| 78 continue | |
| 79 refs.add(ref) | |
| 80 continue | |
| 81 if '__static_initialization_and_destruction' in line: | |
| 82 raise RuntimeError, ('code mentions ' | |
| 83 '__static_initialization_and_destruction; ' | |
| 84 'did you accidentally use a Debug binary?') | |
| 85 | |
| 86 for ref in sorted(refs): | |
| 87 yield ref | |
| 88 | |
| 89 | |
| 90 (binary,) = sys.argv[1:] | |
|
Lei Zhang
2011/09/06 23:02:37
print a help message when len(argv) != 2.
| |
| 91 demangler = Demangler() | |
| 92 for addr, size, filename in ParseNm(binary): | |
| 93 if size == 2: | |
| 94 # gcc generates a two-byte 'repz retq' initializer when there is nothing | |
| 95 # to do. jyasskin tells me this is fixed in gcc 4.6. | |
| 96 # Two bytes is too small to do anything, so just ignore it. | |
| 97 continue | |
| 98 | |
| 99 print '%s (0x%x 0x%x)' % (filename, addr, addr+size) | |
| 100 for ref in ExtractSymbolReferences(binary, addr, addr+size): | |
| 101 ref = demangler.Demangle(ref) | |
| 102 if ref in NOTES: | |
| 103 print ' ', '%s [%s]' % (ref, NOTES[ref]) | |
| 104 else: | |
| 105 print ' ', ref | |
| 106 print | |
| 107 | |
| OLD | NEW |