OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/python | |
2 # Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 """Dump functions called by static intializers in a Linux binary. | |
Nico
2011/09/06 22:45:00
"in a Linux release binary"?
| |
7 | |
8 A brief overview of static initialization: | |
9 1) the compiler writes out, per object file, a function that contains | |
Nico
2011/09/06 22:45:00
nit: move "per object file" to the beginning or en
| |
10 the static intializers for that file. | |
11 2) the compiler also writes out a pointer to that function in a special | |
12 section. | |
13 3) at link time, the linker concatenates the function pointer sections | |
14 into a single list of all initializers. | |
15 4) at run time, on startup the binary runs all function pointers. | |
16 | |
17 The functions in (1) all have mangled names of the form | |
18 _GLOBAL__I_foobar.cc | |
19 using objdump, we can disassemble those functions and dump all symbols that | |
20 they reference. | |
Nico
2011/09/06 22:45:00
Example invocation command line?
| |
21 """ | |
22 | |
23 import re | |
24 import subprocess | |
25 import sys | |
26 | |
27 # A map of symbol => informative text about it. | |
28 NOTES = { | |
29 '__cxa_atexit@plt': 'registers a dtor to run at exit', | |
30 'std::__ioinit': '#includes <iostream>, use <ostream> instead', | |
31 } | |
32 | |
33 class Demangler(object): | |
34 """A wrapper around c++filt to provide a function to demangle symbols.""" | |
35 def __init__(self): | |
36 self.cppfilt = subprocess.Popen(['c++filt'], | |
37 stdin=subprocess.PIPE, | |
38 stdout=subprocess.PIPE) | |
39 | |
40 def Demangle(self, sym): | |
41 """Given mangled symbol |sym|, return its demangled form.""" | |
42 self.cppfilt.stdin.write(sym + '\n') | |
43 return self.cppfilt.stdout.readline().strip() | |
44 | |
45 | |
46 # Regex matching nm output for the symbols we're interested in. | |
Nico
2011/09/06 22:45:00
nit: Add "# Matches e.g. '+923afe3 5647 t _GLOBAL_
| |
47 nm_re = re.compile(r'(\S+) (\S+) t _GLOBAL__I_(.*)') | |
Nico
2011/09/06 22:45:00
(Unactionable remark: I like how a script that fig
| |
48 def ParseNm(binary): | |
49 """Given a binary, yield static initializers as (start, size, file) pairs.""" | |
50 | |
51 nm = subprocess.Popen(['nm', '-S', binary], stdout=subprocess.PIPE) | |
52 for line in nm.stdout: | |
53 match = nm_re.match(line) | |
54 if match: | |
55 addr, size, filename = match.groups() | |
56 yield int(addr, 16), int(size, 16), filename | |
57 | |
58 | |
59 # Regex matching objdump output for the symbols we're interested in. | |
Nico
2011/09/06 22:45:00
nit: also add an example matching line
| |
60 disassembly_re = re.compile(r'^\s+[0-9a-f]+:.*<(\S+)>') | |
61 def ExtractSymbolReferences(binary, start, end): | |
62 """Given a span of addresses, yields symbol references from disassembly.""" | |
63 cmd = ['objdump', binary, '--disassemble', | |
64 '--start-address=0x%x' % start, '--stop-address=0x%x' % end] | |
65 objdump = subprocess.Popen(cmd, stdout=subprocess.PIPE) | |
66 | |
67 refs = set() | |
68 for line in objdump.stdout: | |
69 #print line, | |
Nico
2011/09/06 22:45:00
remove?
| |
70 match = disassembly_re.search(line) | |
71 if match: | |
72 (ref,) = match.groups() | |
73 if ref.startswith('.LC') or ref.startswith('_DYNAMIC'): | |
74 # Ignore these, they are uninformative. | |
75 continue | |
76 if ref.startswith('_GLOBAL__I_'): | |
77 # Probably a relative jump within this function. | |
78 continue | |
79 refs.add(ref) | |
80 continue | |
81 if '__static_initialization_and_destruction' in line: | |
82 raise RuntimeError, ('code mentions ' | |
83 '__static_initialization_and_destruction; ' | |
84 'did you accidentally use a Debug binary?') | |
85 | |
86 for ref in sorted(refs): | |
87 yield ref | |
88 | |
89 | |
90 (binary,) = sys.argv[1:] | |
Lei Zhang
2011/09/06 23:02:37
print a help message when len(argv) != 2.
| |
91 demangler = Demangler() | |
92 for addr, size, filename in ParseNm(binary): | |
93 if size == 2: | |
94 # gcc generates a two-byte 'repz retq' initializer when there is nothing | |
95 # to do. jyasskin tells me this is fixed in gcc 4.6. | |
96 # Two bytes is too small to do anything, so just ignore it. | |
97 continue | |
98 | |
99 print '%s (0x%x 0x%x)' % (filename, addr, addr+size) | |
100 for ref in ExtractSymbolReferences(binary, addr, addr+size): | |
101 ref = demangler.Demangle(ref) | |
102 if ref in NOTES: | |
103 print ' ', '%s [%s]' % (ref, NOTES[ref]) | |
104 else: | |
105 print ' ', ref | |
106 print | |
107 | |
OLD | NEW |