OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/env python2 | |
2 | |
3 import argparse | |
4 import os | |
5 import pipes | |
6 import re | |
7 import sys | |
8 | |
9 from utils import shellcmd | |
10 from utils import FindBaseNaCl | |
11 | |
12 def BuildRegex(patterns, syms): | |
13 """Creates a regex string from an array of patterns and an array | |
jvoung (off chromium)
2014/09/09 17:28:58
nit: Usually docstrings are:
1 line summary.
<bla
Jim Stichnoth
2014/09/09 18:14:16
Done.
| |
14 of symbol names. Each element in the patterns array is either a | |
15 regex, or a range of entries in the symbol name array, e.g. '2:9'. | |
16 """ | |
17 pattern_list = [] | |
18 for pattern in patterns: | |
19 if pattern[0].isdigit() or pattern[0] == ':': | |
20 # Legitimate symbols or regexes shouldn't start with a | |
21 # digit or a ':', so interpret the pattern as a range. | |
22 interval = pattern.split(':') | |
23 if len(interval) == 1: | |
24 # Treat singleton 'n' as 'n:n+1'. | |
25 lower = int(interval[0]) | |
26 upper = lower + 1 | |
27 elif len(interval) == 2: | |
28 # Handle 'a:b', 'a:', and ':b' with suitable defaults. | |
29 lower = int(interval[0]) if len(interval[0]) else 0 | |
30 upper = int(interval[1]) if len(interval[1]) else len(syms) | |
31 else: | |
32 print 'Invalid range syntax: {p}'.format(p=pattern) | |
33 exit(1) | |
34 pattern = '$|^'.join([re.escape(p) for p in syms[lower:upper]]) | |
35 pattern_list.append('^' + pattern + '$') | |
36 return '|'.join(pattern_list) if len(pattern_list) else '^$' | |
37 | |
38 def MatchSymbol(sym, re_include, re_exclude, default_match): | |
39 """Returns True or False depending on whether the given symbol | |
40 matches the compiled include or exclude regexes. The default is | |
41 returned if neither the include nor the exclude regex matches. | |
42 """ | |
43 if re_exclude.match(sym): | |
44 # Always honor an explicit exclude before considering | |
45 # includes. | |
46 return False | |
47 if re_include.match(sym): | |
48 return True | |
49 return default_match | |
50 | |
51 def main(): | |
52 """Takes a finalized pexe and builds a native executable as a | |
53 hybrid of Subzero and llc translated bitcode. Linker tricks are | |
54 used to determine whether Subzero or llc generated symbols are | |
55 used, on a per-symbol basis. | |
56 | |
57 By default, for every symbol, its llc version is used. Subzero | |
58 symbols can be enabled by regular expressions on the symbol name, | |
59 or by ranges of lines in this program's auto-generated symbol | |
60 file. | |
61 | |
62 For each symbol, the --exclude arguments are first checked (the | |
63 symbol is 'rejected' on a match), followed by the --include | |
64 arguments (the symbol is 'accepted' on a match), followed by | |
65 unconditional 'rejection'. The Subzero version is used for an | |
66 'accepted' symbol, and the llc version is used for a 'rejected' | |
67 symbol. | |
68 | |
69 Each --include and --exclude argument can be a regular expression | |
70 or a range of lines in the symbol file. Each regular expression | |
71 is wrapped inside '^$', so if you want a substring match on 'foo', | |
72 use '.*foo.*' instead. Ranges use python-style 'first:last' | |
73 notation, so e.g. use '0:10' or ':10' for the first 10 lines of | |
74 the file, or '1' for the second line of the file. | |
75 | |
76 The --init argument does first-time initialization for the pexe, | |
77 including creation of the Subzero symbol file that is implicitly | |
78 used in the --include and --exclude arguments. It can be removed | |
79 from the command line for subsequent executions if the pexe | |
80 doesn't change. | |
81 | |
82 This scripts augments PATH so that various PNaCl and LLVM tools | |
83 can be run. These extra paths are within the native_client tree. | |
84 When changes are made to these tools, copy them this way: | |
85 cd native_client | |
86 toolchain_build/toolchain_build_pnacl.py llvm_i686_linux \\ | |
87 --install=toolchain/linux_x86/pnacl_newlib | |
88 """ | |
89 argparser = argparse.ArgumentParser( | |
90 description=' ' + main.__doc__, | |
91 formatter_class=argparse.RawTextHelpFormatter) | |
92 argparser.add_argument('pexe', help='Finalized pexe to translate') | |
93 argparser.add_argument('--init', dest='init', action='store_true', | |
94 help='Perform first-time setup for the pexe') | |
95 argparser.add_argument('--include', '-i', default=[], dest='include', | |
96 action='append', | |
97 help='Subzero symbols to include ' + | |
98 '(regex or line range)') | |
99 argparser.add_argument('--exclude', '-e', default=[], dest='exclude', | |
100 action='append', | |
101 help='Subzero symbols to exclude ' + | |
102 '(regex or line range)') | |
103 argparser.add_argument('--output', '-o', default='a.out', dest='output', | |
104 action='store', | |
105 help='Output executable (default a.out)') | |
106 argparser.add_argument('-O', default='2', dest='optlevel', | |
107 choices=['m1', '-1', '0', '1', '2'], | |
108 help='Optimization level ' + | |
109 '(m1 and -1 are equivalent)') | |
110 argparser.add_argument('--verbose', '-v', dest='verbose', | |
111 action='store_true', | |
112 help='Display some extra debugging output') | |
113 args = argparser.parse_args() | |
114 | |
115 pexe = args.pexe | |
116 [pexe_base, ext] = os.path.splitext(pexe) | |
117 if ext != '.pexe': | |
118 pexe_base = pexe | |
119 pexe_base_unescaped = pexe_base | |
120 pexe_base = pipes.quote(pexe_base) | |
121 pexe = pipes.quote(pexe) | |
122 exe = args.output | |
123 | |
124 nacl_root = FindBaseNaCl() | |
125 os.environ['PATH'] = ( | |
126 '{root}/toolchain/linux_x86/pnacl_newlib/bin{sep}' + | |
127 '{root}/toolchain/linux_x86/pnacl_newlib/host_x86_32/bin{sep}' + | |
128 '{path}' | |
129 ).format(root=nacl_root, sep=os.pathsep, path=os.environ['PATH']) | |
130 obj_llc = pexe_base + '.llc.o' | |
131 obj_sz = pexe_base + '.sz.o' | |
132 asm_sz = pexe_base + '.sz.s' | |
133 obj_llc_weak = pexe_base + '.weak.llc.o' | |
134 obj_sz_weak = pexe_base + '.weak.sz.o' | |
135 obj_partial = pexe_base + '.o' | |
136 sym_llc = pexe_base + '.sym.llc.txt' | |
137 sym_sz = pexe_base + '.sym.sz.txt' | |
138 sym_sz_unescaped = pexe_base_unescaped + '.sym.sz.txt' | |
139 whitelist_sz = pexe_base + '.wl.sz.txt' | |
140 whitelist_sz_unescaped = pexe_base_unescaped + '.wl.sz.txt' | |
141 | |
142 if args.init: | |
143 opt_level = args.optlevel | |
144 opt_level_map = { 'm1':'0', '-1':'0', '0':'0', '1':'1', '2':'2' } | |
145 shellcmd(( | |
146 'pnacl-translate -ffunction-sections -c -arch x86-32-linux ' + | |
147 '-O{level} --pnacl-driver-append-LLC_FLAGS_EXTRA=-externalize ' + | |
148 '-o {obj} {pexe}' | |
149 ).format(level=opt_level_map[opt_level], obj=obj_llc, pexe=pexe)) | |
150 shellcmd(( | |
151 'objcopy --redefine-sym _start=_user_start {obj}' | |
152 ).format(obj=obj_llc)) | |
153 shellcmd(( | |
154 '{root}/toolchain_build/src/subzero/llvm2ice ' + | |
155 '-O{level} -bitcode-format=pnacl -disable-globals ' + | |
156 '-externalize -ffunction-sections {pexe} -o {asm}' | |
157 ).format(root=nacl_root,level=opt_level, pexe=pexe, asm=asm_sz)) | |
158 shellcmd(( | |
159 'llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj -o {obj} ' + | |
160 '{asm}' | |
161 ).format(asm=asm_sz, obj=obj_sz)) | |
162 shellcmd(( | |
163 'objcopy --redefine-sym _start=_user_start {obj}' | |
164 ).format(obj=obj_sz)) | |
165 shellcmd(( | |
166 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}' | |
167 ).format(obj=obj_sz, sym=sym_sz)) | |
168 shellcmd(( | |
169 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}' | |
170 ).format(obj=obj_llc, sym=sym_llc)) | |
171 shellcmd(( | |
172 'objcopy --globalize-symbols={sym} {obj}' | |
173 ).format(sym=sym_llc, obj=obj_llc)) | |
jvoung (off chromium)
2014/09/09 17:28:58
Is this still necessary, with the -externalize opt
Jim Stichnoth
2014/09/09 18:14:16
Interesting, I think you're right. It looks like
| |
174 shellcmd(( | |
175 'objcopy --globalize-symbols={sym} {obj}' | |
176 ).format(sym=sym_sz, obj=obj_sz)) | |
177 | |
178 with open(sym_sz_unescaped) as f: | |
179 sz_syms = f.read().splitlines() | |
180 re_include_str = BuildRegex(args.include, sz_syms) | |
181 re_exclude_str = BuildRegex(args.exclude, sz_syms) | |
182 re_include = re.compile(re_include_str) | |
183 re_exclude = re.compile(re_exclude_str) | |
184 # If a symbol doesn't explicitly match re_include or re_exclude, | |
185 # the default MatchSymbol() result is False, unless some --exclude | |
186 # args are provided and no --include args are provided. | |
187 default_match = len(args.exclude) and not len(args.include) | |
188 | |
189 whitelist_has_items = False | |
190 with open(whitelist_sz_unescaped, 'w') as f: | |
191 for sym in sz_syms: | |
192 if MatchSymbol(sym, re_include, re_exclude, default_match): | |
193 f.write(sym + '\n') | |
194 whitelist_has_items = True | |
195 shellcmd(( | |
196 'objcopy --weaken {obj} {weak}' | |
197 ).format(obj=obj_sz, weak=obj_sz_weak)) | |
198 if whitelist_has_items: | |
199 # objcopy returns an error if the --weaken-symbols file is empty. | |
200 shellcmd(( | |
201 'objcopy --weaken-symbols={whitelist} {obj} {weak}' | |
202 ).format(whitelist=whitelist_sz, obj=obj_llc, weak=obj_llc_weak)) | |
203 else: | |
204 shellcmd(( | |
205 'objcopy {obj} {weak}' | |
206 ).format(obj=obj_llc, weak=obj_llc_weak)) | |
207 shellcmd(( | |
208 'ld -r -m elf_i386 -o {partial} {sz} {llc}' | |
209 ).format(partial=obj_partial, sz=obj_sz_weak, llc=obj_llc_weak)) | |
210 shellcmd(( | |
211 'objcopy -w --localize-symbol="*" {partial}' | |
212 ).format(partial=obj_partial)) | |
213 shellcmd(( | |
214 'objcopy --globalize-symbol=_user_start {partial}' | |
215 ).format(partial=obj_partial)) | |
216 shellcmd(( | |
217 'gcc -m32 {partial} -o {exe} ' + | |
218 '{root}/toolchain/linux_x86/pnacl_newlib/lib-x86-32-linux/' + | |
219 '{{unsandboxed_irt,irt_query_list}}.o ' + | |
jvoung (off chromium)
2014/09/09 17:28:58
Maybe note that this set of files to link is to be
Jim Stichnoth
2014/09/09 18:14:16
Done.
| |
220 '{root}/toolchain_build/src/subzero/runtime/szrt.c ' + | |
221 '-lpthread -lrt' | |
222 ).format(partial=obj_partial, exe=exe, root=nacl_root)) | |
223 # Put the extra verbose printing at the end. | |
224 if args.verbose: | |
jvoung (off chromium)
2014/09/09 17:28:58
If useful, you could also have shellcmd(..., echo=
Jim Stichnoth
2014/09/09 18:14:16
Good idea. It probably shouldn't be so chatty by
| |
225 print 'PATH={path}'.format(path=os.environ['PATH']) | |
226 print 'include={regex}'.format(regex=re_include_str) | |
227 print 'exclude={regex}'.format(regex=re_exclude_str) | |
228 print 'default_match={dm}'.format(dm=default_match) | |
229 print 'Number of Subzero syms = {num}'.format(num=len(sz_syms)) | |
230 | |
231 if __name__ == '__main__': | |
232 main() | |
OLD | NEW |