OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/env python2 | |
2 | |
3 import argparse | |
4 import os | |
5 import re | |
6 import sys | |
7 | |
8 from utils import shellcmd | |
9 from utils import FindBaseNaCl | |
10 | |
11 def BuildRegex(patterns, syms): | |
12 """Creates a regex string from an array of patterns and an array | |
13 of symbol names. Each element in the patterns array is either a | |
14 regex, or a range of entries in the symbol name array, e.g. '2:9'. | |
15 """ | |
16 pattern_list = [] | |
17 for pattern in patterns: | |
18 if pattern[0].isdigit() or pattern[0] == ':': | |
19 # Legitimate symbols or regexes shouldn't start with a | |
20 # digit or a ':', so interpret the pattern as a range. | |
21 interval = pattern.split(':') | |
22 if len(interval) == 1: | |
23 # Treat singleton 'n' as 'n:n+1'. | |
24 lower = int(interval[0]) | |
25 upper = lower + 1 | |
26 elif len(interval) == 2: | |
27 # Handle 'a:b', 'a:', and ':b' with suitable defaults. | |
28 lower = int(interval[0]) if len(interval[0]) else 0 | |
29 upper = int(interval[1]) if len(interval[1]) else len(syms) | |
30 else: | |
31 print 'Invalid range syntax: {p}'.format(p=pattern) | |
32 exit(1) | |
33 pattern = '$|^'.join([re.escape(p) for p in syms[lower:upper]]) | |
34 pattern_list.append('^' + pattern + '$') | |
35 return '|'.join(pattern_list) if len(pattern_list) else '^$' | |
36 | |
37 def MatchSymbol(sym, re_include, re_exclude, default_match): | |
38 """Returns True or False depending on whether the given symbol | |
39 matches the compiled include or exclude regexes. The default is | |
40 returned if neither the include nor the exclude regex matches. | |
41 """ | |
42 if re_exclude.match(sym): | |
43 # Always honor an explicit exclude before considering | |
44 # includes. | |
45 return False | |
46 if re_include.match(sym): | |
47 return True | |
48 return default_match | |
49 | |
50 if __name__ == '__main__': | |
51 """Takes a finalized pexe and builds a native executable as a | |
52 hybrid of Subzero and llc translated bitcode. Linker tricks are | |
53 used to determine whether Subzero or llc generated symbols are | |
54 used, on a per-symbol basis. | |
55 | |
56 By default, for every symbol, its llc version is used. Subzero | |
57 symbols can be enabled by regular expressions on the symbol name, | |
58 or by ranges of lines in this program's auto-generated symbol | |
59 file. | |
60 | |
61 For each symbol, the --exclude arguments are first checked (the | |
62 symbol is 'rejected' on a match), followed by the --include | |
63 arguments (the symbol is 'accepted' on a match), followed by | |
64 unconditional 'rejection'. The Subzero version is used for an | |
65 'accepted' symbol, and the llc version is used for a 'rejected' | |
66 symbol. | |
67 | |
68 Each --include and --exclude argument can be a regular expression | |
69 or a range of lines in the symbol file. Each regular expression | |
70 is wrapped inside '^$', so if you want a substring match on 'foo', | |
71 use '.*foo.*' instead. Ranges use python-style 'first:last' | |
72 notation, so e.g. use '0:10' or ':10' for the first 10 lines of | |
73 the file, or '1' for the second line of the file. | |
74 | |
75 The --init argument does first-time initialization for the pexe, | |
76 including creation of the Subzero symbol file that is implicitly | |
77 used in the --include and --exclude arguments. It can be removed | |
78 from the command line for subsequent executions if the pexe | |
79 doesn't change. | |
80 | |
81 This scripts augments PATH so that various PNaCl and LLVM tools | |
82 can be run. These extra paths are within the native_client tree. | |
83 When changes are made to these tools, copy them this way: | |
84 cd native_client | |
85 toolchain_build/toolchain_build_pnacl.py llvm_i686_linux \ | |
86 --install=toolchain/linux_x86/pnacl_newlib | |
87 | |
88 """ | |
JF
2014/09/08 23:14:16
Change the code to:
def main():
"""awesome-o do
Jim Stichnoth
2014/09/09 00:56:20
Done.
| |
89 desc = 'Create a hybrid translation from Subzero and llc.' | |
90 argparser = argparse.ArgumentParser(description=desc) | |
91 argparser.add_argument('pexe', help='Finalized pexe to translate') | |
92 argparser.add_argument('--init', dest='init', action='store_true', | |
93 help='Perform first-time setup for the pexe') | |
94 argparser.add_argument('--include', '-i', default=[], dest='include', | |
95 action='append', | |
96 help='Subzero symbols to include ' + | |
97 '(regex or line range)') | |
98 argparser.add_argument('--exclude', '-e', default=[], dest='exclude', | |
99 action='append', | |
100 help='Subzero symbols to exclude ' + | |
101 '(regex or line range)') | |
102 argparser.add_argument('--output', '-o', default='a.out', dest='output', | |
103 action='store', | |
104 help='Output executable (default a.out)') | |
105 argparser.add_argument('-O', default='2', dest='optlevel', | |
106 choices=['m1', '-1', '0', '1', '2'], | |
107 help='Optimization level ' + | |
108 '(m1 and -1 are equivalent)') | |
109 argparser.add_argument('--verbose', '-v', dest='verbose', | |
110 action='store_true', | |
111 help='Display some extra debugging output') | |
112 args = argparser.parse_args() | |
113 | |
114 pexe = args.pexe | |
115 pexe_base = re.sub('\.pexe', '', pexe) | |
JF
2014/09/08 23:14:16
os.path.{basename,splitext}
Jim Stichnoth
2014/09/09 00:56:20
Done.
| |
116 # TODO(stichnot): Use pipes.quote(pexe_base)? shlex.quote(pexe_base)? | |
117 exe = args.output | |
118 | |
119 nacl_root = FindBaseNaCl() | |
120 os.environ['PATH'] = ( | |
121 '{root}/toolchain/linux_x86/pnacl_newlib/bin{sep}' + | |
122 '{root}/toolchain/linux_x86/pnacl_newlib/host_x86_32/bin{sep}' + | |
123 '{path}' | |
124 ).format(root=nacl_root, sep=os.pathsep, path=os.environ['PATH']) | |
125 obj_llc = pexe_base + '.llc.o' | |
126 obj_sz = pexe_base + '.sz.o' | |
127 asm_sz = pexe_base + '.sz.s' | |
128 obj_llc_weak = pexe_base + '.weak.llc.o' | |
129 obj_sz_weak = pexe_base + '.weak.sz.o' | |
130 obj_partial = pexe_base + '.o' | |
131 sym_llc = pexe_base + '.sym.llc.txt' | |
132 sym_sz = pexe_base + '.sym.sz.txt' | |
133 whitelist_sz = pexe_base + '.wl.sz.txt' | |
134 | |
135 if args.init: | |
136 opt_level = args.optlevel | |
137 opt_level_map = { 'm1':'0', '-1':'0', '0':'0', '1':'1', '2':'2' } | |
138 shellcmd(( | |
139 'pnacl-translate -ffunction-sections -c -arch x86-32-linux ' + | |
140 '-O{level} --pnacl-driver-append-LLC_FLAGS_EXTRA=-externalize ' + | |
141 '-o {obj} {pexe}' | |
142 ).format(level=opt_level_map[opt_level], obj=obj_llc, pexe=pexe)) | |
143 shellcmd(( | |
144 'objcopy --redefine-sym _start=_user_start {obj}' | |
145 ).format(obj=obj_llc)) | |
146 shellcmd(( | |
147 '../llvm2ice -O{level} -bitcode-format=pnacl -disable-globals ' + | |
148 '-externalize -ffunction-sections {pexe} -o {asm}' | |
149 ).format(level=opt_level, pexe=pexe, asm=asm_sz)) | |
150 shellcmd(( | |
151 'llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj -o {obj} ' + | |
152 '{asm}' | |
153 ).format(asm=asm_sz, obj=obj_sz)) | |
154 shellcmd(( | |
155 'objcopy --redefine-sym _start=_user_start {obj}' | |
156 ).format(obj=obj_sz)) | |
157 shellcmd(( | |
158 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}' | |
159 ).format(obj=obj_sz, sym=sym_sz)) | |
160 shellcmd(( | |
161 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}' | |
162 ).format(obj=obj_llc, sym=sym_llc)) | |
163 shellcmd(( | |
164 'objcopy --globalize-symbols={sym} {obj}' | |
165 ).format(sym=sym_llc, obj=obj_llc)) | |
166 shellcmd(( | |
167 'objcopy --globalize-symbols={sym} {obj}' | |
168 ).format(sym=sym_sz, obj=obj_sz)) | |
169 | |
170 with open(sym_sz) as f: | |
171 sz_syms = f.read().splitlines() | |
172 re_include_str = BuildRegex(args.include, sz_syms) | |
173 re_exclude_str = BuildRegex(args.exclude, sz_syms) | |
174 re_include = re.compile(re_include_str) | |
175 re_exclude = re.compile(re_exclude_str) | |
176 # If a symbol doesn't explicitly match re_include or re_exclude, | |
177 # the default MatchSymbol() result is False, unless some --exclude | |
178 # args are provided and no --include args are provided. | |
179 default_match = len(args.exclude) and not len(args.include) | |
180 | |
181 whitelist_has_items = False | |
182 with open(whitelist_sz, 'w') as f: | |
183 for sym in sz_syms: | |
184 if MatchSymbol(sym, re_include, re_exclude, default_match): | |
185 f.write(sym + '\n') | |
186 whitelist_has_items = True | |
187 shellcmd(( | |
188 'objcopy --weaken {obj} {weak}' | |
189 ).format(obj=obj_sz, weak=obj_sz_weak)) | |
190 if whitelist_has_items: | |
191 # objcopy returns an error if the --weaken-symbols file is empty. | |
192 shellcmd(( | |
193 'objcopy --weaken-symbols={whitelist} {obj} {weak}' | |
194 ).format(whitelist=whitelist_sz, obj=obj_llc, weak=obj_llc_weak)) | |
195 else: | |
196 shellcmd(( | |
197 'objcopy {obj} {weak}' | |
198 ).format(obj=obj_llc, weak=obj_llc_weak)) | |
199 shellcmd(( | |
200 'ld -r -m elf_i386 -o {partial} {sz} {llc}' | |
201 ).format(partial=obj_partial, sz=obj_sz_weak, llc=obj_llc_weak)) | |
202 shellcmd(( | |
203 'objcopy -w --localize-symbol="*" {partial}' | |
204 ).format(partial=obj_partial)) | |
205 shellcmd(( | |
206 'objcopy --globalize-symbol=_user_start {partial}' | |
207 ).format(partial=obj_partial)) | |
208 shellcmd(( | |
209 'gcc -m32 {partial} -o {exe} ' + | |
210 '{root}/toolchain/linux_x86/pnacl_newlib/lib-x86-32-linux/' + | |
211 '{{unsandboxed_irt,irt_query_list}}.o ' + | |
212 '{root}/toolchain_build/src/subzero/runtime/szrt.c ' + | |
213 '-lpthread -lrt' | |
214 ).format(partial=obj_partial, exe=exe, root=nacl_root)) | |
215 # Put the extra verbose printing at the end. | |
216 if args.verbose: | |
217 print 'PATH={path}'.format(path=os.environ['PATH']) | |
218 print 'include={regex}'.format(regex=re_include_str) | |
219 print 'exclude={regex}'.format(regex=re_exclude_str) | |
220 print 'default_match={dm}'.format(dm=default_match) | |
221 print 'Number of Subzero syms = {num}'.format(num=len(sz_syms)) | |
OLD | NEW |