Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(159)

Side by Side Diff: pydir/szbuild.py

Issue 551953002: Subzero: Add a script that builds a hybrid Subzero/llc native executable. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Improve help text. Sanitize inputs Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | runtime/szrt.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python2
2
3 import argparse
4 import os
5 import pipes
6 import re
7 import sys
8
9 from utils import shellcmd
10 from utils import FindBaseNaCl
11
12 def BuildRegex(patterns, syms):
13 """Creates a regex string from an array of patterns and an array
jvoung (off chromium) 2014/09/09 17:28:58 nit: Usually docstrings are: 1 line summary. <bla
Jim Stichnoth 2014/09/09 18:14:16 Done.
14 of symbol names. Each element in the patterns array is either a
15 regex, or a range of entries in the symbol name array, e.g. '2:9'.
16 """
17 pattern_list = []
18 for pattern in patterns:
19 if pattern[0].isdigit() or pattern[0] == ':':
20 # Legitimate symbols or regexes shouldn't start with a
21 # digit or a ':', so interpret the pattern as a range.
22 interval = pattern.split(':')
23 if len(interval) == 1:
24 # Treat singleton 'n' as 'n:n+1'.
25 lower = int(interval[0])
26 upper = lower + 1
27 elif len(interval) == 2:
28 # Handle 'a:b', 'a:', and ':b' with suitable defaults.
29 lower = int(interval[0]) if len(interval[0]) else 0
30 upper = int(interval[1]) if len(interval[1]) else len(syms)
31 else:
32 print 'Invalid range syntax: {p}'.format(p=pattern)
33 exit(1)
34 pattern = '$|^'.join([re.escape(p) for p in syms[lower:upper]])
35 pattern_list.append('^' + pattern + '$')
36 return '|'.join(pattern_list) if len(pattern_list) else '^$'
37
38 def MatchSymbol(sym, re_include, re_exclude, default_match):
39 """Returns True or False depending on whether the given symbol
40 matches the compiled include or exclude regexes. The default is
41 returned if neither the include nor the exclude regex matches.
42 """
43 if re_exclude.match(sym):
44 # Always honor an explicit exclude before considering
45 # includes.
46 return False
47 if re_include.match(sym):
48 return True
49 return default_match
50
51 def main():
52 """Takes a finalized pexe and builds a native executable as a
53 hybrid of Subzero and llc translated bitcode. Linker tricks are
54 used to determine whether Subzero or llc generated symbols are
55 used, on a per-symbol basis.
56
57 By default, for every symbol, its llc version is used. Subzero
58 symbols can be enabled by regular expressions on the symbol name,
59 or by ranges of lines in this program's auto-generated symbol
60 file.
61
62 For each symbol, the --exclude arguments are first checked (the
63 symbol is 'rejected' on a match), followed by the --include
64 arguments (the symbol is 'accepted' on a match), followed by
65 unconditional 'rejection'. The Subzero version is used for an
66 'accepted' symbol, and the llc version is used for a 'rejected'
67 symbol.
68
69 Each --include and --exclude argument can be a regular expression
70 or a range of lines in the symbol file. Each regular expression
71 is wrapped inside '^$', so if you want a substring match on 'foo',
72 use '.*foo.*' instead. Ranges use python-style 'first:last'
73 notation, so e.g. use '0:10' or ':10' for the first 10 lines of
74 the file, or '1' for the second line of the file.
75
76 The --init argument does first-time initialization for the pexe,
77 including creation of the Subzero symbol file that is implicitly
78 used in the --include and --exclude arguments. It can be removed
79 from the command line for subsequent executions if the pexe
80 doesn't change.
81
82 This scripts augments PATH so that various PNaCl and LLVM tools
83 can be run. These extra paths are within the native_client tree.
84 When changes are made to these tools, copy them this way:
85 cd native_client
86 toolchain_build/toolchain_build_pnacl.py llvm_i686_linux \\
87 --install=toolchain/linux_x86/pnacl_newlib
88 """
89 argparser = argparse.ArgumentParser(
90 description=' ' + main.__doc__,
91 formatter_class=argparse.RawTextHelpFormatter)
92 argparser.add_argument('pexe', help='Finalized pexe to translate')
93 argparser.add_argument('--init', dest='init', action='store_true',
94 help='Perform first-time setup for the pexe')
95 argparser.add_argument('--include', '-i', default=[], dest='include',
96 action='append',
97 help='Subzero symbols to include ' +
98 '(regex or line range)')
99 argparser.add_argument('--exclude', '-e', default=[], dest='exclude',
100 action='append',
101 help='Subzero symbols to exclude ' +
102 '(regex or line range)')
103 argparser.add_argument('--output', '-o', default='a.out', dest='output',
104 action='store',
105 help='Output executable (default a.out)')
106 argparser.add_argument('-O', default='2', dest='optlevel',
107 choices=['m1', '-1', '0', '1', '2'],
108 help='Optimization level ' +
109 '(m1 and -1 are equivalent)')
110 argparser.add_argument('--verbose', '-v', dest='verbose',
111 action='store_true',
112 help='Display some extra debugging output')
113 args = argparser.parse_args()
114
115 pexe = args.pexe
116 [pexe_base, ext] = os.path.splitext(pexe)
117 if ext != '.pexe':
118 pexe_base = pexe
119 pexe_base_unescaped = pexe_base
120 pexe_base = pipes.quote(pexe_base)
121 pexe = pipes.quote(pexe)
122 exe = args.output
123
124 nacl_root = FindBaseNaCl()
125 os.environ['PATH'] = (
126 '{root}/toolchain/linux_x86/pnacl_newlib/bin{sep}' +
127 '{root}/toolchain/linux_x86/pnacl_newlib/host_x86_32/bin{sep}' +
128 '{path}'
129 ).format(root=nacl_root, sep=os.pathsep, path=os.environ['PATH'])
130 obj_llc = pexe_base + '.llc.o'
131 obj_sz = pexe_base + '.sz.o'
132 asm_sz = pexe_base + '.sz.s'
133 obj_llc_weak = pexe_base + '.weak.llc.o'
134 obj_sz_weak = pexe_base + '.weak.sz.o'
135 obj_partial = pexe_base + '.o'
136 sym_llc = pexe_base + '.sym.llc.txt'
137 sym_sz = pexe_base + '.sym.sz.txt'
138 sym_sz_unescaped = pexe_base_unescaped + '.sym.sz.txt'
139 whitelist_sz = pexe_base + '.wl.sz.txt'
140 whitelist_sz_unescaped = pexe_base_unescaped + '.wl.sz.txt'
141
142 if args.init:
143 opt_level = args.optlevel
144 opt_level_map = { 'm1':'0', '-1':'0', '0':'0', '1':'1', '2':'2' }
145 shellcmd((
146 'pnacl-translate -ffunction-sections -c -arch x86-32-linux ' +
147 '-O{level} --pnacl-driver-append-LLC_FLAGS_EXTRA=-externalize ' +
148 '-o {obj} {pexe}'
149 ).format(level=opt_level_map[opt_level], obj=obj_llc, pexe=pexe))
150 shellcmd((
151 'objcopy --redefine-sym _start=_user_start {obj}'
152 ).format(obj=obj_llc))
153 shellcmd((
154 '{root}/toolchain_build/src/subzero/llvm2ice ' +
155 '-O{level} -bitcode-format=pnacl -disable-globals ' +
156 '-externalize -ffunction-sections {pexe} -o {asm}'
157 ).format(root=nacl_root,level=opt_level, pexe=pexe, asm=asm_sz))
158 shellcmd((
159 'llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj -o {obj} ' +
160 '{asm}'
161 ).format(asm=asm_sz, obj=obj_sz))
162 shellcmd((
163 'objcopy --redefine-sym _start=_user_start {obj}'
164 ).format(obj=obj_sz))
165 shellcmd((
166 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}'
167 ).format(obj=obj_sz, sym=sym_sz))
168 shellcmd((
169 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}'
170 ).format(obj=obj_llc, sym=sym_llc))
171 shellcmd((
172 'objcopy --globalize-symbols={sym} {obj}'
173 ).format(sym=sym_llc, obj=obj_llc))
jvoung (off chromium) 2014/09/09 17:28:58 Is this still necessary, with the -externalize opt
Jim Stichnoth 2014/09/09 18:14:16 Interesting, I think you're right. It looks like
174 shellcmd((
175 'objcopy --globalize-symbols={sym} {obj}'
176 ).format(sym=sym_sz, obj=obj_sz))
177
178 with open(sym_sz_unescaped) as f:
179 sz_syms = f.read().splitlines()
180 re_include_str = BuildRegex(args.include, sz_syms)
181 re_exclude_str = BuildRegex(args.exclude, sz_syms)
182 re_include = re.compile(re_include_str)
183 re_exclude = re.compile(re_exclude_str)
184 # If a symbol doesn't explicitly match re_include or re_exclude,
185 # the default MatchSymbol() result is False, unless some --exclude
186 # args are provided and no --include args are provided.
187 default_match = len(args.exclude) and not len(args.include)
188
189 whitelist_has_items = False
190 with open(whitelist_sz_unescaped, 'w') as f:
191 for sym in sz_syms:
192 if MatchSymbol(sym, re_include, re_exclude, default_match):
193 f.write(sym + '\n')
194 whitelist_has_items = True
195 shellcmd((
196 'objcopy --weaken {obj} {weak}'
197 ).format(obj=obj_sz, weak=obj_sz_weak))
198 if whitelist_has_items:
199 # objcopy returns an error if the --weaken-symbols file is empty.
200 shellcmd((
201 'objcopy --weaken-symbols={whitelist} {obj} {weak}'
202 ).format(whitelist=whitelist_sz, obj=obj_llc, weak=obj_llc_weak))
203 else:
204 shellcmd((
205 'objcopy {obj} {weak}'
206 ).format(obj=obj_llc, weak=obj_llc_weak))
207 shellcmd((
208 'ld -r -m elf_i386 -o {partial} {sz} {llc}'
209 ).format(partial=obj_partial, sz=obj_sz_weak, llc=obj_llc_weak))
210 shellcmd((
211 'objcopy -w --localize-symbol="*" {partial}'
212 ).format(partial=obj_partial))
213 shellcmd((
214 'objcopy --globalize-symbol=_user_start {partial}'
215 ).format(partial=obj_partial))
216 shellcmd((
217 'gcc -m32 {partial} -o {exe} ' +
218 '{root}/toolchain/linux_x86/pnacl_newlib/lib-x86-32-linux/' +
219 '{{unsandboxed_irt,irt_query_list}}.o ' +
jvoung (off chromium) 2014/09/09 17:28:58 Maybe note that this set of files to link is to be
Jim Stichnoth 2014/09/09 18:14:16 Done.
220 '{root}/toolchain_build/src/subzero/runtime/szrt.c ' +
221 '-lpthread -lrt'
222 ).format(partial=obj_partial, exe=exe, root=nacl_root))
223 # Put the extra verbose printing at the end.
224 if args.verbose:
jvoung (off chromium) 2014/09/09 17:28:58 If useful, you could also have shellcmd(..., echo=
Jim Stichnoth 2014/09/09 18:14:16 Good idea. It probably shouldn't be so chatty by
225 print 'PATH={path}'.format(path=os.environ['PATH'])
226 print 'include={regex}'.format(regex=re_include_str)
227 print 'exclude={regex}'.format(regex=re_exclude_str)
228 print 'default_match={dm}'.format(dm=default_match)
229 print 'Number of Subzero syms = {num}'.format(num=len(sz_syms))
230
231 if __name__ == '__main__':
232 main()
OLDNEW
« no previous file with comments | « no previous file | runtime/szrt.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698