Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 #!/usr/bin/env python2 | |
| 2 | |
| 3 import argparse | |
| 4 import os | |
| 5 import pipes | |
| 6 import re | |
| 7 import sys | |
| 8 | |
| 9 from utils import shellcmd | |
| 10 from utils import FindBaseNaCl | |
| 11 | |
| 12 def BuildRegex(patterns, syms): | |
| 13 """Creates a regex string from an array of patterns and an array | |
|
jvoung (off chromium)
2014/09/09 17:28:58
nit: Usually docstrings are:
1 line summary.
<bla
Jim Stichnoth
2014/09/09 18:14:16
Done.
| |
| 14 of symbol names. Each element in the patterns array is either a | |
| 15 regex, or a range of entries in the symbol name array, e.g. '2:9'. | |
| 16 """ | |
| 17 pattern_list = [] | |
| 18 for pattern in patterns: | |
| 19 if pattern[0].isdigit() or pattern[0] == ':': | |
| 20 # Legitimate symbols or regexes shouldn't start with a | |
| 21 # digit or a ':', so interpret the pattern as a range. | |
| 22 interval = pattern.split(':') | |
| 23 if len(interval) == 1: | |
| 24 # Treat singleton 'n' as 'n:n+1'. | |
| 25 lower = int(interval[0]) | |
| 26 upper = lower + 1 | |
| 27 elif len(interval) == 2: | |
| 28 # Handle 'a:b', 'a:', and ':b' with suitable defaults. | |
| 29 lower = int(interval[0]) if len(interval[0]) else 0 | |
| 30 upper = int(interval[1]) if len(interval[1]) else len(syms) | |
| 31 else: | |
| 32 print 'Invalid range syntax: {p}'.format(p=pattern) | |
| 33 exit(1) | |
| 34 pattern = '$|^'.join([re.escape(p) for p in syms[lower:upper]]) | |
| 35 pattern_list.append('^' + pattern + '$') | |
| 36 return '|'.join(pattern_list) if len(pattern_list) else '^$' | |
| 37 | |
| 38 def MatchSymbol(sym, re_include, re_exclude, default_match): | |
| 39 """Returns True or False depending on whether the given symbol | |
| 40 matches the compiled include or exclude regexes. The default is | |
| 41 returned if neither the include nor the exclude regex matches. | |
| 42 """ | |
| 43 if re_exclude.match(sym): | |
| 44 # Always honor an explicit exclude before considering | |
| 45 # includes. | |
| 46 return False | |
| 47 if re_include.match(sym): | |
| 48 return True | |
| 49 return default_match | |
| 50 | |
| 51 def main(): | |
| 52 """Takes a finalized pexe and builds a native executable as a | |
| 53 hybrid of Subzero and llc translated bitcode. Linker tricks are | |
| 54 used to determine whether Subzero or llc generated symbols are | |
| 55 used, on a per-symbol basis. | |
| 56 | |
| 57 By default, for every symbol, its llc version is used. Subzero | |
| 58 symbols can be enabled by regular expressions on the symbol name, | |
| 59 or by ranges of lines in this program's auto-generated symbol | |
| 60 file. | |
| 61 | |
| 62 For each symbol, the --exclude arguments are first checked (the | |
| 63 symbol is 'rejected' on a match), followed by the --include | |
| 64 arguments (the symbol is 'accepted' on a match), followed by | |
| 65 unconditional 'rejection'. The Subzero version is used for an | |
| 66 'accepted' symbol, and the llc version is used for a 'rejected' | |
| 67 symbol. | |
| 68 | |
| 69 Each --include and --exclude argument can be a regular expression | |
| 70 or a range of lines in the symbol file. Each regular expression | |
| 71 is wrapped inside '^$', so if you want a substring match on 'foo', | |
| 72 use '.*foo.*' instead. Ranges use python-style 'first:last' | |
| 73 notation, so e.g. use '0:10' or ':10' for the first 10 lines of | |
| 74 the file, or '1' for the second line of the file. | |
| 75 | |
| 76 The --init argument does first-time initialization for the pexe, | |
| 77 including creation of the Subzero symbol file that is implicitly | |
| 78 used in the --include and --exclude arguments. It can be removed | |
| 79 from the command line for subsequent executions if the pexe | |
| 80 doesn't change. | |
| 81 | |
| 82 This scripts augments PATH so that various PNaCl and LLVM tools | |
| 83 can be run. These extra paths are within the native_client tree. | |
| 84 When changes are made to these tools, copy them this way: | |
| 85 cd native_client | |
| 86 toolchain_build/toolchain_build_pnacl.py llvm_i686_linux \\ | |
| 87 --install=toolchain/linux_x86/pnacl_newlib | |
| 88 """ | |
| 89 argparser = argparse.ArgumentParser( | |
| 90 description=' ' + main.__doc__, | |
| 91 formatter_class=argparse.RawTextHelpFormatter) | |
| 92 argparser.add_argument('pexe', help='Finalized pexe to translate') | |
| 93 argparser.add_argument('--init', dest='init', action='store_true', | |
| 94 help='Perform first-time setup for the pexe') | |
| 95 argparser.add_argument('--include', '-i', default=[], dest='include', | |
| 96 action='append', | |
| 97 help='Subzero symbols to include ' + | |
| 98 '(regex or line range)') | |
| 99 argparser.add_argument('--exclude', '-e', default=[], dest='exclude', | |
| 100 action='append', | |
| 101 help='Subzero symbols to exclude ' + | |
| 102 '(regex or line range)') | |
| 103 argparser.add_argument('--output', '-o', default='a.out', dest='output', | |
| 104 action='store', | |
| 105 help='Output executable (default a.out)') | |
| 106 argparser.add_argument('-O', default='2', dest='optlevel', | |
| 107 choices=['m1', '-1', '0', '1', '2'], | |
| 108 help='Optimization level ' + | |
| 109 '(m1 and -1 are equivalent)') | |
| 110 argparser.add_argument('--verbose', '-v', dest='verbose', | |
| 111 action='store_true', | |
| 112 help='Display some extra debugging output') | |
| 113 args = argparser.parse_args() | |
| 114 | |
| 115 pexe = args.pexe | |
| 116 [pexe_base, ext] = os.path.splitext(pexe) | |
| 117 if ext != '.pexe': | |
| 118 pexe_base = pexe | |
| 119 pexe_base_unescaped = pexe_base | |
| 120 pexe_base = pipes.quote(pexe_base) | |
| 121 pexe = pipes.quote(pexe) | |
| 122 exe = args.output | |
| 123 | |
| 124 nacl_root = FindBaseNaCl() | |
| 125 os.environ['PATH'] = ( | |
| 126 '{root}/toolchain/linux_x86/pnacl_newlib/bin{sep}' + | |
| 127 '{root}/toolchain/linux_x86/pnacl_newlib/host_x86_32/bin{sep}' + | |
| 128 '{path}' | |
| 129 ).format(root=nacl_root, sep=os.pathsep, path=os.environ['PATH']) | |
| 130 obj_llc = pexe_base + '.llc.o' | |
| 131 obj_sz = pexe_base + '.sz.o' | |
| 132 asm_sz = pexe_base + '.sz.s' | |
| 133 obj_llc_weak = pexe_base + '.weak.llc.o' | |
| 134 obj_sz_weak = pexe_base + '.weak.sz.o' | |
| 135 obj_partial = pexe_base + '.o' | |
| 136 sym_llc = pexe_base + '.sym.llc.txt' | |
| 137 sym_sz = pexe_base + '.sym.sz.txt' | |
| 138 sym_sz_unescaped = pexe_base_unescaped + '.sym.sz.txt' | |
| 139 whitelist_sz = pexe_base + '.wl.sz.txt' | |
| 140 whitelist_sz_unescaped = pexe_base_unescaped + '.wl.sz.txt' | |
| 141 | |
| 142 if args.init: | |
| 143 opt_level = args.optlevel | |
| 144 opt_level_map = { 'm1':'0', '-1':'0', '0':'0', '1':'1', '2':'2' } | |
| 145 shellcmd(( | |
| 146 'pnacl-translate -ffunction-sections -c -arch x86-32-linux ' + | |
| 147 '-O{level} --pnacl-driver-append-LLC_FLAGS_EXTRA=-externalize ' + | |
| 148 '-o {obj} {pexe}' | |
| 149 ).format(level=opt_level_map[opt_level], obj=obj_llc, pexe=pexe)) | |
| 150 shellcmd(( | |
| 151 'objcopy --redefine-sym _start=_user_start {obj}' | |
| 152 ).format(obj=obj_llc)) | |
| 153 shellcmd(( | |
| 154 '{root}/toolchain_build/src/subzero/llvm2ice ' + | |
| 155 '-O{level} -bitcode-format=pnacl -disable-globals ' + | |
| 156 '-externalize -ffunction-sections {pexe} -o {asm}' | |
| 157 ).format(root=nacl_root,level=opt_level, pexe=pexe, asm=asm_sz)) | |
| 158 shellcmd(( | |
| 159 'llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj -o {obj} ' + | |
| 160 '{asm}' | |
| 161 ).format(asm=asm_sz, obj=obj_sz)) | |
| 162 shellcmd(( | |
| 163 'objcopy --redefine-sym _start=_user_start {obj}' | |
| 164 ).format(obj=obj_sz)) | |
| 165 shellcmd(( | |
| 166 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}' | |
| 167 ).format(obj=obj_sz, sym=sym_sz)) | |
| 168 shellcmd(( | |
| 169 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}' | |
| 170 ).format(obj=obj_llc, sym=sym_llc)) | |
| 171 shellcmd(( | |
| 172 'objcopy --globalize-symbols={sym} {obj}' | |
| 173 ).format(sym=sym_llc, obj=obj_llc)) | |
|
jvoung (off chromium)
2014/09/09 17:28:58
Is this still necessary, with the -externalize opt
Jim Stichnoth
2014/09/09 18:14:16
Interesting, I think you're right. It looks like
| |
| 174 shellcmd(( | |
| 175 'objcopy --globalize-symbols={sym} {obj}' | |
| 176 ).format(sym=sym_sz, obj=obj_sz)) | |
| 177 | |
| 178 with open(sym_sz_unescaped) as f: | |
| 179 sz_syms = f.read().splitlines() | |
| 180 re_include_str = BuildRegex(args.include, sz_syms) | |
| 181 re_exclude_str = BuildRegex(args.exclude, sz_syms) | |
| 182 re_include = re.compile(re_include_str) | |
| 183 re_exclude = re.compile(re_exclude_str) | |
| 184 # If a symbol doesn't explicitly match re_include or re_exclude, | |
| 185 # the default MatchSymbol() result is False, unless some --exclude | |
| 186 # args are provided and no --include args are provided. | |
| 187 default_match = len(args.exclude) and not len(args.include) | |
| 188 | |
| 189 whitelist_has_items = False | |
| 190 with open(whitelist_sz_unescaped, 'w') as f: | |
| 191 for sym in sz_syms: | |
| 192 if MatchSymbol(sym, re_include, re_exclude, default_match): | |
| 193 f.write(sym + '\n') | |
| 194 whitelist_has_items = True | |
| 195 shellcmd(( | |
| 196 'objcopy --weaken {obj} {weak}' | |
| 197 ).format(obj=obj_sz, weak=obj_sz_weak)) | |
| 198 if whitelist_has_items: | |
| 199 # objcopy returns an error if the --weaken-symbols file is empty. | |
| 200 shellcmd(( | |
| 201 'objcopy --weaken-symbols={whitelist} {obj} {weak}' | |
| 202 ).format(whitelist=whitelist_sz, obj=obj_llc, weak=obj_llc_weak)) | |
| 203 else: | |
| 204 shellcmd(( | |
| 205 'objcopy {obj} {weak}' | |
| 206 ).format(obj=obj_llc, weak=obj_llc_weak)) | |
| 207 shellcmd(( | |
| 208 'ld -r -m elf_i386 -o {partial} {sz} {llc}' | |
| 209 ).format(partial=obj_partial, sz=obj_sz_weak, llc=obj_llc_weak)) | |
| 210 shellcmd(( | |
| 211 'objcopy -w --localize-symbol="*" {partial}' | |
| 212 ).format(partial=obj_partial)) | |
| 213 shellcmd(( | |
| 214 'objcopy --globalize-symbol=_user_start {partial}' | |
| 215 ).format(partial=obj_partial)) | |
| 216 shellcmd(( | |
| 217 'gcc -m32 {partial} -o {exe} ' + | |
| 218 '{root}/toolchain/linux_x86/pnacl_newlib/lib-x86-32-linux/' + | |
| 219 '{{unsandboxed_irt,irt_query_list}}.o ' + | |
|
jvoung (off chromium)
2014/09/09 17:28:58
Maybe note that this set of files to link is to be
Jim Stichnoth
2014/09/09 18:14:16
Done.
| |
| 220 '{root}/toolchain_build/src/subzero/runtime/szrt.c ' + | |
| 221 '-lpthread -lrt' | |
| 222 ).format(partial=obj_partial, exe=exe, root=nacl_root)) | |
| 223 # Put the extra verbose printing at the end. | |
| 224 if args.verbose: | |
|
jvoung (off chromium)
2014/09/09 17:28:58
If useful, you could also have shellcmd(..., echo=
Jim Stichnoth
2014/09/09 18:14:16
Good idea. It probably shouldn't be so chatty by
| |
| 225 print 'PATH={path}'.format(path=os.environ['PATH']) | |
| 226 print 'include={regex}'.format(regex=re_include_str) | |
| 227 print 'exclude={regex}'.format(regex=re_exclude_str) | |
| 228 print 'default_match={dm}'.format(dm=default_match) | |
| 229 print 'Number of Subzero syms = {num}'.format(num=len(sz_syms)) | |
| 230 | |
| 231 if __name__ == '__main__': | |
| 232 main() | |
| OLD | NEW |