Chromium Code Reviews| Index: pydir/szbuild.py |
| diff --git a/pydir/szbuild.py b/pydir/szbuild.py |
| new file mode 100755 |
| index 0000000000000000000000000000000000000000..99383e5cdb4909e92242a9dd77a40649e4f7cdb9 |
| --- /dev/null |
| +++ b/pydir/szbuild.py |
| @@ -0,0 +1,232 @@ |
| +#!/usr/bin/env python2 |
| + |
| +import argparse |
| +import os |
| +import pipes |
| +import re |
| +import sys |
| + |
| +from utils import shellcmd |
| +from utils import FindBaseNaCl |
| + |
| +def BuildRegex(patterns, syms): |
| + """Creates a regex string from an array of patterns and an array |
|
jvoung (off chromium)
2014/09/09 17:28:58
nit: Usually docstrings are:
1 line summary.
<bla
Jim Stichnoth
2014/09/09 18:14:16
Done.
|
| + of symbol names. Each element in the patterns array is either a |
| + regex, or a range of entries in the symbol name array, e.g. '2:9'. |
| + """ |
| + pattern_list = [] |
| + for pattern in patterns: |
| + if pattern[0].isdigit() or pattern[0] == ':': |
| + # Legitimate symbols or regexes shouldn't start with a |
| + # digit or a ':', so interpret the pattern as a range. |
| + interval = pattern.split(':') |
| + if len(interval) == 1: |
| + # Treat singleton 'n' as 'n:n+1'. |
| + lower = int(interval[0]) |
| + upper = lower + 1 |
| + elif len(interval) == 2: |
| + # Handle 'a:b', 'a:', and ':b' with suitable defaults. |
| + lower = int(interval[0]) if len(interval[0]) else 0 |
| + upper = int(interval[1]) if len(interval[1]) else len(syms) |
| + else: |
| + print 'Invalid range syntax: {p}'.format(p=pattern) |
| + exit(1) |
| + pattern = '$|^'.join([re.escape(p) for p in syms[lower:upper]]) |
| + pattern_list.append('^' + pattern + '$') |
| + return '|'.join(pattern_list) if len(pattern_list) else '^$' |
| + |
| +def MatchSymbol(sym, re_include, re_exclude, default_match): |
| + """Returns True or False depending on whether the given symbol |
| + matches the compiled include or exclude regexes. The default is |
| + returned if neither the include nor the exclude regex matches. |
| + """ |
| + if re_exclude.match(sym): |
| + # Always honor an explicit exclude before considering |
| + # includes. |
| + return False |
| + if re_include.match(sym): |
| + return True |
| + return default_match |
| + |
| +def main(): |
| + """Takes a finalized pexe and builds a native executable as a |
| + hybrid of Subzero and llc translated bitcode. Linker tricks are |
| + used to determine whether Subzero or llc generated symbols are |
| + used, on a per-symbol basis. |
| + |
| + By default, for every symbol, its llc version is used. Subzero |
| + symbols can be enabled by regular expressions on the symbol name, |
| + or by ranges of lines in this program's auto-generated symbol |
| + file. |
| + |
| + For each symbol, the --exclude arguments are first checked (the |
| + symbol is 'rejected' on a match), followed by the --include |
| + arguments (the symbol is 'accepted' on a match), followed by |
| + unconditional 'rejection'. The Subzero version is used for an |
| + 'accepted' symbol, and the llc version is used for a 'rejected' |
| + symbol. |
| + |
| + Each --include and --exclude argument can be a regular expression |
| + or a range of lines in the symbol file. Each regular expression |
| + is wrapped inside '^$', so if you want a substring match on 'foo', |
| + use '.*foo.*' instead. Ranges use python-style 'first:last' |
| + notation, so e.g. use '0:10' or ':10' for the first 10 lines of |
| + the file, or '1' for the second line of the file. |
| + |
| + The --init argument does first-time initialization for the pexe, |
| + including creation of the Subzero symbol file that is implicitly |
| + used in the --include and --exclude arguments. It can be removed |
| + from the command line for subsequent executions if the pexe |
| + doesn't change. |
| + |
| + This scripts augments PATH so that various PNaCl and LLVM tools |
| + can be run. These extra paths are within the native_client tree. |
| + When changes are made to these tools, copy them this way: |
| + cd native_client |
| + toolchain_build/toolchain_build_pnacl.py llvm_i686_linux \\ |
| + --install=toolchain/linux_x86/pnacl_newlib |
| + """ |
| + argparser = argparse.ArgumentParser( |
| + description=' ' + main.__doc__, |
| + formatter_class=argparse.RawTextHelpFormatter) |
| + argparser.add_argument('pexe', help='Finalized pexe to translate') |
| + argparser.add_argument('--init', dest='init', action='store_true', |
| + help='Perform first-time setup for the pexe') |
| + argparser.add_argument('--include', '-i', default=[], dest='include', |
| + action='append', |
| + help='Subzero symbols to include ' + |
| + '(regex or line range)') |
| + argparser.add_argument('--exclude', '-e', default=[], dest='exclude', |
| + action='append', |
| + help='Subzero symbols to exclude ' + |
| + '(regex or line range)') |
| + argparser.add_argument('--output', '-o', default='a.out', dest='output', |
| + action='store', |
| + help='Output executable (default a.out)') |
| + argparser.add_argument('-O', default='2', dest='optlevel', |
| + choices=['m1', '-1', '0', '1', '2'], |
| + help='Optimization level ' + |
| + '(m1 and -1 are equivalent)') |
| + argparser.add_argument('--verbose', '-v', dest='verbose', |
| + action='store_true', |
| + help='Display some extra debugging output') |
| + args = argparser.parse_args() |
| + |
| + pexe = args.pexe |
| + [pexe_base, ext] = os.path.splitext(pexe) |
| + if ext != '.pexe': |
| + pexe_base = pexe |
| + pexe_base_unescaped = pexe_base |
| + pexe_base = pipes.quote(pexe_base) |
| + pexe = pipes.quote(pexe) |
| + exe = args.output |
| + |
| + nacl_root = FindBaseNaCl() |
| + os.environ['PATH'] = ( |
| + '{root}/toolchain/linux_x86/pnacl_newlib/bin{sep}' + |
| + '{root}/toolchain/linux_x86/pnacl_newlib/host_x86_32/bin{sep}' + |
| + '{path}' |
| + ).format(root=nacl_root, sep=os.pathsep, path=os.environ['PATH']) |
| + obj_llc = pexe_base + '.llc.o' |
| + obj_sz = pexe_base + '.sz.o' |
| + asm_sz = pexe_base + '.sz.s' |
| + obj_llc_weak = pexe_base + '.weak.llc.o' |
| + obj_sz_weak = pexe_base + '.weak.sz.o' |
| + obj_partial = pexe_base + '.o' |
| + sym_llc = pexe_base + '.sym.llc.txt' |
| + sym_sz = pexe_base + '.sym.sz.txt' |
| + sym_sz_unescaped = pexe_base_unescaped + '.sym.sz.txt' |
| + whitelist_sz = pexe_base + '.wl.sz.txt' |
| + whitelist_sz_unescaped = pexe_base_unescaped + '.wl.sz.txt' |
| + |
| + if args.init: |
| + opt_level = args.optlevel |
| + opt_level_map = { 'm1':'0', '-1':'0', '0':'0', '1':'1', '2':'2' } |
| + shellcmd(( |
| + 'pnacl-translate -ffunction-sections -c -arch x86-32-linux ' + |
| + '-O{level} --pnacl-driver-append-LLC_FLAGS_EXTRA=-externalize ' + |
| + '-o {obj} {pexe}' |
| + ).format(level=opt_level_map[opt_level], obj=obj_llc, pexe=pexe)) |
| + shellcmd(( |
| + 'objcopy --redefine-sym _start=_user_start {obj}' |
| + ).format(obj=obj_llc)) |
| + shellcmd(( |
| + '{root}/toolchain_build/src/subzero/llvm2ice ' + |
| + '-O{level} -bitcode-format=pnacl -disable-globals ' + |
| + '-externalize -ffunction-sections {pexe} -o {asm}' |
| + ).format(root=nacl_root,level=opt_level, pexe=pexe, asm=asm_sz)) |
| + shellcmd(( |
| + 'llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj -o {obj} ' + |
| + '{asm}' |
| + ).format(asm=asm_sz, obj=obj_sz)) |
| + shellcmd(( |
| + 'objcopy --redefine-sym _start=_user_start {obj}' |
| + ).format(obj=obj_sz)) |
| + shellcmd(( |
| + 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}' |
| + ).format(obj=obj_sz, sym=sym_sz)) |
| + shellcmd(( |
| + 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}' |
| + ).format(obj=obj_llc, sym=sym_llc)) |
| + shellcmd(( |
| + 'objcopy --globalize-symbols={sym} {obj}' |
| + ).format(sym=sym_llc, obj=obj_llc)) |
|
jvoung (off chromium)
2014/09/09 17:28:58
Is this still necessary, with the -externalize opt
Jim Stichnoth
2014/09/09 18:14:16
Interesting, I think you're right. It looks like
|
| + shellcmd(( |
| + 'objcopy --globalize-symbols={sym} {obj}' |
| + ).format(sym=sym_sz, obj=obj_sz)) |
| + |
| + with open(sym_sz_unescaped) as f: |
| + sz_syms = f.read().splitlines() |
| + re_include_str = BuildRegex(args.include, sz_syms) |
| + re_exclude_str = BuildRegex(args.exclude, sz_syms) |
| + re_include = re.compile(re_include_str) |
| + re_exclude = re.compile(re_exclude_str) |
| + # If a symbol doesn't explicitly match re_include or re_exclude, |
| + # the default MatchSymbol() result is False, unless some --exclude |
| + # args are provided and no --include args are provided. |
| + default_match = len(args.exclude) and not len(args.include) |
| + |
| + whitelist_has_items = False |
| + with open(whitelist_sz_unescaped, 'w') as f: |
| + for sym in sz_syms: |
| + if MatchSymbol(sym, re_include, re_exclude, default_match): |
| + f.write(sym + '\n') |
| + whitelist_has_items = True |
| + shellcmd(( |
| + 'objcopy --weaken {obj} {weak}' |
| + ).format(obj=obj_sz, weak=obj_sz_weak)) |
| + if whitelist_has_items: |
| + # objcopy returns an error if the --weaken-symbols file is empty. |
| + shellcmd(( |
| + 'objcopy --weaken-symbols={whitelist} {obj} {weak}' |
| + ).format(whitelist=whitelist_sz, obj=obj_llc, weak=obj_llc_weak)) |
| + else: |
| + shellcmd(( |
| + 'objcopy {obj} {weak}' |
| + ).format(obj=obj_llc, weak=obj_llc_weak)) |
| + shellcmd(( |
| + 'ld -r -m elf_i386 -o {partial} {sz} {llc}' |
| + ).format(partial=obj_partial, sz=obj_sz_weak, llc=obj_llc_weak)) |
| + shellcmd(( |
| + 'objcopy -w --localize-symbol="*" {partial}' |
| + ).format(partial=obj_partial)) |
| + shellcmd(( |
| + 'objcopy --globalize-symbol=_user_start {partial}' |
| + ).format(partial=obj_partial)) |
| + shellcmd(( |
| + 'gcc -m32 {partial} -o {exe} ' + |
| + '{root}/toolchain/linux_x86/pnacl_newlib/lib-x86-32-linux/' + |
| + '{{unsandboxed_irt,irt_query_list}}.o ' + |
|
jvoung (off chromium)
2014/09/09 17:28:58
Maybe note that this set of files to link is to be
Jim Stichnoth
2014/09/09 18:14:16
Done.
|
| + '{root}/toolchain_build/src/subzero/runtime/szrt.c ' + |
| + '-lpthread -lrt' |
| + ).format(partial=obj_partial, exe=exe, root=nacl_root)) |
| + # Put the extra verbose printing at the end. |
| + if args.verbose: |
|
jvoung (off chromium)
2014/09/09 17:28:58
If useful, you could also have shellcmd(..., echo=
Jim Stichnoth
2014/09/09 18:14:16
Good idea. It probably shouldn't be so chatty by
|
| + print 'PATH={path}'.format(path=os.environ['PATH']) |
| + print 'include={regex}'.format(regex=re_include_str) |
| + print 'exclude={regex}'.format(regex=re_exclude_str) |
| + print 'default_match={dm}'.format(dm=default_match) |
| + print 'Number of Subzero syms = {num}'.format(num=len(sz_syms)) |
| + |
| +if __name__ == '__main__': |
| + main() |