Index: pydir/szbuild.py |
diff --git a/pydir/szbuild.py b/pydir/szbuild.py |
new file mode 100755 |
index 0000000000000000000000000000000000000000..8adc683057acf96df9b23b3d8191c1f53a9d3ef5 |
--- /dev/null |
+++ b/pydir/szbuild.py |
@@ -0,0 +1,239 @@ |
+#!/usr/bin/env python2 |
+ |
+import argparse |
+import os |
+import pipes |
+import re |
+import sys |
+ |
+from utils import shellcmd |
+from utils import FindBaseNaCl |
+ |
+def BuildRegex(patterns, syms): |
+ """Build a regular expression string for inclusion or exclusion. |
+ |
+ Creates a regex string from an array of patterns and an array |
+ of symbol names. Each element in the patterns array is either a |
+ regex, or a range of entries in the symbol name array, e.g. '2:9'. |
+ """ |
+ pattern_list = [] |
+ for pattern in patterns: |
+ if pattern[0].isdigit() or pattern[0] == ':': |
+ # Legitimate symbols or regexes shouldn't start with a |
+ # digit or a ':', so interpret the pattern as a range. |
+ interval = pattern.split(':') |
+ if len(interval) == 1: |
+ # Treat singleton 'n' as 'n:n+1'. |
+ lower = int(interval[0]) |
+ upper = lower + 1 |
+ elif len(interval) == 2: |
+ # Handle 'a:b', 'a:', and ':b' with suitable defaults. |
+ lower = int(interval[0]) if len(interval[0]) else 0 |
+ upper = int(interval[1]) if len(interval[1]) else len(syms) |
+ else: |
+ print 'Invalid range syntax: {p}'.format(p=pattern) |
+ exit(1) |
+ pattern = '$|^'.join([re.escape(p) for p in syms[lower:upper]]) |
+ pattern_list.append('^' + pattern + '$') |
+ return '|'.join(pattern_list) if len(pattern_list) else '^$' |
+ |
+def MatchSymbol(sym, re_include, re_exclude, default_match): |
+ """Match a symbol name against inclusion/exclusion rules. |
+ |
+ Returns True or False depending on whether the given symbol |
+ matches the compiled include or exclude regexes. The default is |
+ returned if neither the include nor the exclude regex matches. |
+ """ |
+ if re_exclude.match(sym): |
+ # Always honor an explicit exclude before considering |
+ # includes. |
+ return False |
+ if re_include.match(sym): |
+ return True |
+ return default_match |
+ |
+def main(): |
+ """Create a hybrid translation from Subzero and llc. |
+ |
+ Takes a finalized pexe and builds a native executable as a |
+ hybrid of Subzero and llc translated bitcode. Linker tricks are |
+ used to determine whether Subzero or llc generated symbols are |
+ used, on a per-symbol basis. |
+ |
+ By default, for every symbol, its llc version is used. Subzero |
+ symbols can be enabled by regular expressions on the symbol name, |
+ or by ranges of lines in this program's auto-generated symbol |
+ file. |
+ |
+ For each symbol, the --exclude arguments are first checked (the |
+ symbol is 'rejected' on a match), followed by the --include |
+ arguments (the symbol is 'accepted' on a match), followed by |
+ unconditional 'rejection'. The Subzero version is used for an |
+ 'accepted' symbol, and the llc version is used for a 'rejected' |
+ symbol. |
+ |
+ Each --include and --exclude argument can be a regular expression |
+ or a range of lines in the symbol file. Each regular expression |
+ is wrapped inside '^$', so if you want a substring match on 'foo', |
+ use '.*foo.*' instead. Ranges use python-style 'first:last' |
+ notation, so e.g. use '0:10' or ':10' for the first 10 lines of |
+ the file, or '1' for the second line of the file. |
+ |
+ The --init argument does first-time initialization for the pexe, |
+ including creation of the Subzero symbol file that is implicitly |
+ used in the --include and --exclude arguments. It can be removed |
+ from the command line for subsequent executions if the pexe |
+ doesn't change. |
+ |
+ This scripts augments PATH so that various PNaCl and LLVM tools |
+ can be run. These extra paths are within the native_client tree. |
+ When changes are made to these tools, copy them this way: |
+ cd native_client |
+ toolchain_build/toolchain_build_pnacl.py llvm_i686_linux \\ |
+ --install=toolchain/linux_x86/pnacl_newlib |
+ """ |
+ argparser = argparse.ArgumentParser( |
+ description=' ' + main.__doc__, |
+ formatter_class=argparse.RawTextHelpFormatter) |
+ argparser.add_argument('pexe', help='Finalized pexe to translate') |
+ argparser.add_argument('--init', dest='init', action='store_true', |
+ help='Perform first-time setup for the pexe') |
+ argparser.add_argument('--include', '-i', default=[], dest='include', |
+ action='append', |
+ help='Subzero symbols to include ' + |
+ '(regex or line range)') |
+ argparser.add_argument('--exclude', '-e', default=[], dest='exclude', |
+ action='append', |
+ help='Subzero symbols to exclude ' + |
+ '(regex or line range)') |
+ argparser.add_argument('--output', '-o', default='a.out', dest='output', |
+ action='store', |
+ help='Output executable (default a.out)') |
+ argparser.add_argument('-O', default='2', dest='optlevel', |
+ choices=['m1', '-1', '0', '1', '2'], |
+ help='Optimization level ' + |
+ '(m1 and -1 are equivalent)') |
+ argparser.add_argument('--verbose', '-v', dest='verbose', |
+ action='store_true', |
+ help='Display some extra debugging output') |
+ args = argparser.parse_args() |
+ |
+ pexe = args.pexe |
+ [pexe_base, ext] = os.path.splitext(pexe) |
+ if ext != '.pexe': |
+ pexe_base = pexe |
+ pexe_base_unescaped = pexe_base |
+ pexe_base = pipes.quote(pexe_base) |
+ pexe = pipes.quote(pexe) |
+ exe = args.output |
+ |
+ nacl_root = FindBaseNaCl() |
+ os.environ['PATH'] = ( |
+ '{root}/toolchain/linux_x86/pnacl_newlib/bin{sep}' + |
+ '{root}/toolchain/linux_x86/pnacl_newlib/host_x86_32/bin{sep}' + |
+ '{path}' |
+ ).format(root=nacl_root, sep=os.pathsep, path=os.environ['PATH']) |
+ obj_llc = pexe_base + '.llc.o' |
+ obj_sz = pexe_base + '.sz.o' |
+ asm_sz = pexe_base + '.sz.s' |
+ obj_llc_weak = pexe_base + '.weak.llc.o' |
+ obj_sz_weak = pexe_base + '.weak.sz.o' |
+ obj_partial = pexe_base + '.o' |
+ sym_llc = pexe_base + '.sym.llc.txt' |
+ sym_sz = pexe_base + '.sym.sz.txt' |
+ sym_sz_unescaped = pexe_base_unescaped + '.sym.sz.txt' |
+ whitelist_sz = pexe_base + '.wl.sz.txt' |
+ whitelist_sz_unescaped = pexe_base_unescaped + '.wl.sz.txt' |
+ |
+ if args.init: |
+ opt_level = args.optlevel |
+ opt_level_map = { 'm1':'0', '-1':'0', '0':'0', '1':'1', '2':'2' } |
+ shellcmd(( |
+ 'pnacl-translate -ffunction-sections -c -arch x86-32-linux ' + |
+ '-O{level} --pnacl-driver-append-LLC_FLAGS_EXTRA=-externalize ' + |
+ '-o {obj} {pexe}' |
+ ).format(level=opt_level_map[opt_level], obj=obj_llc, pexe=pexe), |
+ echo=args.verbose) |
+ shellcmd(( |
+ 'objcopy --redefine-sym _start=_user_start {obj}' |
+ ).format(obj=obj_llc), echo=args.verbose) |
+ shellcmd(( |
+ '{root}/toolchain_build/src/subzero/llvm2ice ' + |
+ '-O{level} -bitcode-format=pnacl -disable-globals ' + |
+ '-externalize -ffunction-sections {pexe} -o {asm}' |
+ ).format(root=nacl_root,level=opt_level, pexe=pexe, asm=asm_sz), |
+ echo=args.verbose) |
+ shellcmd(( |
+ 'llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj -o {obj} ' + |
+ '{asm}' |
+ ).format(asm=asm_sz, obj=obj_sz), echo=args.verbose) |
+ shellcmd(( |
+ 'objcopy --redefine-sym _start=_user_start {obj}' |
+ ).format(obj=obj_sz), echo=args.verbose) |
+ shellcmd(( |
+ 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}' |
+ ).format(obj=obj_sz, sym=sym_sz), echo=args.verbose) |
+ shellcmd(( |
+ 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}' |
+ ).format(obj=obj_llc, sym=sym_llc), echo=args.verbose) |
+ |
+ with open(sym_sz_unescaped) as f: |
+ sz_syms = f.read().splitlines() |
+ re_include_str = BuildRegex(args.include, sz_syms) |
+ re_exclude_str = BuildRegex(args.exclude, sz_syms) |
+ re_include = re.compile(re_include_str) |
+ re_exclude = re.compile(re_exclude_str) |
+ # If a symbol doesn't explicitly match re_include or re_exclude, |
+ # the default MatchSymbol() result is False, unless some --exclude |
+ # args are provided and no --include args are provided. |
+ default_match = len(args.exclude) and not len(args.include) |
+ |
+ whitelist_has_items = False |
+ with open(whitelist_sz_unescaped, 'w') as f: |
+ for sym in sz_syms: |
+ if MatchSymbol(sym, re_include, re_exclude, default_match): |
+ f.write(sym + '\n') |
+ whitelist_has_items = True |
+ shellcmd(( |
+ 'objcopy --weaken {obj} {weak}' |
+ ).format(obj=obj_sz, weak=obj_sz_weak), echo=args.verbose) |
+ if whitelist_has_items: |
+ # objcopy returns an error if the --weaken-symbols file is empty. |
+ shellcmd(( |
+ 'objcopy --weaken-symbols={whitelist} {obj} {weak}' |
+ ).format(whitelist=whitelist_sz, obj=obj_llc, weak=obj_llc_weak), |
+ echo=args.verbose) |
+ else: |
+ shellcmd(( |
+ 'objcopy {obj} {weak}' |
+ ).format(obj=obj_llc, weak=obj_llc_weak), echo=args.verbose) |
+ shellcmd(( |
+ 'ld -r -m elf_i386 -o {partial} {sz} {llc}' |
+ ).format(partial=obj_partial, sz=obj_sz_weak, llc=obj_llc_weak), |
+ echo=args.verbose) |
+ shellcmd(( |
+ 'objcopy -w --localize-symbol="*" {partial}' |
+ ).format(partial=obj_partial), echo=args.verbose) |
+ shellcmd(( |
+ 'objcopy --globalize-symbol=_user_start {partial}' |
+ ).format(partial=obj_partial), echo=args.verbose) |
+ shellcmd(( |
+ 'gcc -m32 {partial} -o {exe} ' + |
+ # Keep the rest of this command line (except szrt.c) in sync |
+ # with RunHostLD() in pnacl-translate.py. |
+ '{root}/toolchain/linux_x86/pnacl_newlib/lib-x86-32-linux/' + |
+ '{{unsandboxed_irt,irt_query_list}}.o ' + |
+ '{root}/toolchain_build/src/subzero/runtime/szrt.c ' + |
+ '-lpthread -lrt' |
+ ).format(partial=obj_partial, exe=exe, root=nacl_root), |
+ echo=args.verbose) |
+ # Put the extra verbose printing at the end. |
+ if args.verbose: |
+ print 'PATH={path}'.format(path=os.environ['PATH']) |
+ print 'include={regex}'.format(regex=re_include_str) |
+ print 'exclude={regex}'.format(regex=re_exclude_str) |
+ print 'default_match={dm}'.format(dm=default_match) |
+ print 'Number of Subzero syms = {num}'.format(num=len(sz_syms)) |
+ |
+if __name__ == '__main__': |
+ main() |