| Index: pydir/szbuild.py
|
| diff --git a/pydir/szbuild.py b/pydir/szbuild.py
|
| new file mode 100755
|
| index 0000000000000000000000000000000000000000..8adc683057acf96df9b23b3d8191c1f53a9d3ef5
|
| --- /dev/null
|
| +++ b/pydir/szbuild.py
|
| @@ -0,0 +1,239 @@
|
| +#!/usr/bin/env python2
|
| +
|
| +import argparse
|
| +import os
|
| +import pipes
|
| +import re
|
| +import sys
|
| +
|
| +from utils import shellcmd
|
| +from utils import FindBaseNaCl
|
| +
|
| +def BuildRegex(patterns, syms):
|
| + """Build a regular expression string for inclusion or exclusion.
|
| +
|
| + Creates a regex string from an array of patterns and an array
|
| + of symbol names. Each element in the patterns array is either a
|
| + regex, or a range of entries in the symbol name array, e.g. '2:9'.
|
| + """
|
| + pattern_list = []
|
| + for pattern in patterns:
|
| + if pattern[0].isdigit() or pattern[0] == ':':
|
| + # Legitimate symbols or regexes shouldn't start with a
|
| + # digit or a ':', so interpret the pattern as a range.
|
| + interval = pattern.split(':')
|
| + if len(interval) == 1:
|
| + # Treat singleton 'n' as 'n:n+1'.
|
| + lower = int(interval[0])
|
| + upper = lower + 1
|
| + elif len(interval) == 2:
|
| + # Handle 'a:b', 'a:', and ':b' with suitable defaults.
|
| + lower = int(interval[0]) if len(interval[0]) else 0
|
| + upper = int(interval[1]) if len(interval[1]) else len(syms)
|
| + else:
|
| + print 'Invalid range syntax: {p}'.format(p=pattern)
|
| + exit(1)
|
| + pattern = '$|^'.join([re.escape(p) for p in syms[lower:upper]])
|
| + pattern_list.append('^' + pattern + '$')
|
| + return '|'.join(pattern_list) if len(pattern_list) else '^$'
|
| +
|
| +def MatchSymbol(sym, re_include, re_exclude, default_match):
|
| + """Match a symbol name against inclusion/exclusion rules.
|
| +
|
| + Returns True or False depending on whether the given symbol
|
| + matches the compiled include or exclude regexes. The default is
|
| + returned if neither the include nor the exclude regex matches.
|
| + """
|
| + if re_exclude.match(sym):
|
| + # Always honor an explicit exclude before considering
|
| + # includes.
|
| + return False
|
| + if re_include.match(sym):
|
| + return True
|
| + return default_match
|
| +
|
| +def main():
|
| + """Create a hybrid translation from Subzero and llc.
|
| +
|
| + Takes a finalized pexe and builds a native executable as a
|
| + hybrid of Subzero and llc translated bitcode. Linker tricks are
|
| + used to determine whether Subzero or llc generated symbols are
|
| + used, on a per-symbol basis.
|
| +
|
| + By default, for every symbol, its llc version is used. Subzero
|
| + symbols can be enabled by regular expressions on the symbol name,
|
| + or by ranges of lines in this program's auto-generated symbol
|
| + file.
|
| +
|
| + For each symbol, the --exclude arguments are first checked (the
|
| + symbol is 'rejected' on a match), followed by the --include
|
| + arguments (the symbol is 'accepted' on a match), followed by
|
| + unconditional 'rejection'. The Subzero version is used for an
|
| + 'accepted' symbol, and the llc version is used for a 'rejected'
|
| + symbol.
|
| +
|
| + Each --include and --exclude argument can be a regular expression
|
| + or a range of lines in the symbol file. Each regular expression
|
| + is wrapped inside '^$', so if you want a substring match on 'foo',
|
| + use '.*foo.*' instead. Ranges use python-style 'first:last'
|
| + notation, so e.g. use '0:10' or ':10' for the first 10 lines of
|
| + the file, or '1' for the second line of the file.
|
| +
|
| + The --init argument does first-time initialization for the pexe,
|
| + including creation of the Subzero symbol file that is implicitly
|
| + used in the --include and --exclude arguments. It can be removed
|
| + from the command line for subsequent executions if the pexe
|
| + doesn't change.
|
| +
|
| + This scripts augments PATH so that various PNaCl and LLVM tools
|
| + can be run. These extra paths are within the native_client tree.
|
| + When changes are made to these tools, copy them this way:
|
| + cd native_client
|
| + toolchain_build/toolchain_build_pnacl.py llvm_i686_linux \\
|
| + --install=toolchain/linux_x86/pnacl_newlib
|
| + """
|
| + argparser = argparse.ArgumentParser(
|
| + description=' ' + main.__doc__,
|
| + formatter_class=argparse.RawTextHelpFormatter)
|
| + argparser.add_argument('pexe', help='Finalized pexe to translate')
|
| + argparser.add_argument('--init', dest='init', action='store_true',
|
| + help='Perform first-time setup for the pexe')
|
| + argparser.add_argument('--include', '-i', default=[], dest='include',
|
| + action='append',
|
| + help='Subzero symbols to include ' +
|
| + '(regex or line range)')
|
| + argparser.add_argument('--exclude', '-e', default=[], dest='exclude',
|
| + action='append',
|
| + help='Subzero symbols to exclude ' +
|
| + '(regex or line range)')
|
| + argparser.add_argument('--output', '-o', default='a.out', dest='output',
|
| + action='store',
|
| + help='Output executable (default a.out)')
|
| + argparser.add_argument('-O', default='2', dest='optlevel',
|
| + choices=['m1', '-1', '0', '1', '2'],
|
| + help='Optimization level ' +
|
| + '(m1 and -1 are equivalent)')
|
| + argparser.add_argument('--verbose', '-v', dest='verbose',
|
| + action='store_true',
|
| + help='Display some extra debugging output')
|
| + args = argparser.parse_args()
|
| +
|
| + pexe = args.pexe
|
| + [pexe_base, ext] = os.path.splitext(pexe)
|
| + if ext != '.pexe':
|
| + pexe_base = pexe
|
| + pexe_base_unescaped = pexe_base
|
| + pexe_base = pipes.quote(pexe_base)
|
| + pexe = pipes.quote(pexe)
|
| + exe = args.output
|
| +
|
| + nacl_root = FindBaseNaCl()
|
| + os.environ['PATH'] = (
|
| + '{root}/toolchain/linux_x86/pnacl_newlib/bin{sep}' +
|
| + '{root}/toolchain/linux_x86/pnacl_newlib/host_x86_32/bin{sep}' +
|
| + '{path}'
|
| + ).format(root=nacl_root, sep=os.pathsep, path=os.environ['PATH'])
|
| + obj_llc = pexe_base + '.llc.o'
|
| + obj_sz = pexe_base + '.sz.o'
|
| + asm_sz = pexe_base + '.sz.s'
|
| + obj_llc_weak = pexe_base + '.weak.llc.o'
|
| + obj_sz_weak = pexe_base + '.weak.sz.o'
|
| + obj_partial = pexe_base + '.o'
|
| + sym_llc = pexe_base + '.sym.llc.txt'
|
| + sym_sz = pexe_base + '.sym.sz.txt'
|
| + sym_sz_unescaped = pexe_base_unescaped + '.sym.sz.txt'
|
| + whitelist_sz = pexe_base + '.wl.sz.txt'
|
| + whitelist_sz_unescaped = pexe_base_unescaped + '.wl.sz.txt'
|
| +
|
| + if args.init:
|
| + opt_level = args.optlevel
|
| + opt_level_map = { 'm1':'0', '-1':'0', '0':'0', '1':'1', '2':'2' }
|
| + shellcmd((
|
| + 'pnacl-translate -ffunction-sections -c -arch x86-32-linux ' +
|
| + '-O{level} --pnacl-driver-append-LLC_FLAGS_EXTRA=-externalize ' +
|
| + '-o {obj} {pexe}'
|
| + ).format(level=opt_level_map[opt_level], obj=obj_llc, pexe=pexe),
|
| + echo=args.verbose)
|
| + shellcmd((
|
| + 'objcopy --redefine-sym _start=_user_start {obj}'
|
| + ).format(obj=obj_llc), echo=args.verbose)
|
| + shellcmd((
|
| + '{root}/toolchain_build/src/subzero/llvm2ice ' +
|
| + '-O{level} -bitcode-format=pnacl -disable-globals ' +
|
| + '-externalize -ffunction-sections {pexe} -o {asm}'
|
| + ).format(root=nacl_root,level=opt_level, pexe=pexe, asm=asm_sz),
|
| + echo=args.verbose)
|
| + shellcmd((
|
| + 'llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj -o {obj} ' +
|
| + '{asm}'
|
| + ).format(asm=asm_sz, obj=obj_sz), echo=args.verbose)
|
| + shellcmd((
|
| + 'objcopy --redefine-sym _start=_user_start {obj}'
|
| + ).format(obj=obj_sz), echo=args.verbose)
|
| + shellcmd((
|
| + 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}'
|
| + ).format(obj=obj_sz, sym=sym_sz), echo=args.verbose)
|
| + shellcmd((
|
| + 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}'
|
| + ).format(obj=obj_llc, sym=sym_llc), echo=args.verbose)
|
| +
|
| + with open(sym_sz_unescaped) as f:
|
| + sz_syms = f.read().splitlines()
|
| + re_include_str = BuildRegex(args.include, sz_syms)
|
| + re_exclude_str = BuildRegex(args.exclude, sz_syms)
|
| + re_include = re.compile(re_include_str)
|
| + re_exclude = re.compile(re_exclude_str)
|
| + # If a symbol doesn't explicitly match re_include or re_exclude,
|
| + # the default MatchSymbol() result is False, unless some --exclude
|
| + # args are provided and no --include args are provided.
|
| + default_match = len(args.exclude) and not len(args.include)
|
| +
|
| + whitelist_has_items = False
|
| + with open(whitelist_sz_unescaped, 'w') as f:
|
| + for sym in sz_syms:
|
| + if MatchSymbol(sym, re_include, re_exclude, default_match):
|
| + f.write(sym + '\n')
|
| + whitelist_has_items = True
|
| + shellcmd((
|
| + 'objcopy --weaken {obj} {weak}'
|
| + ).format(obj=obj_sz, weak=obj_sz_weak), echo=args.verbose)
|
| + if whitelist_has_items:
|
| + # objcopy returns an error if the --weaken-symbols file is empty.
|
| + shellcmd((
|
| + 'objcopy --weaken-symbols={whitelist} {obj} {weak}'
|
| + ).format(whitelist=whitelist_sz, obj=obj_llc, weak=obj_llc_weak),
|
| + echo=args.verbose)
|
| + else:
|
| + shellcmd((
|
| + 'objcopy {obj} {weak}'
|
| + ).format(obj=obj_llc, weak=obj_llc_weak), echo=args.verbose)
|
| + shellcmd((
|
| + 'ld -r -m elf_i386 -o {partial} {sz} {llc}'
|
| + ).format(partial=obj_partial, sz=obj_sz_weak, llc=obj_llc_weak),
|
| + echo=args.verbose)
|
| + shellcmd((
|
| + 'objcopy -w --localize-symbol="*" {partial}'
|
| + ).format(partial=obj_partial), echo=args.verbose)
|
| + shellcmd((
|
| + 'objcopy --globalize-symbol=_user_start {partial}'
|
| + ).format(partial=obj_partial), echo=args.verbose)
|
| + shellcmd((
|
| + 'gcc -m32 {partial} -o {exe} ' +
|
| + # Keep the rest of this command line (except szrt.c) in sync
|
| + # with RunHostLD() in pnacl-translate.py.
|
| + '{root}/toolchain/linux_x86/pnacl_newlib/lib-x86-32-linux/' +
|
| + '{{unsandboxed_irt,irt_query_list}}.o ' +
|
| + '{root}/toolchain_build/src/subzero/runtime/szrt.c ' +
|
| + '-lpthread -lrt'
|
| + ).format(partial=obj_partial, exe=exe, root=nacl_root),
|
| + echo=args.verbose)
|
| + # Put the extra verbose printing at the end.
|
| + if args.verbose:
|
| + print 'PATH={path}'.format(path=os.environ['PATH'])
|
| + print 'include={regex}'.format(regex=re_include_str)
|
| + print 'exclude={regex}'.format(regex=re_exclude_str)
|
| + print 'default_match={dm}'.format(dm=default_match)
|
| + print 'Number of Subzero syms = {num}'.format(num=len(sz_syms))
|
| +
|
| +if __name__ == '__main__':
|
| + main()
|
|
|