Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(268)

Unified Diff: pydir/szbuild.py

Issue 551953002: Subzero: Add a script that builds a hybrid Subzero/llc native executable. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Updates from Jan's code review Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | runtime/szrt.c » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: pydir/szbuild.py
diff --git a/pydir/szbuild.py b/pydir/szbuild.py
new file mode 100755
index 0000000000000000000000000000000000000000..8adc683057acf96df9b23b3d8191c1f53a9d3ef5
--- /dev/null
+++ b/pydir/szbuild.py
@@ -0,0 +1,239 @@
+#!/usr/bin/env python2
+
+import argparse
+import os
+import pipes
+import re
+import sys
+
+from utils import shellcmd
+from utils import FindBaseNaCl
+
+def BuildRegex(patterns, syms):
+ """Build a regular expression string for inclusion or exclusion.
+
+ Creates a regex string from an array of patterns and an array
+ of symbol names. Each element in the patterns array is either a
+ regex, or a range of entries in the symbol name array, e.g. '2:9'.
+ """
+ pattern_list = []
+ for pattern in patterns:
+ if pattern[0].isdigit() or pattern[0] == ':':
+ # Legitimate symbols or regexes shouldn't start with a
+ # digit or a ':', so interpret the pattern as a range.
+ interval = pattern.split(':')
+ if len(interval) == 1:
+ # Treat singleton 'n' as 'n:n+1'.
+ lower = int(interval[0])
+ upper = lower + 1
+ elif len(interval) == 2:
+ # Handle 'a:b', 'a:', and ':b' with suitable defaults.
+ lower = int(interval[0]) if len(interval[0]) else 0
+ upper = int(interval[1]) if len(interval[1]) else len(syms)
+ else:
+ print 'Invalid range syntax: {p}'.format(p=pattern)
+ exit(1)
+ pattern = '$|^'.join([re.escape(p) for p in syms[lower:upper]])
+ pattern_list.append('^' + pattern + '$')
+ return '|'.join(pattern_list) if len(pattern_list) else '^$'
+
+def MatchSymbol(sym, re_include, re_exclude, default_match):
+ """Match a symbol name against inclusion/exclusion rules.
+
+ Returns True or False depending on whether the given symbol
+ matches the compiled include or exclude regexes. The default is
+ returned if neither the include nor the exclude regex matches.
+ """
+ if re_exclude.match(sym):
+ # Always honor an explicit exclude before considering
+ # includes.
+ return False
+ if re_include.match(sym):
+ return True
+ return default_match
+
+def main():
+ """Create a hybrid translation from Subzero and llc.
+
+ Takes a finalized pexe and builds a native executable as a
+ hybrid of Subzero and llc translated bitcode. Linker tricks are
+ used to determine whether Subzero or llc generated symbols are
+ used, on a per-symbol basis.
+
+ By default, for every symbol, its llc version is used. Subzero
+ symbols can be enabled by regular expressions on the symbol name,
+ or by ranges of lines in this program's auto-generated symbol
+ file.
+
+ For each symbol, the --exclude arguments are first checked (the
+ symbol is 'rejected' on a match), followed by the --include
+ arguments (the symbol is 'accepted' on a match), followed by
+ unconditional 'rejection'. The Subzero version is used for an
+ 'accepted' symbol, and the llc version is used for a 'rejected'
+ symbol.
+
+ Each --include and --exclude argument can be a regular expression
+ or a range of lines in the symbol file. Each regular expression
+ is wrapped inside '^$', so if you want a substring match on 'foo',
+ use '.*foo.*' instead. Ranges use python-style 'first:last'
+ notation, so e.g. use '0:10' or ':10' for the first 10 lines of
+ the file, or '1' for the second line of the file.
+
+ The --init argument does first-time initialization for the pexe,
+ including creation of the Subzero symbol file that is implicitly
+ used in the --include and --exclude arguments. It can be removed
+ from the command line for subsequent executions if the pexe
+ doesn't change.
+
+ This scripts augments PATH so that various PNaCl and LLVM tools
+ can be run. These extra paths are within the native_client tree.
+ When changes are made to these tools, copy them this way:
+ cd native_client
+ toolchain_build/toolchain_build_pnacl.py llvm_i686_linux \\
+ --install=toolchain/linux_x86/pnacl_newlib
+ """
+ argparser = argparse.ArgumentParser(
+ description=' ' + main.__doc__,
+ formatter_class=argparse.RawTextHelpFormatter)
+ argparser.add_argument('pexe', help='Finalized pexe to translate')
+ argparser.add_argument('--init', dest='init', action='store_true',
+ help='Perform first-time setup for the pexe')
+ argparser.add_argument('--include', '-i', default=[], dest='include',
+ action='append',
+ help='Subzero symbols to include ' +
+ '(regex or line range)')
+ argparser.add_argument('--exclude', '-e', default=[], dest='exclude',
+ action='append',
+ help='Subzero symbols to exclude ' +
+ '(regex or line range)')
+ argparser.add_argument('--output', '-o', default='a.out', dest='output',
+ action='store',
+ help='Output executable (default a.out)')
+ argparser.add_argument('-O', default='2', dest='optlevel',
+ choices=['m1', '-1', '0', '1', '2'],
+ help='Optimization level ' +
+ '(m1 and -1 are equivalent)')
+ argparser.add_argument('--verbose', '-v', dest='verbose',
+ action='store_true',
+ help='Display some extra debugging output')
+ args = argparser.parse_args()
+
+ pexe = args.pexe
+ [pexe_base, ext] = os.path.splitext(pexe)
+ if ext != '.pexe':
+ pexe_base = pexe
+ pexe_base_unescaped = pexe_base
+ pexe_base = pipes.quote(pexe_base)
+ pexe = pipes.quote(pexe)
+ exe = args.output
+
+ nacl_root = FindBaseNaCl()
+ os.environ['PATH'] = (
+ '{root}/toolchain/linux_x86/pnacl_newlib/bin{sep}' +
+ '{root}/toolchain/linux_x86/pnacl_newlib/host_x86_32/bin{sep}' +
+ '{path}'
+ ).format(root=nacl_root, sep=os.pathsep, path=os.environ['PATH'])
+ obj_llc = pexe_base + '.llc.o'
+ obj_sz = pexe_base + '.sz.o'
+ asm_sz = pexe_base + '.sz.s'
+ obj_llc_weak = pexe_base + '.weak.llc.o'
+ obj_sz_weak = pexe_base + '.weak.sz.o'
+ obj_partial = pexe_base + '.o'
+ sym_llc = pexe_base + '.sym.llc.txt'
+ sym_sz = pexe_base + '.sym.sz.txt'
+ sym_sz_unescaped = pexe_base_unescaped + '.sym.sz.txt'
+ whitelist_sz = pexe_base + '.wl.sz.txt'
+ whitelist_sz_unescaped = pexe_base_unescaped + '.wl.sz.txt'
+
+ if args.init:
+ opt_level = args.optlevel
+ opt_level_map = { 'm1':'0', '-1':'0', '0':'0', '1':'1', '2':'2' }
+ shellcmd((
+ 'pnacl-translate -ffunction-sections -c -arch x86-32-linux ' +
+ '-O{level} --pnacl-driver-append-LLC_FLAGS_EXTRA=-externalize ' +
+ '-o {obj} {pexe}'
+ ).format(level=opt_level_map[opt_level], obj=obj_llc, pexe=pexe),
+ echo=args.verbose)
+ shellcmd((
+ 'objcopy --redefine-sym _start=_user_start {obj}'
+ ).format(obj=obj_llc), echo=args.verbose)
+ shellcmd((
+ '{root}/toolchain_build/src/subzero/llvm2ice ' +
+ '-O{level} -bitcode-format=pnacl -disable-globals ' +
+ '-externalize -ffunction-sections {pexe} -o {asm}'
+ ).format(root=nacl_root,level=opt_level, pexe=pexe, asm=asm_sz),
+ echo=args.verbose)
+ shellcmd((
+ 'llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj -o {obj} ' +
+ '{asm}'
+ ).format(asm=asm_sz, obj=obj_sz), echo=args.verbose)
+ shellcmd((
+ 'objcopy --redefine-sym _start=_user_start {obj}'
+ ).format(obj=obj_sz), echo=args.verbose)
+ shellcmd((
+ 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}'
+ ).format(obj=obj_sz, sym=sym_sz), echo=args.verbose)
+ shellcmd((
+ 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}'
+ ).format(obj=obj_llc, sym=sym_llc), echo=args.verbose)
+
+ with open(sym_sz_unescaped) as f:
+ sz_syms = f.read().splitlines()
+ re_include_str = BuildRegex(args.include, sz_syms)
+ re_exclude_str = BuildRegex(args.exclude, sz_syms)
+ re_include = re.compile(re_include_str)
+ re_exclude = re.compile(re_exclude_str)
+ # If a symbol doesn't explicitly match re_include or re_exclude,
+ # the default MatchSymbol() result is False, unless some --exclude
+ # args are provided and no --include args are provided.
+ default_match = len(args.exclude) and not len(args.include)
+
+ whitelist_has_items = False
+ with open(whitelist_sz_unescaped, 'w') as f:
+ for sym in sz_syms:
+ if MatchSymbol(sym, re_include, re_exclude, default_match):
+ f.write(sym + '\n')
+ whitelist_has_items = True
+ shellcmd((
+ 'objcopy --weaken {obj} {weak}'
+ ).format(obj=obj_sz, weak=obj_sz_weak), echo=args.verbose)
+ if whitelist_has_items:
+ # objcopy returns an error if the --weaken-symbols file is empty.
+ shellcmd((
+ 'objcopy --weaken-symbols={whitelist} {obj} {weak}'
+ ).format(whitelist=whitelist_sz, obj=obj_llc, weak=obj_llc_weak),
+ echo=args.verbose)
+ else:
+ shellcmd((
+ 'objcopy {obj} {weak}'
+ ).format(obj=obj_llc, weak=obj_llc_weak), echo=args.verbose)
+ shellcmd((
+ 'ld -r -m elf_i386 -o {partial} {sz} {llc}'
+ ).format(partial=obj_partial, sz=obj_sz_weak, llc=obj_llc_weak),
+ echo=args.verbose)
+ shellcmd((
+ 'objcopy -w --localize-symbol="*" {partial}'
+ ).format(partial=obj_partial), echo=args.verbose)
+ shellcmd((
+ 'objcopy --globalize-symbol=_user_start {partial}'
+ ).format(partial=obj_partial), echo=args.verbose)
+ shellcmd((
+ 'gcc -m32 {partial} -o {exe} ' +
+ # Keep the rest of this command line (except szrt.c) in sync
+ # with RunHostLD() in pnacl-translate.py.
+ '{root}/toolchain/linux_x86/pnacl_newlib/lib-x86-32-linux/' +
+ '{{unsandboxed_irt,irt_query_list}}.o ' +
+ '{root}/toolchain_build/src/subzero/runtime/szrt.c ' +
+ '-lpthread -lrt'
+ ).format(partial=obj_partial, exe=exe, root=nacl_root),
+ echo=args.verbose)
+ # Put the extra verbose printing at the end.
+ if args.verbose:
+ print 'PATH={path}'.format(path=os.environ['PATH'])
+ print 'include={regex}'.format(regex=re_include_str)
+ print 'exclude={regex}'.format(regex=re_exclude_str)
+ print 'default_match={dm}'.format(dm=default_match)
+ print 'Number of Subzero syms = {num}'.format(num=len(sz_syms))
+
+if __name__ == '__main__':
+ main()
« no previous file with comments | « no previous file | runtime/szrt.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698