OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/env python2 |
| 2 |
| 3 import argparse |
| 4 import os |
| 5 import pipes |
| 6 import re |
| 7 import sys |
| 8 |
| 9 from utils import shellcmd |
| 10 from utils import FindBaseNaCl |
| 11 |
| 12 def BuildRegex(patterns, syms): |
| 13 """Build a regular expression string for inclusion or exclusion. |
| 14 |
| 15 Creates a regex string from an array of patterns and an array |
| 16 of symbol names. Each element in the patterns array is either a |
| 17 regex, or a range of entries in the symbol name array, e.g. '2:9'. |
| 18 """ |
| 19 pattern_list = [] |
| 20 for pattern in patterns: |
| 21 if pattern[0].isdigit() or pattern[0] == ':': |
| 22 # Legitimate symbols or regexes shouldn't start with a |
| 23 # digit or a ':', so interpret the pattern as a range. |
| 24 interval = pattern.split(':') |
| 25 if len(interval) == 1: |
| 26 # Treat singleton 'n' as 'n:n+1'. |
| 27 lower = int(interval[0]) |
| 28 upper = lower + 1 |
| 29 elif len(interval) == 2: |
| 30 # Handle 'a:b', 'a:', and ':b' with suitable defaults. |
| 31 lower = int(interval[0]) if len(interval[0]) else 0 |
| 32 upper = int(interval[1]) if len(interval[1]) else len(syms) |
| 33 else: |
| 34 print 'Invalid range syntax: {p}'.format(p=pattern) |
| 35 exit(1) |
| 36 pattern = '$|^'.join([re.escape(p) for p in syms[lower:upper]]) |
| 37 pattern_list.append('^' + pattern + '$') |
| 38 return '|'.join(pattern_list) if len(pattern_list) else '^$' |
| 39 |
| 40 def MatchSymbol(sym, re_include, re_exclude, default_match): |
| 41 """Match a symbol name against inclusion/exclusion rules. |
| 42 |
| 43 Returns True or False depending on whether the given symbol |
| 44 matches the compiled include or exclude regexes. The default is |
| 45 returned if neither the include nor the exclude regex matches. |
| 46 """ |
| 47 if re_exclude.match(sym): |
| 48 # Always honor an explicit exclude before considering |
| 49 # includes. |
| 50 return False |
| 51 if re_include.match(sym): |
| 52 return True |
| 53 return default_match |
| 54 |
| 55 def main(): |
| 56 """Create a hybrid translation from Subzero and llc. |
| 57 |
| 58 Takes a finalized pexe and builds a native executable as a |
| 59 hybrid of Subzero and llc translated bitcode. Linker tricks are |
| 60 used to determine whether Subzero or llc generated symbols are |
| 61 used, on a per-symbol basis. |
| 62 |
| 63 By default, for every symbol, its llc version is used. Subzero |
| 64 symbols can be enabled by regular expressions on the symbol name, |
| 65 or by ranges of lines in this program's auto-generated symbol |
| 66 file. |
| 67 |
| 68 For each symbol, the --exclude arguments are first checked (the |
| 69 symbol is 'rejected' on a match), followed by the --include |
| 70 arguments (the symbol is 'accepted' on a match), followed by |
| 71 unconditional 'rejection'. The Subzero version is used for an |
| 72 'accepted' symbol, and the llc version is used for a 'rejected' |
| 73 symbol. |
| 74 |
| 75 Each --include and --exclude argument can be a regular expression |
| 76 or a range of lines in the symbol file. Each regular expression |
| 77 is wrapped inside '^$', so if you want a substring match on 'foo', |
| 78 use '.*foo.*' instead. Ranges use python-style 'first:last' |
| 79 notation, so e.g. use '0:10' or ':10' for the first 10 lines of |
| 80 the file, or '1' for the second line of the file. |
| 81 |
| 82 The --init argument does first-time initialization for the pexe, |
| 83 including creation of the Subzero symbol file that is implicitly |
| 84 used in the --include and --exclude arguments. It can be removed |
| 85 from the command line for subsequent executions if the pexe |
| 86 doesn't change. |
| 87 |
| 88 This scripts augments PATH so that various PNaCl and LLVM tools |
| 89 can be run. These extra paths are within the native_client tree. |
| 90 When changes are made to these tools, copy them this way: |
| 91 cd native_client |
| 92 toolchain_build/toolchain_build_pnacl.py llvm_i686_linux \\ |
| 93 --install=toolchain/linux_x86/pnacl_newlib |
| 94 """ |
| 95 argparser = argparse.ArgumentParser( |
| 96 description=' ' + main.__doc__, |
| 97 formatter_class=argparse.RawTextHelpFormatter) |
| 98 argparser.add_argument('pexe', help='Finalized pexe to translate') |
| 99 argparser.add_argument('--init', dest='init', action='store_true', |
| 100 help='Perform first-time setup for the pexe') |
| 101 argparser.add_argument('--include', '-i', default=[], dest='include', |
| 102 action='append', |
| 103 help='Subzero symbols to include ' + |
| 104 '(regex or line range)') |
| 105 argparser.add_argument('--exclude', '-e', default=[], dest='exclude', |
| 106 action='append', |
| 107 help='Subzero symbols to exclude ' + |
| 108 '(regex or line range)') |
| 109 argparser.add_argument('--output', '-o', default='a.out', dest='output', |
| 110 action='store', |
| 111 help='Output executable (default a.out)') |
| 112 argparser.add_argument('-O', default='2', dest='optlevel', |
| 113 choices=['m1', '-1', '0', '1', '2'], |
| 114 help='Optimization level ' + |
| 115 '(m1 and -1 are equivalent)') |
| 116 argparser.add_argument('--verbose', '-v', dest='verbose', |
| 117 action='store_true', |
| 118 help='Display some extra debugging output') |
| 119 args = argparser.parse_args() |
| 120 |
| 121 pexe = args.pexe |
| 122 [pexe_base, ext] = os.path.splitext(pexe) |
| 123 if ext != '.pexe': |
| 124 pexe_base = pexe |
| 125 pexe_base_unescaped = pexe_base |
| 126 pexe_base = pipes.quote(pexe_base) |
| 127 pexe = pipes.quote(pexe) |
| 128 exe = args.output |
| 129 |
| 130 nacl_root = FindBaseNaCl() |
| 131 os.environ['PATH'] = ( |
| 132 '{root}/toolchain/linux_x86/pnacl_newlib/bin{sep}' + |
| 133 '{root}/toolchain/linux_x86/pnacl_newlib/host_x86_32/bin{sep}' + |
| 134 '{path}' |
| 135 ).format(root=nacl_root, sep=os.pathsep, path=os.environ['PATH']) |
| 136 obj_llc = pexe_base + '.llc.o' |
| 137 obj_sz = pexe_base + '.sz.o' |
| 138 asm_sz = pexe_base + '.sz.s' |
| 139 obj_llc_weak = pexe_base + '.weak.llc.o' |
| 140 obj_sz_weak = pexe_base + '.weak.sz.o' |
| 141 obj_partial = pexe_base + '.o' |
| 142 sym_llc = pexe_base + '.sym.llc.txt' |
| 143 sym_sz = pexe_base + '.sym.sz.txt' |
| 144 sym_sz_unescaped = pexe_base_unescaped + '.sym.sz.txt' |
| 145 whitelist_sz = pexe_base + '.wl.sz.txt' |
| 146 whitelist_sz_unescaped = pexe_base_unescaped + '.wl.sz.txt' |
| 147 |
| 148 if args.init: |
| 149 opt_level = args.optlevel |
| 150 opt_level_map = { 'm1':'0', '-1':'0', '0':'0', '1':'1', '2':'2' } |
| 151 shellcmd(( |
| 152 'pnacl-translate -ffunction-sections -c -arch x86-32-linux ' + |
| 153 '-O{level} --pnacl-driver-append-LLC_FLAGS_EXTRA=-externalize ' + |
| 154 '-o {obj} {pexe}' |
| 155 ).format(level=opt_level_map[opt_level], obj=obj_llc, pexe=pexe), |
| 156 echo=args.verbose) |
| 157 shellcmd(( |
| 158 'objcopy --redefine-sym _start=_user_start {obj}' |
| 159 ).format(obj=obj_llc), echo=args.verbose) |
| 160 shellcmd(( |
| 161 '{root}/toolchain_build/src/subzero/llvm2ice ' + |
| 162 '-O{level} -bitcode-format=pnacl -disable-globals ' + |
| 163 '-externalize -ffunction-sections {pexe} -o {asm}' |
| 164 ).format(root=nacl_root,level=opt_level, pexe=pexe, asm=asm_sz), |
| 165 echo=args.verbose) |
| 166 shellcmd(( |
| 167 'llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj -o {obj} ' + |
| 168 '{asm}' |
| 169 ).format(asm=asm_sz, obj=obj_sz), echo=args.verbose) |
| 170 shellcmd(( |
| 171 'objcopy --redefine-sym _start=_user_start {obj}' |
| 172 ).format(obj=obj_sz), echo=args.verbose) |
| 173 shellcmd(( |
| 174 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}' |
| 175 ).format(obj=obj_sz, sym=sym_sz), echo=args.verbose) |
| 176 shellcmd(( |
| 177 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}' |
| 178 ).format(obj=obj_llc, sym=sym_llc), echo=args.verbose) |
| 179 |
| 180 with open(sym_sz_unescaped) as f: |
| 181 sz_syms = f.read().splitlines() |
| 182 re_include_str = BuildRegex(args.include, sz_syms) |
| 183 re_exclude_str = BuildRegex(args.exclude, sz_syms) |
| 184 re_include = re.compile(re_include_str) |
| 185 re_exclude = re.compile(re_exclude_str) |
| 186 # If a symbol doesn't explicitly match re_include or re_exclude, |
| 187 # the default MatchSymbol() result is False, unless some --exclude |
| 188 # args are provided and no --include args are provided. |
| 189 default_match = len(args.exclude) and not len(args.include) |
| 190 |
| 191 whitelist_has_items = False |
| 192 with open(whitelist_sz_unescaped, 'w') as f: |
| 193 for sym in sz_syms: |
| 194 if MatchSymbol(sym, re_include, re_exclude, default_match): |
| 195 f.write(sym + '\n') |
| 196 whitelist_has_items = True |
| 197 shellcmd(( |
| 198 'objcopy --weaken {obj} {weak}' |
| 199 ).format(obj=obj_sz, weak=obj_sz_weak), echo=args.verbose) |
| 200 if whitelist_has_items: |
| 201 # objcopy returns an error if the --weaken-symbols file is empty. |
| 202 shellcmd(( |
| 203 'objcopy --weaken-symbols={whitelist} {obj} {weak}' |
| 204 ).format(whitelist=whitelist_sz, obj=obj_llc, weak=obj_llc_weak), |
| 205 echo=args.verbose) |
| 206 else: |
| 207 shellcmd(( |
| 208 'objcopy {obj} {weak}' |
| 209 ).format(obj=obj_llc, weak=obj_llc_weak), echo=args.verbose) |
| 210 shellcmd(( |
| 211 'ld -r -m elf_i386 -o {partial} {sz} {llc}' |
| 212 ).format(partial=obj_partial, sz=obj_sz_weak, llc=obj_llc_weak), |
| 213 echo=args.verbose) |
| 214 shellcmd(( |
| 215 'objcopy -w --localize-symbol="*" {partial}' |
| 216 ).format(partial=obj_partial), echo=args.verbose) |
| 217 shellcmd(( |
| 218 'objcopy --globalize-symbol=_user_start {partial}' |
| 219 ).format(partial=obj_partial), echo=args.verbose) |
| 220 shellcmd(( |
| 221 'gcc -m32 {partial} -o {exe} ' + |
| 222 # Keep the rest of this command line (except szrt.c) in sync |
| 223 # with RunHostLD() in pnacl-translate.py. |
| 224 '{root}/toolchain/linux_x86/pnacl_newlib/lib-x86-32-linux/' + |
| 225 '{{unsandboxed_irt,irt_query_list}}.o ' + |
| 226 '{root}/toolchain_build/src/subzero/runtime/szrt.c ' + |
| 227 '-lpthread -lrt' |
| 228 ).format(partial=obj_partial, exe=exe, root=nacl_root), |
| 229 echo=args.verbose) |
| 230 # Put the extra verbose printing at the end. |
| 231 if args.verbose: |
| 232 print 'PATH={path}'.format(path=os.environ['PATH']) |
| 233 print 'include={regex}'.format(regex=re_include_str) |
| 234 print 'exclude={regex}'.format(regex=re_exclude_str) |
| 235 print 'default_match={dm}'.format(dm=default_match) |
| 236 print 'Number of Subzero syms = {num}'.format(num=len(sz_syms)) |
| 237 |
| 238 if __name__ == '__main__': |
| 239 main() |
OLD | NEW |