 Chromium Code Reviews
 Chromium Code Reviews Issue 693393003:
  Subzero: Allow non-hybrid binaries to be built.  (Closed) 
  Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
    
  
    Issue 693393003:
  Subzero: Allow non-hybrid binaries to be built.  (Closed) 
  Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master| OLD | NEW | 
|---|---|
| 1 #!/usr/bin/env python2 | 1 #!/usr/bin/env python2 | 
| 2 | 2 | 
| 3 import argparse | 3 import argparse | 
| 4 import os | 4 import os | 
| 5 import pipes | 5 import pipes | 
| 6 import re | 6 import re | 
| 7 import sys | 7 import sys | 
| 8 | 8 | 
| 9 from utils import shellcmd | 9 from utils import shellcmd | 
| 10 from utils import FindBaseNaCl | 10 from utils import FindBaseNaCl | 
| (...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 84 action='store_true', | 84 action='store_true', | 
| 85 help='Display some extra debugging output') | 85 help='Display some extra debugging output') | 
| 86 argparser.add_argument('--sz', dest='sz_args', action='append', default=[], | 86 argparser.add_argument('--sz', dest='sz_args', action='append', default=[], | 
| 87 help='Extra arguments for Subzero') | 87 help='Extra arguments for Subzero') | 
| 88 argparser.add_argument('--llc', dest='llc_args', action='append', | 88 argparser.add_argument('--llc', dest='llc_args', action='append', | 
| 89 default=[], help='Extra arguments for llc') | 89 default=[], help='Extra arguments for llc') | 
| 90 | 90 | 
| 91 def main(): | 91 def main(): | 
| 92 """Create a hybrid translation from Subzero and llc. | 92 """Create a hybrid translation from Subzero and llc. | 
| 93 | 93 | 
| 94 Takes a finalized pexe and builds a native executable as a | 94 Takes a finalized pexe and builds a native executable as a hybrid of Subzero | 
| 95 hybrid of Subzero and llc translated bitcode. Linker tricks are | 95 and llc translated bitcode. Linker tricks are used to determine whether | 
| 96 used to determine whether Subzero or llc generated symbols are | 96 Subzero or llc generated symbols are used, on a per-symbol basis. | 
| 97 used, on a per-symbol basis. | |
| 98 | 97 | 
| 99 By default, for every symbol, its llc version is used. Subzero | 98 By default, for every symbol, its Subzero version is used. Subzero and llc | 
| 
Jim Stichnoth
2014/11/02 19:11:48
Documentation was out of date -- Subzero has been
 | |
| 100 symbols can be enabled by regular expressions on the symbol name, | 99 symbols can be selectively enabled/disabled via regular expressions on the | 
| 101 or by ranges of lines in this program's auto-generated symbol | 100 symbol name, or by ranges of lines in this program's auto-generated symbol | 
| 102 file. | 101 file. | 
| 103 | 102 | 
| 104 For each symbol, the --exclude arguments are first checked (the | 103 For each symbol, the --exclude arguments are first checked (the symbol is | 
| 105 symbol is 'rejected' on a match), followed by the --include | 104 'rejected' on a match), followed by the --include arguments (the symbol is | 
| 106 arguments (the symbol is 'accepted' on a match), followed by | 105 'accepted' on a match), followed by unconditional 'rejection'. The Subzero | 
| 107 unconditional 'rejection'. The Subzero version is used for an | 106 version is used for an 'accepted' symbol, and the llc version is used for a | 
| 108 'accepted' symbol, and the llc version is used for a 'rejected' | 107 'rejected' symbol. | 
| 109 symbol. | |
| 110 | 108 | 
| 111 Each --include and --exclude argument can be a regular expression | 109 Each --include and --exclude argument can be a regular expression or a range | 
| 112 or a range of lines in the symbol file. Each regular expression | 110 of lines in the symbol file. Each regular expression is wrapped inside | 
| 113 is wrapped inside '^$', so if you want a substring match on 'foo', | 111 '^$', so if you want a substring match on 'foo', use '.*foo.*' instead. | 
| 114 use '.*foo.*' instead. Ranges use python-style 'first:last' | 112 Ranges use python-style 'first:last' notation, so e.g. use '0:10' or ':10' | 
| 115 notation, so e.g. use '0:10' or ':10' for the first 10 lines of | 113 for the first 10 lines of the file, or '1' for the second line of the file. | 
| 116 the file, or '1' for the second line of the file. | |
| 117 | 114 | 
| 118 This script uses file modification timestamps to determine whether | 115 If no --include or --exclude arguments are given, the executable is produced | 
| 119 llc and Subzero re-translation are needed. It checks timestamps | 116 entirely using Subzero, without using llc or linker tricks. | 
| 120 of llc, llvm2ice, and the pexe against the translated object files | |
| 121 to determine the minimal work necessary. The --force option | |
| 122 suppresses those checks and re-translates everything. | |
| 123 | 117 | 
| 124 This script augments PATH so that various PNaCl and LLVM tools can | 118 This script uses file modification timestamps to determine whether llc and | 
| 125 be run. These extra paths are within the native_client tree. | 119 Subzero re-translation are needed. It checks timestamps of llc, llvm2ice, | 
| 126 When changes are made to these tools, copy them this way: | 120 and the pexe against the translated object files to determine the minimal | 
| 121 work necessary. The --force option suppresses those checks and | |
| 122 re-translates everything. | |
| 123 | |
| 124 This script augments PATH so that various PNaCl and LLVM tools can be run. | |
| 125 These extra paths are within the native_client tree. When changes are made | |
| 126 to these tools, copy them this way: | |
| 127 cd native_client | 127 cd native_client | 
| 128 toolchain_build/toolchain_build_pnacl.py llvm_x86_64_linux \\ | 128 toolchain_build/toolchain_build_pnacl.py llvm_x86_64_linux \\ | 
| 129 --install=toolchain/linux_x86/pnacl_newlib | 129 --install=toolchain/linux_x86/pnacl_newlib | 
| 130 """ | 130 """ | 
| 131 argparser = argparse.ArgumentParser( | 131 argparser = argparse.ArgumentParser( | 
| 132 description=' ' + main.__doc__, | 132 description=' ' + main.__doc__, | 
| 133 formatter_class=argparse.RawTextHelpFormatter) | 133 formatter_class=argparse.RawTextHelpFormatter) | 
| 134 AddOptionalArgs(argparser) | 134 AddOptionalArgs(argparser) | 
| 135 argparser.add_argument('pexe', help='Finalized pexe to translate') | 135 argparser.add_argument('pexe', help='Finalized pexe to translate') | 
| 136 args = argparser.parse_args() | 136 args = argparser.parse_args() | 
| (...skipping 12 matching lines...) Expand all Loading... | |
| 149 nacl_root = FindBaseNaCl() | 149 nacl_root = FindBaseNaCl() | 
| 150 os.environ['PATH'] = ( | 150 os.environ['PATH'] = ( | 
| 151 '{root}/toolchain/linux_x86/pnacl_newlib/bin{sep}' + | 151 '{root}/toolchain/linux_x86/pnacl_newlib/bin{sep}' + | 
| 152 '{path}' | 152 '{path}' | 
| 153 ).format(root=nacl_root, sep=os.pathsep, path=os.environ['PATH']) | 153 ).format(root=nacl_root, sep=os.pathsep, path=os.environ['PATH']) | 
| 154 obj_llc = pexe_base + '.llc.o' | 154 obj_llc = pexe_base + '.llc.o' | 
| 155 obj_sz = pexe_base + '.sz.o' | 155 obj_sz = pexe_base + '.sz.o' | 
| 156 asm_sz = pexe_base + '.sz.s' | 156 asm_sz = pexe_base + '.sz.s' | 
| 157 obj_llc_weak = pexe_base + '.weak.llc.o' | 157 obj_llc_weak = pexe_base + '.weak.llc.o' | 
| 158 obj_sz_weak = pexe_base + '.weak.sz.o' | 158 obj_sz_weak = pexe_base + '.weak.sz.o' | 
| 159 obj_partial = pexe_base + '.o' | 159 obj_partial = obj_sz # overridden for hybrid mode | 
| 160 sym_llc = pexe_base + '.sym.llc.txt' | 160 sym_llc = pexe_base + '.sym.llc.txt' | 
| 161 sym_sz = pexe_base + '.sym.sz.txt' | 161 sym_sz = pexe_base + '.sym.sz.txt' | 
| 162 sym_sz_unescaped = pexe_base_unescaped + '.sym.sz.txt' | 162 sym_sz_unescaped = pexe_base_unescaped + '.sym.sz.txt' | 
| 163 whitelist_sz = pexe_base + '.wl.sz.txt' | 163 whitelist_sz = pexe_base + '.wl.sz.txt' | 
| 164 whitelist_sz_unescaped = pexe_base_unescaped + '.wl.sz.txt' | 164 whitelist_sz_unescaped = pexe_base_unescaped + '.wl.sz.txt' | 
| 165 llvm2ice = ( | 165 llvm2ice = ( | 
| 166 '{root}/toolchain_build/src/subzero/llvm2ice' | 166 '{root}/toolchain_build/src/subzero/llvm2ice' | 
| 167 ).format(root=nacl_root) | 167 ).format(root=nacl_root) | 
| 168 llcbin = ( | 168 llcbin = ( | 
| 169 '{root}/toolchain/linux_x86/pnacl_newlib/bin/llc' | 169 '{root}/toolchain/linux_x86/pnacl_newlib/bin/llc' | 
| 170 ).format(root=nacl_root) | 170 ).format(root=nacl_root) | 
| 171 opt_level = args.optlevel | 171 opt_level = args.optlevel | 
| 172 opt_level_map = { 'm1':'0', '-1':'0', '0':'0', '1':'1', '2':'2' } | 172 opt_level_map = { 'm1':'0', '-1':'0', '0':'0', '1':'1', '2':'2' } | 
| 173 if args.force or NewerThanOrNotThere(pexe, obj_llc) or \ | 173 hybrid = args.include or args.exclude | 
| 174 NewerThanOrNotThere(llcbin, obj_llc): | 174 | 
| 175 if hybrid and (args.force or | |
| 176 NewerThanOrNotThere(pexe, obj_llc) or | |
| 177 NewerThanOrNotThere(llcbin, obj_llc)): | |
| 178 # Only run pnacl-translate in hybrid mode. | |
| 175 shellcmd(['pnacl-translate', | 179 shellcmd(['pnacl-translate', | 
| 176 '-ffunction-sections', | 180 '-ffunction-sections', | 
| 177 '-fdata-sections', | 181 '-fdata-sections', | 
| 178 '-c', | 182 '-c', | 
| 179 '-arch', 'x86-32-linux', | 183 '-arch', 'x86-32-linux', | 
| 180 '-O' + opt_level_map[opt_level], | 184 '-O' + opt_level_map[opt_level], | 
| 181 '--pnacl-driver-append-LLC_FLAGS_EXTRA=-externalize', | 185 '--pnacl-driver-append-LLC_FLAGS_EXTRA=-externalize', | 
| 182 '-o', obj_llc] + | 186 '-o', obj_llc] + | 
| 183 args.llc_args + | 187 args.llc_args + | 
| 184 [pexe], | 188 [pexe], | 
| 185 echo=args.verbose) | 189 echo=args.verbose) | 
| 186 shellcmd(( | 190 shellcmd(( | 
| 187 'objcopy --redefine-sym _start=_user_start {obj}' | 191 'objcopy --redefine-sym _start=_user_start {obj}' | 
| 188 ).format(obj=obj_llc), echo=args.verbose) | 192 ).format(obj=obj_llc), echo=args.verbose) | 
| 193 # Generate llc syms file for consistency, even though it's not used. | |
| 189 shellcmd(( | 194 shellcmd(( | 
| 190 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}' | 195 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}' | 
| 191 ).format(obj=obj_llc, sym=sym_llc), echo=args.verbose) | 196 ).format(obj=obj_llc, sym=sym_llc), echo=args.verbose) | 
| 192 if args.force or NewerThanOrNotThere(pexe, obj_sz) or \ | 197 | 
| 193 NewerThanOrNotThere(llvm2ice, obj_sz): | 198 if (args.force or | 
| 199 NewerThanOrNotThere(pexe, obj_sz) or | |
| 200 NewerThanOrNotThere(llvm2ice, obj_sz)): | |
| 201 # Run llvm2ice regardless of hybrid mode. | |
| 194 shellcmd([llvm2ice, | 202 shellcmd([llvm2ice, | 
| 195 '-O' + opt_level, | 203 '-O' + opt_level, | 
| 196 '-bitcode-format=pnacl', | 204 '-bitcode-format=pnacl', | 
| 197 '-externalize', | |
| 198 '-ffunction-sections', | |
| 199 '-fdata-sections', | |
| 200 '-o', asm_sz] + | 205 '-o', asm_sz] + | 
| 206 (['-externalize', | |
| 207 '-ffunction-sections', | |
| 208 '-fdata-sections'] if hybrid else []) + | |
| 201 args.sz_args + | 209 args.sz_args + | 
| 202 [pexe], | 210 [pexe], | 
| 203 echo=args.verbose) | 211 echo=args.verbose) | 
| 204 shellcmd(( | 212 shellcmd(( | 
| 205 'llvm-mc -arch=x86 -filetype=obj -o {obj} {asm}' | 213 'llvm-mc -arch=x86 -filetype=obj -o {obj} {asm}' | 
| 206 ).format(asm=asm_sz, obj=obj_sz), echo=args.verbose) | 214 ).format(asm=asm_sz, obj=obj_sz), echo=args.verbose) | 
| 207 shellcmd(( | 215 shellcmd(( | 
| 208 'objcopy --redefine-sym _start=_user_start {obj}' | 216 'objcopy --redefine-sym _start=_user_start {obj}' | 
| 209 ).format(obj=obj_sz), echo=args.verbose) | 217 ).format(obj=obj_sz), echo=args.verbose) | 
| 218 if hybrid: | |
| 219 shellcmd(( | |
| 220 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}' | |
| 221 ).format(obj=obj_sz, sym=sym_sz), echo=args.verbose) | |
| 222 | |
| 223 if hybrid: | |
| 224 with open(sym_sz_unescaped) as f: | |
| 225 sz_syms = f.read().splitlines() | |
| 226 re_include_str = BuildRegex(args.include, sz_syms) | |
| 227 re_exclude_str = BuildRegex(args.exclude, sz_syms) | |
| 228 re_include = re.compile(re_include_str) | |
| 229 re_exclude = re.compile(re_exclude_str) | |
| 230 # If a symbol doesn't explicitly match re_include or re_exclude, | |
| 231 # the default MatchSymbol() result is True, unless some --include | |
| 232 # args are provided. | |
| 233 default_match = not args.include | |
| 234 | |
| 235 whitelist_has_items = False | |
| 236 with open(whitelist_sz_unescaped, 'w') as f: | |
| 237 for sym in sz_syms: | |
| 238 if MatchSymbol(sym, re_include, re_exclude, default_match): | |
| 239 f.write(sym + '\n') | |
| 240 whitelist_has_items = True | |
| 210 shellcmd(( | 241 shellcmd(( | 
| 211 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}' | 242 'objcopy --weaken {obj} {weak}' | 
| 212 ).format(obj=obj_sz, sym=sym_sz), echo=args.verbose) | 243 ).format(obj=obj_sz, weak=obj_sz_weak), echo=args.verbose) | 
| 244 if whitelist_has_items: | |
| 245 # objcopy returns an error if the --weaken-symbols file is empty. | |
| 246 shellcmd(( | |
| 247 'objcopy --weaken-symbols={whitelist} {obj} {weak}' | |
| 248 ).format(whitelist=whitelist_sz, obj=obj_llc, | |
| 249 weak=obj_llc_weak), | |
| 250 echo=args.verbose) | |
| 251 else: | |
| 252 shellcmd(( | |
| 253 'objcopy {obj} {weak}' | |
| 254 ).format(obj=obj_llc, weak=obj_llc_weak), echo=args.verbose) | |
| 255 obj_partial = pexe_base + '.o' | |
| 256 shellcmd(( | |
| 257 'ld -r -m elf_i386 -o {partial} {sz} {llc}' | |
| 258 ).format(partial=obj_partial, sz=obj_sz_weak, llc=obj_llc_weak), | |
| 259 echo=args.verbose) | |
| 260 shellcmd(( | |
| 261 'objcopy -w --localize-symbol="*" {partial}' | |
| 262 ).format(partial=obj_partial), echo=args.verbose) | |
| 263 shellcmd(( | |
| 264 'objcopy --globalize-symbol=_user_start {partial}' | |
| 265 ).format(partial=obj_partial), echo=args.verbose) | |
| 213 | 266 | 
| 214 with open(sym_sz_unescaped) as f: | 267 # Run the linker regardless of hybrid mode. | 
| 215 sz_syms = f.read().splitlines() | |
| 216 re_include_str = BuildRegex(args.include, sz_syms) | |
| 217 re_exclude_str = BuildRegex(args.exclude, sz_syms) | |
| 218 re_include = re.compile(re_include_str) | |
| 219 re_exclude = re.compile(re_exclude_str) | |
| 220 # If a symbol doesn't explicitly match re_include or re_exclude, | |
| 221 # the default MatchSymbol() result is True, unless some --include | |
| 222 # args are provided. | |
| 223 default_match = not len(args.include) | |
| 224 | |
| 225 whitelist_has_items = False | |
| 226 with open(whitelist_sz_unescaped, 'w') as f: | |
| 227 for sym in sz_syms: | |
| 228 if MatchSymbol(sym, re_include, re_exclude, default_match): | |
| 229 f.write(sym + '\n') | |
| 230 whitelist_has_items = True | |
| 231 shellcmd(( | |
| 232 'objcopy --weaken {obj} {weak}' | |
| 233 ).format(obj=obj_sz, weak=obj_sz_weak), echo=args.verbose) | |
| 234 if whitelist_has_items: | |
| 235 # objcopy returns an error if the --weaken-symbols file is empty. | |
| 236 shellcmd(( | |
| 237 'objcopy --weaken-symbols={whitelist} {obj} {weak}' | |
| 238 ).format(whitelist=whitelist_sz, obj=obj_llc, weak=obj_llc_weak), | |
| 239 echo=args.verbose) | |
| 240 else: | |
| 241 shellcmd(( | |
| 242 'objcopy {obj} {weak}' | |
| 243 ).format(obj=obj_llc, weak=obj_llc_weak), echo=args.verbose) | |
| 244 shellcmd(( | |
| 245 'ld -r -m elf_i386 -o {partial} {sz} {llc}' | |
| 246 ).format(partial=obj_partial, sz=obj_sz_weak, llc=obj_llc_weak), | |
| 247 echo=args.verbose) | |
| 248 shellcmd(( | |
| 249 'objcopy -w --localize-symbol="*" {partial}' | |
| 250 ).format(partial=obj_partial), echo=args.verbose) | |
| 251 shellcmd(( | |
| 252 'objcopy --globalize-symbol=_user_start {partial}' | |
| 253 ).format(partial=obj_partial), echo=args.verbose) | |
| 254 linker = ( | 268 linker = ( | 
| 255 '{root}/../third_party/llvm-build/Release+Asserts/bin/clang' | 269 '{root}/../third_party/llvm-build/Release+Asserts/bin/clang' | 
| 256 ).format(root=nacl_root) | 270 ).format(root=nacl_root) | 
| 257 shellcmd(( | 271 shellcmd(( | 
| 258 '{ld} -m32 {partial} -o {exe} -O{opt_level} ' + | 272 '{ld} -m32 {partial} -o {exe} -O{opt_level} ' + | 
| 259 # Keep the rest of this command line (except szrt.c) in sync | 273 # Keep the rest of this command line (except szrt.c) in sync | 
| 260 # with RunHostLD() in pnacl-translate.py. | 274 # with RunHostLD() in pnacl-translate.py. | 
| 261 '{root}/toolchain/linux_x86/pnacl_newlib/translator/x86-32-linux/lib/' + | 275 '{root}/toolchain/linux_x86/pnacl_newlib/translator/x86-32-linux/lib/' + | 
| 262 '{{unsandboxed_irt,irt_query_list}}.o ' + | 276 '{{unsandboxed_irt,irt_query_list}}.o ' + | 
| 263 '{root}/toolchain_build/src/subzero/runtime/szrt.c ' + | 277 '{root}/toolchain_build/src/subzero/runtime/szrt.c ' + | 
| 264 '{root}/toolchain_build/src/subzero/runtime/szrt_i686.ll ' + | 278 '{root}/toolchain_build/src/subzero/runtime/szrt_i686.ll ' + | 
| 265 '-lpthread -lrt' | 279 '-lpthread -lrt' | 
| 266 ).format(ld=linker, partial=obj_partial, exe=exe, | 280 ).format(ld=linker, partial=obj_partial, exe=exe, | 
| 267 opt_level=opt_level_map[opt_level], root=nacl_root), | 281 opt_level=opt_level_map[opt_level], root=nacl_root), | 
| 268 echo=args.verbose) | 282 echo=args.verbose) | 
| 283 | |
| 269 # Put the extra verbose printing at the end. | 284 # Put the extra verbose printing at the end. | 
| 270 if args.verbose: | 285 if args.verbose: | 
| 271 print 'PATH={path}'.format(path=os.environ['PATH']) | 286 print 'PATH={path}'.format(path=os.environ['PATH']) | 
| 272 print 'include={regex}'.format(regex=re_include_str) | 287 if hybrid: | 
| 273 print 'exclude={regex}'.format(regex=re_exclude_str) | 288 print 'include={regex}'.format(regex=re_include_str) | 
| 274 print 'default_match={dm}'.format(dm=default_match) | 289 print 'exclude={regex}'.format(regex=re_exclude_str) | 
| 275 print 'Number of Subzero syms = {num}'.format(num=len(sz_syms)) | 290 print 'default_match={dm}'.format(dm=default_match) | 
| 291 print 'Number of Subzero syms = {num}'.format(num=len(sz_syms)) | |
| 276 | 292 | 
| 277 if __name__ == '__main__': | 293 if __name__ == '__main__': | 
| 278 main() | 294 main() | 
| OLD | NEW |