| OLD | NEW |
| 1 //===- subzero/src/IceGlobalContext.cpp - Global context defs -------------===// | 1 //===- subzero/src/IceGlobalContext.cpp - Global context defs -------------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 // |
| 10 // This file defines aspects of the compilation that persist across | 10 // This file defines aspects of the compilation that persist across |
| (...skipping 204 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 215 } | 215 } |
| 216 | 216 |
| 217 GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit, Ostream *OsError, | 217 GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit, Ostream *OsError, |
| 218 ELFStreamer *ELFStr, const ClFlags &Flags) | 218 ELFStreamer *ELFStr, const ClFlags &Flags) |
| 219 : ConstPool(new ConstantPool()), ErrorStatus(), StrDump(OsDump), | 219 : ConstPool(new ConstantPool()), ErrorStatus(), StrDump(OsDump), |
| 220 StrEmit(OsEmit), StrError(OsError), Flags(Flags), | 220 StrEmit(OsEmit), StrError(OsError), Flags(Flags), |
| 221 RNG(Flags.getRandomSeed()), ObjectWriter(), | 221 RNG(Flags.getRandomSeed()), ObjectWriter(), |
| 222 OptQ(/*Sequential=*/Flags.isSequential(), | 222 OptQ(/*Sequential=*/Flags.isSequential(), |
| 223 /*MaxSize=*/Flags.getNumTranslationThreads()), | 223 /*MaxSize=*/Flags.getNumTranslationThreads()), |
| 224 // EmitQ is allowed unlimited size. | 224 // EmitQ is allowed unlimited size. |
| 225 EmitQ(/*Sequential=*/Flags.isSequential()) { | 225 EmitQ(/*Sequential=*/Flags.isSequential()), |
| 226 DataLowering(TargetDataLowering::createLowering(this)), |
| 227 HasSeenCode(false), |
| 228 ProfileBlockInfoVarDecl(VariableDeclaration::create()) { |
| 226 assert(OsDump && "OsDump is not defined for GlobalContext"); | 229 assert(OsDump && "OsDump is not defined for GlobalContext"); |
| 227 assert(OsEmit && "OsEmit is not defined for GlobalContext"); | 230 assert(OsEmit && "OsEmit is not defined for GlobalContext"); |
| 228 assert(OsError && "OsError is not defined for GlobalContext"); | 231 assert(OsError && "OsError is not defined for GlobalContext"); |
| 229 // Make sure thread_local fields are properly initialized before any | 232 // Make sure thread_local fields are properly initialized before any |
| 230 // accesses are made. Do this here instead of at the start of | 233 // accesses are made. Do this here instead of at the start of |
| 231 // main() so that all clients (e.g. unit tests) can benefit for | 234 // main() so that all clients (e.g. unit tests) can benefit for |
| 232 // free. | 235 // free. |
| 233 GlobalContext::TlsInit(); | 236 GlobalContext::TlsInit(); |
| 234 Cfg::TlsInit(); | 237 Cfg::TlsInit(); |
| 235 // Create a new ThreadContext for the current thread. No need to | 238 // Create a new ThreadContext for the current thread. No need to |
| (...skipping 11 matching lines...) Expand all Loading... |
| 247 } | 250 } |
| 248 Timers.initInto(MyTLS->Timers); | 251 Timers.initInto(MyTLS->Timers); |
| 249 switch (Flags.getOutFileType()) { | 252 switch (Flags.getOutFileType()) { |
| 250 case FT_Elf: | 253 case FT_Elf: |
| 251 ObjectWriter.reset(new ELFObjectWriter(*this, *ELFStr)); | 254 ObjectWriter.reset(new ELFObjectWriter(*this, *ELFStr)); |
| 252 break; | 255 break; |
| 253 case FT_Asm: | 256 case FT_Asm: |
| 254 case FT_Iasm: | 257 case FT_Iasm: |
| 255 break; | 258 break; |
| 256 } | 259 } |
| 260 ProfileBlockInfoVarDecl->setAlignment(typeWidthInBytes(IceType_i64)); |
| 261 ProfileBlockInfoVarDecl->setIsConstant(true); |
| 262 |
| 263 // Note: if you change this symbol, make sure to update |
| 264 // runtime/szrt_profiler.c as well. |
| 265 ProfileBlockInfoVarDecl->setName("__Sz_block_profile_info"); |
| 266 ProfileBlockInfoVarDecl->setSuppressMangling(); |
| 267 ProfileBlockInfoVarDecl->setLinkage(llvm::GlobalValue::ExternalLinkage); |
| 257 } | 268 } |
| 258 | 269 |
| 259 void GlobalContext::translateFunctions() { | 270 void GlobalContext::translateFunctions() { |
| 260 while (std::unique_ptr<Cfg> Func = optQueueBlockingPop()) { | 271 while (std::unique_ptr<Cfg> Func = optQueueBlockingPop()) { |
| 261 // Install Func in TLS for Cfg-specific container allocators. | 272 // Install Func in TLS for Cfg-specific container allocators. |
| 262 Cfg::setCurrentCfg(Func.get()); | 273 Cfg::setCurrentCfg(Func.get()); |
| 263 // Reset per-function stats being accumulated in TLS. | 274 // Reset per-function stats being accumulated in TLS. |
| 264 resetStats(); | 275 resetStats(); |
| 265 // Set verbose level to none if the current function does NOT | 276 // Set verbose level to none if the current function does NOT |
| 266 // match the -verbose-focus command-line option. | 277 // match the -verbose-focus command-line option. |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 315 } | 326 } |
| 316 Cfg::setCurrentCfg(nullptr); | 327 Cfg::setCurrentCfg(nullptr); |
| 317 assert(Item); | 328 assert(Item); |
| 318 emitQueueBlockingPush(Item); | 329 emitQueueBlockingPush(Item); |
| 319 // The Cfg now gets deleted as Func goes out of scope. | 330 // The Cfg now gets deleted as Func goes out of scope. |
| 320 } | 331 } |
| 321 } | 332 } |
| 322 | 333 |
| 323 namespace { | 334 namespace { |
| 324 | 335 |
| 325 // Adds an array of pointers to all the profiler-generated globals. The | 336 void addBlockInfoPtrs(const VariableDeclarationList &Globals, |
| 326 // __Sz_profile_summary function iterates over this array for printing the | 337 VariableDeclaration *ProfileBlockInfo) { |
| 327 // profiling counters. | |
| 328 VariableDeclaration *blockProfileInfo(const VariableDeclarationList &Globals) { | |
| 329 auto *Var = VariableDeclaration::create(); | |
| 330 Var->setAlignment(typeWidthInBytes(IceType_i64)); | |
| 331 Var->setIsConstant(true); | |
| 332 | |
| 333 // Note: if you change this symbol, make sure to update | |
| 334 // runtime/szrt_profiler.c as well. | |
| 335 Var->setName("__Sz_block_profile_info"); | |
| 336 Var->setSuppressMangling(); | |
| 337 Var->setLinkage(llvm::GlobalValue::ExternalLinkage); | |
| 338 for (const VariableDeclaration *Global : Globals) { | 338 for (const VariableDeclaration *Global : Globals) { |
| 339 if (Cfg::isProfileGlobal(*Global)) { | 339 if (Cfg::isProfileGlobal(*Global)) { |
| 340 constexpr RelocOffsetT BlockExecutionCounterOffset = 0; | 340 constexpr RelocOffsetT BlockExecutionCounterOffset = 0; |
| 341 Var->addInitializer(new VariableDeclaration::RelocInitializer( | 341 ProfileBlockInfo->addInitializer( |
| 342 Global, BlockExecutionCounterOffset)); | 342 new VariableDeclaration::RelocInitializer( |
| 343 Global, BlockExecutionCounterOffset)); |
| 343 } | 344 } |
| 344 } | 345 } |
| 345 | |
| 346 // This adds a 64-bit sentinel entry to the end of our array. For 32-bit | |
| 347 // architectures this will waste 4 bytes. | |
| 348 const SizeT Sizeof64BitNullPtr = typeWidthInBytes(IceType_i64); | |
| 349 Var->addInitializer( | |
| 350 new VariableDeclaration::ZeroInitializer(Sizeof64BitNullPtr)); | |
| 351 | |
| 352 return Var; | |
| 353 } | |
| 354 | |
| 355 void addBlockProfileInfoArrayToGlobals(VariableDeclarationList *Globals) { | |
| 356 // Purposefully create the Var temp to prevent bugs in case the compiler | |
| 357 // reorders instructions in a way that Globals is extended before the call | |
| 358 // to profileInfoArray. | |
| 359 VariableDeclaration *Var = blockProfileInfo(*Globals); | |
| 360 Globals->push_back(Var); | |
| 361 } | |
| 362 | |
| 363 void lowerGlobals(GlobalContext *Ctx, | |
| 364 std::unique_ptr<VariableDeclarationList> VariableDeclarations, | |
| 365 TargetDataLowering *DataLowering) { | |
| 366 TimerMarker T(TimerStack::TT_emitGlobalInitializers, Ctx); | |
| 367 const bool DumpGlobalVariables = ALLOW_DUMP && Ctx->getFlags().getVerbose() && | |
| 368 Ctx->getFlags().getVerboseFocusOn().empty(); | |
| 369 if (DumpGlobalVariables) { | |
| 370 OstreamLocker L(Ctx); | |
| 371 Ostream &Stream = Ctx->getStrDump(); | |
| 372 for (const Ice::VariableDeclaration *Global : *VariableDeclarations) { | |
| 373 Global->dump(Ctx, Stream); | |
| 374 } | |
| 375 } | |
| 376 if (Ctx->getFlags().getDisableTranslation()) | |
| 377 return; | |
| 378 | |
| 379 // There should be no need to emit the block_profile_info array if profiling | |
| 380 // is disabled. In practice, given that szrt_profiler.o will always be | |
| 381 // embedded in the application, we need to add it. In a non-profiled build | |
| 382 // this array will only contain the nullptr terminator. | |
| 383 addBlockProfileInfoArrayToGlobals(VariableDeclarations.get()); | |
| 384 | |
| 385 DataLowering->lowerGlobals(std::move(VariableDeclarations)); | |
| 386 } | 346 } |
| 387 | 347 |
| 388 // Ensure Pending is large enough that Pending[Index] is valid. | 348 // Ensure Pending is large enough that Pending[Index] is valid. |
| 389 void resizePending(std::vector<EmitterWorkItem *> &Pending, uint32_t Index) { | 349 void resizePending(std::vector<EmitterWorkItem *> &Pending, uint32_t Index) { |
| 390 if (Index >= Pending.size()) | 350 if (Index >= Pending.size()) |
| 391 Pending.resize(Index + 1); | 351 Pending.resize(Index + 1); |
| 392 } | 352 } |
| 393 | 353 |
| 394 void addAllIfNotNull(std::unique_ptr<VariableDeclarationList> src, | |
| 395 VariableDeclarationList *dst) { | |
| 396 if (src != nullptr) { | |
| 397 dst->insert(dst->end(), src->begin(), src->end()); | |
| 398 } | |
| 399 } | |
| 400 | |
| 401 } // end of anonymous namespace | 354 } // end of anonymous namespace |
| 402 | 355 |
| 403 void GlobalContext::emitFileHeader() { | 356 void GlobalContext::emitFileHeader() { |
| 404 TimerMarker T1(Ice::TimerStack::TT_emit, this); | 357 TimerMarker T1(Ice::TimerStack::TT_emit, this); |
| 405 if (getFlags().getOutFileType() == FT_Elf) { | 358 if (getFlags().getOutFileType() == FT_Elf) { |
| 406 getObjectWriter()->writeInitialELFHeader(); | 359 getObjectWriter()->writeInitialELFHeader(); |
| 407 } else { | 360 } else { |
| 408 if (!ALLOW_DUMP) | 361 if (!ALLOW_DUMP) |
| 409 llvm::report_fatal_error("emitFileHeader for non-ELF"); | 362 llvm::report_fatal_error("emitFileHeader for non-ELF"); |
| 410 TargetHeaderLowering::createLowering(this)->lower(); | 363 TargetHeaderLowering::createLowering(this)->lower(); |
| 411 } | 364 } |
| 412 } | 365 } |
| 413 | 366 |
| 367 void GlobalContext::lowerConstants() { |
| 368 DataLowering->lowerConstants(); |
| 369 } |
| 370 |
| 371 void GlobalContext::lowerGlobals(const IceString &SectionSuffix) { |
| 372 TimerMarker T(TimerStack::TT_emitGlobalInitializers, this); |
| 373 const bool DumpGlobalVariables = |
| 374 ALLOW_DUMP && Flags.getVerbose() && Flags.getVerboseFocusOn().empty(); |
| 375 if (DumpGlobalVariables) { |
| 376 OstreamLocker L(this); |
| 377 Ostream &Stream = getStrDump(); |
| 378 for (const Ice::VariableDeclaration *Global : Globals) { |
| 379 Global->dump(this, Stream); |
| 380 } |
| 381 } |
| 382 if (Flags.getDisableTranslation()) |
| 383 return; |
| 384 |
| 385 addBlockInfoPtrs(Globals, ProfileBlockInfoVarDecl.get()); |
| 386 DataLowering->lowerGlobals(Globals, SectionSuffix); |
| 387 Globals.clear(); |
| 388 } |
| 389 |
| 390 void GlobalContext::lowerProfileData() { |
| 391 // This adds a 64-bit sentinel entry to the end of our array. For 32-bit |
| 392 // architectures this will waste 4 bytes. |
| 393 const SizeT Sizeof64BitNullPtr = typeWidthInBytes(IceType_i64); |
| 394 ProfileBlockInfoVarDecl->addInitializer( |
| 395 new VariableDeclaration::ZeroInitializer(Sizeof64BitNullPtr)); |
| 396 Globals.push_back(ProfileBlockInfoVarDecl.get()); |
| 397 constexpr char ProfileDataSection[] = "$sz_profiler$"; |
| 398 lowerGlobals(ProfileDataSection); |
| 399 } |
| 400 |
| 414 void GlobalContext::emitItems() { | 401 void GlobalContext::emitItems() { |
| 415 const bool Threaded = !getFlags().isSequential(); | 402 const bool Threaded = !getFlags().isSequential(); |
| 416 // Pending is a vector containing the reassembled, ordered list of | 403 // Pending is a vector containing the reassembled, ordered list of |
| 417 // work items. When we're ready for the next item, we first check | 404 // work items. When we're ready for the next item, we first check |
| 418 // whether it's in the Pending list. If not, we take an item from | 405 // whether it's in the Pending list. If not, we take an item from |
| 419 // the work queue, and if it's not the item we're waiting for, we | 406 // the work queue, and if it's not the item we're waiting for, we |
| 420 // insert it into Pending and repeat. The work item is deleted | 407 // insert it into Pending and repeat. The work item is deleted |
| 421 // after it is processed. | 408 // after it is processed. |
| 422 std::unique_ptr<VariableDeclarationList> GlobalInits( | |
| 423 new VariableDeclarationList()); | |
| 424 std::vector<EmitterWorkItem *> Pending; | 409 std::vector<EmitterWorkItem *> Pending; |
| 425 uint32_t DesiredSequenceNumber = getFirstSequenceNumber(); | 410 uint32_t DesiredSequenceNumber = getFirstSequenceNumber(); |
| 426 while (true) { | 411 while (true) { |
| 427 resizePending(Pending, DesiredSequenceNumber); | 412 resizePending(Pending, DesiredSequenceNumber); |
| 428 // See if Pending contains DesiredSequenceNumber. | 413 // See if Pending contains DesiredSequenceNumber. |
| 429 EmitterWorkItem *RawItem = Pending[DesiredSequenceNumber]; | 414 EmitterWorkItem *RawItem = Pending[DesiredSequenceNumber]; |
| 430 if (RawItem == nullptr) | 415 if (RawItem == nullptr) |
| 431 RawItem = emitQueueBlockingPop(); | 416 RawItem = emitQueueBlockingPop(); |
| 432 if (RawItem == nullptr) | 417 if (RawItem == nullptr) |
| 433 break; | 418 break; |
| 434 uint32_t ItemSeq = RawItem->getSequenceNumber(); | 419 uint32_t ItemSeq = RawItem->getSequenceNumber(); |
| 435 if (Threaded && ItemSeq != DesiredSequenceNumber) { | 420 if (Threaded && ItemSeq != DesiredSequenceNumber) { |
| 436 resizePending(Pending, ItemSeq); | 421 resizePending(Pending, ItemSeq); |
| 437 Pending[ItemSeq] = RawItem; | 422 Pending[ItemSeq] = RawItem; |
| 438 continue; | 423 continue; |
| 439 } | 424 } |
| 440 | 425 |
| 441 std::unique_ptr<EmitterWorkItem> Item(RawItem); | 426 std::unique_ptr<EmitterWorkItem> Item(RawItem); |
| 442 ++DesiredSequenceNumber; | 427 ++DesiredSequenceNumber; |
| 443 switch (Item->getKind()) { | 428 switch (Item->getKind()) { |
| 444 case EmitterWorkItem::WI_Nop: | 429 case EmitterWorkItem::WI_Nop: |
| 445 break; | 430 break; |
| 446 case EmitterWorkItem::WI_GlobalInits: { | 431 case EmitterWorkItem::WI_GlobalInits: { |
| 447 addAllIfNotNull(Item->getGlobalInits(), GlobalInits.get()); | 432 accumulateGlobals(Item->getGlobalInits()); |
| 448 } break; | 433 } break; |
| 449 case EmitterWorkItem::WI_Asm: { | 434 case EmitterWorkItem::WI_Asm: { |
| 450 addAllIfNotNull(Item->getGlobalInits(), GlobalInits.get()); | 435 lowerGlobalsIfNoCodeHasBeenSeen(); |
| 436 accumulateGlobals(Item->getGlobalInits()); |
| 437 |
| 451 std::unique_ptr<Assembler> Asm = Item->getAsm(); | 438 std::unique_ptr<Assembler> Asm = Item->getAsm(); |
| 452 Asm->alignFunction(); | 439 Asm->alignFunction(); |
| 453 IceString MangledName = mangleName(Asm->getFunctionName()); | 440 IceString MangledName = mangleName(Asm->getFunctionName()); |
| 454 switch (getFlags().getOutFileType()) { | 441 switch (getFlags().getOutFileType()) { |
| 455 case FT_Elf: | 442 case FT_Elf: |
| 456 getObjectWriter()->writeFunctionCode(MangledName, Asm->getInternal(), | 443 getObjectWriter()->writeFunctionCode(MangledName, Asm->getInternal(), |
| 457 Asm.get()); | 444 Asm.get()); |
| 458 break; | 445 break; |
| 459 case FT_Iasm: { | 446 case FT_Iasm: { |
| 460 OstreamLocker L(this); | 447 OstreamLocker L(this); |
| 461 Cfg::emitTextHeader(MangledName, this, Asm.get()); | 448 Cfg::emitTextHeader(MangledName, this, Asm.get()); |
| 462 Asm->emitIASBytes(this); | 449 Asm->emitIASBytes(this); |
| 463 } break; | 450 } break; |
| 464 case FT_Asm: | 451 case FT_Asm: |
| 465 llvm::report_fatal_error("Unexpected FT_Asm"); | 452 llvm::report_fatal_error("Unexpected FT_Asm"); |
| 466 break; | 453 break; |
| 467 } | 454 } |
| 468 } break; | 455 } break; |
| 469 case EmitterWorkItem::WI_Cfg: { | 456 case EmitterWorkItem::WI_Cfg: { |
| 470 if (!ALLOW_DUMP) | 457 if (!ALLOW_DUMP) |
| 471 llvm::report_fatal_error("WI_Cfg work item created inappropriately"); | 458 llvm::report_fatal_error("WI_Cfg work item created inappropriately"); |
| 472 | 459 lowerGlobalsIfNoCodeHasBeenSeen(); |
| 473 addAllIfNotNull(Item->getGlobalInits(), GlobalInits.get()); | 460 accumulateGlobals(Item->getGlobalInits()); |
| 474 | 461 |
| 475 assert(getFlags().getOutFileType() == FT_Asm); | 462 assert(getFlags().getOutFileType() == FT_Asm); |
| 476 std::unique_ptr<Cfg> Func = Item->getCfg(); | 463 std::unique_ptr<Cfg> Func = Item->getCfg(); |
| 477 // Unfortunately, we have to temporarily install the Cfg in TLS | 464 // Unfortunately, we have to temporarily install the Cfg in TLS |
| 478 // because Variable::asType() uses the allocator to create the | 465 // because Variable::asType() uses the allocator to create the |
| 479 // differently-typed copy. | 466 // differently-typed copy. |
| 480 Cfg::setCurrentCfg(Func.get()); | 467 Cfg::setCurrentCfg(Func.get()); |
| 481 Func->emit(); | 468 Func->emit(); |
| 482 Cfg::setCurrentCfg(nullptr); | 469 Cfg::setCurrentCfg(nullptr); |
| 483 dumpStats(Func->getFunctionName()); | 470 dumpStats(Func->getFunctionName()); |
| 484 } break; | 471 } break; |
| 485 } | 472 } |
| 486 } | 473 } |
| 487 | 474 |
| 488 lowerGlobals(this, std::move(GlobalInits), | 475 // In case there are no code to be generated, we invoke the conditional |
| 489 TargetDataLowering::createLowering(this).get()); | 476 // lowerGlobals again -- this is a no-op if code has been emitted. |
| 477 lowerGlobalsIfNoCodeHasBeenSeen(); |
| 490 } | 478 } |
| 491 | 479 |
| 492 // Scan a string for S[0-9A-Z]*_ patterns and replace them with | 480 // Scan a string for S[0-9A-Z]*_ patterns and replace them with |
| 493 // S<num>_ where <num> is the next base-36 value. If a type name | 481 // S<num>_ where <num> is the next base-36 value. If a type name |
| 494 // legitimately contains that pattern, then the substitution will be | 482 // legitimately contains that pattern, then the substitution will be |
| 495 // made in error and most likely the link will fail. In this case, | 483 // made in error and most likely the link will fail. In this case, |
| 496 // the test classes can be rewritten not to use that pattern, which is | 484 // the test classes can be rewritten not to use that pattern, which is |
| 497 // much simpler and more reliable than implementing a full demangling | 485 // much simpler and more reliable than implementing a full demangling |
| 498 // parser. Another substitution-in-error may occur if a type | 486 // parser. Another substitution-in-error may occur if a type |
| 499 // identifier ends with the pattern S[0-9A-Z]*, because an immediately | 487 // identifier ends with the pattern S[0-9A-Z]*, because an immediately |
| (...skipping 406 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 906 Ctx = Func->getContext(); | 894 Ctx = Func->getContext(); |
| 907 Active = | 895 Active = |
| 908 Func->getFocusedTiming() || Ctx->getFlags().getSubzeroTimingEnabled(); | 896 Func->getFocusedTiming() || Ctx->getFlags().getSubzeroTimingEnabled(); |
| 909 if (Active) | 897 if (Active) |
| 910 Ctx->pushTimer(ID, StackID); | 898 Ctx->pushTimer(ID, StackID); |
| 911 } | 899 } |
| 912 | 900 |
| 913 ICE_TLS_DEFINE_FIELD(GlobalContext::ThreadContext *, GlobalContext, TLS); | 901 ICE_TLS_DEFINE_FIELD(GlobalContext::ThreadContext *, GlobalContext, TLS); |
| 914 | 902 |
| 915 } // end of namespace Ice | 903 } // end of namespace Ice |
| OLD | NEW |