OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 425 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
436 } | 436 } |
437 | 437 |
438 genTargetHelperCalls(); | 438 genTargetHelperCalls(); |
439 Func->dump("After target helper call insertion"); | 439 Func->dump("After target helper call insertion"); |
440 | 440 |
441 // Merge Alloca instructions, and lay out the stack. | 441 // Merge Alloca instructions, and lay out the stack. |
442 static constexpr bool SortAndCombineAllocas = true; | 442 static constexpr bool SortAndCombineAllocas = true; |
443 Func->processAllocas(SortAndCombineAllocas); | 443 Func->processAllocas(SortAndCombineAllocas); |
444 Func->dump("After Alloca processing"); | 444 Func->dump("After Alloca processing"); |
445 | 445 |
| 446 // Run this early so it can be used to focus optimizations on potentially hot |
| 447 // code. |
| 448 // TODO(stichnot,ascull): currently only used for regalloc not |
| 449 // expensive high level optimizations which could be focused on potentially |
| 450 // hot code. |
| 451 Func->generateLoopInfo(); |
| 452 Func->dump("After loop analysis"); |
| 453 if (getFlags().getLoopInvariantCodeMotion()) { |
| 454 Func->loopInvariantCodeMotion(); |
| 455 Func->dump("After LICM"); |
| 456 } |
| 457 |
446 if (getFlags().getEnableExperimental()) { | 458 if (getFlags().getEnableExperimental()) { |
447 Func->localCSE(); | 459 Func->localCSE(); |
448 Func->dump("After Local CSE"); | 460 Func->dump("After Local CSE"); |
449 } | 461 } |
450 if (getFlags().getEnableShortCircuit()) { | 462 if (getFlags().getEnableShortCircuit()) { |
451 Func->shortCircuitJumps(); | 463 Func->shortCircuitJumps(); |
452 Func->dump("After Short Circuiting"); | 464 Func->dump("After Short Circuiting"); |
453 } | 465 } |
454 | 466 |
455 if (!getFlags().getEnablePhiEdgeSplit()) { | 467 if (!getFlags().getEnablePhiEdgeSplit()) { |
456 // Lower Phi instructions. | 468 // Lower Phi instructions. |
457 Func->placePhiLoads(); | 469 Func->placePhiLoads(); |
458 if (Func->hasError()) | 470 if (Func->hasError()) |
459 return; | 471 return; |
460 Func->placePhiStores(); | 472 Func->placePhiStores(); |
461 if (Func->hasError()) | 473 if (Func->hasError()) |
462 return; | 474 return; |
463 Func->deletePhis(); | 475 Func->deletePhis(); |
464 if (Func->hasError()) | 476 if (Func->hasError()) |
465 return; | 477 return; |
466 Func->dump("After Phi lowering"); | 478 Func->dump("After Phi lowering"); |
467 } | 479 } |
468 | 480 |
469 // Run this early so it can be used to focus optimizations on potentially hot | |
470 // code. | |
471 // TODO(stichnot,ascull): currently only used for regalloc not | |
472 // expensive high level optimizations which could be focused on potentially | |
473 // hot code. | |
474 Func->computeLoopNestDepth(); | |
475 Func->dump("After loop nest depth analysis"); | |
476 | |
477 // Address mode optimization. | 481 // Address mode optimization. |
478 Func->getVMetadata()->init(VMK_SingleDefs); | 482 Func->getVMetadata()->init(VMK_SingleDefs); |
479 Func->doAddressOpt(); | 483 Func->doAddressOpt(); |
480 Func->materializeVectorShuffles(); | 484 Func->materializeVectorShuffles(); |
481 | 485 |
482 // Find read-modify-write opportunities. Do this after address mode | 486 // Find read-modify-write opportunities. Do this after address mode |
483 // optimization so that doAddressOpt() doesn't need to be applied to RMW | 487 // optimization so that doAddressOpt() doesn't need to be applied to RMW |
484 // instructions as well. | 488 // instructions as well. |
485 findRMW(); | 489 findRMW(); |
486 Func->dump("After RMW transform"); | 490 Func->dump("After RMW transform"); |
(...skipping 4926 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5413 | 5417 |
5414 OptAddr NewAddr; | 5418 OptAddr NewAddr; |
5415 NewAddr.Base = llvm::dyn_cast<Variable>(Addr); | 5419 NewAddr.Base = llvm::dyn_cast<Variable>(Addr); |
5416 if (NewAddr.Base == nullptr) | 5420 if (NewAddr.Base == nullptr) |
5417 return nullptr; | 5421 return nullptr; |
5418 | 5422 |
5419 // If the Base has more than one use or is live across multiple blocks, then | 5423 // If the Base has more than one use or is live across multiple blocks, then |
5420 // don't go further. Alternatively (?), never consider a transformation that | 5424 // don't go further. Alternatively (?), never consider a transformation that |
5421 // would change a variable that is currently *not* live across basic block | 5425 // would change a variable that is currently *not* live across basic block |
5422 // boundaries into one that *is*. | 5426 // boundaries into one that *is*. |
5423 if (Func->getVMetadata()->isMultiBlock( | 5427 if (!getFlags().getLoopInvariantCodeMotion()) { |
5424 NewAddr.Base) /* || Base->getUseCount() > 1*/) | 5428 // Need multi block address opt when licm is enabled. |
5425 return nullptr; | 5429 // Might make sense to restrict to current node and loop header. |
5426 | 5430 if (Func->getVMetadata()->isMultiBlock( |
| 5431 NewAddr.Base) /* || Base->getUseCount() > 1*/) |
| 5432 return nullptr; |
| 5433 } |
5427 AddressOptimizer AddrOpt(Func); | 5434 AddressOptimizer AddrOpt(Func); |
5428 const bool MockBounds = getFlags().getMockBoundsCheck(); | 5435 const bool MockBounds = getFlags().getMockBoundsCheck(); |
5429 const Inst *Reason = nullptr; | 5436 const Inst *Reason = nullptr; |
5430 bool AddressWasOptimized = false; | 5437 bool AddressWasOptimized = false; |
5431 // The following unnamed struct identifies the address mode formation steps | 5438 // The following unnamed struct identifies the address mode formation steps |
5432 // that could potentially create an invalid memory operand (i.e., no free | 5439 // that could potentially create an invalid memory operand (i.e., no free |
5433 // slots for RebasePtr.) We add all those variables to this struct so that we | 5440 // slots for RebasePtr.) We add all those variables to this struct so that we |
5434 // can use memset() to reset all members to false. | 5441 // can use memset() to reset all members to false. |
5435 struct { | 5442 struct { |
5436 bool AssignBase = false; | 5443 bool AssignBase = false; |
(...skipping 2595 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
8032 emitGlobal(*Var, SectionSuffix); | 8039 emitGlobal(*Var, SectionSuffix); |
8033 } | 8040 } |
8034 } | 8041 } |
8035 } break; | 8042 } break; |
8036 } | 8043 } |
8037 } | 8044 } |
8038 } // end of namespace X86NAMESPACE | 8045 } // end of namespace X86NAMESPACE |
8039 } // end of namespace Ice | 8046 } // end of namespace Ice |
8040 | 8047 |
8041 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 8048 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
OLD | NEW |