OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
75 | 75 |
76 template <class MachineTraits> class BoolFolding { | 76 template <class MachineTraits> class BoolFolding { |
77 public: | 77 public: |
78 enum BoolFoldingProducerKind { | 78 enum BoolFoldingProducerKind { |
79 PK_None, | 79 PK_None, |
80 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative. | 80 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative. |
81 PK_Icmp32, | 81 PK_Icmp32, |
82 PK_Icmp64, | 82 PK_Icmp64, |
83 PK_Fcmp, | 83 PK_Fcmp, |
84 PK_Trunc, | 84 PK_Trunc, |
85 PK_Arith // A flag-setting arithmetic instruction. | 85 PK_Arith // A flag-setting arithmetic instruction. |
86 }; | 86 }; |
87 | 87 |
88 /// Currently the actual enum values are not used (other than CK_None), but we | 88 /// Currently the actual enum values are not used (other than CK_None), but we |
89 /// go ahead and produce them anyway for symmetry with the | 89 /// go ahead and produce them anyway for symmetry with the |
90 /// BoolFoldingProducerKind. | 90 /// BoolFoldingProducerKind. |
91 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; | 91 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; |
92 | 92 |
93 private: | 93 private: |
94 BoolFolding(const BoolFolding &) = delete; | 94 BoolFolding(const BoolFolding &) = delete; |
95 BoolFolding &operator=(const BoolFolding &) = delete; | 95 BoolFolding &operator=(const BoolFolding &) = delete; |
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
295 } | 295 } |
296 } | 296 } |
297 | 297 |
298 template <class Machine> void TargetX86Base<Machine>::staticInit() { | 298 template <class Machine> void TargetX86Base<Machine>::staticInit() { |
299 Traits::initRegisterSet(&TypeToRegisterSet, &RegisterAliases, &ScratchRegs); | 299 Traits::initRegisterSet(&TypeToRegisterSet, &RegisterAliases, &ScratchRegs); |
300 } | 300 } |
301 | 301 |
302 template <class Machine> void TargetX86Base<Machine>::translateO2() { | 302 template <class Machine> void TargetX86Base<Machine>::translateO2() { |
303 TimerMarker T(TimerStack::TT_O2, Func); | 303 TimerMarker T(TimerStack::TT_O2, Func); |
304 | 304 |
| 305 // Merge Alloca instructions, and lay out the stack. |
| 306 static constexpr bool SortAndCombineAllocas = true; |
| 307 Func->processAllocas(SortAndCombineAllocas); |
| 308 Func->dump("After Alloca processing"); |
| 309 |
305 if (!Ctx->getFlags().getPhiEdgeSplit()) { | 310 if (!Ctx->getFlags().getPhiEdgeSplit()) { |
306 // Lower Phi instructions. | 311 // Lower Phi instructions. |
307 Func->placePhiLoads(); | 312 Func->placePhiLoads(); |
308 if (Func->hasError()) | 313 if (Func->hasError()) |
309 return; | 314 return; |
310 Func->placePhiStores(); | 315 Func->placePhiStores(); |
311 if (Func->hasError()) | 316 if (Func->hasError()) |
312 return; | 317 return; |
313 Func->deletePhis(); | 318 Func->deletePhis(); |
314 if (Func->hasError()) | 319 if (Func->hasError()) |
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
413 Func->doNopInsertion(); | 418 Func->doNopInsertion(); |
414 | 419 |
415 // Mark nodes that require sandbox alignment | 420 // Mark nodes that require sandbox alignment |
416 if (Ctx->getFlags().getUseSandboxing()) | 421 if (Ctx->getFlags().getUseSandboxing()) |
417 Func->markNodesForSandboxing(); | 422 Func->markNodesForSandboxing(); |
418 } | 423 } |
419 | 424 |
420 template <class Machine> void TargetX86Base<Machine>::translateOm1() { | 425 template <class Machine> void TargetX86Base<Machine>::translateOm1() { |
421 TimerMarker T(TimerStack::TT_Om1, Func); | 426 TimerMarker T(TimerStack::TT_Om1, Func); |
422 | 427 |
| 428 // Do not merge Alloca instructions, and lay out the stack. |
| 429 static constexpr bool SortAndCombineAllocas = false; |
| 430 Func->processAllocas(SortAndCombineAllocas); |
| 431 Func->dump("After Alloca processing"); |
| 432 |
423 Func->placePhiLoads(); | 433 Func->placePhiLoads(); |
424 if (Func->hasError()) | 434 if (Func->hasError()) |
425 return; | 435 return; |
426 Func->placePhiStores(); | 436 Func->placePhiStores(); |
427 if (Func->hasError()) | 437 if (Func->hasError()) |
428 return; | 438 return; |
429 Func->deletePhis(); | 439 Func->deletePhis(); |
430 if (Func->hasError()) | 440 if (Func->hasError()) |
431 return; | 441 return; |
432 Func->dump("After Phi lowering"); | 442 Func->dump("After Phi lowering"); |
(...skipping 505 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
938 template <class Machine> | 948 template <class Machine> |
939 llvm::SmallBitVector | 949 llvm::SmallBitVector |
940 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include, | 950 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include, |
941 RegSetMask Exclude) const { | 951 RegSetMask Exclude) const { |
942 return Traits::getRegisterSet(Include, Exclude); | 952 return Traits::getRegisterSet(Include, Exclude); |
943 } | 953 } |
944 | 954 |
945 template <class Machine> | 955 template <class Machine> |
946 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { | 956 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { |
947 if (!Inst->getKnownFrameOffset()) | 957 if (!Inst->getKnownFrameOffset()) |
948 IsEbpBasedFrame = true; | 958 setHasFramePointer(); |
949 // Conservatively require the stack to be aligned. Some stack adjustment | 959 // Conservatively require the stack to be aligned. Some stack adjustment |
950 // operations implemented below assume that the stack is aligned before the | 960 // operations implemented below assume that the stack is aligned before the |
951 // alloca. All the alloca code ensures that the stack alignment is preserved | 961 // alloca. All the alloca code ensures that the stack alignment is preserved |
952 // after the alloca. The stack alignment restriction can be relaxed in some | 962 // after the alloca. The stack alignment restriction can be relaxed in some |
953 // cases. | 963 // cases. |
954 NeedsStackAlignment = true; | 964 NeedsStackAlignment = true; |
955 | 965 |
956 // TODO(stichnot): minimize the number of adjustments of esp, etc. | 966 // TODO(stichnot): minimize the number of adjustments of esp, etc. |
957 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); | 967 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
958 Operand *TotalSize = legalize(Inst->getSizeInBytes()); | 968 Operand *TotalSize = legalize(Inst->getSizeInBytes()); |
959 Variable *Dest = Inst->getDest(); | 969 Variable *Dest = Inst->getDest(); |
960 uint32_t AlignmentParam = Inst->getAlignInBytes(); | 970 uint32_t AlignmentParam = Inst->getAlignInBytes(); |
961 // For default align=0, set it to the real value 1, to avoid any | 971 // For default align=0, set it to the real value 1, to avoid any |
962 // bit-manipulation problems below. | 972 // bit-manipulation problems below. |
963 AlignmentParam = std::max(AlignmentParam, 1u); | 973 AlignmentParam = std::max(AlignmentParam, 1u); |
964 | 974 |
965 // LLVM enforces power of 2 alignment. | 975 // LLVM enforces power of 2 alignment. |
966 assert(llvm::isPowerOf2_32(AlignmentParam)); | 976 assert(llvm::isPowerOf2_32(AlignmentParam)); |
967 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES)); | 977 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES)); |
968 | 978 |
969 uint32_t Alignment = | 979 uint32_t Alignment = |
970 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); | 980 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); |
971 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) { | 981 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) { |
| 982 setHasFramePointer(); |
972 _and(esp, Ctx->getConstantInt32(-Alignment)); | 983 _and(esp, Ctx->getConstantInt32(-Alignment)); |
973 } | 984 } |
974 if (const auto *ConstantTotalSize = | 985 if (const auto *ConstantTotalSize = |
975 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { | 986 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { |
976 uint32_t Value = ConstantTotalSize->getValue(); | 987 uint32_t Value = ConstantTotalSize->getValue(); |
977 Value = Utils::applyAlignment(Value, Alignment); | 988 Value = Utils::applyAlignment(Value, Alignment); |
978 if (Inst->getKnownFrameOffset()) { | 989 if (Inst->getKnownFrameOffset()) { |
979 _adjust_stack(Value); | 990 _adjust_stack(Value); |
980 FixedAllocaSizeBytes += Value; | 991 FixedAllocaSizeBytes += Value; |
981 } else { | 992 } else { |
(...skipping 4511 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5493 } | 5504 } |
5494 | 5505 |
5495 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) { | 5506 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) { |
5496 // Before doing anything with a Mem operand, we need to ensure that the | 5507 // Before doing anything with a Mem operand, we need to ensure that the |
5497 // Base and Index components are in physical registers. | 5508 // Base and Index components are in physical registers. |
5498 Variable *Base = Mem->getBase(); | 5509 Variable *Base = Mem->getBase(); |
5499 Variable *Index = Mem->getIndex(); | 5510 Variable *Index = Mem->getIndex(); |
5500 Variable *RegBase = nullptr; | 5511 Variable *RegBase = nullptr; |
5501 Variable *RegIndex = nullptr; | 5512 Variable *RegIndex = nullptr; |
5502 if (Base) { | 5513 if (Base) { |
5503 RegBase = legalizeToReg(Base); | 5514 RegBase = llvm::cast<Variable>( |
| 5515 legalize(Base, Legal_Reg | Legal_Rematerializable)); |
5504 } | 5516 } |
5505 if (Index) { | 5517 if (Index) { |
5506 RegIndex = legalizeToReg(Index); | 5518 RegIndex = llvm::cast<Variable>( |
| 5519 legalize(Index, Legal_Reg | Legal_Rematerializable)); |
5507 } | 5520 } |
5508 if (Base != RegBase || Index != RegIndex) { | 5521 if (Base != RegBase || Index != RegIndex) { |
5509 Mem = Traits::X86OperandMem::create(Func, Ty, RegBase, Mem->getOffset(), | 5522 Mem = Traits::X86OperandMem::create(Func, Ty, RegBase, Mem->getOffset(), |
5510 RegIndex, Mem->getShift(), | 5523 RegIndex, Mem->getShift(), |
5511 Mem->getSegmentRegister()); | 5524 Mem->getSegmentRegister()); |
5512 } | 5525 } |
5513 | 5526 |
5514 // For all Memory Operands, we do randomization/pooling here | 5527 // For all Memory Operands, we do randomization/pooling here |
5515 From = randomizeOrPoolImmediate(Mem); | 5528 From = randomizeOrPoolImmediate(Mem); |
5516 | 5529 |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5568 if (NeedsReg) { | 5581 if (NeedsReg) { |
5569 From = copyToReg(From, RegNum); | 5582 From = copyToReg(From, RegNum); |
5570 } | 5583 } |
5571 return From; | 5584 return From; |
5572 } | 5585 } |
5573 if (auto *Var = llvm::dyn_cast<Variable>(From)) { | 5586 if (auto *Var = llvm::dyn_cast<Variable>(From)) { |
5574 // Check if the variable is guaranteed a physical register. This can happen | 5587 // Check if the variable is guaranteed a physical register. This can happen |
5575 // either when the variable is pre-colored or when it is assigned infinite | 5588 // either when the variable is pre-colored or when it is assigned infinite |
5576 // weight. | 5589 // weight. |
5577 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); | 5590 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); |
| 5591 bool MustRematerialize = |
| 5592 (Var->isRematerializable() && !(Allowed & Legal_Rematerializable)); |
5578 // We need a new physical register for the operand if: | 5593 // We need a new physical register for the operand if: |
5579 // Mem is not allowed and Var isn't guaranteed a physical | 5594 // - Mem is not allowed and Var isn't guaranteed a physical register, or |
5580 // register, or | 5595 // - RegNum is required and Var->getRegNum() doesn't match, or |
5581 // RegNum is required and Var->getRegNum() doesn't match. | 5596 // - Var is a rematerializable variable and rematerializable pass-through is |
5582 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || | 5597 // not allowed (in which case we need an lea instruction). |
5583 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { | 5598 if (MustRematerialize) { |
| 5599 assert(Ty == IceType_i32); |
| 5600 Variable *NewVar = makeReg(Ty, RegNum); |
| 5601 // Since Var is rematerializable, the offset will be added when the lea is |
| 5602 // emitted. |
| 5603 constexpr Constant *NoOffset = nullptr; |
| 5604 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Var, NoOffset); |
| 5605 _lea(NewVar, Mem); |
| 5606 From = NewVar; |
| 5607 } else if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || |
| 5608 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum()) || |
| 5609 MustRematerialize) { |
5584 From = copyToReg(From, RegNum); | 5610 From = copyToReg(From, RegNum); |
5585 } | 5611 } |
5586 return From; | 5612 return From; |
5587 } | 5613 } |
5588 llvm_unreachable("Unhandled operand kind in legalize()"); | 5614 llvm_unreachable("Unhandled operand kind in legalize()"); |
5589 return From; | 5615 return From; |
5590 } | 5616 } |
5591 | 5617 |
5592 /// Provide a trivial wrapper to legalize() for this common usage. | 5618 /// Provide a trivial wrapper to legalize() for this common usage. |
5593 template <class Machine> | 5619 template <class Machine> |
(...skipping 358 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5952 } | 5978 } |
5953 // the offset is not eligible for blinding or pooling, return the original | 5979 // the offset is not eligible for blinding or pooling, return the original |
5954 // mem operand | 5980 // mem operand |
5955 return MemOperand; | 5981 return MemOperand; |
5956 } | 5982 } |
5957 | 5983 |
5958 } // end of namespace X86Internal | 5984 } // end of namespace X86Internal |
5959 } // end of namespace Ice | 5985 } // end of namespace Ice |
5960 | 5986 |
5961 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5987 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
OLD | NEW |