Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(904)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1411583007: Combine allocas (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Finish fast path. Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | tests_lit/llvm2ice_tests/fused-alloca.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
75 75
76 template <class MachineTraits> class BoolFolding { 76 template <class MachineTraits> class BoolFolding {
77 public: 77 public:
78 enum BoolFoldingProducerKind { 78 enum BoolFoldingProducerKind {
79 PK_None, 79 PK_None,
80 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative. 80 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative.
81 PK_Icmp32, 81 PK_Icmp32,
82 PK_Icmp64, 82 PK_Icmp64,
83 PK_Fcmp, 83 PK_Fcmp,
84 PK_Trunc, 84 PK_Trunc,
85 PK_Arith // A flag-setting arithmetic instruction. 85 PK_Arith // A flag-setting arithmetic instruction.
86 }; 86 };
87 87
88 /// Currently the actual enum values are not used (other than CK_None), but we 88 /// Currently the actual enum values are not used (other than CK_None), but we
89 /// go ahead and produce them anyway for symmetry with the 89 /// go ahead and produce them anyway for symmetry with the
90 /// BoolFoldingProducerKind. 90 /// BoolFoldingProducerKind.
91 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; 91 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext };
92 92
93 private: 93 private:
94 BoolFolding(const BoolFolding &) = delete; 94 BoolFolding(const BoolFolding &) = delete;
95 BoolFolding &operator=(const BoolFolding &) = delete; 95 BoolFolding &operator=(const BoolFolding &) = delete;
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after
295 } 295 }
296 } 296 }
297 297
298 template <class Machine> void TargetX86Base<Machine>::staticInit() { 298 template <class Machine> void TargetX86Base<Machine>::staticInit() {
299 Traits::initRegisterSet(&TypeToRegisterSet, &RegisterAliases, &ScratchRegs); 299 Traits::initRegisterSet(&TypeToRegisterSet, &RegisterAliases, &ScratchRegs);
300 } 300 }
301 301
302 template <class Machine> void TargetX86Base<Machine>::translateO2() { 302 template <class Machine> void TargetX86Base<Machine>::translateO2() {
303 TimerMarker T(TimerStack::TT_O2, Func); 303 TimerMarker T(TimerStack::TT_O2, Func);
304 304
305 // Merge Alloca instructions, and lay out the stack.
306 static constexpr bool SortAndCombineAllocas = true;
307 Func->processAllocas(SortAndCombineAllocas);
308 Func->dump("After Alloca processing");
309
305 if (!Ctx->getFlags().getPhiEdgeSplit()) { 310 if (!Ctx->getFlags().getPhiEdgeSplit()) {
306 // Lower Phi instructions. 311 // Lower Phi instructions.
307 Func->placePhiLoads(); 312 Func->placePhiLoads();
308 if (Func->hasError()) 313 if (Func->hasError())
309 return; 314 return;
310 Func->placePhiStores(); 315 Func->placePhiStores();
311 if (Func->hasError()) 316 if (Func->hasError())
312 return; 317 return;
313 Func->deletePhis(); 318 Func->deletePhis();
314 if (Func->hasError()) 319 if (Func->hasError())
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after
413 Func->doNopInsertion(); 418 Func->doNopInsertion();
414 419
415 // Mark nodes that require sandbox alignment 420 // Mark nodes that require sandbox alignment
416 if (Ctx->getFlags().getUseSandboxing()) 421 if (Ctx->getFlags().getUseSandboxing())
417 Func->markNodesForSandboxing(); 422 Func->markNodesForSandboxing();
418 } 423 }
419 424
420 template <class Machine> void TargetX86Base<Machine>::translateOm1() { 425 template <class Machine> void TargetX86Base<Machine>::translateOm1() {
421 TimerMarker T(TimerStack::TT_Om1, Func); 426 TimerMarker T(TimerStack::TT_Om1, Func);
422 427
428 // Do not merge Alloca instructions, and lay out the stack.
429 static constexpr bool SortAndCombineAllocas = false;
430 Func->processAllocas(SortAndCombineAllocas);
431 Func->dump("After Alloca processing");
432
423 Func->placePhiLoads(); 433 Func->placePhiLoads();
424 if (Func->hasError()) 434 if (Func->hasError())
425 return; 435 return;
426 Func->placePhiStores(); 436 Func->placePhiStores();
427 if (Func->hasError()) 437 if (Func->hasError())
428 return; 438 return;
429 Func->deletePhis(); 439 Func->deletePhis();
430 if (Func->hasError()) 440 if (Func->hasError())
431 return; 441 return;
432 Func->dump("After Phi lowering"); 442 Func->dump("After Phi lowering");
(...skipping 505 matching lines...) Expand 10 before | Expand all | Expand 10 after
938 template <class Machine> 948 template <class Machine>
939 llvm::SmallBitVector 949 llvm::SmallBitVector
940 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include, 950 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,
941 RegSetMask Exclude) const { 951 RegSetMask Exclude) const {
942 return Traits::getRegisterSet(Include, Exclude); 952 return Traits::getRegisterSet(Include, Exclude);
943 } 953 }
944 954
945 template <class Machine> 955 template <class Machine>
946 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { 956 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {
947 if (!Inst->getKnownFrameOffset()) 957 if (!Inst->getKnownFrameOffset())
948 IsEbpBasedFrame = true; 958 setHasFramePointer();
949 // Conservatively require the stack to be aligned. Some stack adjustment 959 // Conservatively require the stack to be aligned. Some stack adjustment
950 // operations implemented below assume that the stack is aligned before the 960 // operations implemented below assume that the stack is aligned before the
951 // alloca. All the alloca code ensures that the stack alignment is preserved 961 // alloca. All the alloca code ensures that the stack alignment is preserved
952 // after the alloca. The stack alignment restriction can be relaxed in some 962 // after the alloca. The stack alignment restriction can be relaxed in some
953 // cases. 963 // cases.
954 NeedsStackAlignment = true; 964 NeedsStackAlignment = true;
955 965
956 // TODO(stichnot): minimize the number of adjustments of esp, etc. 966 // TODO(stichnot): minimize the number of adjustments of esp, etc.
957 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); 967 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
958 Operand *TotalSize = legalize(Inst->getSizeInBytes()); 968 Operand *TotalSize = legalize(Inst->getSizeInBytes());
959 Variable *Dest = Inst->getDest(); 969 Variable *Dest = Inst->getDest();
960 uint32_t AlignmentParam = Inst->getAlignInBytes(); 970 uint32_t AlignmentParam = Inst->getAlignInBytes();
961 // For default align=0, set it to the real value 1, to avoid any 971 // For default align=0, set it to the real value 1, to avoid any
962 // bit-manipulation problems below. 972 // bit-manipulation problems below.
963 AlignmentParam = std::max(AlignmentParam, 1u); 973 AlignmentParam = std::max(AlignmentParam, 1u);
964 974
965 // LLVM enforces power of 2 alignment. 975 // LLVM enforces power of 2 alignment.
966 assert(llvm::isPowerOf2_32(AlignmentParam)); 976 assert(llvm::isPowerOf2_32(AlignmentParam));
967 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES)); 977 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));
968 978
969 uint32_t Alignment = 979 uint32_t Alignment =
970 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); 980 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);
971 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) { 981 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) {
982 setHasFramePointer();
972 _and(esp, Ctx->getConstantInt32(-Alignment)); 983 _and(esp, Ctx->getConstantInt32(-Alignment));
973 } 984 }
974 if (const auto *ConstantTotalSize = 985 if (const auto *ConstantTotalSize =
975 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { 986 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
976 uint32_t Value = ConstantTotalSize->getValue(); 987 uint32_t Value = ConstantTotalSize->getValue();
977 Value = Utils::applyAlignment(Value, Alignment); 988 Value = Utils::applyAlignment(Value, Alignment);
978 if (Inst->getKnownFrameOffset()) { 989 if (Inst->getKnownFrameOffset()) {
979 _adjust_stack(Value); 990 _adjust_stack(Value);
980 FixedAllocaSizeBytes += Value; 991 FixedAllocaSizeBytes += Value;
981 } else { 992 } else {
(...skipping 4511 matching lines...) Expand 10 before | Expand all | Expand 10 after
5493 } 5504 }
5494 5505
5495 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) { 5506 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {
5496 // Before doing anything with a Mem operand, we need to ensure that the 5507 // Before doing anything with a Mem operand, we need to ensure that the
5497 // Base and Index components are in physical registers. 5508 // Base and Index components are in physical registers.
5498 Variable *Base = Mem->getBase(); 5509 Variable *Base = Mem->getBase();
5499 Variable *Index = Mem->getIndex(); 5510 Variable *Index = Mem->getIndex();
5500 Variable *RegBase = nullptr; 5511 Variable *RegBase = nullptr;
5501 Variable *RegIndex = nullptr; 5512 Variable *RegIndex = nullptr;
5502 if (Base) { 5513 if (Base) {
5503 RegBase = legalizeToReg(Base); 5514 RegBase = llvm::cast<Variable>(
5515 legalize(Base, Legal_Reg | Legal_Rematerializable));
5504 } 5516 }
5505 if (Index) { 5517 if (Index) {
5506 RegIndex = legalizeToReg(Index); 5518 RegIndex = llvm::cast<Variable>(
5519 legalize(Index, Legal_Reg | Legal_Rematerializable));
5507 } 5520 }
5508 if (Base != RegBase || Index != RegIndex) { 5521 if (Base != RegBase || Index != RegIndex) {
5509 Mem = Traits::X86OperandMem::create(Func, Ty, RegBase, Mem->getOffset(), 5522 Mem = Traits::X86OperandMem::create(Func, Ty, RegBase, Mem->getOffset(),
5510 RegIndex, Mem->getShift(), 5523 RegIndex, Mem->getShift(),
5511 Mem->getSegmentRegister()); 5524 Mem->getSegmentRegister());
5512 } 5525 }
5513 5526
5514 // For all Memory Operands, we do randomization/pooling here 5527 // For all Memory Operands, we do randomization/pooling here
5515 From = randomizeOrPoolImmediate(Mem); 5528 From = randomizeOrPoolImmediate(Mem);
5516 5529
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
5568 if (NeedsReg) { 5581 if (NeedsReg) {
5569 From = copyToReg(From, RegNum); 5582 From = copyToReg(From, RegNum);
5570 } 5583 }
5571 return From; 5584 return From;
5572 } 5585 }
5573 if (auto *Var = llvm::dyn_cast<Variable>(From)) { 5586 if (auto *Var = llvm::dyn_cast<Variable>(From)) {
5574 // Check if the variable is guaranteed a physical register. This can happen 5587 // Check if the variable is guaranteed a physical register. This can happen
5575 // either when the variable is pre-colored or when it is assigned infinite 5588 // either when the variable is pre-colored or when it is assigned infinite
5576 // weight. 5589 // weight.
5577 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); 5590 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
5591 bool MustRematerialize =
5592 (Var->isRematerializable() && !(Allowed & Legal_Rematerializable));
5578 // We need a new physical register for the operand if: 5593 // We need a new physical register for the operand if:
5579 // Mem is not allowed and Var isn't guaranteed a physical 5594 // - Mem is not allowed and Var isn't guaranteed a physical register, or
5580 // register, or 5595 // - RegNum is required and Var->getRegNum() doesn't match, or
5581 // RegNum is required and Var->getRegNum() doesn't match. 5596 // - Var is a rematerializable variable and rematerializable pass-through is
5582 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || 5597 // not allowed (in which case we need an lea instruction).
5583 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { 5598 if (MustRematerialize) {
5599 assert(Ty == IceType_i32);
5600 Variable *NewVar = makeReg(Ty, RegNum);
5601 // Since Var is rematerializable, the offset will be added when the lea is
5602 // emitted.
5603 constexpr Constant *NoOffset = nullptr;
5604 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Var, NoOffset);
5605 _lea(NewVar, Mem);
5606 From = NewVar;
5607 } else if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
5608 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum()) ||
5609 MustRematerialize) {
5584 From = copyToReg(From, RegNum); 5610 From = copyToReg(From, RegNum);
5585 } 5611 }
5586 return From; 5612 return From;
5587 } 5613 }
5588 llvm_unreachable("Unhandled operand kind in legalize()"); 5614 llvm_unreachable("Unhandled operand kind in legalize()");
5589 return From; 5615 return From;
5590 } 5616 }
5591 5617
5592 /// Provide a trivial wrapper to legalize() for this common usage. 5618 /// Provide a trivial wrapper to legalize() for this common usage.
5593 template <class Machine> 5619 template <class Machine>
(...skipping 358 matching lines...) Expand 10 before | Expand all | Expand 10 after
5952 } 5978 }
5953 // the offset is not eligible for blinding or pooling, return the original 5979 // the offset is not eligible for blinding or pooling, return the original
5954 // mem operand 5980 // mem operand
5955 return MemOperand; 5981 return MemOperand;
5956 } 5982 }
5957 5983
5958 } // end of namespace X86Internal 5984 } // end of namespace X86Internal
5959 } // end of namespace Ice 5985 } // end of namespace Ice
5960 5986
5961 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 5987 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | tests_lit/llvm2ice_tests/fused-alloca.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698