| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX8632.h - x86-32 lowering ---*- C++ -*-===// | 1 //===- subzero/src/IceTargetLoweringX86Base.h - x86 lowering ----*- C++ -*-===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 // |
| 10 // This file declares the TargetLoweringX8632 class, which | 10 // This file declares the TargetLoweringX86 template class, which |
| 11 // implements the TargetLowering interface for the x86-32 | 11 // implements the TargetLowering base interface for the x86 |
| 12 // architecture. | 12 // architecture. |
| 13 // | 13 // |
| 14 //===----------------------------------------------------------------------===// | 14 //===----------------------------------------------------------------------===// |
| 15 | 15 |
| 16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX8632_H | 16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASE_H |
| 17 #define SUBZERO_SRC_ICETARGETLOWERINGX8632_H | 17 #define SUBZERO_SRC_ICETARGETLOWERINGX86BASE_H |
| 18 | 18 |
| 19 #include <unordered_map> | 19 #include <unordered_map> |
| 20 | 20 |
| 21 #include "IceAssemblerX8632.h" | |
| 22 #include "IceDefs.h" | 21 #include "IceDefs.h" |
| 23 #include "IceInst.h" | 22 #include "IceInst.h" |
| 24 #include "IceInstX8632.h" | |
| 25 #include "IceRegistersX8632.h" | |
| 26 #include "IceTargetLowering.h" | 23 #include "IceTargetLowering.h" |
| 27 | 24 |
| 28 namespace Ice { | 25 namespace Ice { |
| 26 namespace X86Internal { |
| 29 | 27 |
| 30 class BoolFoldingEntry { | 28 template <class MachineTraits> class BoolFolding; |
| 31 BoolFoldingEntry(const BoolFoldingEntry &) = delete; | 29 |
| 30 template <class Machine> struct MachineTraits {}; |
| 31 |
| 32 template <class Machine> class TargetX86Base : public Machine { |
| 33 TargetX86Base() = delete; |
| 34 TargetX86Base(const TargetX86Base &) = delete; |
| 35 TargetX86Base &operator=(const TargetX86Base &) = delete; |
| 36 |
| 37 protected: |
| 38 using TargetLowering::H_bitcast_16xi1_i16; |
| 39 using TargetLowering::H_bitcast_8xi1_i8; |
| 40 using TargetLowering::H_bitcast_i16_16xi1; |
| 41 using TargetLowering::H_bitcast_i8_8xi1; |
| 42 using TargetLowering::H_call_ctpop_i32; |
| 43 using TargetLowering::H_call_ctpop_i64; |
| 44 using TargetLowering::H_call_longjmp; |
| 45 using TargetLowering::H_call_memcpy; |
| 46 using TargetLowering::H_call_memmove; |
| 47 using TargetLowering::H_call_memset; |
| 48 using TargetLowering::H_call_read_tp; |
| 49 using TargetLowering::H_call_setjmp; |
| 50 using TargetLowering::H_fptosi_f32_i64; |
| 51 using TargetLowering::H_fptosi_f64_i64; |
| 52 using TargetLowering::H_fptoui_4xi32_f32; |
| 53 using TargetLowering::H_fptoui_f32_i32; |
| 54 using TargetLowering::H_fptoui_f32_i64; |
| 55 using TargetLowering::H_fptoui_f64_i32; |
| 56 using TargetLowering::H_fptoui_f64_i64; |
| 57 using TargetLowering::H_frem_f32; |
| 58 using TargetLowering::H_frem_f64; |
| 59 using TargetLowering::H_sdiv_i64; |
| 60 using TargetLowering::H_sitofp_i64_f32; |
| 61 using TargetLowering::H_sitofp_i64_f64; |
| 62 using TargetLowering::H_srem_i64; |
| 63 using TargetLowering::H_udiv_i64; |
| 64 using TargetLowering::H_uitofp_4xi32_4xf32; |
| 65 using TargetLowering::H_uitofp_i32_f32; |
| 66 using TargetLowering::H_uitofp_i32_f64; |
| 67 using TargetLowering::H_uitofp_i64_f32; |
| 68 using TargetLowering::H_uitofp_i64_f64; |
| 69 using TargetLowering::H_urem_i64; |
| 70 |
| 71 using TargetLowering::alignStackSpillAreas; |
| 72 using TargetLowering::assignVarStackSlots; |
| 73 using TargetLowering::inferTwoAddress; |
| 74 using TargetLowering::makeHelperCall; |
| 75 using TargetLowering::getVarStackSlotParams; |
| 32 | 76 |
| 33 public: | 77 public: |
| 34 BoolFoldingEntry() = default; | 78 using Traits = MachineTraits<Machine>; |
| 35 explicit BoolFoldingEntry(Inst *I); | 79 using BoolFolding = ::Ice::X86Internal::BoolFolding<Traits>; |
| 36 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default; | |
| 37 // Instr is the instruction producing the i1-type variable of interest. | |
| 38 Inst *Instr = nullptr; | |
| 39 // IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr). | |
| 40 bool IsComplex = false; | |
| 41 // IsLiveOut is initialized conservatively to true, and is set to false when | |
| 42 // we encounter an instruction that ends Var's live range. We disable the | |
| 43 // folding optimization when Var is live beyond this basic block. Note that | |
| 44 // if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will | |
| 45 // always be true and the folding optimization will never be performed. | |
| 46 bool IsLiveOut = true; | |
| 47 // NumUses counts the number of times Var is used as a source operand in the | |
| 48 // basic block. If IsComplex is true and there is more than one use of Var, | |
| 49 // then the folding optimization is disabled for Var. | |
| 50 uint32_t NumUses = 0; | |
| 51 }; | |
| 52 | 80 |
| 53 class BoolFolding { | 81 using TargetLowering::RegSet_All; |
| 54 public: | 82 using TargetLowering::RegSet_CalleeSave; |
| 55 enum BoolFoldingProducerKind { | 83 using TargetLowering::RegSet_CallerSave; |
| 56 PK_None, | 84 using TargetLowering::RegSet_FramePointer; |
| 57 PK_Icmp32, | 85 using TargetLowering::RegSet_None; |
| 58 PK_Icmp64, | 86 using TargetLowering::RegSet_StackPointer; |
| 59 PK_Fcmp, | 87 using TargetLowering::Context; |
| 60 PK_Trunc | 88 using TargetLowering::Ctx; |
| 61 }; | 89 using TargetLowering::Func; |
| 90 using TargetLowering::RegSetMask; |
| 62 | 91 |
| 63 // Currently the actual enum values are not used (other than CK_None), but we | 92 using TargetLowering::_bundle_lock; |
| 64 // go | 93 using TargetLowering::_bundle_unlock; |
| 65 // ahead and produce them anyway for symmetry with the | 94 using TargetLowering::getContext; |
| 66 // BoolFoldingProducerKind. | 95 using TargetLowering::getStackAdjustment; |
| 67 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; | 96 using TargetLowering::regAlloc; |
| 97 using TargetLowering::resetStackAdjustment; |
| 68 | 98 |
| 69 private: | 99 static TargetX86Base *create(Cfg *Func) { return new TargetX86Base(Func); } |
| 70 BoolFolding(const BoolFolding &) = delete; | |
| 71 BoolFolding &operator=(const BoolFolding &) = delete; | |
| 72 | |
| 73 public: | |
| 74 BoolFolding() = default; | |
| 75 static BoolFoldingProducerKind getProducerKind(const Inst *Instr); | |
| 76 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr); | |
| 77 static bool hasComplexLowering(const Inst *Instr); | |
| 78 void init(CfgNode *Node); | |
| 79 const Inst *getProducerFor(const Operand *Opnd) const; | |
| 80 void dump(const Cfg *Func) const; | |
| 81 | |
| 82 private: | |
| 83 // Returns true if Producers contains a valid entry for the given VarNum. | |
| 84 bool containsValid(SizeT VarNum) const { | |
| 85 auto Element = Producers.find(VarNum); | |
| 86 return Element != Producers.end() && Element->second.Instr != nullptr; | |
| 87 } | |
| 88 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; } | |
| 89 // Producers maps Variable::Number to a BoolFoldingEntry. | |
| 90 std::unordered_map<SizeT, BoolFoldingEntry> Producers; | |
| 91 }; | |
| 92 | |
| 93 class TargetX8632 : public TargetLowering { | |
| 94 TargetX8632() = delete; | |
| 95 TargetX8632(const TargetX8632 &) = delete; | |
| 96 TargetX8632 &operator=(const TargetX8632 &) = delete; | |
| 97 | |
| 98 public: | |
| 99 static TargetX8632 *create(Cfg *Func) { return new TargetX8632(Func); } | |
| 100 | 100 |
| 101 void translateOm1() override; | 101 void translateOm1() override; |
| 102 void translateO2() override; | 102 void translateO2() override; |
| 103 void doLoadOpt(); | 103 void doLoadOpt(); |
| 104 bool doBranchOpt(Inst *I, const CfgNode *NextNode) override; | 104 bool doBranchOpt(Inst *I, const CfgNode *NextNode) override; |
| 105 | 105 |
| 106 SizeT getNumRegisters() const override { return RegX8632::Reg_NUM; } | 106 SizeT getNumRegisters() const override { return RegX8632::Reg_NUM; } |
| 107 Variable *getPhysicalRegister(SizeT RegNum, Type Ty = IceType_void) override; | 107 Variable *getPhysicalRegister(SizeT RegNum, Type Ty = IceType_void) override; |
| 108 IceString getRegName(SizeT RegNum, Type Ty) const override; | 108 IceString getRegName(SizeT RegNum, Type Ty) const override; |
| 109 llvm::SmallBitVector getRegisterSet(RegSetMask Include, | 109 llvm::SmallBitVector getRegisterSet(RegSetMask Include, |
| (...skipping 27 matching lines...) Expand all Loading... |
| 137 // Ensure that a 64-bit Variable has been split into 2 32-bit | 137 // Ensure that a 64-bit Variable has been split into 2 32-bit |
| 138 // Variables, creating them if necessary. This is needed for all | 138 // Variables, creating them if necessary. This is needed for all |
| 139 // I64 operations, and it is needed for pushing F64 arguments for | 139 // I64 operations, and it is needed for pushing F64 arguments for |
| 140 // function calls using the 32-bit push instruction (though the | 140 // function calls using the 32-bit push instruction (though the |
| 141 // latter could be done by directly writing to the stack). | 141 // latter could be done by directly writing to the stack). |
| 142 void split64(Variable *Var); | 142 void split64(Variable *Var); |
| 143 Operand *loOperand(Operand *Operand); | 143 Operand *loOperand(Operand *Operand); |
| 144 Operand *hiOperand(Operand *Operand); | 144 Operand *hiOperand(Operand *Operand); |
| 145 void finishArgumentLowering(Variable *Arg, Variable *FramePtr, | 145 void finishArgumentLowering(Variable *Arg, Variable *FramePtr, |
| 146 size_t BasicFrameOffset, size_t &InArgsSizeBytes); | 146 size_t BasicFrameOffset, size_t &InArgsSizeBytes); |
| 147 X8632::Address stackVarToAsmOperand(const Variable *Var) const; | 147 X8632::Address stackVarToAsmOperand(const Variable *Var) const final; |
| 148 | 148 |
| 149 enum X86InstructionSet { | 149 typename Traits::InstructionSet getInstructionSet() const final { |
| 150 Begin, | 150 return InstructionSet; |
| 151 // SSE2 is the PNaCl baseline instruction set. | 151 } |
| 152 SSE2 = Begin, | |
| 153 SSE4_1, | |
| 154 End | |
| 155 }; | |
| 156 | |
| 157 X86InstructionSet getInstructionSet() const { return InstructionSet; } | |
| 158 | 152 |
| 159 protected: | 153 protected: |
| 160 explicit TargetX8632(Cfg *Func); | 154 explicit TargetX86Base(Cfg *Func); |
| 161 | 155 |
| 162 void postLower() override; | 156 void postLower() override; |
| 163 | 157 |
| 164 void lowerAlloca(const InstAlloca *Inst) override; | 158 void lowerAlloca(const InstAlloca *Inst) override; |
| 165 void lowerArithmetic(const InstArithmetic *Inst) override; | 159 void lowerArithmetic(const InstArithmetic *Inst) override; |
| 166 void lowerAssign(const InstAssign *Inst) override; | 160 void lowerAssign(const InstAssign *Inst) override; |
| 167 void lowerBr(const InstBr *Inst) override; | 161 void lowerBr(const InstBr *Inst) override; |
| 168 void lowerCall(const InstCall *Inst) override; | 162 void lowerCall(const InstCall *Inst) override; |
| 169 void lowerCast(const InstCast *Inst) override; | 163 void lowerCast(const InstCast *Inst) override; |
| 170 void lowerExtractElement(const InstExtractElement *Inst) override; | 164 void lowerExtractElement(const InstExtractElement *Inst) override; |
| (...skipping 21 matching lines...) Expand all Loading... |
| 192 void lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr, Operand *Expected, | 186 void lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr, Operand *Expected, |
| 193 Operand *Desired); | 187 Operand *Desired); |
| 194 // Attempt a more optimized lowering of cmpxchg. Returns true if optimized. | 188 // Attempt a more optimized lowering of cmpxchg. Returns true if optimized. |
| 195 bool tryOptimizedCmpxchgCmpBr(Variable *DestPrev, Operand *Ptr, | 189 bool tryOptimizedCmpxchgCmpBr(Variable *DestPrev, Operand *Ptr, |
| 196 Operand *Expected, Operand *Desired); | 190 Operand *Expected, Operand *Desired); |
| 197 void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr, | 191 void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr, |
| 198 Operand *Val); | 192 Operand *Val); |
| 199 void lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, Operand *FirstVal, | 193 void lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, Operand *FirstVal, |
| 200 Operand *SecondVal); | 194 Operand *SecondVal); |
| 201 | 195 |
| 202 typedef void (TargetX8632::*LowerBinOp)(Variable *, Operand *); | 196 typedef void (TargetX86Base::*LowerBinOp)(Variable *, Operand *); |
| 203 void expandAtomicRMWAsCmpxchg(LowerBinOp op_lo, LowerBinOp op_hi, | 197 void expandAtomicRMWAsCmpxchg(LowerBinOp op_lo, LowerBinOp op_hi, |
| 204 Variable *Dest, Operand *Ptr, Operand *Val); | 198 Variable *Dest, Operand *Ptr, Operand *Val); |
| 205 | 199 |
| 206 void eliminateNextVectorSextInstruction(Variable *SignExtendedResult); | 200 void eliminateNextVectorSextInstruction(Variable *SignExtendedResult); |
| 207 | 201 |
| 208 void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest, | 202 void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest, |
| 209 Operand *Src0, Operand *Src1); | 203 Operand *Src0, Operand *Src1); |
| 210 | 204 |
| 211 // Operand legalization helpers. To deal with address mode | 205 // Operand legalization helpers. To deal with address mode |
| 212 // constraints, the helpers will create a new Operand and emit | 206 // constraints, the helpers will create a new Operand and emit |
| (...skipping 365 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 578 void _xor_rmw(OperandX8632Mem *DestSrc0, Operand *Src1) { | 572 void _xor_rmw(OperandX8632Mem *DestSrc0, Operand *Src1) { |
| 579 Context.insert(InstX8632XorRMW::create(Func, DestSrc0, Src1)); | 573 Context.insert(InstX8632XorRMW::create(Func, DestSrc0, Src1)); |
| 580 } | 574 } |
| 581 void _set_dest_nonkillable() { | 575 void _set_dest_nonkillable() { |
| 582 Context.getLastInserted()->setDestNonKillable(); | 576 Context.getLastInserted()->setDestNonKillable(); |
| 583 } | 577 } |
| 584 | 578 |
| 585 bool optimizeScalarMul(Variable *Dest, Operand *Src0, int32_t Src1); | 579 bool optimizeScalarMul(Variable *Dest, Operand *Src0, int32_t Src1); |
| 586 void findRMW(); | 580 void findRMW(); |
| 587 | 581 |
| 588 X86InstructionSet InstructionSet = X86InstructionSet::Begin; | 582 typename Traits::InstructionSet InstructionSet = |
| 583 Traits::InstructionSet::Begin; |
| 589 bool IsEbpBasedFrame = false; | 584 bool IsEbpBasedFrame = false; |
| 590 bool NeedsStackAlignment = false; | 585 bool NeedsStackAlignment = false; |
| 591 size_t SpillAreaSizeBytes = 0; | 586 size_t SpillAreaSizeBytes = 0; |
| 592 llvm::SmallBitVector TypeToRegisterSet[IceType_NUM]; | 587 llvm::SmallBitVector TypeToRegisterSet[IceType_NUM]; |
| 593 llvm::SmallBitVector ScratchRegs; | 588 llvm::SmallBitVector ScratchRegs; |
| 594 llvm::SmallBitVector RegsUsed; | 589 llvm::SmallBitVector RegsUsed; |
| 595 VarList PhysicalRegisters[IceType_NUM]; | 590 VarList PhysicalRegisters[IceType_NUM]; |
| 596 static IceString RegNames[]; | 591 static IceString RegNames[]; |
| 597 | 592 |
| 598 // Randomize a given immediate operand | 593 // Randomize a given immediate operand |
| 599 Operand *randomizeOrPoolImmediate(Constant *Immediate, | 594 Operand *randomizeOrPoolImmediate(Constant *Immediate, |
| 600 int32_t RegNum = Variable::NoRegister); | 595 int32_t RegNum = Variable::NoRegister); |
| 601 OperandX8632Mem * | 596 OperandX8632Mem * |
| 602 randomizeOrPoolImmediate(OperandX8632Mem *MemOperand, | 597 randomizeOrPoolImmediate(OperandX8632Mem *MemOperand, |
| 603 int32_t RegNum = Variable::NoRegister); | 598 int32_t RegNum = Variable::NoRegister); |
| 604 bool RandomizationPoolingPaused = false; | 599 bool RandomizationPoolingPaused = false; |
| 605 | 600 |
| 606 private: | 601 private: |
| 607 ~TargetX8632() override {} | 602 ~TargetX86Base() override {} |
| 608 BoolFolding FoldingInfo; | 603 BoolFolding FoldingInfo; |
| 609 }; | 604 }; |
| 610 | 605 } // end of namespace X86Internal |
| 611 class TargetDataX8632 final : public TargetDataLowering { | |
| 612 TargetDataX8632() = delete; | |
| 613 TargetDataX8632(const TargetDataX8632 &) = delete; | |
| 614 TargetDataX8632 &operator=(const TargetDataX8632 &) = delete; | |
| 615 | |
| 616 public: | |
| 617 static std::unique_ptr<TargetDataLowering> create(GlobalContext *Ctx) { | |
| 618 return std::unique_ptr<TargetDataLowering>(new TargetDataX8632(Ctx)); | |
| 619 } | |
| 620 | |
| 621 void lowerGlobals(const VariableDeclarationList &Vars, | |
| 622 const IceString &SectionSuffix) override; | |
| 623 void lowerConstants() override; | |
| 624 | |
| 625 protected: | |
| 626 explicit TargetDataX8632(GlobalContext *Ctx); | |
| 627 | |
| 628 private: | |
| 629 ~TargetDataX8632() override {} | |
| 630 template <typename T> static void emitConstantPool(GlobalContext *Ctx); | |
| 631 }; | |
| 632 | |
| 633 class TargetHeaderX8632 final : public TargetHeaderLowering { | |
| 634 TargetHeaderX8632() = delete; | |
| 635 TargetHeaderX8632(const TargetHeaderX8632 &) = delete; | |
| 636 TargetHeaderX8632 &operator=(const TargetHeaderX8632 &) = delete; | |
| 637 | |
| 638 public: | |
| 639 static std::unique_ptr<TargetHeaderLowering> create(GlobalContext *Ctx) { | |
| 640 return std::unique_ptr<TargetHeaderLowering>(new TargetHeaderX8632(Ctx)); | |
| 641 } | |
| 642 | |
| 643 protected: | |
| 644 explicit TargetHeaderX8632(GlobalContext *Ctx); | |
| 645 | |
| 646 private: | |
| 647 ~TargetHeaderX8632() = default; | |
| 648 }; | |
| 649 | |
| 650 } // end of namespace Ice | 606 } // end of namespace Ice |
| 651 | 607 |
| 652 #endif // SUBZERO_SRC_ICETARGETLOWERINGX8632_H | 608 #include "IceTargetLoweringX86BaseImpl.h" |
| 609 |
| 610 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASE_H |
| OLD | NEW |