| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 /// |
| 10 // This file implements the TargetLoweringX86Base class, which | 10 /// \file |
| 11 // consists almost entirely of the lowering sequence for each | 11 /// This file implements the TargetLoweringX86Base class, which |
| 12 // high-level instruction. | 12 /// consists almost entirely of the lowering sequence for each |
| 13 // | 13 /// high-level instruction. |
| 14 /// |
| 14 //===----------------------------------------------------------------------===// | 15 //===----------------------------------------------------------------------===// |
| 15 | 16 |
| 16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 17 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| 17 #define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 18 #define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| 18 | 19 |
| 19 #include "IceCfg.h" | 20 #include "IceCfg.h" |
| 20 #include "IceCfgNode.h" | 21 #include "IceCfgNode.h" |
| 21 #include "IceClFlags.h" | 22 #include "IceClFlags.h" |
| 22 #include "IceDefs.h" | 23 #include "IceDefs.h" |
| 23 #include "IceELFObjectWriter.h" | 24 #include "IceELFObjectWriter.h" |
| 24 #include "IceGlobalInits.h" | 25 #include "IceGlobalInits.h" |
| 25 #include "IceInstX8632.h" | 26 #include "IceInstX8632.h" |
| 26 #include "IceLiveness.h" | 27 #include "IceLiveness.h" |
| 27 #include "IceOperand.h" | 28 #include "IceOperand.h" |
| 28 #include "IceRegistersX8632.h" | 29 #include "IceRegistersX8632.h" |
| 29 #include "IceTargetLoweringX8632.def" | 30 #include "IceTargetLoweringX8632.def" |
| 30 #include "IceTargetLoweringX8632.h" | 31 #include "IceTargetLoweringX8632.h" |
| 31 #include "IceUtils.h" | 32 #include "IceUtils.h" |
| 32 #include "llvm/Support/MathExtras.h" | 33 #include "llvm/Support/MathExtras.h" |
| 33 | 34 |
| 34 namespace Ice { | 35 namespace Ice { |
| 35 namespace X86Internal { | 36 namespace X86Internal { |
| 36 | 37 |
| 37 // A helper class to ease the settings of RandomizationPoolingPause | 38 /// A helper class to ease the settings of RandomizationPoolingPause |
| 38 // to disable constant blinding or pooling for some translation phases. | 39 /// to disable constant blinding or pooling for some translation phases. |
| 39 class BoolFlagSaver { | 40 class BoolFlagSaver { |
| 40 BoolFlagSaver() = delete; | 41 BoolFlagSaver() = delete; |
| 41 BoolFlagSaver(const BoolFlagSaver &) = delete; | 42 BoolFlagSaver(const BoolFlagSaver &) = delete; |
| 42 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete; | 43 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete; |
| 43 | 44 |
| 44 public: | 45 public: |
| 45 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; } | 46 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; } |
| 46 ~BoolFlagSaver() { Flag = OldValue; } | 47 ~BoolFlagSaver() { Flag = OldValue; } |
| 47 | 48 |
| 48 private: | 49 private: |
| 49 const bool OldValue; | 50 const bool OldValue; |
| 50 bool &Flag; | 51 bool &Flag; |
| 51 }; | 52 }; |
| 52 | 53 |
| 53 template <class MachineTraits> class BoolFoldingEntry { | 54 template <class MachineTraits> class BoolFoldingEntry { |
| 54 BoolFoldingEntry(const BoolFoldingEntry &) = delete; | 55 BoolFoldingEntry(const BoolFoldingEntry &) = delete; |
| 55 | 56 |
| 56 public: | 57 public: |
| 57 BoolFoldingEntry() = default; | 58 BoolFoldingEntry() = default; |
| 58 explicit BoolFoldingEntry(Inst *I); | 59 explicit BoolFoldingEntry(Inst *I); |
| 59 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default; | 60 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default; |
| 60 // Instr is the instruction producing the i1-type variable of interest. | 61 /// Instr is the instruction producing the i1-type variable of interest. |
| 61 Inst *Instr = nullptr; | 62 Inst *Instr = nullptr; |
| 62 // IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr). | 63 /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr). |
| 63 bool IsComplex = false; | 64 bool IsComplex = false; |
| 64 // IsLiveOut is initialized conservatively to true, and is set to false when | 65 /// IsLiveOut is initialized conservatively to true, and is set to false when |
| 65 // we encounter an instruction that ends Var's live range. We disable the | 66 /// we encounter an instruction that ends Var's live range. We disable the |
| 66 // folding optimization when Var is live beyond this basic block. Note that | 67 /// folding optimization when Var is live beyond this basic block. Note that |
| 67 // if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will | 68 /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will |
| 68 // always be true and the folding optimization will never be performed. | 69 /// always be true and the folding optimization will never be performed. |
| 69 bool IsLiveOut = true; | 70 bool IsLiveOut = true; |
| 70 // NumUses counts the number of times Var is used as a source operand in the | 71 // NumUses counts the number of times Var is used as a source operand in the |
| 71 // basic block. If IsComplex is true and there is more than one use of Var, | 72 // basic block. If IsComplex is true and there is more than one use of Var, |
| 72 // then the folding optimization is disabled for Var. | 73 // then the folding optimization is disabled for Var. |
| 73 uint32_t NumUses = 0; | 74 uint32_t NumUses = 0; |
| 74 }; | 75 }; |
| 75 | 76 |
| 76 template <class MachineTraits> class BoolFolding { | 77 template <class MachineTraits> class BoolFolding { |
| 77 public: | 78 public: |
| 78 enum BoolFoldingProducerKind { | 79 enum BoolFoldingProducerKind { |
| 79 PK_None, | 80 PK_None, |
| 80 PK_Icmp32, | 81 PK_Icmp32, |
| 81 PK_Icmp64, | 82 PK_Icmp64, |
| 82 PK_Fcmp, | 83 PK_Fcmp, |
| 83 PK_Trunc | 84 PK_Trunc |
| 84 }; | 85 }; |
| 85 | 86 |
| 86 // Currently the actual enum values are not used (other than CK_None), but we | 87 /// Currently the actual enum values are not used (other than CK_None), but we |
| 87 // go | 88 /// go |
| 88 // ahead and produce them anyway for symmetry with the | 89 /// ahead and produce them anyway for symmetry with the |
| 89 // BoolFoldingProducerKind. | 90 /// BoolFoldingProducerKind. |
| 90 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; | 91 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; |
| 91 | 92 |
| 92 private: | 93 private: |
| 93 BoolFolding(const BoolFolding &) = delete; | 94 BoolFolding(const BoolFolding &) = delete; |
| 94 BoolFolding &operator=(const BoolFolding &) = delete; | 95 BoolFolding &operator=(const BoolFolding &) = delete; |
| 95 | 96 |
| 96 public: | 97 public: |
| 97 BoolFolding() = default; | 98 BoolFolding() = default; |
| 98 static BoolFoldingProducerKind getProducerKind(const Inst *Instr); | 99 static BoolFoldingProducerKind getProducerKind(const Inst *Instr); |
| 99 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr); | 100 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr); |
| 100 static bool hasComplexLowering(const Inst *Instr); | 101 static bool hasComplexLowering(const Inst *Instr); |
| 101 void init(CfgNode *Node); | 102 void init(CfgNode *Node); |
| 102 const Inst *getProducerFor(const Operand *Opnd) const; | 103 const Inst *getProducerFor(const Operand *Opnd) const; |
| 103 void dump(const Cfg *Func) const; | 104 void dump(const Cfg *Func) const; |
| 104 | 105 |
| 105 private: | 106 private: |
| 106 // Returns true if Producers contains a valid entry for the given VarNum. | 107 /// Returns true if Producers contains a valid entry for the given VarNum. |
| 107 bool containsValid(SizeT VarNum) const { | 108 bool containsValid(SizeT VarNum) const { |
| 108 auto Element = Producers.find(VarNum); | 109 auto Element = Producers.find(VarNum); |
| 109 return Element != Producers.end() && Element->second.Instr != nullptr; | 110 return Element != Producers.end() && Element->second.Instr != nullptr; |
| 110 } | 111 } |
| 111 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; } | 112 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; } |
| 112 // Producers maps Variable::Number to a BoolFoldingEntry. | 113 /// Producers maps Variable::Number to a BoolFoldingEntry. |
| 113 std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers; | 114 std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers; |
| 114 }; | 115 }; |
| 115 | 116 |
| 116 template <class MachineTraits> | 117 template <class MachineTraits> |
| 117 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I) | 118 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I) |
| 118 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {} | 119 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {} |
| 119 | 120 |
| 120 template <class MachineTraits> | 121 template <class MachineTraits> |
| 121 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind | 122 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind |
| 122 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) { | 123 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) { |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 155 return CK_None; | 156 return CK_None; |
| 156 case InstCast::Sext: | 157 case InstCast::Sext: |
| 157 return CK_Sext; | 158 return CK_Sext; |
| 158 case InstCast::Zext: | 159 case InstCast::Zext: |
| 159 return CK_Zext; | 160 return CK_Zext; |
| 160 } | 161 } |
| 161 } | 162 } |
| 162 return CK_None; | 163 return CK_None; |
| 163 } | 164 } |
| 164 | 165 |
| 165 // Returns true if the producing instruction has a "complex" lowering | 166 /// Returns true if the producing instruction has a "complex" lowering |
| 166 // sequence. This generally means that its lowering sequence requires | 167 /// sequence. This generally means that its lowering sequence requires |
| 167 // more than one conditional branch, namely 64-bit integer compares | 168 /// more than one conditional branch, namely 64-bit integer compares |
| 168 // and some floating-point compares. When this is true, and there is | 169 /// and some floating-point compares. When this is true, and there is |
| 169 // more than one consumer, we prefer to disable the folding | 170 /// more than one consumer, we prefer to disable the folding |
| 170 // optimization because it minimizes branches. | 171 /// optimization because it minimizes branches. |
| 171 template <class MachineTraits> | 172 template <class MachineTraits> |
| 172 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { | 173 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { |
| 173 switch (getProducerKind(Instr)) { | 174 switch (getProducerKind(Instr)) { |
| 174 default: | 175 default: |
| 175 return false; | 176 return false; |
| 176 case PK_Icmp64: | 177 case PK_Icmp64: |
| 177 return true; | 178 return true; |
| 178 case PK_Fcmp: | 179 case PK_Fcmp: |
| 179 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] | 180 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] |
| 180 .C2 != MachineTraits::Cond::Br_None; | 181 .C2 != MachineTraits::Cond::Br_None; |
| (...skipping 422 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 603 } | 604 } |
| 604 | 605 |
| 605 // Converts a ConstantInteger32 operand into its constant value, or | 606 // Converts a ConstantInteger32 operand into its constant value, or |
| 606 // MemoryOrderInvalid if the operand is not a ConstantInteger32. | 607 // MemoryOrderInvalid if the operand is not a ConstantInteger32. |
| 607 uint64_t getConstantMemoryOrder(Operand *Opnd) { | 608 uint64_t getConstantMemoryOrder(Operand *Opnd) { |
| 608 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) | 609 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) |
| 609 return Integer->getValue(); | 610 return Integer->getValue(); |
| 610 return Intrinsics::MemoryOrderInvalid; | 611 return Intrinsics::MemoryOrderInvalid; |
| 611 } | 612 } |
| 612 | 613 |
| 613 // Determines whether the dest of a Load instruction can be folded | 614 /// Determines whether the dest of a Load instruction can be folded |
| 614 // into one of the src operands of a 2-operand instruction. This is | 615 /// into one of the src operands of a 2-operand instruction. This is |
| 615 // true as long as the load dest matches exactly one of the binary | 616 /// true as long as the load dest matches exactly one of the binary |
| 616 // instruction's src operands. Replaces Src0 or Src1 with LoadSrc if | 617 /// instruction's src operands. Replaces Src0 or Src1 with LoadSrc if |
| 617 // the answer is true. | 618 /// the answer is true. |
| 618 bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, | 619 bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, |
| 619 Operand *&Src0, Operand *&Src1) { | 620 Operand *&Src0, Operand *&Src1) { |
| 620 if (Src0 == LoadDest && Src1 != LoadDest) { | 621 if (Src0 == LoadDest && Src1 != LoadDest) { |
| 621 Src0 = LoadSrc; | 622 Src0 = LoadSrc; |
| 622 return true; | 623 return true; |
| 623 } | 624 } |
| 624 if (Src0 != LoadDest && Src1 == LoadDest) { | 625 if (Src0 != LoadDest && Src1 == LoadDest) { |
| 625 Src1 = LoadSrc; | 626 Src1 = LoadSrc; |
| 626 return true; | 627 return true; |
| 627 } | 628 } |
| (...skipping 217 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 845 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); | 846 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); |
| 846 RegisterArg->setRegNum(RegNum); | 847 RegisterArg->setRegNum(RegNum); |
| 847 RegisterArg->setIsArg(); | 848 RegisterArg->setIsArg(); |
| 848 Arg->setIsArg(false); | 849 Arg->setIsArg(false); |
| 849 | 850 |
| 850 Args[I] = RegisterArg; | 851 Args[I] = RegisterArg; |
| 851 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); | 852 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); |
| 852 } | 853 } |
| 853 } | 854 } |
| 854 | 855 |
| 855 // Helper function for addProlog(). | 856 /// Helper function for addProlog(). |
| 856 // | 857 /// |
| 857 // This assumes Arg is an argument passed on the stack. This sets the | 858 /// This assumes Arg is an argument passed on the stack. This sets the |
| 858 // frame offset for Arg and updates InArgsSizeBytes according to Arg's | 859 /// frame offset for Arg and updates InArgsSizeBytes according to Arg's |
| 859 // width. For an I64 arg that has been split into Lo and Hi components, | 860 /// width. For an I64 arg that has been split into Lo and Hi components, |
| 860 // it calls itself recursively on the components, taking care to handle | 861 /// it calls itself recursively on the components, taking care to handle |
| 861 // Lo first because of the little-endian architecture. Lastly, this | 862 /// Lo first because of the little-endian architecture. Lastly, this |
| 862 // function generates an instruction to copy Arg into its assigned | 863 /// function generates an instruction to copy Arg into its assigned |
| 863 // register if applicable. | 864 /// register if applicable. |
| 864 template <class Machine> | 865 template <class Machine> |
| 865 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, | 866 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, |
| 866 Variable *FramePtr, | 867 Variable *FramePtr, |
| 867 size_t BasicFrameOffset, | 868 size_t BasicFrameOffset, |
| 868 size_t &InArgsSizeBytes) { | 869 size_t &InArgsSizeBytes) { |
| 869 Variable *Lo = Arg->getLo(); | 870 Variable *Lo = Arg->getLo(); |
| 870 Variable *Hi = Arg->getHi(); | 871 Variable *Hi = Arg->getHi(); |
| 871 Type Ty = Arg->getType(); | 872 Type Ty = Arg->getType(); |
| 872 if (Lo && Hi && Ty == IceType_i64) { | 873 if (Lo && Hi && Ty == IceType_i64) { |
| 873 assert(Lo->getType() != IceType_i64); // don't want infinite recursion | 874 assert(Lo->getType() != IceType_i64); // don't want infinite recursion |
| (...skipping 466 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1340 // multiple of the required alignment at runtime. | 1341 // multiple of the required alignment at runtime. |
| 1341 Variable *T = makeReg(IceType_i32); | 1342 Variable *T = makeReg(IceType_i32); |
| 1342 _mov(T, TotalSize); | 1343 _mov(T, TotalSize); |
| 1343 _add(T, Ctx->getConstantInt32(Alignment - 1)); | 1344 _add(T, Ctx->getConstantInt32(Alignment - 1)); |
| 1344 _and(T, Ctx->getConstantInt32(-Alignment)); | 1345 _and(T, Ctx->getConstantInt32(-Alignment)); |
| 1345 _sub(esp, T); | 1346 _sub(esp, T); |
| 1346 } | 1347 } |
| 1347 _mov(Dest, esp); | 1348 _mov(Dest, esp); |
| 1348 } | 1349 } |
| 1349 | 1350 |
| 1350 // Strength-reduce scalar integer multiplication by a constant (for | 1351 /// Strength-reduce scalar integer multiplication by a constant (for |
| 1351 // i32 or narrower) for certain constants. The lea instruction can be | 1352 /// i32 or narrower) for certain constants. The lea instruction can be |
| 1352 // used to multiply by 3, 5, or 9, and the lsh instruction can be used | 1353 /// used to multiply by 3, 5, or 9, and the lsh instruction can be used |
| 1353 // to multiply by powers of 2. These can be combined such that | 1354 /// to multiply by powers of 2. These can be combined such that |
| 1354 // e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5, | 1355 /// e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5, |
| 1355 // combined with left-shifting by 2. | 1356 /// combined with left-shifting by 2. |
| 1356 template <class Machine> | 1357 template <class Machine> |
| 1357 bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0, | 1358 bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0, |
| 1358 int32_t Src1) { | 1359 int32_t Src1) { |
| 1359 // Disable this optimization for Om1 and O0, just to keep things | 1360 // Disable this optimization for Om1 and O0, just to keep things |
| 1360 // simple there. | 1361 // simple there. |
| 1361 if (Ctx->getFlags().getOptLevel() < Opt_1) | 1362 if (Ctx->getFlags().getOptLevel() < Opt_1) |
| 1362 return false; | 1363 return false; |
| 1363 Type Ty = Dest->getType(); | 1364 Type Ty = Dest->getType(); |
| 1364 Variable *T = nullptr; | 1365 Variable *T = nullptr; |
| 1365 if (Src1 == -1) { | 1366 if (Src1 == -1) { |
| (...skipping 1018 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2384 if (DestTy == IceType_v16i8) { | 2385 if (DestTy == IceType_v16i8) { |
| 2385 // onemask = materialize(1,1,...); dst = (src & onemask) > 0 | 2386 // onemask = materialize(1,1,...); dst = (src & onemask) > 0 |
| 2386 Variable *OneMask = makeVectorOfOnes(Dest->getType()); | 2387 Variable *OneMask = makeVectorOfOnes(Dest->getType()); |
| 2387 Variable *T = makeReg(DestTy); | 2388 Variable *T = makeReg(DestTy); |
| 2388 _movp(T, Src0RM); | 2389 _movp(T, Src0RM); |
| 2389 _pand(T, OneMask); | 2390 _pand(T, OneMask); |
| 2390 Variable *Zeros = makeVectorOfZeros(Dest->getType()); | 2391 Variable *Zeros = makeVectorOfZeros(Dest->getType()); |
| 2391 _pcmpgt(T, Zeros); | 2392 _pcmpgt(T, Zeros); |
| 2392 _movp(Dest, T); | 2393 _movp(Dest, T); |
| 2393 } else { | 2394 } else { |
| 2394 // width = width(elty) - 1; dest = (src << width) >> width | 2395 /// width = width(elty) - 1; dest = (src << width) >> width |
| 2395 SizeT ShiftAmount = | 2396 SizeT ShiftAmount = |
| 2396 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - | 2397 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - |
| 2397 1; | 2398 1; |
| 2398 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount); | 2399 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount); |
| 2399 Variable *T = makeReg(DestTy); | 2400 Variable *T = makeReg(DestTy); |
| 2400 _movp(T, Src0RM); | 2401 _movp(T, Src0RM); |
| 2401 _psll(T, ShiftConstant); | 2402 _psll(T, ShiftConstant); |
| 2402 _psra(T, ShiftConstant); | 2403 _psra(T, ShiftConstant); |
| 2403 _movp(Dest, T); | 2404 _movp(Dest, T); |
| 2404 } | 2405 } |
| (...skipping 1527 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3932 // the end of the loop, since it will be re-used by the loop. | 3933 // the end of the loop, since it will be re-used by the loop. |
| 3933 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { | 3934 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { |
| 3934 Context.insert(InstFakeUse::create(Func, ValVar)); | 3935 Context.insert(InstFakeUse::create(Func, ValVar)); |
| 3935 } | 3936 } |
| 3936 // The address base (if any) is also reused in the loop. | 3937 // The address base (if any) is also reused in the loop. |
| 3937 if (Variable *Base = Addr->getBase()) | 3938 if (Variable *Base = Addr->getBase()) |
| 3938 Context.insert(InstFakeUse::create(Func, Base)); | 3939 Context.insert(InstFakeUse::create(Func, Base)); |
| 3939 _mov(Dest, T_eax); | 3940 _mov(Dest, T_eax); |
| 3940 } | 3941 } |
| 3941 | 3942 |
| 3942 // Lowers count {trailing, leading} zeros intrinsic. | 3943 /// Lowers count {trailing, leading} zeros intrinsic. |
| 3943 // | 3944 /// |
| 3944 // We could do constant folding here, but that should have | 3945 /// We could do constant folding here, but that should have |
| 3945 // been done by the front-end/middle-end optimizations. | 3946 /// been done by the front-end/middle-end optimizations. |
| 3946 template <class Machine> | 3947 template <class Machine> |
| 3947 void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, | 3948 void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, |
| 3948 Operand *FirstVal, | 3949 Operand *FirstVal, |
| 3949 Operand *SecondVal) { | 3950 Operand *SecondVal) { |
| 3950 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI). | 3951 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI). |
| 3951 // Then the instructions will handle the Val == 0 case much more simply | 3952 // Then the instructions will handle the Val == 0 case much more simply |
| 3952 // and won't require conversion from bit position to number of zeros. | 3953 // and won't require conversion from bit position to number of zeros. |
| 3953 // | 3954 // |
| 3954 // Otherwise: | 3955 // Otherwise: |
| 3955 // bsr IF_NOT_ZERO, Val | 3956 // bsr IF_NOT_ZERO, Val |
| (...skipping 645 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4601 | 4602 |
| 4602 // Insert the result into position. | 4603 // Insert the result into position. |
| 4603 Variable *DestT = Func->template makeVariable(Ty); | 4604 Variable *DestT = Func->template makeVariable(Ty); |
| 4604 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index)); | 4605 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index)); |
| 4605 T = DestT; | 4606 T = DestT; |
| 4606 } | 4607 } |
| 4607 | 4608 |
| 4608 lowerAssign(InstAssign::create(Func, Dest, T)); | 4609 lowerAssign(InstAssign::create(Func, Dest, T)); |
| 4609 } | 4610 } |
| 4610 | 4611 |
| 4611 // The following pattern occurs often in lowered C and C++ code: | 4612 /// The following pattern occurs often in lowered C and C++ code: |
| 4612 // | 4613 /// |
| 4613 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1 | 4614 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1 |
| 4614 // %cmp.ext = sext <n x i1> %cmp to <n x ty> | 4615 /// %cmp.ext = sext <n x i1> %cmp to <n x ty> |
| 4615 // | 4616 /// |
| 4616 // We can eliminate the sext operation by copying the result of pcmpeqd, | 4617 /// We can eliminate the sext operation by copying the result of pcmpeqd, |
| 4617 // pcmpgtd, or cmpps (which produce sign extended results) to the result | 4618 /// pcmpgtd, or cmpps (which produce sign extended results) to the result |
| 4618 // of the sext operation. | 4619 /// of the sext operation. |
| 4619 template <class Machine> | 4620 template <class Machine> |
| 4620 void TargetX86Base<Machine>::eliminateNextVectorSextInstruction( | 4621 void TargetX86Base<Machine>::eliminateNextVectorSextInstruction( |
| 4621 Variable *SignExtendedResult) { | 4622 Variable *SignExtendedResult) { |
| 4622 if (InstCast *NextCast = | 4623 if (InstCast *NextCast = |
| 4623 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { | 4624 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { |
| 4624 if (NextCast->getCastKind() == InstCast::Sext && | 4625 if (NextCast->getCastKind() == InstCast::Sext && |
| 4625 NextCast->getSrc(0) == SignExtendedResult) { | 4626 NextCast->getSrc(0) == SignExtendedResult) { |
| 4626 NextCast->setDeleted(); | 4627 NextCast->setDeleted(); |
| 4627 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult)); | 4628 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult)); |
| 4628 // Skip over the instruction. | 4629 // Skip over the instruction. |
| (...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4714 | 4715 |
| 4715 template <class Machine> | 4716 template <class Machine> |
| 4716 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) { | 4717 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) { |
| 4717 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) { | 4718 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) { |
| 4718 lowerRMW(RMW); | 4719 lowerRMW(RMW); |
| 4719 } else { | 4720 } else { |
| 4720 TargetLowering::lowerOther(Instr); | 4721 TargetLowering::lowerOther(Instr); |
| 4721 } | 4722 } |
| 4722 } | 4723 } |
| 4723 | 4724 |
| 4724 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to | 4725 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to |
| 4725 // preserve integrity of liveness analysis. Undef values are also | 4726 /// preserve integrity of liveness analysis. Undef values are also |
| 4726 // turned into zeroes, since loOperand() and hiOperand() don't expect | 4727 /// turned into zeroes, since loOperand() and hiOperand() don't expect |
| 4727 // Undef input. | 4728 /// Undef input. |
| 4728 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() { | 4729 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() { |
| 4729 // Pause constant blinding or pooling, blinding or pooling will be done later | 4730 // Pause constant blinding or pooling, blinding or pooling will be done later |
| 4730 // during phi lowering assignments | 4731 // during phi lowering assignments |
| 4731 BoolFlagSaver B(RandomizationPoolingPaused, true); | 4732 BoolFlagSaver B(RandomizationPoolingPaused, true); |
| 4732 | 4733 |
| 4733 CfgNode *Node = Context.getNode(); | 4734 CfgNode *Node = Context.getNode(); |
| 4734 for (Inst &I : Node->getPhis()) { | 4735 for (Inst &I : Node->getPhis()) { |
| 4735 auto Phi = llvm::dyn_cast<InstPhi>(&I); | 4736 auto Phi = llvm::dyn_cast<InstPhi>(&I); |
| 4736 if (Phi->isDeleted()) | 4737 if (Phi->isDeleted()) |
| 4737 continue; | 4738 continue; |
| (...skipping 25 matching lines...) Expand all Loading... |
| 4763 // because they do in fact need a register to materialize the vector | 4764 // because they do in fact need a register to materialize the vector |
| 4764 // of zeroes into. | 4765 // of zeroes into. |
| 4765 if (llvm::isa<ConstantUndef>(Opnd)) | 4766 if (llvm::isa<ConstantUndef>(Opnd)) |
| 4766 return isScalarFloatingType(Opnd->getType()) || | 4767 return isScalarFloatingType(Opnd->getType()) || |
| 4767 isVectorType(Opnd->getType()); | 4768 isVectorType(Opnd->getType()); |
| 4768 if (llvm::isa<Constant>(Opnd)) | 4769 if (llvm::isa<Constant>(Opnd)) |
| 4769 return isScalarFloatingType(Opnd->getType()); | 4770 return isScalarFloatingType(Opnd->getType()); |
| 4770 return true; | 4771 return true; |
| 4771 } | 4772 } |
| 4772 | 4773 |
| 4773 // Lower the pre-ordered list of assignments into mov instructions. | 4774 /// Lower the pre-ordered list of assignments into mov instructions. |
| 4774 // Also has to do some ad-hoc register allocation as necessary. | 4775 /// Also has to do some ad-hoc register allocation as necessary. |
| 4775 template <class Machine> | 4776 template <class Machine> |
| 4776 void TargetX86Base<Machine>::lowerPhiAssignments( | 4777 void TargetX86Base<Machine>::lowerPhiAssignments( |
| 4777 CfgNode *Node, const AssignList &Assignments) { | 4778 CfgNode *Node, const AssignList &Assignments) { |
| 4778 // Check that this is a properly initialized shell of a node. | 4779 // Check that this is a properly initialized shell of a node. |
| 4779 assert(Node->getOutEdges().size() == 1); | 4780 assert(Node->getOutEdges().size() == 1); |
| 4780 assert(Node->getInsts().empty()); | 4781 assert(Node->getInsts().empty()); |
| 4781 assert(Node->getPhis().empty()); | 4782 assert(Node->getPhis().empty()); |
| 4782 CfgNode *Succ = Node->getOutEdges().front(); | 4783 CfgNode *Succ = Node->getOutEdges().front(); |
| 4783 getContext().init(Node); | 4784 getContext().init(Node); |
| 4784 // Register set setup similar to regAlloc(). | 4785 // Register set setup similar to regAlloc(). |
| (...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4968 // SSE has no left shift operation for vectors of 8 bit integers. | 4969 // SSE has no left shift operation for vectors of 8 bit integers. |
| 4969 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; | 4970 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; |
| 4970 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); | 4971 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); |
| 4971 Variable *Reg = makeReg(Ty, RegNum); | 4972 Variable *Reg = makeReg(Ty, RegNum); |
| 4972 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); | 4973 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); |
| 4973 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); | 4974 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); |
| 4974 return Reg; | 4975 return Reg; |
| 4975 } | 4976 } |
| 4976 } | 4977 } |
| 4977 | 4978 |
| 4978 // Construct a mask in a register that can be and'ed with a | 4979 /// Construct a mask in a register that can be and'ed with a |
| 4979 // floating-point value to mask off its sign bit. The value will be | 4980 /// floating-point value to mask off its sign bit. The value will be |
| 4980 // <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff> | 4981 /// <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff> |
| 4981 // for f64. Construct it as vector of ones logically right shifted | 4982 /// for f64. Construct it as vector of ones logically right shifted |
| 4982 // one bit. TODO(stichnot): Fix the wala TODO above, to represent | 4983 /// one bit. TODO(stichnot): Fix the wala TODO above, to represent |
| 4983 // vector constants in memory. | 4984 /// vector constants in memory. |
| 4984 template <class Machine> | 4985 template <class Machine> |
| 4985 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty, | 4986 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty, |
| 4986 int32_t RegNum) { | 4987 int32_t RegNum) { |
| 4987 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum); | 4988 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum); |
| 4988 _psrl(Reg, Ctx->getConstantInt8(1)); | 4989 _psrl(Reg, Ctx->getConstantInt8(1)); |
| 4989 return Reg; | 4990 return Reg; |
| 4990 } | 4991 } |
| 4991 | 4992 |
| 4992 template <class Machine> | 4993 template <class Machine> |
| 4993 OperandX8632Mem * | 4994 OperandX8632Mem * |
| 4994 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, | 4995 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, |
| 4995 uint32_t Offset) { | 4996 uint32_t Offset) { |
| 4996 // Ensure that Loc is a stack slot. | 4997 // Ensure that Loc is a stack slot. |
| 4997 assert(Slot->getWeight().isZero()); | 4998 assert(Slot->getWeight().isZero()); |
| 4998 assert(Slot->getRegNum() == Variable::NoRegister); | 4999 assert(Slot->getRegNum() == Variable::NoRegister); |
| 4999 // Compute the location of Loc in memory. | 5000 // Compute the location of Loc in memory. |
| 5000 // TODO(wala,stichnot): lea should not be required. The address of | 5001 // TODO(wala,stichnot): lea should not be required. The address of |
| 5001 // the stack slot is known at compile time (although not until after | 5002 // the stack slot is known at compile time (although not until after |
| 5002 // addProlog()). | 5003 // addProlog()). |
| 5003 const Type PointerType = IceType_i32; | 5004 const Type PointerType = IceType_i32; |
| 5004 Variable *Loc = makeReg(PointerType); | 5005 Variable *Loc = makeReg(PointerType); |
| 5005 _lea(Loc, Slot); | 5006 _lea(Loc, Slot); |
| 5006 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); | 5007 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); |
| 5007 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset); | 5008 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset); |
| 5008 } | 5009 } |
| 5009 | 5010 |
| 5010 // Helper for legalize() to emit the right code to lower an operand to a | 5011 /// Helper for legalize() to emit the right code to lower an operand to a |
| 5011 // register of the appropriate type. | 5012 /// register of the appropriate type. |
| 5012 template <class Machine> | 5013 template <class Machine> |
| 5013 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { | 5014 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { |
| 5014 Type Ty = Src->getType(); | 5015 Type Ty = Src->getType(); |
| 5015 Variable *Reg = makeReg(Ty, RegNum); | 5016 Variable *Reg = makeReg(Ty, RegNum); |
| 5016 if (isVectorType(Ty)) { | 5017 if (isVectorType(Ty)) { |
| 5017 _movp(Reg, Src); | 5018 _movp(Reg, Src); |
| 5018 } else { | 5019 } else { |
| 5019 _mov(Reg, Src); | 5020 _mov(Reg, Src); |
| 5020 } | 5021 } |
| 5021 return Reg; | 5022 return Reg; |
| (...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5128 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || | 5129 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || |
| 5129 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { | 5130 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { |
| 5130 From = copyToReg(From, RegNum); | 5131 From = copyToReg(From, RegNum); |
| 5131 } | 5132 } |
| 5132 return From; | 5133 return From; |
| 5133 } | 5134 } |
| 5134 llvm_unreachable("Unhandled operand kind in legalize()"); | 5135 llvm_unreachable("Unhandled operand kind in legalize()"); |
| 5135 return From; | 5136 return From; |
| 5136 } | 5137 } |
| 5137 | 5138 |
| 5138 // Provide a trivial wrapper to legalize() for this common usage. | 5139 /// Provide a trivial wrapper to legalize() for this common usage. |
| 5139 template <class Machine> | 5140 template <class Machine> |
| 5140 Variable *TargetX86Base<Machine>::legalizeToVar(Operand *From, int32_t RegNum) { | 5141 Variable *TargetX86Base<Machine>::legalizeToVar(Operand *From, int32_t RegNum) { |
| 5141 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); | 5142 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); |
| 5142 } | 5143 } |
| 5143 | 5144 |
| 5144 // For the cmp instruction, if Src1 is an immediate, or known to be a | 5145 /// For the cmp instruction, if Src1 is an immediate, or known to be a |
| 5145 // physical register, we can allow Src0 to be a memory operand. | 5146 /// physical register, we can allow Src0 to be a memory operand. |
| 5146 // Otherwise, Src0 must be copied into a physical register. | 5147 /// Otherwise, Src0 must be copied into a physical register. |
| 5147 // (Actually, either Src0 or Src1 can be chosen for the physical | 5148 /// (Actually, either Src0 or Src1 can be chosen for the physical |
| 5148 // register, but unfortunately we have to commit to one or the other | 5149 /// register, but unfortunately we have to commit to one or the other |
| 5149 // before register allocation.) | 5150 /// before register allocation.) |
| 5150 template <class Machine> | 5151 template <class Machine> |
| 5151 Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0, | 5152 Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0, |
| 5152 Operand *Src1) { | 5153 Operand *Src1) { |
| 5153 bool IsSrc1ImmOrReg = false; | 5154 bool IsSrc1ImmOrReg = false; |
| 5154 if (llvm::isa<Constant>(Src1)) { | 5155 if (llvm::isa<Constant>(Src1)) { |
| 5155 IsSrc1ImmOrReg = true; | 5156 IsSrc1ImmOrReg = true; |
| 5156 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { | 5157 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { |
| 5157 if (Var->hasReg()) | 5158 if (Var->hasReg()) |
| 5158 IsSrc1ImmOrReg = true; | 5159 IsSrc1ImmOrReg = true; |
| 5159 } | 5160 } |
| (...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5308 return; | 5309 return; |
| 5309 Ostream &Str = Ctx->getStrEmit(); | 5310 Ostream &Str = Ctx->getStrEmit(); |
| 5310 C->emitPoolLabel(Str); | 5311 C->emitPoolLabel(Str); |
| 5311 } | 5312 } |
| 5312 | 5313 |
| 5313 template <class Machine> | 5314 template <class Machine> |
| 5314 void TargetX86Base<Machine>::emit(const ConstantUndef *) const { | 5315 void TargetX86Base<Machine>::emit(const ConstantUndef *) const { |
| 5315 llvm::report_fatal_error("undef value encountered by emitter."); | 5316 llvm::report_fatal_error("undef value encountered by emitter."); |
| 5316 } | 5317 } |
| 5317 | 5318 |
| 5318 // Randomize or pool an Immediate. | 5319 /// Randomize or pool an Immediate. |
| 5319 template <class Machine> | 5320 template <class Machine> |
| 5320 Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate, | 5321 Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate, |
| 5321 int32_t RegNum) { | 5322 int32_t RegNum) { |
| 5322 assert(llvm::isa<ConstantInteger32>(Immediate) || | 5323 assert(llvm::isa<ConstantInteger32>(Immediate) || |
| 5323 llvm::isa<ConstantRelocatable>(Immediate)); | 5324 llvm::isa<ConstantRelocatable>(Immediate)); |
| 5324 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || | 5325 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || |
| 5325 RandomizationPoolingPaused == true) { | 5326 RandomizationPoolingPaused == true) { |
| 5326 // Immediates randomization/pooling off or paused | 5327 // Immediates randomization/pooling off or paused |
| 5327 return Immediate; | 5328 return Immediate; |
| 5328 } | 5329 } |
| (...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5507 } | 5508 } |
| 5508 // the offset is not eligible for blinding or pooling, return the original | 5509 // the offset is not eligible for blinding or pooling, return the original |
| 5509 // mem operand | 5510 // mem operand |
| 5510 return MemOperand; | 5511 return MemOperand; |
| 5511 } | 5512 } |
| 5512 | 5513 |
| 5513 } // end of namespace X86Internal | 5514 } // end of namespace X86Internal |
| 5514 } // end of namespace Ice | 5515 } // end of namespace Ice |
| 5515 | 5516 |
| 5516 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5517 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |