Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 // |
| 10 // This file implements the TargetLoweringX86Base class, which | 10 // This file implements the TargetLoweringX86Base class, which |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 27 #include "IceOperand.h" | 27 #include "IceOperand.h" |
| 28 #include "IceRegistersX8632.h" | 28 #include "IceRegistersX8632.h" |
| 29 #include "IceTargetLoweringX8632.def" | 29 #include "IceTargetLoweringX8632.def" |
| 30 #include "IceTargetLoweringX8632.h" | 30 #include "IceTargetLoweringX8632.h" |
| 31 #include "IceUtils.h" | 31 #include "IceUtils.h" |
| 32 #include "llvm/Support/MathExtras.h" | 32 #include "llvm/Support/MathExtras.h" |
| 33 | 33 |
| 34 namespace Ice { | 34 namespace Ice { |
| 35 namespace X86Internal { | 35 namespace X86Internal { |
| 36 | 36 |
| 37 // A helper class to ease the settings of RandomizationPoolingPause | 37 /// A helper class to ease the settings of RandomizationPoolingPause |
| 38 // to disable constant blinding or pooling for some translation phases. | 38 /// to disable constant blinding or pooling for some translation phases. |
| 39 class BoolFlagSaver { | 39 class BoolFlagSaver { |
| 40 BoolFlagSaver() = delete; | 40 BoolFlagSaver() = delete; |
| 41 BoolFlagSaver(const BoolFlagSaver &) = delete; | 41 BoolFlagSaver(const BoolFlagSaver &) = delete; |
| 42 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete; | 42 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete; |
| 43 | 43 |
| 44 public: | 44 public: |
| 45 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; } | 45 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; } |
| 46 ~BoolFlagSaver() { Flag = OldValue; } | 46 ~BoolFlagSaver() { Flag = OldValue; } |
| 47 | 47 |
| 48 private: | 48 private: |
| 49 const bool OldValue; | 49 const bool OldValue; |
| 50 bool &Flag; | 50 bool &Flag; |
| 51 }; | 51 }; |
| 52 | 52 |
| 53 template <class MachineTraits> class BoolFoldingEntry { | 53 template <class MachineTraits> class BoolFoldingEntry { |
| 54 BoolFoldingEntry(const BoolFoldingEntry &) = delete; | 54 BoolFoldingEntry(const BoolFoldingEntry &) = delete; |
| 55 | 55 |
| 56 public: | 56 public: |
| 57 BoolFoldingEntry() = default; | 57 BoolFoldingEntry() = default; |
| 58 explicit BoolFoldingEntry(Inst *I); | 58 explicit BoolFoldingEntry(Inst *I); |
| 59 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default; | 59 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default; |
| 60 // Instr is the instruction producing the i1-type variable of interest. | 60 /// Instr is the instruction producing the i1-type variable of interest. |
| 61 Inst *Instr = nullptr; | 61 Inst *Instr = nullptr; |
| 62 // IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr). | 62 /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr). |
| 63 bool IsComplex = false; | 63 bool IsComplex = false; |
| 64 // IsLiveOut is initialized conservatively to true, and is set to false when | 64 /// IsLiveOut is initialized conservatively to true, and is set to false when |
| 65 // we encounter an instruction that ends Var's live range. We disable the | 65 /// we encounter an instruction that ends Var's live range. We disable the |
| 66 // folding optimization when Var is live beyond this basic block. Note that | 66 /// folding optimization when Var is live beyond this basic block. Note that |
| 67 // if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will | 67 /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will |
| 68 // always be true and the folding optimization will never be performed. | 68 /// always be true and the folding optimization will never be performed. |
| 69 bool IsLiveOut = true; | 69 bool IsLiveOut = true; |
| 70 // NumUses counts the number of times Var is used as a source operand in the | 70 // NumUses counts the number of times Var is used as a source operand in the |
| 71 // basic block. If IsComplex is true and there is more than one use of Var, | 71 // basic block. If IsComplex is true and there is more than one use of Var, |
| 72 // then the folding optimization is disabled for Var. | 72 // then the folding optimization is disabled for Var. |
| 73 uint32_t NumUses = 0; | 73 uint32_t NumUses = 0; |
| 74 }; | 74 }; |
| 75 | 75 |
| 76 template <class MachineTraits> class BoolFolding { | 76 template <class MachineTraits> class BoolFolding { |
| 77 public: | 77 public: |
| 78 enum BoolFoldingProducerKind { | 78 enum BoolFoldingProducerKind { |
| 79 PK_None, | 79 PK_None, |
| 80 PK_Icmp32, | 80 PK_Icmp32, |
| 81 PK_Icmp64, | 81 PK_Icmp64, |
| 82 PK_Fcmp, | 82 PK_Fcmp, |
| 83 PK_Trunc | 83 PK_Trunc |
| 84 }; | 84 }; |
| 85 | 85 |
| 86 // Currently the actual enum values are not used (other than CK_None), but we | 86 /// Currently the actual enum values are not used (other than CK_None), but we |
| 87 // go | 87 /// go |
| 88 // ahead and produce them anyway for symmetry with the | 88 /// ahead and produce them anyway for symmetry with the |
| 89 // BoolFoldingProducerKind. | 89 /// BoolFoldingProducerKind. |
| 90 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; | 90 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; |
| 91 | 91 |
| 92 private: | 92 private: |
| 93 BoolFolding(const BoolFolding &) = delete; | 93 BoolFolding(const BoolFolding &) = delete; |
| 94 BoolFolding &operator=(const BoolFolding &) = delete; | 94 BoolFolding &operator=(const BoolFolding &) = delete; |
| 95 | 95 |
| 96 public: | 96 public: |
| 97 BoolFolding() = default; | 97 BoolFolding() = default; |
| 98 static BoolFoldingProducerKind getProducerKind(const Inst *Instr); | 98 static BoolFoldingProducerKind getProducerKind(const Inst *Instr); |
| 99 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr); | 99 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr); |
| 100 static bool hasComplexLowering(const Inst *Instr); | 100 static bool hasComplexLowering(const Inst *Instr); |
| 101 void init(CfgNode *Node); | 101 void init(CfgNode *Node); |
| 102 const Inst *getProducerFor(const Operand *Opnd) const; | 102 const Inst *getProducerFor(const Operand *Opnd) const; |
| 103 void dump(const Cfg *Func) const; | 103 void dump(const Cfg *Func) const; |
| 104 | 104 |
| 105 private: | 105 private: |
| 106 // Returns true if Producers contains a valid entry for the given VarNum. | 106 /// Returns true if Producers contains a valid entry for the given VarNum. |
| 107 bool containsValid(SizeT VarNum) const { | 107 bool containsValid(SizeT VarNum) const { |
| 108 auto Element = Producers.find(VarNum); | 108 auto Element = Producers.find(VarNum); |
| 109 return Element != Producers.end() && Element->second.Instr != nullptr; | 109 return Element != Producers.end() && Element->second.Instr != nullptr; |
| 110 } | 110 } |
| 111 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; } | 111 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; } |
| 112 // Producers maps Variable::Number to a BoolFoldingEntry. | 112 /// Producers maps Variable::Number to a BoolFoldingEntry. |
| 113 std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers; | 113 std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers; |
| 114 }; | 114 }; |
| 115 | 115 |
| 116 template <class MachineTraits> | 116 template <class MachineTraits> |
| 117 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I) | 117 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I) |
| 118 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {} | 118 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {} |
| 119 | 119 |
| 120 template <class MachineTraits> | 120 template <class MachineTraits> |
| 121 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind | 121 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind |
| 122 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) { | 122 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) { |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 155 return CK_None; | 155 return CK_None; |
| 156 case InstCast::Sext: | 156 case InstCast::Sext: |
| 157 return CK_Sext; | 157 return CK_Sext; |
| 158 case InstCast::Zext: | 158 case InstCast::Zext: |
| 159 return CK_Zext; | 159 return CK_Zext; |
| 160 } | 160 } |
| 161 } | 161 } |
| 162 return CK_None; | 162 return CK_None; |
| 163 } | 163 } |
| 164 | 164 |
| 165 // Returns true if the producing instruction has a "complex" lowering | 165 /// Returns true if the producing instruction has a "complex" lowering |
| 166 // sequence. This generally means that its lowering sequence requires | 166 /// sequence. This generally means that its lowering sequence requires |
| 167 // more than one conditional branch, namely 64-bit integer compares | 167 /// more than one conditional branch, namely 64-bit integer compares |
| 168 // and some floating-point compares. When this is true, and there is | 168 /// and some floating-point compares. When this is true, and there is |
| 169 // more than one consumer, we prefer to disable the folding | 169 /// more than one consumer, we prefer to disable the folding |
| 170 // optimization because it minimizes branches. | 170 /// optimization because it minimizes branches. |
| 171 template <class MachineTraits> | 171 template <class MachineTraits> |
| 172 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { | 172 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { |
| 173 switch (getProducerKind(Instr)) { | 173 switch (getProducerKind(Instr)) { |
| 174 default: | 174 default: |
| 175 return false; | 175 return false; |
| 176 case PK_Icmp64: | 176 case PK_Icmp64: |
| 177 return true; | 177 return true; |
| 178 case PK_Fcmp: | 178 case PK_Fcmp: |
| 179 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] | 179 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] |
| 180 .C2 != CondX86::Br_None; | 180 .C2 != CondX86::Br_None; |
| (...skipping 422 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 603 } | 603 } |
| 604 | 604 |
| 605 // Converts a ConstantInteger32 operand into its constant value, or | 605 // Converts a ConstantInteger32 operand into its constant value, or |
| 606 // MemoryOrderInvalid if the operand is not a ConstantInteger32. | 606 // MemoryOrderInvalid if the operand is not a ConstantInteger32. |
| 607 uint64_t getConstantMemoryOrder(Operand *Opnd) { | 607 uint64_t getConstantMemoryOrder(Operand *Opnd) { |
| 608 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) | 608 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) |
| 609 return Integer->getValue(); | 609 return Integer->getValue(); |
| 610 return Intrinsics::MemoryOrderInvalid; | 610 return Intrinsics::MemoryOrderInvalid; |
| 611 } | 611 } |
| 612 | 612 |
| 613 // Determines whether the dest of a Load instruction can be folded | 613 /// Determines whether the dest of a Load instruction can be folded |
| 614 // into one of the src operands of a 2-operand instruction. This is | 614 /// into one of the src operands of a 2-operand instruction. This is |
| 615 // true as long as the load dest matches exactly one of the binary | 615 /// true as long as the load dest matches exactly one of the binary |
| 616 // instruction's src operands. Replaces Src0 or Src1 with LoadSrc if | 616 /// instruction's src operands. Replaces Src0 or Src1 with LoadSrc if |
| 617 // the answer is true. | 617 /// the answer is true. |
| 618 bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, | 618 bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, |
| 619 Operand *&Src0, Operand *&Src1) { | 619 Operand *&Src0, Operand *&Src1) { |
| 620 if (Src0 == LoadDest && Src1 != LoadDest) { | 620 if (Src0 == LoadDest && Src1 != LoadDest) { |
| 621 Src0 = LoadSrc; | 621 Src0 = LoadSrc; |
| 622 return true; | 622 return true; |
| 623 } | 623 } |
| 624 if (Src0 != LoadDest && Src1 == LoadDest) { | 624 if (Src0 != LoadDest && Src1 == LoadDest) { |
| 625 Src1 = LoadSrc; | 625 Src1 = LoadSrc; |
| 626 return true; | 626 return true; |
| 627 } | 627 } |
| (...skipping 216 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 844 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); | 844 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); |
| 845 RegisterArg->setRegNum(RegNum); | 845 RegisterArg->setRegNum(RegNum); |
| 846 RegisterArg->setIsArg(); | 846 RegisterArg->setIsArg(); |
| 847 Arg->setIsArg(false); | 847 Arg->setIsArg(false); |
| 848 | 848 |
| 849 Args[I] = RegisterArg; | 849 Args[I] = RegisterArg; |
| 850 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); | 850 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); |
| 851 } | 851 } |
| 852 } | 852 } |
| 853 | 853 |
| 854 // Helper function for addProlog(). | 854 /// Helper function for addProlog(). |
| 855 // | 855 /// |
| 856 // This assumes Arg is an argument passed on the stack. This sets the | 856 /// This assumes Arg is an argument passed on the stack. This sets the |
| 857 // frame offset for Arg and updates InArgsSizeBytes according to Arg's | 857 /// frame offset for Arg and updates InArgsSizeBytes according to Arg's |
| 858 // width. For an I64 arg that has been split into Lo and Hi components, | 858 /// width. For an I64 arg that has been split into Lo and Hi components, |
| 859 // it calls itself recursively on the components, taking care to handle | 859 /// it calls itself recursively on the components, taking care to handle |
| 860 // Lo first because of the little-endian architecture. Lastly, this | 860 /// Lo first because of the little-endian architecture. Lastly, this |
| 861 // function generates an instruction to copy Arg into its assigned | 861 /// function generates an instruction to copy Arg into its assigned |
| 862 // register if applicable. | 862 /// register if applicable. |
| 863 template <class Machine> | 863 template <class Machine> |
| 864 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, | 864 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, |
| 865 Variable *FramePtr, | 865 Variable *FramePtr, |
| 866 size_t BasicFrameOffset, | 866 size_t BasicFrameOffset, |
| 867 size_t &InArgsSizeBytes) { | 867 size_t &InArgsSizeBytes) { |
| 868 Variable *Lo = Arg->getLo(); | 868 Variable *Lo = Arg->getLo(); |
| 869 Variable *Hi = Arg->getHi(); | 869 Variable *Hi = Arg->getHi(); |
| 870 Type Ty = Arg->getType(); | 870 Type Ty = Arg->getType(); |
| 871 if (Lo && Hi && Ty == IceType_i64) { | 871 if (Lo && Hi && Ty == IceType_i64) { |
| 872 assert(Lo->getType() != IceType_i64); // don't want infinite recursion | 872 assert(Lo->getType() != IceType_i64); // don't want infinite recursion |
| (...skipping 22 matching lines...) Expand all Loading... | |
| 895 // statistics. | 895 // statistics. |
| 896 Ctx->statsUpdateFills(); | 896 Ctx->statsUpdateFills(); |
| 897 } | 897 } |
| 898 } | 898 } |
| 899 | 899 |
| 900 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() { | 900 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() { |
| 901 return IceType_i32; | 901 return IceType_i32; |
| 902 } | 902 } |
| 903 | 903 |
| 904 template <class Machine> void TargetX86Base<Machine>::addProlog(CfgNode *Node) { | 904 template <class Machine> void TargetX86Base<Machine>::addProlog(CfgNode *Node) { |
| 905 // Stack frame layout: | 905 // Stack frame layout: |
|
Karl
2015/07/06 18:08:48
Not clear if this should be a doxygen comment.
ascull
2015/07/06 19:29:09
If it should be documented then it needs to be mov
jvoung (off chromium)
2015/07/06 20:07:43
Let's not doxygenize it for now.
This is explaini
| |
| 906 // | 906 // |
| 907 // +------------------------+ | 907 // +------------------------+ |
| 908 // | 1. return address | | 908 // | 1. return address | |
| 909 // +------------------------+ | 909 // +------------------------+ |
| 910 // | 2. preserved registers | | 910 // | 2. preserved registers | |
| 911 // +------------------------+ | 911 // +------------------------+ |
| 912 // | 3. padding | | 912 // | 3. padding | |
| 913 // +------------------------+ | 913 // +------------------------+ |
| 914 // | 4. global spill area | | 914 // | 4. global spill area | |
| 915 // +------------------------+ | 915 // +------------------------+ |
| (...skipping 223 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1139 SizeT j = CalleeSaves.size() - i - 1; | 1139 SizeT j = CalleeSaves.size() - i - 1; |
| 1140 if (j == RegX8632::Reg_ebp && IsEbpBasedFrame) | 1140 if (j == RegX8632::Reg_ebp && IsEbpBasedFrame) |
| 1141 continue; | 1141 continue; |
| 1142 if (CalleeSaves[j] && RegsUsed[j]) { | 1142 if (CalleeSaves[j] && RegsUsed[j]) { |
| 1143 _pop(getPhysicalRegister(j)); | 1143 _pop(getPhysicalRegister(j)); |
| 1144 } | 1144 } |
| 1145 } | 1145 } |
| 1146 | 1146 |
| 1147 if (!Ctx->getFlags().getUseSandboxing()) | 1147 if (!Ctx->getFlags().getUseSandboxing()) |
| 1148 return; | 1148 return; |
| 1149 // Change the original ret instruction into a sandboxed return sequence. | 1149 /// Change the original ret instruction into a sandboxed return sequence. |
|
Karl
2015/07/06 18:08:49
This is in the middle of a method. Should it have
ascull
2015/07/06 19:29:09
Done.
| |
| 1150 // t:ecx = pop | 1150 /// t:ecx = pop |
| 1151 // bundle_lock | 1151 /// bundle_lock |
| 1152 // and t, ~31 | 1152 /// and t, ~31 |
| 1153 // jmp *t | 1153 /// jmp *t |
| 1154 // bundle_unlock | 1154 /// bundle_unlock |
| 1155 // FakeUse <original_ret_operand> | 1155 /// FakeUse <original_ret_operand> |
| 1156 const SizeT BundleSize = | 1156 const SizeT BundleSize = |
| 1157 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes(); | 1157 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes(); |
| 1158 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx); | 1158 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx); |
| 1159 _pop(T_ecx); | 1159 _pop(T_ecx); |
| 1160 _bundle_lock(); | 1160 _bundle_lock(); |
| 1161 _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1))); | 1161 _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1))); |
| 1162 _jmp(T_ecx); | 1162 _jmp(T_ecx); |
| 1163 _bundle_unlock(); | 1163 _bundle_unlock(); |
| 1164 if (RI->getSrcSize()) { | 1164 if (RI->getSrcSize()) { |
| 1165 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0)); | 1165 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0)); |
| (...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1339 // multiple of the required alignment at runtime. | 1339 // multiple of the required alignment at runtime. |
| 1340 Variable *T = makeReg(IceType_i32); | 1340 Variable *T = makeReg(IceType_i32); |
| 1341 _mov(T, TotalSize); | 1341 _mov(T, TotalSize); |
| 1342 _add(T, Ctx->getConstantInt32(Alignment - 1)); | 1342 _add(T, Ctx->getConstantInt32(Alignment - 1)); |
| 1343 _and(T, Ctx->getConstantInt32(-Alignment)); | 1343 _and(T, Ctx->getConstantInt32(-Alignment)); |
| 1344 _sub(esp, T); | 1344 _sub(esp, T); |
| 1345 } | 1345 } |
| 1346 _mov(Dest, esp); | 1346 _mov(Dest, esp); |
| 1347 } | 1347 } |
| 1348 | 1348 |
| 1349 // Strength-reduce scalar integer multiplication by a constant (for | 1349 /// Strength-reduce scalar integer multiplication by a constant (for |
| 1350 // i32 or narrower) for certain constants. The lea instruction can be | 1350 /// i32 or narrower) for certain constants. The lea instruction can be |
| 1351 // used to multiply by 3, 5, or 9, and the lsh instruction can be used | 1351 /// used to multiply by 3, 5, or 9, and the lsh instruction can be used |
| 1352 // to multiply by powers of 2. These can be combined such that | 1352 /// to multiply by powers of 2. These can be combined such that |
| 1353 // e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5, | 1353 /// e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5, |
| 1354 // combined with left-shifting by 2. | 1354 /// combined with left-shifting by 2. |
| 1355 template <class Machine> | 1355 template <class Machine> |
| 1356 bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0, | 1356 bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0, |
| 1357 int32_t Src1) { | 1357 int32_t Src1) { |
| 1358 // Disable this optimization for Om1 and O0, just to keep things | 1358 // Disable this optimization for Om1 and O0, just to keep things |
| 1359 // simple there. | 1359 // simple there. |
| 1360 if (Ctx->getFlags().getOptLevel() < Opt_1) | 1360 if (Ctx->getFlags().getOptLevel() < Opt_1) |
| 1361 return false; | 1361 return false; |
| 1362 Type Ty = Dest->getType(); | 1362 Type Ty = Dest->getType(); |
| 1363 Variable *T = nullptr; | 1363 Variable *T = nullptr; |
| 1364 if (Src1 == -1) { | 1364 if (Src1 == -1) { |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1405 ++CountOps; | 1405 ++CountOps; |
| 1406 ++Count2; | 1406 ++Count2; |
| 1407 Src1 /= 2; | 1407 Src1 /= 2; |
| 1408 } else { | 1408 } else { |
| 1409 return false; | 1409 return false; |
| 1410 } | 1410 } |
| 1411 } | 1411 } |
| 1412 // Lea optimization only works for i16 and i32 types, not i8. | 1412 // Lea optimization only works for i16 and i32 types, not i8. |
| 1413 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9)) | 1413 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9)) |
| 1414 return false; | 1414 return false; |
| 1415 // Limit the number of lea/shl operations for a single multiply, to | 1415 /// Limit the number of lea/shl operations for a single multiply, to |
|
Karl
2015/07/06 18:08:49
Again, this is within a method. Should it have dox
ascull
2015/07/06 19:29:09
Done.
| |
| 1416 // a somewhat arbitrary choice of 3. | 1416 /// a somewhat arbitrary choice of 3. |
| 1417 const uint32_t MaxOpsForOptimizedMul = 3; | 1417 const uint32_t MaxOpsForOptimizedMul = 3; |
| 1418 if (CountOps > MaxOpsForOptimizedMul) | 1418 if (CountOps > MaxOpsForOptimizedMul) |
| 1419 return false; | 1419 return false; |
| 1420 _mov(T, Src0); | 1420 _mov(T, Src0); |
| 1421 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1421 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1422 for (uint32_t i = 0; i < Count9; ++i) { | 1422 for (uint32_t i = 0; i < Count9; ++i) { |
| 1423 const uint16_t Shift = 3; // log2(9-1) | 1423 const uint16_t Shift = 3; // log2(9-1) |
| 1424 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift)); | 1424 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift)); |
| 1425 _set_dest_nonkillable(); | 1425 _set_dest_nonkillable(); |
| 1426 } | 1426 } |
| (...skipping 335 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1762 // pshufd T3, Src1, {1,0,3,0} | 1762 // pshufd T3, Src1, {1,0,3,0} |
| 1763 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]} | 1763 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]} |
| 1764 // pmuludq T1, Src1 | 1764 // pmuludq T1, Src1 |
| 1765 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]} | 1765 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]} |
| 1766 // pmuludq T2, T3 | 1766 // pmuludq T2, T3 |
| 1767 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])} | 1767 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])} |
| 1768 // shufps T1, T2, {0,2,0,2} | 1768 // shufps T1, T2, {0,2,0,2} |
| 1769 // pshufd T4, T1, {0,2,1,3} | 1769 // pshufd T4, T1, {0,2,1,3} |
| 1770 // movups Dest, T4 | 1770 // movups Dest, T4 |
| 1771 | 1771 |
| 1772 // Mask that directs pshufd to create a vector with entries | 1772 /// Mask that directs pshufd to create a vector with entries |
|
Karl
2015/07/06 18:08:48
Similar question here (not at declaration level).
ascull
2015/07/06 19:29:09
Done.
| |
| 1773 // Src[1, 0, 3, 0] | 1773 /// Src[1, 0, 3, 0] |
| 1774 const unsigned Constant1030 = 0x31; | 1774 const unsigned Constant1030 = 0x31; |
| 1775 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030); | 1775 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030); |
| 1776 // Mask that directs shufps to create a vector with entries | 1776 /// Mask that directs shufps to create a vector with entries |
| 1777 // Dest[0, 2], Src[0, 2] | 1777 /// Dest[0, 2], Src[0, 2] |
| 1778 const unsigned Mask0202 = 0x88; | 1778 const unsigned Mask0202 = 0x88; |
| 1779 // Mask that directs pshufd to create a vector with entries | 1779 /// Mask that directs pshufd to create a vector with entries |
| 1780 // Src[0, 2, 1, 3] | 1780 /// Src[0, 2, 1, 3] |
| 1781 const unsigned Mask0213 = 0xd8; | 1781 const unsigned Mask0213 = 0xd8; |
| 1782 Variable *T1 = makeReg(IceType_v4i32); | 1782 Variable *T1 = makeReg(IceType_v4i32); |
| 1783 Variable *T2 = makeReg(IceType_v4i32); | 1783 Variable *T2 = makeReg(IceType_v4i32); |
| 1784 Variable *T3 = makeReg(IceType_v4i32); | 1784 Variable *T3 = makeReg(IceType_v4i32); |
| 1785 Variable *T4 = makeReg(IceType_v4i32); | 1785 Variable *T4 = makeReg(IceType_v4i32); |
| 1786 _movp(T1, Src0); | 1786 _movp(T1, Src0); |
| 1787 _pshufd(T2, Src0, Mask1030); | 1787 _pshufd(T2, Src0, Mask1030); |
| 1788 _pshufd(T3, Src1, Mask1030); | 1788 _pshufd(T3, Src1, Mask1030); |
| 1789 _pmuludq(T1, Src1); | 1789 _pmuludq(T1, Src1); |
| 1790 _pmuludq(T2, T3); | 1790 _pmuludq(T2, T3); |
| (...skipping 589 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2380 if (DestTy == IceType_v16i8) { | 2380 if (DestTy == IceType_v16i8) { |
| 2381 // onemask = materialize(1,1,...); dst = (src & onemask) > 0 | 2381 // onemask = materialize(1,1,...); dst = (src & onemask) > 0 |
| 2382 Variable *OneMask = makeVectorOfOnes(Dest->getType()); | 2382 Variable *OneMask = makeVectorOfOnes(Dest->getType()); |
| 2383 Variable *T = makeReg(DestTy); | 2383 Variable *T = makeReg(DestTy); |
| 2384 _movp(T, Src0RM); | 2384 _movp(T, Src0RM); |
| 2385 _pand(T, OneMask); | 2385 _pand(T, OneMask); |
| 2386 Variable *Zeros = makeVectorOfZeros(Dest->getType()); | 2386 Variable *Zeros = makeVectorOfZeros(Dest->getType()); |
| 2387 _pcmpgt(T, Zeros); | 2387 _pcmpgt(T, Zeros); |
| 2388 _movp(Dest, T); | 2388 _movp(Dest, T); |
| 2389 } else { | 2389 } else { |
| 2390 // width = width(elty) - 1; dest = (src << width) >> width | 2390 /// width = width(elty) - 1; dest = (src << width) >> width |
| 2391 SizeT ShiftAmount = | 2391 SizeT ShiftAmount = |
| 2392 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - | 2392 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - |
| 2393 1; | 2393 1; |
| 2394 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount); | 2394 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount); |
| 2395 Variable *T = makeReg(DestTy); | 2395 Variable *T = makeReg(DestTy); |
| 2396 _movp(T, Src0RM); | 2396 _movp(T, Src0RM); |
| 2397 _psll(T, ShiftConstant); | 2397 _psll(T, ShiftConstant); |
| 2398 _psra(T, ShiftConstant); | 2398 _psra(T, ShiftConstant); |
| 2399 _movp(Dest, T); | 2399 _movp(Dest, T); |
| 2400 } | 2400 } |
| (...skipping 211 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2612 if (isVectorType(Dest->getType())) { | 2612 if (isVectorType(Dest->getType())) { |
| 2613 assert(Dest->getType() == IceType_v4f32 && | 2613 assert(Dest->getType() == IceType_v4f32 && |
| 2614 Inst->getSrc(0)->getType() == IceType_v4i32); | 2614 Inst->getSrc(0)->getType() == IceType_v4i32); |
| 2615 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2615 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2616 if (llvm::isa<OperandX8632Mem>(Src0RM)) | 2616 if (llvm::isa<OperandX8632Mem>(Src0RM)) |
| 2617 Src0RM = legalizeToVar(Src0RM); | 2617 Src0RM = legalizeToVar(Src0RM); |
| 2618 Variable *T = makeReg(Dest->getType()); | 2618 Variable *T = makeReg(Dest->getType()); |
| 2619 _cvt(T, Src0RM, InstX8632Cvt::Dq2ps); | 2619 _cvt(T, Src0RM, InstX8632Cvt::Dq2ps); |
| 2620 _movp(Dest, T); | 2620 _movp(Dest, T); |
| 2621 } else if (Inst->getSrc(0)->getType() == IceType_i64) { | 2621 } else if (Inst->getSrc(0)->getType() == IceType_i64) { |
| 2622 // Use a helper for x86-32. | 2622 /// Use a helper for x86-32. |
|
Karl
2015/07/06 18:08:49
Should this be a doxygen comment (inside method).
ascull
2015/07/06 19:29:09
Done.
| |
| 2623 const SizeT MaxSrcs = 1; | 2623 const SizeT MaxSrcs = 1; |
| 2624 Type DestType = Dest->getType(); | 2624 Type DestType = Dest->getType(); |
| 2625 InstCall *Call = | 2625 InstCall *Call = |
| 2626 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32 | 2626 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32 |
| 2627 : H_sitofp_i64_f64, | 2627 : H_sitofp_i64_f64, |
| 2628 Dest, MaxSrcs); | 2628 Dest, MaxSrcs); |
| 2629 // TODO: Call the correct compiler-rt helper function. | 2629 // TODO: Call the correct compiler-rt helper function. |
| 2630 Call->addArg(Inst->getSrc(0)); | 2630 Call->addArg(Inst->getSrc(0)); |
| 2631 lowerCall(Call); | 2631 lowerCall(Call); |
| 2632 return; | 2632 return; |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 2648 Operand *Src0 = Inst->getSrc(0); | 2648 Operand *Src0 = Inst->getSrc(0); |
| 2649 if (isVectorType(Src0->getType())) { | 2649 if (isVectorType(Src0->getType())) { |
| 2650 assert(Dest->getType() == IceType_v4f32 && | 2650 assert(Dest->getType() == IceType_v4f32 && |
| 2651 Src0->getType() == IceType_v4i32); | 2651 Src0->getType() == IceType_v4i32); |
| 2652 const SizeT MaxSrcs = 1; | 2652 const SizeT MaxSrcs = 1; |
| 2653 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); | 2653 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); |
| 2654 Call->addArg(Src0); | 2654 Call->addArg(Src0); |
| 2655 lowerCall(Call); | 2655 lowerCall(Call); |
| 2656 } else if (Src0->getType() == IceType_i64 || | 2656 } else if (Src0->getType() == IceType_i64 || |
| 2657 Src0->getType() == IceType_i32) { | 2657 Src0->getType() == IceType_i32) { |
| 2658 // Use a helper for x86-32 and x86-64. Also use a helper for | 2658 /// Use a helper for x86-32 and x86-64. Also use a helper for |
| 2659 // i32 on x86-32. | 2659 /// i32 on x86-32. |
|
Karl
2015/07/06 18:08:48
Again, not a declaration. Should it be /// ?
ascull
2015/07/06 19:29:09
Done.
| |
| 2660 const SizeT MaxSrcs = 1; | 2660 const SizeT MaxSrcs = 1; |
| 2661 Type DestType = Dest->getType(); | 2661 Type DestType = Dest->getType(); |
| 2662 IceString TargetString; | 2662 IceString TargetString; |
| 2663 if (isInt32Asserting32Or64(Src0->getType())) { | 2663 if (isInt32Asserting32Or64(Src0->getType())) { |
| 2664 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32 | 2664 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32 |
| 2665 : H_uitofp_i32_f64; | 2665 : H_uitofp_i32_f64; |
| 2666 } else { | 2666 } else { |
| 2667 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32 | 2667 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32 |
| 2668 : H_uitofp_i64_f64; | 2668 : H_uitofp_i64_f64; |
| 2669 } | 2669 } |
| (...skipping 558 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3228 } | 3228 } |
| 3229 | 3229 |
| 3230 if (Index == 0) { | 3230 if (Index == 0) { |
| 3231 Variable *T = makeReg(Ty); | 3231 Variable *T = makeReg(Ty); |
| 3232 _movp(T, SourceVectRM); | 3232 _movp(T, SourceVectRM); |
| 3233 _movss(T, ElementR); | 3233 _movss(T, ElementR); |
| 3234 _movp(Inst->getDest(), T); | 3234 _movp(Inst->getDest(), T); |
| 3235 return; | 3235 return; |
| 3236 } | 3236 } |
| 3237 | 3237 |
| 3238 // shufps treats the source and desination operands as vectors of | 3238 /// shufps treats the source and desination operands as vectors of |
| 3239 // four doublewords. The destination's two high doublewords are | 3239 /// four doublewords. The destination's two high doublewords are |
| 3240 // selected from the source operand and the two low doublewords are | 3240 /// selected from the source operand and the two low doublewords are |
| 3241 // selected from the (original value of) the destination operand. | 3241 /// selected from the (original value of) the destination operand. |
| 3242 // An insertelement operation can be effected with a sequence of two | 3242 /// An insertelement operation can be effected with a sequence of two |
| 3243 // shufps operations with appropriate masks. In all cases below, | 3243 /// shufps operations with appropriate masks. In all cases below, |
| 3244 // Element[0] is being inserted into SourceVectOperand. Indices are | 3244 /// Element[0] is being inserted into SourceVectOperand. Indices are |
| 3245 // ordered from left to right. | 3245 /// ordered from left to right. |
| 3246 // | 3246 /// |
| 3247 // insertelement into index 1 (result is stored in ElementR): | 3247 /// insertelement into index 1 (result is stored in ElementR): |
| 3248 // ElementR := ElementR[0, 0] SourceVectRM[0, 0] | 3248 /// ElementR := ElementR[0, 0] SourceVectRM[0, 0] |
| 3249 // ElementR := ElementR[3, 0] SourceVectRM[2, 3] | 3249 /// ElementR := ElementR[3, 0] SourceVectRM[2, 3] |
| 3250 // | 3250 /// |
| 3251 // insertelement into index 2 (result is stored in T): | 3251 /// insertelement into index 2 (result is stored in T): |
| 3252 // T := SourceVectRM | 3252 /// T := SourceVectRM |
| 3253 // ElementR := ElementR[0, 0] T[0, 3] | 3253 /// ElementR := ElementR[0, 0] T[0, 3] |
| 3254 // T := T[0, 1] ElementR[0, 3] | 3254 /// T := T[0, 1] ElementR[0, 3] |
| 3255 // | 3255 /// |
| 3256 // insertelement into index 3 (result is stored in T): | 3256 /// insertelement into index 3 (result is stored in T): |
| 3257 // T := SourceVectRM | 3257 /// T := SourceVectRM |
| 3258 // ElementR := ElementR[0, 0] T[0, 2] | 3258 /// ElementR := ElementR[0, 0] T[0, 2] |
| 3259 // T := T[0, 1] ElementR[3, 0] | 3259 /// T := T[0, 1] ElementR[3, 0] |
|
Karl
2015/07/06 18:08:49
Again, inside method. Should this be /// ?
ascull
2015/07/06 19:29:09
Done.
| |
| 3260 const unsigned char Mask1[3] = {0, 192, 128}; | 3260 const unsigned char Mask1[3] = {0, 192, 128}; |
| 3261 const unsigned char Mask2[3] = {227, 196, 52}; | 3261 const unsigned char Mask2[3] = {227, 196, 52}; |
| 3262 | 3262 |
| 3263 Constant *Mask1Constant = Ctx->getConstantInt32(Mask1[Index - 1]); | 3263 Constant *Mask1Constant = Ctx->getConstantInt32(Mask1[Index - 1]); |
| 3264 Constant *Mask2Constant = Ctx->getConstantInt32(Mask2[Index - 1]); | 3264 Constant *Mask2Constant = Ctx->getConstantInt32(Mask2[Index - 1]); |
| 3265 | 3265 |
| 3266 if (Index == 1) { | 3266 if (Index == 1) { |
| 3267 _shufps(ElementR, SourceVectRM, Mask1Constant); | 3267 _shufps(ElementR, SourceVectRM, Mask1Constant); |
| 3268 _shufps(ElementR, SourceVectRM, Mask2Constant); | 3268 _shufps(ElementR, SourceVectRM, Mask2Constant); |
| 3269 _movp(Inst->getDest(), ElementR); | 3269 _movp(Inst->getDest(), ElementR); |
| (...skipping 654 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3924 // the end of the loop, since it will be re-used by the loop. | 3924 // the end of the loop, since it will be re-used by the loop. |
| 3925 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { | 3925 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { |
| 3926 Context.insert(InstFakeUse::create(Func, ValVar)); | 3926 Context.insert(InstFakeUse::create(Func, ValVar)); |
| 3927 } | 3927 } |
| 3928 // The address base (if any) is also reused in the loop. | 3928 // The address base (if any) is also reused in the loop. |
| 3929 if (Variable *Base = Addr->getBase()) | 3929 if (Variable *Base = Addr->getBase()) |
| 3930 Context.insert(InstFakeUse::create(Func, Base)); | 3930 Context.insert(InstFakeUse::create(Func, Base)); |
| 3931 _mov(Dest, T_eax); | 3931 _mov(Dest, T_eax); |
| 3932 } | 3932 } |
| 3933 | 3933 |
| 3934 // Lowers count {trailing, leading} zeros intrinsic. | 3934 /// Lowers count {trailing, leading} zeros intrinsic. |
| 3935 // | 3935 /// |
| 3936 // We could do constant folding here, but that should have | 3936 /// We could do constant folding here, but that should have |
| 3937 // been done by the front-end/middle-end optimizations. | 3937 /// been done by the front-end/middle-end optimizations. |
| 3938 template <class Machine> | 3938 template <class Machine> |
| 3939 void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, | 3939 void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, |
| 3940 Operand *FirstVal, | 3940 Operand *FirstVal, |
| 3941 Operand *SecondVal) { | 3941 Operand *SecondVal) { |
| 3942 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI). | 3942 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI). |
| 3943 // Then the instructions will handle the Val == 0 case much more simply | 3943 // Then the instructions will handle the Val == 0 case much more simply |
| 3944 // and won't require conversion from bit position to number of zeros. | 3944 // and won't require conversion from bit position to number of zeros. |
| 3945 // | 3945 // |
| 3946 // Otherwise: | 3946 // Otherwise: |
| 3947 // bsr IF_NOT_ZERO, Val | 3947 // bsr IF_NOT_ZERO, Val |
| (...skipping 315 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4263 lowerAssign(Assign); | 4263 lowerAssign(Assign); |
| 4264 } | 4264 } |
| 4265 | 4265 |
| 4266 template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() { | 4266 template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() { |
| 4267 Inst *Inst = Context.getCur(); | 4267 Inst *Inst = Context.getCur(); |
| 4268 Variable *Dest = Inst->getDest(); | 4268 Variable *Dest = Inst->getDest(); |
| 4269 Operand *Addr = Inst->getSrc(0); | 4269 Operand *Addr = Inst->getSrc(0); |
| 4270 Variable *Index = nullptr; | 4270 Variable *Index = nullptr; |
| 4271 uint16_t Shift = 0; | 4271 uint16_t Shift = 0; |
| 4272 int32_t Offset = 0; // TODO: make Constant | 4272 int32_t Offset = 0; // TODO: make Constant |
| 4273 // Vanilla ICE load instructions should not use the segment registers, | 4273 /// Vanilla ICE load instructions should not use the segment registers, |
| 4274 // and computeAddressOpt only works at the level of Variables and Constants, | 4274 /// and computeAddressOpt only works at the level of Variables and Constants, |
| 4275 // not other OperandX8632Mem, so there should be no mention of segment | 4275 /// not other OperandX8632Mem, so there should be no mention of segment |
| 4276 // registers there either. | 4276 /// registers there either. |
|
Karl
2015/07/06 18:08:48
Should this be /// (inside method).
ascull
2015/07/06 19:29:09
Done.
| |
| 4277 const OperandX8632Mem::SegmentRegisters SegmentReg = | 4277 const OperandX8632Mem::SegmentRegisters SegmentReg = |
| 4278 OperandX8632Mem::DefaultSegment; | 4278 OperandX8632Mem::DefaultSegment; |
| 4279 Variable *Base = llvm::dyn_cast<Variable>(Addr); | 4279 Variable *Base = llvm::dyn_cast<Variable>(Addr); |
| 4280 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); | 4280 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); |
| 4281 if (Base && Addr != Base) { | 4281 if (Base && Addr != Base) { |
| 4282 Inst->setDeleted(); | 4282 Inst->setDeleted(); |
| 4283 Constant *OffsetOp = Ctx->getConstantInt32(Offset); | 4283 Constant *OffsetOp = Ctx->getConstantInt32(Offset); |
| 4284 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index, | 4284 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index, |
| 4285 Shift, SegmentReg); | 4285 Shift, SegmentReg); |
| 4286 Context.insert(InstLoad::create(Func, Dest, Addr)); | 4286 Context.insert(InstLoad::create(Func, Dest, Addr)); |
| (...skipping 207 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4494 } | 4494 } |
| 4495 | 4495 |
| 4496 template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() { | 4496 template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() { |
| 4497 InstStore *Inst = llvm::cast<InstStore>(Context.getCur()); | 4497 InstStore *Inst = llvm::cast<InstStore>(Context.getCur()); |
| 4498 Operand *Data = Inst->getData(); | 4498 Operand *Data = Inst->getData(); |
| 4499 Operand *Addr = Inst->getAddr(); | 4499 Operand *Addr = Inst->getAddr(); |
| 4500 Variable *Index = nullptr; | 4500 Variable *Index = nullptr; |
| 4501 uint16_t Shift = 0; | 4501 uint16_t Shift = 0; |
| 4502 int32_t Offset = 0; // TODO: make Constant | 4502 int32_t Offset = 0; // TODO: make Constant |
| 4503 Variable *Base = llvm::dyn_cast<Variable>(Addr); | 4503 Variable *Base = llvm::dyn_cast<Variable>(Addr); |
| 4504 // Vanilla ICE store instructions should not use the segment registers, | 4504 /// Vanilla ICE store instructions should not use the segment registers, |
| 4505 // and computeAddressOpt only works at the level of Variables and Constants, | 4505 /// and computeAddressOpt only works at the level of Variables and Constants, |
| 4506 // not other OperandX8632Mem, so there should be no mention of segment | 4506 /// not other OperandX8632Mem, so there should be no mention of segment |
| 4507 // registers there either. | 4507 /// registers there either. |
|
Karl
2015/07/06 18:08:48
Similar here (inside method).
ascull
2015/07/06 19:29:10
Done.
| |
| 4508 const OperandX8632Mem::SegmentRegisters SegmentReg = | 4508 const OperandX8632Mem::SegmentRegisters SegmentReg = |
| 4509 OperandX8632Mem::DefaultSegment; | 4509 OperandX8632Mem::DefaultSegment; |
| 4510 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); | 4510 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); |
| 4511 if (Base && Addr != Base) { | 4511 if (Base && Addr != Base) { |
| 4512 Inst->setDeleted(); | 4512 Inst->setDeleted(); |
| 4513 Constant *OffsetOp = Ctx->getConstantInt32(Offset); | 4513 Constant *OffsetOp = Ctx->getConstantInt32(Offset); |
| 4514 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, | 4514 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, |
| 4515 Shift, SegmentReg); | 4515 Shift, SegmentReg); |
| 4516 InstStore *NewStore = InstStore::create(Func, Data, Addr); | 4516 InstStore *NewStore = InstStore::create(Func, Data, Addr); |
| 4517 if (Inst->getDest()) | 4517 if (Inst->getDest()) |
| (...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4590 | 4590 |
| 4591 // Insert the result into position. | 4591 // Insert the result into position. |
| 4592 Variable *DestT = Func->template makeVariable(Ty); | 4592 Variable *DestT = Func->template makeVariable(Ty); |
| 4593 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index)); | 4593 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index)); |
| 4594 T = DestT; | 4594 T = DestT; |
| 4595 } | 4595 } |
| 4596 | 4596 |
| 4597 lowerAssign(InstAssign::create(Func, Dest, T)); | 4597 lowerAssign(InstAssign::create(Func, Dest, T)); |
| 4598 } | 4598 } |
| 4599 | 4599 |
| 4600 // The following pattern occurs often in lowered C and C++ code: | 4600 /// The following pattern occurs often in lowered C and C++ code: |
| 4601 // | 4601 /// |
| 4602 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1 | 4602 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1 |
| 4603 // %cmp.ext = sext <n x i1> %cmp to <n x ty> | 4603 /// %cmp.ext = sext <n x i1> %cmp to <n x ty> |
| 4604 // | 4604 /// |
| 4605 // We can eliminate the sext operation by copying the result of pcmpeqd, | 4605 /// We can eliminate the sext operation by copying the result of pcmpeqd, |
| 4606 // pcmpgtd, or cmpps (which produce sign extended results) to the result | 4606 /// pcmpgtd, or cmpps (which produce sign extended results) to the result |
| 4607 // of the sext operation. | 4607 /// of the sext operation. |
| 4608 template <class Machine> | 4608 template <class Machine> |
| 4609 void TargetX86Base<Machine>::eliminateNextVectorSextInstruction( | 4609 void TargetX86Base<Machine>::eliminateNextVectorSextInstruction( |
| 4610 Variable *SignExtendedResult) { | 4610 Variable *SignExtendedResult) { |
| 4611 if (InstCast *NextCast = | 4611 if (InstCast *NextCast = |
| 4612 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { | 4612 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { |
| 4613 if (NextCast->getCastKind() == InstCast::Sext && | 4613 if (NextCast->getCastKind() == InstCast::Sext && |
| 4614 NextCast->getSrc(0) == SignExtendedResult) { | 4614 NextCast->getSrc(0) == SignExtendedResult) { |
| 4615 NextCast->setDeleted(); | 4615 NextCast->setDeleted(); |
| 4616 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult)); | 4616 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult)); |
| 4617 // Skip over the instruction. | 4617 // Skip over the instruction. |
| (...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4703 | 4703 |
| 4704 template <class Machine> | 4704 template <class Machine> |
| 4705 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) { | 4705 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) { |
| 4706 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) { | 4706 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) { |
| 4707 lowerRMW(RMW); | 4707 lowerRMW(RMW); |
| 4708 } else { | 4708 } else { |
| 4709 TargetLowering::lowerOther(Instr); | 4709 TargetLowering::lowerOther(Instr); |
| 4710 } | 4710 } |
| 4711 } | 4711 } |
| 4712 | 4712 |
| 4713 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to | 4713 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to |
| 4714 // preserve integrity of liveness analysis. Undef values are also | 4714 /// preserve integrity of liveness analysis. Undef values are also |
| 4715 // turned into zeroes, since loOperand() and hiOperand() don't expect | 4715 /// turned into zeroes, since loOperand() and hiOperand() don't expect |
| 4716 // Undef input. | 4716 /// Undef input. |
| 4717 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() { | 4717 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() { |
| 4718 // Pause constant blinding or pooling, blinding or pooling will be done later | 4718 // Pause constant blinding or pooling, blinding or pooling will be done later |
| 4719 // during phi lowering assignments | 4719 // during phi lowering assignments |
| 4720 BoolFlagSaver B(RandomizationPoolingPaused, true); | 4720 BoolFlagSaver B(RandomizationPoolingPaused, true); |
| 4721 | 4721 |
| 4722 CfgNode *Node = Context.getNode(); | 4722 CfgNode *Node = Context.getNode(); |
| 4723 for (Inst &I : Node->getPhis()) { | 4723 for (Inst &I : Node->getPhis()) { |
| 4724 auto Phi = llvm::dyn_cast<InstPhi>(&I); | 4724 auto Phi = llvm::dyn_cast<InstPhi>(&I); |
| 4725 if (Phi->isDeleted()) | 4725 if (Phi->isDeleted()) |
| 4726 continue; | 4726 continue; |
| (...skipping 25 matching lines...) Expand all Loading... | |
| 4752 // because they do in fact need a register to materialize the vector | 4752 // because they do in fact need a register to materialize the vector |
| 4753 // of zeroes into. | 4753 // of zeroes into. |
| 4754 if (llvm::isa<ConstantUndef>(Opnd)) | 4754 if (llvm::isa<ConstantUndef>(Opnd)) |
| 4755 return isScalarFloatingType(Opnd->getType()) || | 4755 return isScalarFloatingType(Opnd->getType()) || |
| 4756 isVectorType(Opnd->getType()); | 4756 isVectorType(Opnd->getType()); |
| 4757 if (llvm::isa<Constant>(Opnd)) | 4757 if (llvm::isa<Constant>(Opnd)) |
| 4758 return isScalarFloatingType(Opnd->getType()); | 4758 return isScalarFloatingType(Opnd->getType()); |
| 4759 return true; | 4759 return true; |
| 4760 } | 4760 } |
| 4761 | 4761 |
| 4762 // Lower the pre-ordered list of assignments into mov instructions. | 4762 /// Lower the pre-ordered list of assignments into mov instructions. |
| 4763 // Also has to do some ad-hoc register allocation as necessary. | 4763 /// Also has to do some ad-hoc register allocation as necessary. |
| 4764 template <class Machine> | 4764 template <class Machine> |
| 4765 void TargetX86Base<Machine>::lowerPhiAssignments( | 4765 void TargetX86Base<Machine>::lowerPhiAssignments( |
| 4766 CfgNode *Node, const AssignList &Assignments) { | 4766 CfgNode *Node, const AssignList &Assignments) { |
| 4767 // Check that this is a properly initialized shell of a node. | 4767 // Check that this is a properly initialized shell of a node. |
| 4768 assert(Node->getOutEdges().size() == 1); | 4768 assert(Node->getOutEdges().size() == 1); |
| 4769 assert(Node->getInsts().empty()); | 4769 assert(Node->getInsts().empty()); |
| 4770 assert(Node->getPhis().empty()); | 4770 assert(Node->getPhis().empty()); |
| 4771 CfgNode *Succ = Node->getOutEdges().front(); | 4771 CfgNode *Succ = Node->getOutEdges().front(); |
| 4772 getContext().init(Node); | 4772 getContext().init(Node); |
| 4773 // Register set setup similar to regAlloc(). | 4773 // Register set setup similar to regAlloc(). |
| (...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4947 int32_t RegNum) { | 4947 int32_t RegNum) { |
| 4948 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || | 4948 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || |
| 4949 Ty == IceType_v16i8); | 4949 Ty == IceType_v16i8); |
| 4950 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { | 4950 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { |
| 4951 Variable *Reg = makeVectorOfOnes(Ty, RegNum); | 4951 Variable *Reg = makeVectorOfOnes(Ty, RegNum); |
| 4952 SizeT Shift = | 4952 SizeT Shift = |
| 4953 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1; | 4953 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1; |
| 4954 _psll(Reg, Ctx->getConstantInt8(Shift)); | 4954 _psll(Reg, Ctx->getConstantInt8(Shift)); |
| 4955 return Reg; | 4955 return Reg; |
| 4956 } else { | 4956 } else { |
| 4957 // SSE has no left shift operation for vectors of 8 bit integers. | 4957 /// SSE has no left shift operation for vectors of 8 bit integers. |
|
Karl
2015/07/06 18:08:49
Why? (inside method).
ascull
2015/07/06 19:29:09
Done.
| |
| 4958 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; | 4958 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; |
| 4959 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); | 4959 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); |
| 4960 Variable *Reg = makeReg(Ty, RegNum); | 4960 Variable *Reg = makeReg(Ty, RegNum); |
| 4961 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); | 4961 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); |
| 4962 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); | 4962 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); |
| 4963 return Reg; | 4963 return Reg; |
| 4964 } | 4964 } |
| 4965 } | 4965 } |
| 4966 | 4966 |
| 4967 // Construct a mask in a register that can be and'ed with a | 4967 /// Construct a mask in a register that can be and'ed with a |
| 4968 // floating-point value to mask off its sign bit. The value will be | 4968 /// floating-point value to mask off its sign bit. The value will be |
| 4969 // <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff> | 4969 /// <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff> |
| 4970 // for f64. Construct it as vector of ones logically right shifted | 4970 /// for f64. Construct it as vector of ones logically right shifted |
| 4971 // one bit. TODO(stichnot): Fix the wala TODO above, to represent | 4971 /// one bit. TODO(stichnot): Fix the wala TODO above, to represent |
| 4972 // vector constants in memory. | 4972 /// vector constants in memory. |
| 4973 template <class Machine> | 4973 template <class Machine> |
| 4974 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty, | 4974 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty, |
| 4975 int32_t RegNum) { | 4975 int32_t RegNum) { |
| 4976 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum); | 4976 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum); |
| 4977 _psrl(Reg, Ctx->getConstantInt8(1)); | 4977 _psrl(Reg, Ctx->getConstantInt8(1)); |
| 4978 return Reg; | 4978 return Reg; |
| 4979 } | 4979 } |
| 4980 | 4980 |
| 4981 template <class Machine> | 4981 template <class Machine> |
| 4982 OperandX8632Mem * | 4982 OperandX8632Mem * |
| 4983 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, | 4983 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, |
| 4984 uint32_t Offset) { | 4984 uint32_t Offset) { |
| 4985 // Ensure that Loc is a stack slot. | 4985 // Ensure that Loc is a stack slot. |
| 4986 assert(Slot->getWeight().isZero()); | 4986 assert(Slot->getWeight().isZero()); |
| 4987 assert(Slot->getRegNum() == Variable::NoRegister); | 4987 assert(Slot->getRegNum() == Variable::NoRegister); |
| 4988 // Compute the location of Loc in memory. | 4988 /// Compute the location of Loc in memory. |
|
Karl
2015/07/06 18:08:49
Why? (inside method).
ascull
2015/07/06 19:29:09
Done.
| |
| 4989 // TODO(wala,stichnot): lea should not be required. The address of | 4989 // TODO(wala,stichnot): lea should not be required. The address of |
| 4990 // the stack slot is known at compile time (although not until after | 4990 // the stack slot is known at compile time (although not until after |
| 4991 // addProlog()). | 4991 // addProlog()). |
| 4992 const Type PointerType = IceType_i32; | 4992 const Type PointerType = IceType_i32; |
| 4993 Variable *Loc = makeReg(PointerType); | 4993 Variable *Loc = makeReg(PointerType); |
| 4994 _lea(Loc, Slot); | 4994 _lea(Loc, Slot); |
| 4995 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); | 4995 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); |
| 4996 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset); | 4996 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset); |
| 4997 } | 4997 } |
| 4998 | 4998 |
| 4999 // Helper for legalize() to emit the right code to lower an operand to a | 4999 /// Helper for legalize() to emit the right code to lower an operand to a |
| 5000 // register of the appropriate type. | 5000 /// register of the appropriate type. |
| 5001 template <class Machine> | 5001 template <class Machine> |
| 5002 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { | 5002 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { |
| 5003 Type Ty = Src->getType(); | 5003 Type Ty = Src->getType(); |
| 5004 Variable *Reg = makeReg(Ty, RegNum); | 5004 Variable *Reg = makeReg(Ty, RegNum); |
| 5005 if (isVectorType(Ty)) { | 5005 if (isVectorType(Ty)) { |
| 5006 _movp(Reg, Src); | 5006 _movp(Reg, Src); |
| 5007 } else { | 5007 } else { |
| 5008 _mov(Reg, Src); | 5008 _mov(Reg, Src); |
| 5009 } | 5009 } |
| 5010 return Reg; | 5010 return Reg; |
| (...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5099 NeedsReg = true; | 5099 NeedsReg = true; |
| 5100 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty)) | 5100 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty)) |
| 5101 // On x86, FP constants are lowered to mem operands. | 5101 // On x86, FP constants are lowered to mem operands. |
| 5102 NeedsReg = true; | 5102 NeedsReg = true; |
| 5103 if (NeedsReg) { | 5103 if (NeedsReg) { |
| 5104 From = copyToReg(From, RegNum); | 5104 From = copyToReg(From, RegNum); |
| 5105 } | 5105 } |
| 5106 return From; | 5106 return From; |
| 5107 } | 5107 } |
| 5108 if (auto Var = llvm::dyn_cast<Variable>(From)) { | 5108 if (auto Var = llvm::dyn_cast<Variable>(From)) { |
| 5109 // Check if the variable is guaranteed a physical register. This | 5109 /// Check if the variable is guaranteed a physical register. This |
| 5110 // can happen either when the variable is pre-colored or when it is | 5110 /// can happen either when the variable is pre-colored or when it is |
| 5111 // assigned infinite weight. | 5111 /// assigned infinite weight. |
|
Karl
2015/07/06 18:08:49
Why? (inside method).
ascull
2015/07/06 19:29:09
Done.
| |
| 5112 bool MustHaveRegister = (Var->hasReg() || Var->getWeight().isInf()); | 5112 bool MustHaveRegister = (Var->hasReg() || Var->getWeight().isInf()); |
| 5113 // We need a new physical register for the operand if: | 5113 // We need a new physical register for the operand if: |
| 5114 // Mem is not allowed and Var isn't guaranteed a physical | 5114 // Mem is not allowed and Var isn't guaranteed a physical |
| 5115 // register, or | 5115 // register, or |
| 5116 // RegNum is required and Var->getRegNum() doesn't match. | 5116 // RegNum is required and Var->getRegNum() doesn't match. |
| 5117 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || | 5117 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || |
| 5118 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { | 5118 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { |
| 5119 From = copyToReg(From, RegNum); | 5119 From = copyToReg(From, RegNum); |
| 5120 } | 5120 } |
| 5121 return From; | 5121 return From; |
| 5122 } | 5122 } |
| 5123 llvm_unreachable("Unhandled operand kind in legalize()"); | 5123 llvm_unreachable("Unhandled operand kind in legalize()"); |
| 5124 return From; | 5124 return From; |
| 5125 } | 5125 } |
| 5126 | 5126 |
| 5127 // Provide a trivial wrapper to legalize() for this common usage. | 5127 /// Provide a trivial wrapper to legalize() for this common usage. |
| 5128 template <class Machine> | 5128 template <class Machine> |
| 5129 Variable *TargetX86Base<Machine>::legalizeToVar(Operand *From, int32_t RegNum) { | 5129 Variable *TargetX86Base<Machine>::legalizeToVar(Operand *From, int32_t RegNum) { |
| 5130 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); | 5130 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); |
| 5131 } | 5131 } |
| 5132 | 5132 |
| 5133 // For the cmp instruction, if Src1 is an immediate, or known to be a | 5133 /// For the cmp instruction, if Src1 is an immediate, or known to be a |
| 5134 // physical register, we can allow Src0 to be a memory operand. | 5134 /// physical register, we can allow Src0 to be a memory operand. |
| 5135 // Otherwise, Src0 must be copied into a physical register. | 5135 /// Otherwise, Src0 must be copied into a physical register. |
| 5136 // (Actually, either Src0 or Src1 can be chosen for the physical | 5136 /// (Actually, either Src0 or Src1 can be chosen for the physical |
| 5137 // register, but unfortunately we have to commit to one or the other | 5137 /// register, but unfortunately we have to commit to one or the other |
| 5138 // before register allocation.) | 5138 /// before register allocation.) |
| 5139 template <class Machine> | 5139 template <class Machine> |
| 5140 Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0, | 5140 Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0, |
| 5141 Operand *Src1) { | 5141 Operand *Src1) { |
| 5142 bool IsSrc1ImmOrReg = false; | 5142 bool IsSrc1ImmOrReg = false; |
| 5143 if (llvm::isa<Constant>(Src1)) { | 5143 if (llvm::isa<Constant>(Src1)) { |
| 5144 IsSrc1ImmOrReg = true; | 5144 IsSrc1ImmOrReg = true; |
| 5145 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { | 5145 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { |
| 5146 if (Var->hasReg()) | 5146 if (Var->hasReg()) |
| 5147 IsSrc1ImmOrReg = true; | 5147 IsSrc1ImmOrReg = true; |
| 5148 } | 5148 } |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5202 inferTwoAddress(); | 5202 inferTwoAddress(); |
| 5203 } | 5203 } |
| 5204 | 5204 |
| 5205 template <class Machine> | 5205 template <class Machine> |
| 5206 void TargetX86Base<Machine>::makeRandomRegisterPermutation( | 5206 void TargetX86Base<Machine>::makeRandomRegisterPermutation( |
| 5207 llvm::SmallVectorImpl<int32_t> &Permutation, | 5207 llvm::SmallVectorImpl<int32_t> &Permutation, |
| 5208 const llvm::SmallBitVector &ExcludeRegisters) const { | 5208 const llvm::SmallBitVector &ExcludeRegisters) const { |
| 5209 // TODO(stichnot): Declaring Permutation this way loses type/size | 5209 // TODO(stichnot): Declaring Permutation this way loses type/size |
| 5210 // information. Fix this in conjunction with the caller-side TODO. | 5210 // information. Fix this in conjunction with the caller-side TODO. |
| 5211 assert(Permutation.size() >= RegX8632::Reg_NUM); | 5211 assert(Permutation.size() >= RegX8632::Reg_NUM); |
| 5212 // Expected upper bound on the number of registers in a single | 5212 /// Expected upper bound on the number of registers in a single |
| 5213 // equivalence class. For x86-32, this would comprise the 8 XMM | 5213 /// equivalence class. For x86-32, this would comprise the 8 XMM |
| 5214 // registers. This is for performance, not correctness. | 5214 /// registers. This is for performance, not correctness. |
|
Karl
2015/07/06 18:08:49
Why ? (inside method).
ascull
2015/07/06 19:29:09
Done.
| |
| 5215 static const unsigned MaxEquivalenceClassSize = 8; | 5215 static const unsigned MaxEquivalenceClassSize = 8; |
| 5216 typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList; | 5216 typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList; |
| 5217 typedef std::map<uint32_t, RegisterList> EquivalenceClassMap; | 5217 typedef std::map<uint32_t, RegisterList> EquivalenceClassMap; |
| 5218 EquivalenceClassMap EquivalenceClasses; | 5218 EquivalenceClassMap EquivalenceClasses; |
| 5219 SizeT NumShuffled = 0, NumPreserved = 0; | 5219 SizeT NumShuffled = 0, NumPreserved = 0; |
| 5220 | 5220 |
| 5221 // Build up the equivalence classes of registers by looking at the | 5221 // Build up the equivalence classes of registers by looking at the |
| 5222 // register properties as well as whether the registers should be | 5222 // register properties as well as whether the registers should be |
| 5223 // explicitly excluded from shuffling. | 5223 // explicitly excluded from shuffling. |
| 5224 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ | 5224 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ |
| (...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5297 return; | 5297 return; |
| 5298 Ostream &Str = Ctx->getStrEmit(); | 5298 Ostream &Str = Ctx->getStrEmit(); |
| 5299 C->emitPoolLabel(Str); | 5299 C->emitPoolLabel(Str); |
| 5300 } | 5300 } |
| 5301 | 5301 |
| 5302 template <class Machine> | 5302 template <class Machine> |
| 5303 void TargetX86Base<Machine>::emit(const ConstantUndef *) const { | 5303 void TargetX86Base<Machine>::emit(const ConstantUndef *) const { |
| 5304 llvm::report_fatal_error("undef value encountered by emitter."); | 5304 llvm::report_fatal_error("undef value encountered by emitter."); |
| 5305 } | 5305 } |
| 5306 | 5306 |
| 5307 // Randomize or pool an Immediate. | 5307 /// Randomize or pool an Immediate. |
| 5308 template <class Machine> | 5308 template <class Machine> |
| 5309 Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate, | 5309 Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate, |
| 5310 int32_t RegNum) { | 5310 int32_t RegNum) { |
| 5311 assert(llvm::isa<ConstantInteger32>(Immediate) || | 5311 assert(llvm::isa<ConstantInteger32>(Immediate) || |
| 5312 llvm::isa<ConstantRelocatable>(Immediate)); | 5312 llvm::isa<ConstantRelocatable>(Immediate)); |
| 5313 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || | 5313 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || |
| 5314 RandomizationPoolingPaused == true) { | 5314 RandomizationPoolingPaused == true) { |
| 5315 // Immediates randomization/pooling off or paused | 5315 // Immediates randomization/pooling off or paused |
| 5316 return Immediate; | 5316 return Immediate; |
| 5317 } | 5317 } |
| (...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5496 } | 5496 } |
| 5497 // the offset is not eligible for blinding or pooling, return the original | 5497 // the offset is not eligible for blinding or pooling, return the original |
| 5498 // mem operand | 5498 // mem operand |
| 5499 return MemOperand; | 5499 return MemOperand; |
| 5500 } | 5500 } |
| 5501 | 5501 |
| 5502 } // end of namespace X86Internal | 5502 } // end of namespace X86Internal |
| 5503 } // end of namespace Ice | 5503 } // end of namespace Ice |
| 5504 | 5504 |
| 5505 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5505 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |